for i_eojeol, eojeol in enumerate(eojeols):
if self.verbose and i_eojeol % 1000 == 0:
perc = "%.3f"% (100 * i_eojeol / n_eojeols)
message = "lemma candidates ... {} %".format(perc)
self._print(message, replace=True, newline=False)
n = len(eojeol)
lemma_candidates = set()
for i in range(1, n+1):
l, r = eojeol[:i], eojeol[i:]
for stem, eomi in _lemma_candidate(l, r):
if (stem in self._stems) and (eomi, self._eomis):
lemma_candidates.add((stem, eomi, "Both"))
elif (stem in self._stems):
lemma_candidates.add((stem, eomi, "Only stem"))
elif (eomi in self._eomis):
lemma_candidates.add((stem, eomi, "Only eomi"))
if lemma_candidates:
lemmas[eojeol] = lemma_candidates
if self.verbose:
message = "lemma candidating was done "self._print(message, replace=True, newline=True)
return lemmas