gazetteers = dict()
for filepath in gazetteers_path.iterdir():
gazetteer_name = filepath.stem
with filepath.open(encoding="utf8") as f:
gazetteers[gazetteer_name] = set()
for line in f:
normalized = normalize(line.strip())
if normalized:
token_values = (t.value
for t in tokenize(normalized, language))
normalized = get_default_sep(language).join(token_values)
After Change
for gazetteer_name in gazetteer_names:
gazetteer_path = (gazetteers_dir / gazetteer_name).with_suffix(".txt")
with gazetteer_path.open(encoding="utf8") as f:
gazetteers[gazetteer_name] = set(v.strip() for v in f)
return gazetteers