1463f45807433d3d7ca110ee0eed012bd58e1418,data.py,,create_words_file,#Any#Any#,7

Before Change



	words = []
	normalizer = Normalizer()
	stemmer = Stemmer(words_file=None)
	words_file = codecs.open(output, "w", "utf8")
	for line in codecs.open(dic_file, encoding="utf8"):
		word = line.split("\t")[0]
		words.append(normalizer.normalize(word))

	word_set = set(words + list(stemmer.tenses.values()))
	for word in words:
		stem = stemmer.stem(word)
		if stem == word or stem not in word_set:
			print(word, file=words_file)

	print(output, "created")


def create_verbs_file(valency_file="resources/valency.txt", output="data/verbs.dat"):

After Change


	

	dic_words = [line.split("\t")[0] for line in codecs.open(dic_file, encoding="utf8")]
	print(*dic_words, sep="\n", file=codecs.open(output, "w", "utf8"))
	print(output, "created")


def create_verbs_file(valency_file="resources/valency.txt", output="data/verbs.dat"):
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 6

Instances


Project Name: sobhe/hazm
Commit Name: 1463f45807433d3d7ca110ee0eed012bd58e1418
Time: 2013-11-25
Author: alireza.nournia@gmail.com
File Name: data.py
Class Name:
Method Name: create_words_file


Project Name: unitedstates/congress-legislators
Commit Name: 4a317ef89f89e335446bfb9c554d381284873c52
Time: 2016-03-16
Author: jconline@gmail.com
File Name: scripts/house_contacts.py
Class Name:
Method Name: run


Project Name: merenlab/anvio
Commit Name: 26c3431f27d56d6bb8757160d7c7374b45ac8f50
Time: 2019-08-21
Author: quentin.clayssen@gmail.com
File Name: anvio/tables/taxoestimation.py
Class Name: TablesForTaxoestimation
Method Name: get_data_for_taxonomy_estimation