1463f45807433d3d7ca110ee0eed012bd58e1418,data.py,,create_words_file,#Any#Any#,7

Before Change



	words = []
	normalizer = Normalizer()
	stemmer = Stemmer(words_file=None)
	words_file = codecs.open(output, "w", "utf8")
	for line in codecs.open(dic_file, encoding="utf8"):
		word = line.split("\t")[0]
		words.append(normalizer.normalize(word))

	word_set = set(words + list(stemmer.tenses.values()))
	for word in words:
		stem = stemmer.stem(word)
		if stem == word or stem not in word_set:
			print(word, file=words_file)

	print(output, "created")


def create_verbs_file(valency_file="resources/valency.txt", output="data/verbs.dat"):

After Change


	

	dic_words = [line.split("\t")[0] for line in codecs.open(dic_file, encoding="utf8")]
	print(*dic_words, sep="\n", file=codecs.open(output, "w", "utf8"))
	print(output, "created")


def create_verbs_file(valency_file="resources/valency.txt", output="data/verbs.dat"):

In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 6

Instances

Link

Project Name: sobhe/hazm

Commit Name: 1463f45807433d3d7ca110ee0eed012bd58e1418

Time: 2013-11-25

Author: alireza.nournia@gmail.com

File Name: data.py

Class Name:

Method Name: create_words_file

Link

Project Name: unitedstates/congress-legislators

Commit Name: 4a317ef89f89e335446bfb9c554d381284873c52

Time: 2016-03-16

Author: jconline@gmail.com

File Name: scripts/house_contacts.py

Class Name:

Method Name: run

Link

Project Name: merenlab/anvio

Commit Name: 26c3431f27d56d6bb8757160d7c7374b45ac8f50

Time: 2019-08-21

Author: quentin.clayssen@gmail.com

File Name: anvio/tables/taxoestimation.py

Class Name: TablesForTaxoestimation

Method Name: get_data_for_taxonomy_estimation