2ca3b0d5a88d414a87c343981b80ed1204b8dd8d,analyze_chunked_corpus.py,,,#,13

Before Change


////////////////////////////

wc = 0
tag_counts = FreqDist()
iob_counts = FreqDist()
tag_iob_counts = ConditionalFreqDist()
word_set = set()

for obj in chunked_corpus.chunked_words():
	if isinstance(obj, Tree):
		iob_counts.inc(obj.node)
		
		for word, tag in obj.leaves():
			wc += 1
			word_set.add(word)
			tag_counts.inc(tag)
			tag_iob_counts[tag].inc(obj.node)
	else:
		word, tag = obj
		wc += 1

After Change


////////////////////////////

wc = 0
tag_counts = collections.defaultdict(int)
iob_counts = collections.defaultdict(int)
tag_iob_counts = collections.defaultdict(lambda: collections.defaultdict(int))
word_set = set()

for obj in chunked_corpus.chunked_words():
	if isinstance(obj, Tree):
		label = node_label(obj)
		iob_counts[label] += 1
		
		for word, tag in obj.leaves():
			wc += 1
			word_set.add(word)
			tag_counts[tag] += 1
			tag_iob_counts[tag][label] += 1
	else:
		word, tag = obj
		wc += 1
		word_set.add(word)
		tag_counts[tag] += 1

////////////////////////
//// output ////
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 6

Instances


Project Name: japerk/nltk-trainer
Commit Name: 2ca3b0d5a88d414a87c343981b80ed1204b8dd8d
Time: 2014-01-05
Author: japerk@gmail.com
File Name: analyze_chunked_corpus.py
Class Name:
Method Name:


Project Name: japerk/nltk-trainer
Commit Name: bc128d9596ed07d1c8d5d98f35b1f6905ad4d819
Time: 2014-01-05
Author: japerk@gmail.com
File Name: analyze_tagged_corpus.py
Class Name:
Method Name:


Project Name: japerk/nltk-trainer
Commit Name: 46248d91fbec1af87b58502c7169d3d21ef47376
Time: 2014-04-21
Author: japerk@gmail.com
File Name: nltk_trainer/classification/scoring.py
Class Name:
Method Name: sum_category_word_scores