504410df92f74a6471247909d01f4ca4639b91d8,tests/test_readme.py,ReadmeTestCase,test_term_counting,#ReadmeTestCase#,221
Before Change
bot = self.doc.as_bag_of_terms(weighting="tf", normalized=False,
lemmatize="auto", ngram_range=(1, 1))
// sort by term ascending, then count descending
observed_2 = sorted([(self.doc.spacy_stringstore[term_id], count)
for term_id, count in bot.most_common(n=10)],
key=itemgetter(0), reverse=False)
observed_2 = sorted(observed_2, key=itemgetter(1), reverse=True)
expected_2 = [("nation", 6), ("incarceration", 4), ("world", 4),
("drug", 3), ("lead", 3), ("mandatory", 3), ("minimum", 3),
("people", 3), ("problem", 3), ("male", 2)]
After Change
bot = self.doc.to_bag_of_terms(
ngrams=1, normalize=False, lemmatize=True, as_strings=True)
// sort by term ascending, then count descending
observed_2 = sorted(bot.items(), key=itemgetter(1, 0), reverse=True)[:10]
expected_2 = [
("nation", 6), ("world", 4), ("u.s.", 4), ("incarceration", 4),
("decade", 4), ("state", 3), ("record", 3), ("problem", 3),
("people", 3), ("minimum", 3)]
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 4
Instances
Project Name: chartbeat-labs/textacy
Commit Name: 504410df92f74a6471247909d01f4ca4639b91d8
Time: 2016-08-11
Author: burton@chartbeat.com
File Name: tests/test_readme.py
Class Name: ReadmeTestCase
Method Name: test_term_counting
Project Name: fxsjy/jieba
Commit Name: c6b386f65b6295d4fbe691f7eb78ec4982009ef9
Time: 2014-11-29
Author: abcdoyle888@gmail.com
File Name: jieba/posseg/viterbi.py
Class Name:
Method Name: get_top_states
Project Name: fxsjy/jieba
Commit Name: 7b7c6955a9119734ac70f902129b7fbfd1f0b660
Time: 2014-11-28
Author: abcdoyle888@gmail.com
File Name: jieba/posseg/viterbi.py
Class Name:
Method Name: get_top_states