a8f7080fbcc91a80391f93c82e21238fc6f82823,tests/test_extract.py,ExtractTestCase,test_words_min_freq,#ExtractTestCase#,48
Before Change
",", "I", "in", "in", "an", "for", ".", "For", ",", "we",
"the", "education", "in", "the", ".", "And", "an",
"education", "and", "he", """, "I", "we", "to", "in"]
observed = [tok.text for tok in extract.words(
self.spacy_doc, filter_stops=False, filter_punct=False, filter_nums=False,
min_freq=2)][:25]
self.assertEqual(observed, expected)
def test_ngrams_less_than_1(self):
with self.assertRaises(ValueError):
After Change
self.assertTrue(all(tok.pos_ == "NOUN" for tok in result))
def test_words_min_freq(self):
counts = collections.Counter()
counts.update(tok.lower_ for tok in self.spacy_doc)
result = [tok for tok in extract.words(
self.spacy_doc, filter_stops=False, filter_punct=False, filter_nums=False,
min_freq=2)]
self.assertTrue(all(counts[tok.lower_] >= 2 for tok in result))
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 4
Instances Project Name: chartbeat-labs/textacy
Commit Name: a8f7080fbcc91a80391f93c82e21238fc6f82823
Time: 2017-11-28
Author: burton@chartbeat.com
File Name: tests/test_extract.py
Class Name: ExtractTestCase
Method Name: test_words_min_freq
Project Name: chartbeat-labs/textacy
Commit Name: f127dccf797744d04255582e89bfac5163da5e6e
Time: 2017-11-30
Author: burton@chartbeat.com
File Name: tests/test_extract.py
Class Name: ExtractTestCase
Method Name: test_words_min_freq
Project Name: pytorch/tutorials
Commit Name: 4794be6f7e3827228b6e0dc9b1cfe432a3ecdeb3
Time: 2021-03-04
Author: brianjo@fb.com
File Name: beginner_source/text_sentiment_ngrams_tutorial.py
Class Name:
Method Name: