observed = [tok.text for tok in extract.words(
self.spacy_doc, filter_stops=False, filter_punct=False, filter_nums=False,
min_freq=2)][:25]
self.assertEqual(observed, expected)
def test_ngrams_less_than_1(self):
with self.assertRaises(ValueError):
list(extract.ngrams(self.spacy_doc, 0))
After Change
self.assertTrue(all(tok.pos_ == "NOUN" for tok in result))
def test_words_min_freq(self):
counts = collections.Counter()counts.update(tok.lower_ for tok in self.spacy_doc)
result = [tok for tok in extract.words(
self.spacy_doc, filter_stops=False, filter_punct=False, filter_nums=False,
min_freq=2)]
self.assertTrue(all(counts[tok.lower_] >= 2 for tok in result))