1f0509fedfd578d939486d238f1d297f9e7e6279,textblob/classifiers.py,,_get_words_from_dataset,#Any#,45
Before Change
``words`` is either a string of a list of tokens.
"""
tokenizer = WordTokenizer()
all_words = []
for words, classification in dataset:
// Words may either be a string or an iterable
if isinstance(words, basestring):
all_words.extend(tokenizer.itokenize(words, include_punc=False))
else:
all_words.extend(words)
return set(all_words)
def basic_extractor(document, train_set):
After Change
return tokenizer.itokenize(words, include_punc=False)
else:
return (w for w in words)
all_words = chain.from_iterable(tokenize(words) for words, _ in dataset)
return set(all_words)
def basic_extractor(document, train_set):
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 8
Instances
Project Name: sloria/TextBlob
Commit Name: 1f0509fedfd578d939486d238f1d297f9e7e6279
Time: 2013-12-22
Author: sloria1@gmail.com
File Name: textblob/classifiers.py
Class Name:
Method Name: _get_words_from_dataset
Project Name: pantsbuild/pants
Commit Name: 73c3c787abbe1ca71b900f11bb880e1de09c6d2a
Time: 2020-09-13
Author: john.sirois@gmail.com
File Name: src/python/pants/engine/process.py
Class Name:
Method Name: find_binary
Project Name: OpenNMT/OpenNMT-py
Commit Name: f122269dc50b76a4656d2542709ffe4837144a24
Time: 2018-06-12
Author: vince62s@yahoo.com
File Name: onmt/utils/loss.py
Class Name:
Method Name: shards