1f0509fedfd578d939486d238f1d297f9e7e6279,textblob/classifiers.py,,_get_words_from_dataset,#Any#,45

Before Change


        ``words`` is either a string of a list of tokens.
    """
    tokenizer = WordTokenizer()
    all_words = []
    for words, classification in dataset:
        // Words may either be a string or an iterable
        if isinstance(words, basestring):
            all_words.extend(tokenizer.itokenize(words, include_punc=False))
        else:
            all_words.extend(words)
    return set(all_words)


def basic_extractor(document, train_set):

After Change


            return tokenizer.itokenize(words, include_punc=False)
        else:
            return (w for w in words)
    all_words = chain.from_iterable(tokenize(words) for words, _ in dataset)
    return set(all_words)


def basic_extractor(document, train_set):
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 8

Instances


Project Name: sloria/TextBlob
Commit Name: 1f0509fedfd578d939486d238f1d297f9e7e6279
Time: 2013-12-22
Author: sloria1@gmail.com
File Name: textblob/classifiers.py
Class Name:
Method Name: _get_words_from_dataset


Project Name: pantsbuild/pants
Commit Name: 73c3c787abbe1ca71b900f11bb880e1de09c6d2a
Time: 2020-09-13
Author: john.sirois@gmail.com
File Name: src/python/pants/engine/process.py
Class Name:
Method Name: find_binary


Project Name: OpenNMT/OpenNMT-py
Commit Name: f122269dc50b76a4656d2542709ffe4837144a24
Time: 2018-06-12
Author: vince62s@yahoo.com
File Name: onmt/utils/loss.py
Class Name:
Method Name: shards