6de331b6e4eff01ef9e86f991d7c1c820f2fe902,allennlp/data/tokenizers/word_tokenizer.py,WordTokenizer,tokenize,#WordTokenizer#Any#,68

Before Change


        
        words = self._word_splitter.split_words(text)
        filtered_words = self._word_filter.filter_words(words)
        stemmed_words = [self._word_stemmer.stem_word(word) for word in filtered_words]
        for start_token in self._start_tokens:
            stemmed_words.insert(0, Token(start_token, 0))
        for end_token in self._end_tokens:
            stemmed_words.append(Token(end_token, -1))
        return stemmed_words

    @classmethod
    def from_params(cls, params: Params) -> "WordTokenizer":
        word_splitter = WordSplitter.from_params(params.pop("word_splitter", {}))

After Change


        stemming or stopword removal, depending on the parameters given to the constructor.
        
        words = self._word_splitter.split_words(text)
        return self._filter_and_stem(words)

    @overrides
    def batch_tokenize(self, texts: List[str]) -> List[List[Token]]:
        batched_words = self._word_splitter.batch_split_words(texts)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 4

Instances


Project Name: allenai/allennlp
Commit Name: 6de331b6e4eff01ef9e86f991d7c1c820f2fe902
Time: 2018-01-22
Author: mattg@allenai.org
File Name: allennlp/data/tokenizers/word_tokenizer.py
Class Name: WordTokenizer
Method Name: tokenize


Project Name: PyThaiNLP/pythainlp
Commit Name: a9ebcc488db9c635c5524de2ba56cee569e91552
Time: 2019-04-17
Author: arthit@gmail.com
File Name: pythainlp/transliterate/royin.py
Class Name:
Method Name: romanize


Project Name: tyarkoni/pliers
Commit Name: 44574472fb477765de24e4f257246df5e838d591
Time: 2018-01-22
Author: tyarkoni@gmail.com
File Name: pliers/extractors/image.py
Class Name: FaceRecognitionFeatureExtractor
Method Name: _to_df