44f80b5872b1bb9679d15b8230c1731fd26ac527,torchnlp/text_encoders/static_tokenizer_encoder.py,StaticTokenizerEncoder,__init__,#StaticTokenizerEncoder#Any#Any#Any#Any#Any#,15
Before Change
self.lower = lower
self.tokenize = tokenize
self.append_eos = append_eos
self.tokens = Counter()
for text in sample:
self.tokens.update(self._preprocess(text))
After Change
if not isinstance(sample, list):
raise TypeError("Sample needs to be a list of strings.")
self.append_eos = append_eos
self.tokens = Counter()
self.tokenize = tokenize if tokenize else lambda x: x
for text in sample:
self.tokens.update(self.tokenize(text))
self.stoi = RESERVED_STOI.copy()
self.itos = RESERVED_ITOS[:]
for token, count in self.tokens.items():
if count >= min_occurrences:
self.itos.append(token)
self.stoi[token] = len(self.itos) - 1
@property
def vocab(self):
Return a list of tokens
return self.itos
In pattern: SUPERPATTERN
Frequency: 4
Non-data size: 6
Instances
Project Name: PetrochukM/PyTorch-NLP
Commit Name: 44f80b5872b1bb9679d15b8230c1731fd26ac527
Time: 2018-03-10
Author: petrochukm@gmail.com
File Name: torchnlp/text_encoders/static_tokenizer_encoder.py
Class Name: StaticTokenizerEncoder
Method Name: __init__
Project Name: home-assistant/home-assistant
Commit Name: d6abdc0d4efa8386e549a9c8bbacc52bf5c82f7e
Time: 2016-04-08
Author: jaharkes@cs.cmu.edu
File Name: homeassistant/util/yaml.py
Class Name:
Method Name: _ordered_dict
Project Name: anttttti/Wordbatch
Commit Name: 7170cdf9c6ed8beacd93738b0ec1c97cfbc23b6e
Time: 2018-04-12
Author: antti.puurula@yahoo.com
File Name: wordbatch/wordbatch.py
Class Name: WordBatch
Method Name: __init__
Project Name: biolab/orange3
Commit Name: 46598f5fd6b35acf43c155570e98148eb47780c0
Time: 2020-02-14
Author: tomaz.hocevar@fri.uni-lj.si
File Name: Orange/data/util.py
Class Name:
Method Name: get_unique_names_duplicates