959fcc4f5dd2972a1894041a3aff1505f61fcf93,tmtoolkit/preprocess.py,TMPreproc,tokenize,#TMPreproc#,154
Before Change
return self
def tokenize(self):
if not callable(self.tokenizer):
raise ValueError("tokenizer must be callable")
self._tokens = {dl: tuplize(self.tokenizer(txt)) for dl, txt in self.docs.items()}
return self
After Change
def tokenize(self):
self._invalidate_workers_tokens()
self._send_task_to_workers("tokenize")
return self
def generate_ngrams(self, n, join=True, join_str=" ", reassign_tokens=False):
//self._require_tokens()
In pattern: SUPERPATTERN
Frequency: 4
Non-data size: 2
Instances
Project Name: WZBSocialScienceCenter/tmtoolkit
Commit Name: 959fcc4f5dd2972a1894041a3aff1505f61fcf93
Time: 2017-08-22
Author: markus.konrad@wzb.eu
File Name: tmtoolkit/preprocess.py
Class Name: TMPreproc
Method Name: tokenize
Project Name: WZBSocialScienceCenter/tmtoolkit
Commit Name: 959fcc4f5dd2972a1894041a3aff1505f61fcf93
Time: 2017-08-22
Author: markus.konrad@wzb.eu
File Name: tmtoolkit/preprocess.py
Class Name: TMPreproc
Method Name: use_ngrams_as_tokens
Project Name: WZBSocialScienceCenter/tmtoolkit
Commit Name: 959fcc4f5dd2972a1894041a3aff1505f61fcf93
Time: 2017-08-22
Author: markus.konrad@wzb.eu
File Name: tmtoolkit/preprocess.py
Class Name: TMPreproc
Method Name: generate_ngrams
Project Name: WZBSocialScienceCenter/tmtoolkit
Commit Name: 6072b0695388307205e0954369f1932e23444993
Time: 2019-02-19
Author: markus.konrad@wzb.eu
File Name: tmtoolkit/preprocess/_tmpreproc.py
Class Name: TMPreproc
Method Name: tokens_to_lowercase