959fcc4f5dd2972a1894041a3aff1505f61fcf93,tmtoolkit/preprocess.py,TMPreproc,tokenize,#TMPreproc#,154

Before Change


        return self

    def tokenize(self):
        if not callable(self.tokenizer):
            raise ValueError("tokenizer must be callable")

        self._tokens = {dl: tuplize(self.tokenizer(txt)) for dl, txt in self.docs.items()}

        return self

After Change



    def tokenize(self):
        self._invalidate_workers_tokens()
        self._send_task_to_workers("tokenize")
        return self

    def generate_ngrams(self, n, join=True, join_str=" ", reassign_tokens=False):
        //self._require_tokens()
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 4

Non-data size: 2

Instances


Project Name: WZBSocialScienceCenter/tmtoolkit
Commit Name: 959fcc4f5dd2972a1894041a3aff1505f61fcf93
Time: 2017-08-22
Author: markus.konrad@wzb.eu
File Name: tmtoolkit/preprocess.py
Class Name: TMPreproc
Method Name: tokenize


Project Name: WZBSocialScienceCenter/tmtoolkit
Commit Name: 959fcc4f5dd2972a1894041a3aff1505f61fcf93
Time: 2017-08-22
Author: markus.konrad@wzb.eu
File Name: tmtoolkit/preprocess.py
Class Name: TMPreproc
Method Name: use_ngrams_as_tokens


Project Name: WZBSocialScienceCenter/tmtoolkit
Commit Name: 959fcc4f5dd2972a1894041a3aff1505f61fcf93
Time: 2017-08-22
Author: markus.konrad@wzb.eu
File Name: tmtoolkit/preprocess.py
Class Name: TMPreproc
Method Name: generate_ngrams


Project Name: WZBSocialScienceCenter/tmtoolkit
Commit Name: 6072b0695388307205e0954369f1932e23444993
Time: 2019-02-19
Author: markus.konrad@wzb.eu
File Name: tmtoolkit/preprocess/_tmpreproc.py
Class Name: TMPreproc
Method Name: tokens_to_lowercase