994004cca3b3153cc9c5b2d1ff0f6c4ba1b3cd13,tmtoolkit/preprocess/_preprocworker.py,PreprocWorker,_task_lemmatize,#PreprocWorker#,303

Before Change


    def _task_lemmatize(self):
        self._update_docs_attr("text", self._get_docs_attr("lemma_", custom_attr=False))

        if "lemma" in self._std_attrs:
            self._std_attrs.pop(self._std_attrs.index("lemma"))

    def _task_expand_compound_tokens(self, split_chars=("-",), split_on_len=2, split_on_casechange=False):
        exptoks = expand_compounds(self._tokens, split_chars=split_chars, split_on_len=split_on_len,
                                   split_on_casechange=split_on_casechange, flatten=False)

After Change


        assert len(docs_lemmata) == len(self._tokens)
        for doc_tok, doc_lem in zip(self._tokens, docs_lemmata):
            assert len(doc_tok) == len(doc_lem)
            new_docs_lemmata.append([t if l.startswith("-") and l.endswith("-") else l
                                     for t, l in zip(doc_tok, doc_lem)])

        self._update_docs_attr("text", new_docs_lemmata)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 5

Instances


Project Name: WZBSocialScienceCenter/tmtoolkit
Commit Name: 994004cca3b3153cc9c5b2d1ff0f6c4ba1b3cd13
Time: 2020-01-29
Author: markus.konrad@wzb.eu
File Name: tmtoolkit/preprocess/_preprocworker.py
Class Name: PreprocWorker
Method Name: _task_lemmatize


Project Name: dpressel/mead-baseline
Commit Name: 2eb4c5f77bd8da9b1e23851b0acb84543e442953
Time: 2018-09-24
Author: dpressel@gmail.com
File Name: python/baseline/pytorch/classify/model.py
Class Name: WordClassifierModelBase
Method Name: create


Project Name: gboeing/osmnx
Commit Name: 248fa57af17b42deec9c818eb83aa71e63d50354
Time: 2020-06-08
Author: boeing@usc.edu
File Name: osmnx/io.py
Class Name:
Method Name: _convert_edge_attr_types