994004cca3b3153cc9c5b2d1ff0f6c4ba1b3cd13,tmtoolkit/preprocess/_preprocworker.py,PreprocWorker,_task_lemmatize,#PreprocWorker#,303
Before Change
def _task_lemmatize(self):
self._update_docs_attr("text", self._get_docs_attr("lemma_", custom_attr=False))
if "lemma" in self._std_attrs:
self._std_attrs.pop(self._std_attrs.index("lemma"))
def _task_expand_compound_tokens(self, split_chars=("-",), split_on_len=2, split_on_casechange=False):
exptoks = expand_compounds(self._tokens, split_chars=split_chars, split_on_len=split_on_len,
split_on_casechange=split_on_casechange, flatten=False)
After Change
assert len(docs_lemmata) == len(self._tokens)
for doc_tok, doc_lem in zip(self._tokens, docs_lemmata):
assert len(doc_tok) == len(doc_lem)
new_docs_lemmata.append([t if l.startswith("-") and l.endswith("-") else l
for t, l in zip(doc_tok, doc_lem)])
self._update_docs_attr("text", new_docs_lemmata)
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 5
Instances Project Name: WZBSocialScienceCenter/tmtoolkit
Commit Name: 994004cca3b3153cc9c5b2d1ff0f6c4ba1b3cd13
Time: 2020-01-29
Author: markus.konrad@wzb.eu
File Name: tmtoolkit/preprocess/_preprocworker.py
Class Name: PreprocWorker
Method Name: _task_lemmatize
Project Name: dpressel/mead-baseline
Commit Name: 2eb4c5f77bd8da9b1e23851b0acb84543e442953
Time: 2018-09-24
Author: dpressel@gmail.com
File Name: python/baseline/pytorch/classify/model.py
Class Name: WordClassifierModelBase
Method Name: create
Project Name: gboeing/osmnx
Commit Name: 248fa57af17b42deec9c818eb83aa71e63d50354
Time: 2020-06-08
Author: boeing@usc.edu
File Name: osmnx/io.py
Class Name:
Method Name: _convert_edge_attr_types