ba2af6efeb086391d286ee5d86635f723e806c15,fonduer/parser/parser.py,SimpleTokenizer,parse,#SimpleTokenizer#Any#Any#,30

Before Change


                                                 for x in words]))[:-1]
            text = " ".join(words)
            stable_id = construct_stable_id(document, "phrase", i, i)
            yield {
                "text": text,
                "words": words,
                "char_offsets": char_offsets,
                "stable_id": stable_id
            }
            i += 1


class OmniParser(UDFRunner):

After Change


            if not len(text.strip()):
                continue
            words = text.split()
            char_offsets = [0] + [int(_) for _ in np.cumsum([len(x) + 1
                                  for x in words])[:-1]]
            text = " ".join(words)
            stable_id = construct_stable_id(document, "phrase", i, i)
            yield {
                "text": text,
                "words": words,
                "pos_tags": [""] * len(words),
                "ner_tags": [""] * len(words),
                "lemmas": [""] * len(words),
                "dep_parents": [0] * len(words),
                "dep_labels": [""] * len(words),
                "char_offsets": char_offsets,
                "abs_char_offsets": char_offsets,
                "stable_id": stable_id
            }
            i += 1


class OmniParser(UDFRunner):
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 5

Instances


Project Name: HazyResearch/fonduer
Commit Name: ba2af6efeb086391d286ee5d86635f723e806c15
Time: 2018-05-17
Author: senwu@cs.stanford.edu
File Name: fonduer/parser/parser.py
Class Name: SimpleTokenizer
Method Name: parse


Project Name: tensorflow/datasets
Commit Name: 52ba3c53fdeb5806b3626b873eefabf8c065d9d4
Time: 2019-05-21
Author: adarob@google.com
File Name: tensorflow_datasets/text/squad.py
Class Name: Squad
Method Name: _generate_examples


Project Name: tensorflow/datasets
Commit Name: 3be1154314111df8cd327a321fe3d68e7b661c68
Time: 2020-05-29
Author: charlespatel07@gmail.com
File Name: tensorflow_datasets/text/pg19.py
Class Name: Pg19
Method Name: _generate_examples