7d3c51ba6059fc7ccadbd44c9c9961fb6700ef88,nltk/tokenize/treebank.py,TreebankWordTokenizer,TreebankWordTokenizer_1,#,49

Before Change


    ]

    // List of contractions adapted from Robert MacIntyre"s tokenizer.
    CONTRACTIONS2 = [re.compile(r"(?i)\b(can)(not)\b"),
                     re.compile(r"(?i)\b(d)("ye)\b"),
                     re.compile(r"(?i)\b(gim)(me)\b"),
                     re.compile(r"(?i)\b(gon)(na)\b"),
                     re.compile(r"(?i)\b(got)(ta)\b"),
                     re.compile(r"(?i)\b(lem)(me)\b"),
                     re.compile(r"(?i)\b(mor)("n)\b"),
                     re.compile(r"(?i)\b(wan)(na) ")]
    CONTRACTIONS3 = [re.compile(r"(?i) ("t)(is)\b"),
                     re.compile(r"(?i) ("t)(was)\b")]
    CONTRACTIONS4 = [re.compile(r"(?i)\b(whad)(dd)(ya)\b"),
                     re.compile(r"(?i)\b(wha)(t)(cha)\b")]

After Change


    // List of contractions adapted from Robert MacIntyre"s tokenizer.
    _contractions = MacIntyreContractions()
    CONTRACTIONS2 = list(map(re.compile, _contractions.CONTRACTIONS2))
    CONTRACTIONS3 = list(map(re.compile, _contractions.CONTRACTIONS3))

    def tokenize(self, text, return_str=False):
        for regexp, substitution in self.STARTING_QUOTES:
            text = regexp.sub(substitution, text)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 4

Instances


Project Name: nltk/nltk
Commit Name: 7d3c51ba6059fc7ccadbd44c9c9961fb6700ef88
Time: 2017-05-03
Author: alvations@gmail.com
File Name: nltk/tokenize/treebank.py
Class Name: TreebankWordTokenizer
Method Name: TreebankWordTokenizer_1


Project Name: matplotlib/matplotlib
Commit Name: 0b92b4f5530fee68432f13075a1ddc866748f9d1
Time: 2020-11-20
Author: anntzer.lee@gmail.com
File Name: examples/showcase/firefox.py
Class Name:
Method Name: svg_parse


Project Name: mozilla/bugbug
Commit Name: ef65ed0d5cba305de3f3761e929719a0515ce6f1
Time: 2019-01-23
Author: mcastelluccio@mozilla.com
File Name: bugbug/repository.py
Class Name:
Method Name: download_commits