7d3c51ba6059fc7ccadbd44c9c9961fb6700ef88,nltk/tokenize/treebank.py,TreebankWordTokenizer,TreebankWordTokenizer_1,#,49

Before Change


                     re.compile(r"(?i)\b(lem)(me)\b"),
                     re.compile(r"(?i)\b(mor)("n)\b"),
                     re.compile(r"(?i)\b(wan)(na) ")]
    CONTRACTIONS3 = [re.compile(r"(?i) ("t)(is)\b"),
                     re.compile(r"(?i) ("t)(was)\b")]
    CONTRACTIONS4 = [re.compile(r"(?i)\b(whad)(dd)(ya)\b"),
                     re.compile(r"(?i)\b(wha)(t)(cha)\b")]

    def tokenize(self, text, return_str=False):

After Change



    // List of contractions adapted from Robert MacIntyre"s tokenizer.
    _contractions = MacIntyreContractions()
    CONTRACTIONS2 = list(map(re.compile, _contractions.CONTRACTIONS2))
    CONTRACTIONS3 = list(map(re.compile, _contractions.CONTRACTIONS3))

    def tokenize(self, text, return_str=False):
        for regexp, substitution in self.STARTING_QUOTES:
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 4

Instances


Project Name: nltk/nltk
Commit Name: 7d3c51ba6059fc7ccadbd44c9c9961fb6700ef88
Time: 2017-05-03
Author: alvations@gmail.com
File Name: nltk/tokenize/treebank.py
Class Name: TreebankWordTokenizer
Method Name: TreebankWordTokenizer_1


Project Name: streamlit/streamlit
Commit Name: 89ff0a812370c9bdc22b1d2297c09806f0d1375f
Time: 2019-11-19
Author: pnaomi@gmail.com
File Name: lib/streamlit/__init__.py
Class Name:
Method Name:


Project Name: acl-org/acl-anthology
Commit Name: 71bd2aa1128f242429ef8245b0c14e13672ed8d7
Time: 2019-05-08
Author: dchiang@nd.edu
File Name: bin/latex_to_unicode.py
Class Name:
Method Name: