7d3c51ba6059fc7ccadbd44c9c9961fb6700ef88,nltk/tokenize/treebank.py,TreebankWordTokenizer,TreebankWordTokenizer_1,#,49
Before Change
re.compile(r"(?i)\b(lem)(me)\b"),
re.compile(r"(?i)\b(mor)("n)\b"),
re.compile(r"(?i)\b(wan)(na) ")]
CONTRACTIONS3 = [re.compile(r"(?i) ("t)(is)\b"),
re.compile(r"(?i) ("t)(was)\b")]
CONTRACTIONS4 = [re.compile(r"(?i)\b(whad)(dd)(ya)\b"),
re.compile(r"(?i)\b(wha)(t)(cha)\b")]
def tokenize(self, text, return_str=False):
After Change
// List of contractions adapted from Robert MacIntyre"s tokenizer.
_contractions = MacIntyreContractions()
CONTRACTIONS2 = list(map(re.compile, _contractions.CONTRACTIONS2))
CONTRACTIONS3 = list(map(re.compile, _contractions.CONTRACTIONS3))
def tokenize(self, text, return_str=False):
for regexp, substitution in self.STARTING_QUOTES:
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 4
Instances Project Name: nltk/nltk
Commit Name: 7d3c51ba6059fc7ccadbd44c9c9961fb6700ef88
Time: 2017-05-03
Author: alvations@gmail.com
File Name: nltk/tokenize/treebank.py
Class Name: TreebankWordTokenizer
Method Name: TreebankWordTokenizer_1
Project Name: streamlit/streamlit
Commit Name: 89ff0a812370c9bdc22b1d2297c09806f0d1375f
Time: 2019-11-19
Author: pnaomi@gmail.com
File Name: lib/streamlit/__init__.py
Class Name:
Method Name:
Project Name: acl-org/acl-anthology
Commit Name: 71bd2aa1128f242429ef8245b0c14e13672ed8d7
Time: 2019-05-08
Author: dchiang@nd.edu
File Name: bin/latex_to_unicode.py
Class Name:
Method Name: