7d3c51ba6059fc7ccadbd44c9c9961fb6700ef88,nltk/tokenize/treebank.py,TreebankWordTokenizer,TreebankWordTokenizer_1,#,49
Before Change
]
// List of contractions adapted from Robert MacIntyre"s tokenizer.
CONTRACTIONS2 = [re.compile(r"(?i)\b(can)(not)\b"),
re.compile(r"(?i)\b(d)("ye)\b"),
re.compile(r"(?i)\b(gim)(me)\b"),
re.compile(r"(?i)\b(gon)(na)\b"),
re.compile(r"(?i)\b(got)(ta)\b"),
re.compile(r"(?i)\b(lem)(me)\b"),
re.compile(r"(?i)\b(mor)("n)\b"),
re.compile(r"(?i)\b(wan)(na) ")]
CONTRACTIONS3 = [re.compile(r"(?i) ("t)(is)\b"),
re.compile(r"(?i) ("t)(was)\b")]
CONTRACTIONS4 = [re.compile(r"(?i)\b(whad)(dd)(ya)\b"),
re.compile(r"(?i)\b(wha)(t)(cha)\b")]
After Change
// List of contractions adapted from Robert MacIntyre"s tokenizer.
_contractions = MacIntyreContractions()
CONTRACTIONS2 = list(map(re.compile, _contractions.CONTRACTIONS2))
CONTRACTIONS3 = list(map(re.compile, _contractions.CONTRACTIONS3))
def tokenize(self, text, return_str=False):
for regexp, substitution in self.STARTING_QUOTES:
text = regexp.sub(substitution, text)
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 7
Instances Project Name: nltk/nltk
Commit Name: 7d3c51ba6059fc7ccadbd44c9c9961fb6700ef88
Time: 2017-05-03
Author: alvations@gmail.com
File Name: nltk/tokenize/treebank.py
Class Name: TreebankWordTokenizer
Method Name: TreebankWordTokenizer_1
Project Name: pytorch/pytorch
Commit Name: 812339ca3d9b68b5f87f9c391aa918f068d5b004
Time: 2021-03-01
Author: benjamin.lefaudeux@gmail.com
File Name: torch/distributed/optim/zero_redundancy_optimizer.py
Class Name: ZeroRedundancyOptimizer
Method Name: __init__
Project Name: bashtage/linearmodels
Commit Name: 1345f1b1b41c047a95deff2442283025c3f6269f
Time: 2019-03-12
Author: kevin.k.sheppard@gmail.com
File Name: linearmodels/tests/panel/test_panel_ols.py
Class Name:
Method Name: