0bcc8da0344cddc9dfff82a788df519c19489500,nltk/tokenize/treebank.py,TreebankWordTokenizer,span_tokenize,#TreebankWordTokenizer#Any#,147
Before Change
// Convert converted quotes back to original double quotes
// Do this only if original text contains double quote(s)
if """ in text:
// Find double quotes and converted quotes
matched = [m.group() for m in re.finditer(r"[(``)(\"\")(")]+", text)]
// Replace converted quotes back to double quotes
tokens = [matched.pop(0) if tok in [""", "``", """"] else tok for tok in raw_tokens]
else:
tokens = raw_tokens
return align_tokens(tokens, text)
class TreebankWordDetokenizer(TokenizerI):
After Change
spans = []
for word_token in self.tokenize(text):
if word_token in ("``", """"):
orig_idx = text.find(word_token, ix)
quote_idx = text.find(""", ix)
if orig_idx < 0:
real_token = """
elif quote_idx < 0:
real_token = word_token
elif orig_idx < quote_idx:
real_token = word_token
else:
real_token = """
else:
real_token = word_token
ix = text.find(real_token, ix)
end = ix + len(real_token)
spans.append((ix, end))
ix = end
In pattern: SUPERPATTERN
Frequency: 4
Non-data size: 5
Instances
Project Name: nltk/nltk
Commit Name: 0bcc8da0344cddc9dfff82a788df519c19489500
Time: 2017-10-17
Author: lyyb46@gmail.com
File Name: nltk/tokenize/treebank.py
Class Name: TreebankWordTokenizer
Method Name: span_tokenize
Project Name: shibing624/pycorrector
Commit Name: b3349272475868db067a4e01795e19d3b9e57c63
Time: 2018-03-07
Author: xuming624@qq.com
File Name: pycorrector/cn_spell.py
Class Name:
Method Name: correct
Project Name: NifTK/NiftyNet
Commit Name: 01c3a882833dc5031df2f4440717870b35e4833a
Time: 2017-05-29
Author: wenqi.li@ucl.ac.uk
File Name: utilities/constraints_classes.py
Class Name: ConstraintSearch
Method Name: list_subjects_potential
Project Name: acl-org/acl-anthology
Commit Name: fbf30ba6aadfecc94efa0d410612628507fa691f
Time: 2020-07-29
Author: post@cs.jhu.edu
File Name: bin/generate_crossref_doi_metadata.py
Class Name:
Method Name: main