0bcc8da0344cddc9dfff82a788df519c19489500,nltk/tokenize/treebank.py,TreebankWordTokenizer,span_tokenize,#TreebankWordTokenizer#Any#,147
Before Change
// Do this only if original text contains double quote(s)
if """ in text:
// Find double quotes and converted quotes
matched = [m.group() for m in re.finditer(r"[(``)(\"\")(")]+", text)]
// Replace converted quotes back to double quotes
tokens = [matched.pop(0) if tok in [""", "``", """"] else tok for tok in raw_tokens]
else:
tokens = raw_tokens
return align_tokens(tokens, text)
class TreebankWordDetokenizer(TokenizerI):
After Change
spans.append((ix, end))
ix = end
return spans
class TreebankWordDetokenizer(TokenizerI):
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 6
Instances
Project Name: nltk/nltk
Commit Name: 0bcc8da0344cddc9dfff82a788df519c19489500
Time: 2017-10-17
Author: lyyb46@gmail.com
File Name: nltk/tokenize/treebank.py
Class Name: TreebankWordTokenizer
Method Name: span_tokenize
Project Name: snipsco/snips-nlu
Commit Name: d264e82050700d9aaed31c11dbd65f9dbd03e4d9
Time: 2017-04-25
Author: adrien.ball@snips.net
File Name: snips_nlu/tokenization.py
Class Name:
Method Name: tokenize
Project Name: facebookresearch/ParlAI
Commit Name: 028405190df7a8798b821d1fb1c28cb62f54d7d9
Time: 2017-05-12
Author: adamfisch15@gmail.com
File Name: parlai/agents/drqa/agents.py
Class Name: SimpleDictionaryAgent
Method Name: span_tokenize