0bcc8da0344cddc9dfff82a788df519c19489500,nltk/tokenize/treebank.py,TreebankWordTokenizer,span_tokenize,#TreebankWordTokenizer#Any#,147

Before Change



        // Convert converted quotes back to original double quotes
        // Do this only if original text contains double quote(s)
        if """ in text:
            // Find double quotes and converted quotes
            matched = [m.group() for m in re.finditer(r"[(``)(\"\")(")]+", text)]
            
            // Replace converted quotes back to double quotes
            tokens = [matched.pop(0) if tok in [""", "``", """"] else tok for tok in raw_tokens]
        else:
            tokens = raw_tokens

        return align_tokens(tokens, text)


class TreebankWordDetokenizer(TokenizerI):

After Change



        spans = []
        for word_token in self.tokenize(text):
            if word_token in ("``", """"):
                orig_idx = text.find(word_token, ix)
                quote_idx = text.find(""", ix)
                if orig_idx < 0:
                    real_token = """
                elif quote_idx < 0:
                    real_token = word_token
                elif orig_idx < quote_idx:
                    real_token = word_token
                else:
                    real_token = """
            else:
                real_token = word_token
            ix = text.find(real_token, ix)
            end = ix + len(real_token)
            spans.append((ix, end))
            ix = end

In pattern: SUPERPATTERN

Frequency: 4

Non-data size: 5

Instances

Link

Project Name: nltk/nltk

Commit Name: 0bcc8da0344cddc9dfff82a788df519c19489500

Time: 2017-10-17

Author: lyyb46@gmail.com

File Name: nltk/tokenize/treebank.py

Class Name: TreebankWordTokenizer

Method Name: span_tokenize

Link

Project Name: shibing624/pycorrector

Commit Name: b3349272475868db067a4e01795e19d3b9e57c63

Time: 2018-03-07

Author: xuming624@qq.com

File Name: pycorrector/cn_spell.py

Class Name:

Method Name: correct

Link

Project Name: NifTK/NiftyNet

Commit Name: 01c3a882833dc5031df2f4440717870b35e4833a

Time: 2017-05-29

Author: wenqi.li@ucl.ac.uk

File Name: utilities/constraints_classes.py

Class Name: ConstraintSearch

Method Name: list_subjects_potential

Link

Project Name: acl-org/acl-anthology

Commit Name: fbf30ba6aadfecc94efa0d410612628507fa691f

Time: 2020-07-29

Author: post@cs.jhu.edu

File Name: bin/generate_crossref_doi_metadata.py

Class Name:

Method Name: main