4e144c9f842d7415d8be5bdbb5912d88ae32cced,pycorrector/seq2seq/corpus_reader.py,CGEDReader,read_tokens,#CGEDReader#Any#Any#,96
Before Change
dom_tree = minidom.parse(f)
docs = dom_tree.documentElement.getElementsByTagName("DOC")
for doc in docs:
if is_infer:
// Input the error text
sentence = doc.getElementsByTagName("TEXT")[0]. \
childNodes[0].data.strip()
else:
// Input the correct text
sentence = doc.getElementsByTagName("CORRECTION")[0]. \
childNodes[0].data.strip()
yield segment(sentence, cut_type="char")
After Change
// Input the correct text, which start with 0
if i % 2 == 1:
if line and len(line) > 5:
yield line.lower()[5:].strip().split()
i += 1
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 5
Instances
Project Name: shibing624/pycorrector
Commit Name: 4e144c9f842d7415d8be5bdbb5912d88ae32cced
Time: 2018-04-16
Author: 507153809@qq.com
File Name: pycorrector/seq2seq/corpus_reader.py
Class Name: CGEDReader
Method Name: read_tokens
Project Name: biocore/scikit-bio
Commit Name: 49b10f8937fc4e8e58270189011a9a0e5927f0e4
Time: 2014-10-17
Author: jai.rideout@gmail.com
File Name: skbio/io/fasta.py
Class Name:
Method Name: _fasta_to_generator
Project Name: tensorflow/datasets
Commit Name: 52ba3c53fdeb5806b3626b873eefabf8c065d9d4
Time: 2019-05-21
Author: adarob@google.com
File Name: tensorflow_datasets/text/squad.py
Class Name: Squad
Method Name: _generate_examples