4e144c9f842d7415d8be5bdbb5912d88ae32cced,pycorrector/seq2seq/corpus_reader.py,CGEDReader,read_samples_by_string,#CGEDReader#Any#,80
Before Change
def read_samples_by_string(self, path):
with open(path, "r", encoding="utf-8") as f:
dom_tree = minidom.parse(f)
docs = dom_tree.documentElement.getElementsByTagName("DOC")
for doc in docs:
source_text = doc.getElementsByTagName("TEXT")[0]. \
childNodes[0].data.strip()
After Change
while True:
line_src = f.readline()
line_dst = f.readline()
if not line_src or len(line_src) < 5:
break
source = line_src.lower()[5:].strip().split()
target = line_dst.lower()[5:].strip().split()
yield source, target
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 4
Instances
Project Name: shibing624/pycorrector
Commit Name: 4e144c9f842d7415d8be5bdbb5912d88ae32cced
Time: 2018-04-16
Author: 507153809@qq.com
File Name: pycorrector/seq2seq/corpus_reader.py
Class Name: CGEDReader
Method Name: read_samples_by_string
Project Name: chainer/chainer
Commit Name: 75231a39c212fc8066f99633698b7e59b2ce4efb
Time: 2019-10-13
Author: duaipp@gmail.com
File Name: onnx_chainer/onnx_helper.py
Class Name:
Method Name: is_support_non_standard_domain
Project Name: merenlab/anvio
Commit Name: e888f185c3877e7c34aa6fa7d65e684fe872e867
Time: 2018-12-16
Author: a.murat.eren@gmail.com
File Name: anvio/contigops.py
Class Name: GenbankToAnvio
Method Name: sanity_check