4e144c9f842d7415d8be5bdbb5912d88ae32cced,pycorrector/seq2seq/corpus_reader.py,CGEDReader,read_samples_by_string,#CGEDReader#Any#,80
Before Change
childNodes[0].data.strip()
target_text = doc.getElementsByTagName("CORRECTION")[0]. \
childNodes[0].data.strip()
source = segment(source_text, cut_type="char")
target = segment(target_text, cut_type="char")
yield source, target
def unknown_token(self):
After Change
if not line_src or len(line_src) < 5:
break
source = line_src.lower()[5:].strip().split()
target = line_dst.lower()[5:].strip().split()
yield source, target
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 2
Instances
Project Name: shibing624/pycorrector
Commit Name: 4e144c9f842d7415d8be5bdbb5912d88ae32cced
Time: 2018-04-16
Author: 507153809@qq.com
File Name: pycorrector/seq2seq/corpus_reader.py
Class Name: CGEDReader
Method Name: read_samples_by_string
Project Name: shibing624/pycorrector
Commit Name: 4e144c9f842d7415d8be5bdbb5912d88ae32cced
Time: 2018-04-16
Author: 507153809@qq.com
File Name: pycorrector/seq2seq/corpus_reader.py
Class Name: CGEDReader
Method Name: read_tokens
Project Name: PyThaiNLP/pythainlp
Commit Name: 6aa0eda33cae977d6e7d1053f2456049b0bbfc13
Time: 2016-12-27
Author: wannaphong@yahoo.com
File Name: test.py
Class Name:
Method Name: