52dd8f17b382dea2ddaf3b4054d7845c8c3b4f72,pycorrector/seq2seq/fce_reader.py,FCEReader,read_samples_by_string,#FCEReader#Any#,26
Before Change
self.UNKNOW_ID = self.token_2_id[FCEReader.UNKNOWN_TOKEN]
def read_samples_by_string(self, path):
for tokens in self.read_tokens(path):
source = []
target = []
for token in tokens:
target.append(token)
if self.config.enable_data_dropout:
// Random dropout words from the input
dropout_token = (token in FCEReader.DROPOUT_TOKENS and random.random() < self.dropout_prob)
replace_token = (token in FCEReader.REPLACEMENTS and random.random() < self.replacement_prob)
if replace_token:
source.append(FCEReader.REPLACEMENTS[tokens])
elif not dropout_token:
source.append(token)
else:
source.append(token)
yield source, target
def unknown_token(self):
return FCEReader.UNKNOWN_TOKEN
def read_tokens(self, path):
After Change
def read_samples_by_string(self, path):
with open(path, "r", encoding="utf-8") as f:
line_src = f.readline()
line_dst = f.readline()
if line_src and line_dst:
source = line_src.lower()[5:].strip().split()
target = line_dst.lower()[5:].strip().split()
if self.config.enable_data_dropout:
new_source = []
for token in source:
// Random dropout words from the input
dropout_token = (token in FCEReader.DROPOUT_TOKENS and random.random() < self.dropout_prob)
replace_token = (token in FCEReader.REPLACEMENTS and random.random() < self.replacement_prob)
if replace_token:
new_source.append(FCEReader.REPLACEMENTS[source])
elif not dropout_token:
new_source.append(token)
source = new_source
yield source, target
def unknown_token(self):
return FCEReader.UNKNOWN_TOKEN
def read_tokens(self, path):
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 6
Instances
Project Name: shibing624/pycorrector
Commit Name: 52dd8f17b382dea2ddaf3b4054d7845c8c3b4f72
Time: 2018-03-29
Author: 507153809@qq.com
File Name: pycorrector/seq2seq/fce_reader.py
Class Name: FCEReader
Method Name: read_samples_by_string
Project Name: shibing624/pycorrector
Commit Name: 4e144c9f842d7415d8be5bdbb5912d88ae32cced
Time: 2018-04-16
Author: 507153809@qq.com
File Name: pycorrector/seq2seq/corpus_reader.py
Class Name: CGEDReader
Method Name: read_samples_by_string
Project Name: scipy/scipy
Commit Name: 18d079cad2d3bf82bd8276066f9b33b57c146e73
Time: 2015-05-11
Author: evgeni@burovski.me
File Name: tools/validate_runtests_log.py
Class Name:
Method Name: