def read_samples_by_string(self, path):
for tokens in self.read_tokens(path):
source = []
target = []
for token in tokens:
target.append(token)
if self.config.enable_data_dropout:
// Random dropout words from the input
dropout_token = (token in FCEReader.DROPOUT_TOKENS and random.random() < self.dropout_prob)
replace_token = (token in FCEReader.REPLACEMENTS and random.random() < self.replacement_prob)
After Change
self.UNKNOWN_ID = self.token_2_id[FCEReader.UNKNOWN_TOKEN]
def read_samples_by_string(self, path):
with open(path, "r", encoding="utf-8") as f:
line_src = f.readline()
line_dst = f.readline()
if line_src and line_dst:
source = line_src.lower()[5:].strip().split()
target = line_dst.lower()[5:].strip().split()
if self.config.enable_data_dropout: