9fc989afc19ec56ccbf0b7b966d34bfdd254c651,parlai/tasks/opensubtitles/build.py,,create_fb_format,#Any#Any#,13
Before Change
conv_id = 0
for f in list:
if f[-3:] == ".gz":
dialog = ""
conv_id = conv_id + 1
with gzip.open(f, "r") as f1:
After Change
conv_id = 0
// find all the files.
for root, subfolder, files in os.walk(inpath):
for f in files:
if f.endswith(".gz"):
dialog = ""
conv_id = conv_id + 1
with gzip.open(os.path.join(root, f), "r") as f1:
// print(str(conv_id) + ": " + f)
words = ""
line_id = 1
turn_id = 1
for line in f1:
line = str(line)
if line.find("<s id="") != -1:
// new sentence
if len(words) > 0:
if (turn_id % 2) == 0:
dialog += str(line_id) + " " + words
else:
dialog += "\t" + words + "\n"
line_id += 1
turn_id = turn_id + 1
words = ""
else:
i1 = line.find("<w id="")
if i1 >= 0:
line = line[i1:]
word = line[line.find(">")+1:line.find("</w")]
words = words + " " + word.replace("\t", " ")
handle = ftrain
if (conv_id % 10) == 0:
handle = ftest
if (conv_id % 10) == 1:
handle = fvalid
handle.write(dialog + "\n")
ftrain.close()
fvalid.close()
ftest.close()
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 5
Instances
Project Name: facebookresearch/ParlAI
Commit Name: 9fc989afc19ec56ccbf0b7b966d34bfdd254c651
Time: 2017-05-06
Author: ahm@fb.com
File Name: parlai/tasks/opensubtitles/build.py
Class Name:
Method Name: create_fb_format
Project Name: haotianteng/Chiron
Commit Name: 583deefa3a0d9d320293ea3194433956269a4a63
Time: 2018-05-07
Author: havens.teng@gmail.com
File Name: chiron/utils/extract_sig_ref.py
Class Name:
Method Name: extract
Project Name: tensorflow/datasets
Commit Name: 72dd67421c060a4ff991e0b7316deb1c429e0f23
Time: 2020-03-22
Author: thealmightylion.madhav@gmail.com
File Name: tensorflow_datasets/scripts/replace_fake_images.py
Class Name:
Method Name: rewrite_zip