9fc989afc19ec56ccbf0b7b966d34bfdd254c651,parlai/tasks/opensubtitles/build.py,,create_fb_format,#Any#Any#,13

Before Change


    
    conv_id = 0
    for f in list:
        if f[-3:] == ".gz":
            dialog = ""
            conv_id = conv_id + 1
            with gzip.open(f, "r") as f1:

After Change



    conv_id = 0
    // find all the files.
    for root, subfolder, files in os.walk(inpath):
        for f in files:
            if f.endswith(".gz"):
                dialog = ""
                conv_id = conv_id + 1
                with gzip.open(os.path.join(root, f), "r") as f1:
                    // print(str(conv_id) + ": " + f)
                    words = ""
                    line_id = 1
                    turn_id = 1
                    for line in f1:
                        line = str(line)
                        if line.find("<s id="") != -1:
                            // new sentence
                            if len(words) > 0:
                                if (turn_id % 2) == 0:
                                    dialog += str(line_id) + " " + words
                                else:
                                    dialog += "\t" + words + "\n"
                                    line_id += 1
                            turn_id = turn_id + 1
                            words = ""
                        else:
                            i1 = line.find("<w id="")
                            if i1 >= 0:
                                line = line[i1:]
                                word = line[line.find(">")+1:line.find("</w")]
                                words = words + " " + word.replace("\t", " ")
                handle = ftrain
                if (conv_id % 10) == 0:
                    handle = ftest
                if (conv_id % 10) == 1:
                    handle = fvalid
                handle.write(dialog + "\n")

    ftrain.close()
    fvalid.close()
    ftest.close()
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 5

Instances


Project Name: facebookresearch/ParlAI
Commit Name: 9fc989afc19ec56ccbf0b7b966d34bfdd254c651
Time: 2017-05-06
Author: ahm@fb.com
File Name: parlai/tasks/opensubtitles/build.py
Class Name:
Method Name: create_fb_format


Project Name: haotianteng/Chiron
Commit Name: 583deefa3a0d9d320293ea3194433956269a4a63
Time: 2018-05-07
Author: havens.teng@gmail.com
File Name: chiron/utils/extract_sig_ref.py
Class Name:
Method Name: extract


Project Name: tensorflow/datasets
Commit Name: 72dd67421c060a4ff991e0b7316deb1c429e0f23
Time: 2020-03-22
Author: thealmightylion.madhav@gmail.com
File Name: tensorflow_datasets/scripts/replace_fake_images.py
Class Name:
Method Name: rewrite_zip