594f66417cf0a2abaead1ce6e5f15a2100441682,onmt/IO.py,ONMTDataset,__init__,#ONMTDataset#Any#Any#Any#Any#Any#,112

Before Change


        // the src tokens and their indices and potentially also the
        // src and tgt features and alignment information.
        if tgt_examples:
            examples = [join_dicts(src, tgt)
                        for src, tgt in zip(src_examples, tgt_examples)]
        else:
            examples = src_examples
        for i, example in enumerate(examples):
            example["indices"] = i

        if opt is None or opt.dynamic_dict:
            for example in examples:
                src = example["src"]
                src_vocab = torchtext.vocab.Vocab(Counter(src))
                self.src_vocabs.append(src_vocab)
                // mapping source tokens to indices in the dynamic dict
                src_map = torch.LongTensor([src_vocab.stoi[w] for w in src])
                example["src_map"] = src_map

                if "tgt" in example:
                    tgt = example["tgt"]
                    mask = torch.LongTensor(
                            [0] + [src_vocab.stoi[w] for w in tgt] + [0])
                    example["alignment"] = mask

        keys = examples[0].keys()
        fields = [(k, fields[k]) for k in keys]
        examples = [torchtext.data.Example.fromlist([ex[k] for k in keys],
                                                    fields)
                    for ex in examples]

        def filter_pred(example):
            return 0 < len(example.src) <= opt.src_seq_length \
                and 0 < len(example.tgt) <= opt.tgt_seq_length

After Change


        if self.type_ == "text":
            self.src_vocabs = []
            src_truncate = 0 if opt is None else opt.src_seq_length_trunc
            src_point = next(self._read_corpus_file(src_path, src_truncate))
            self.nfeatures = src_point[2]
            src_data = self._read_corpus_file(src_path, src_truncate)
            src_examples = self._construct_examples(src_data, "src")
        else:
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 9

Instances


Project Name: OpenNMT/OpenNMT-py
Commit Name: 594f66417cf0a2abaead1ce6e5f15a2100441682
Time: 2017-09-21
Author: srush@seas.harvard.edu
File Name: onmt/IO.py
Class Name: ONMTDataset
Method Name: __init__


Project Name: probcomp/bayeslite
Commit Name: 68a7d5553fa4cef09e0159ac5e100a6424172386
Time: 2015-09-24
Author: gremio@acm.org
File Name: src/metamodels/crosscat.py
Class Name: CrosscatMetamodel
Method Name: create_generator


Project Name: snipsco/snips-nlu
Commit Name: aae0ba842e293a63d6aaee45553712532054ef79
Time: 2017-04-12
Author: clement.doumouro@snips.ai
File Name: snips_nlu/slot_filler/data_augmentation.py
Class Name:
Method Name: get_noise_iterator