92849315a08ac89243ef2613e28ff56efecd21cb,python/baseline/reader.py,MultiFileParallelCorpusReader,build_vocabs,#MultiFileParallelCorpusReader#Any#,177

Before Change


    // 2 possibilities here, either we have a vocab file, e.g. vocab.bpe.32000, or we are going to generate
    // from each column
    def build_vocabs(self, files, **kwargs):
        if len(files) == 1 and os.path.exists(files[0]):
            src_vocab = _build_vocab_for_col(0, files, self.src_vectorizers)
            tgt_vocab = _build_vocab_for_col(0, files, {"tgt": self.tgt_vectorizer})
        else:
            src_vocab = _build_vocab_for_col(0, [f + self.src_suffix for f in files], self.src_vectorizers)
            tgt_vocab = _build_vocab_for_col(0, [f + self.tgt_suffix for f in files], {"tgt": self.tgt_vectorizer})
        min_f = kwargs.get("min_f", {})
        tgt_min_f = {"tgt": min_f.pop("tgt", -1)}
        src_vocab = _filter_vocab(src_vocab, min_f)
        tgt_vocab = _filter_vocab(tgt_vocab, tgt_min_f)

After Change


            all_vects["tgt"] = self.tgt_vectorizer
            _vocab_allowed(all_vects)
            // Only read the file once.
            text = _read_from_col(0, listify(vocab_file))
            src_vocab = _build_vocab_for_col(None, None, self.src_vectorizers, text=text)
            tgt_vocab = _build_vocab_for_col(None, None, {"tgt": self.tgt_vectorizer}, text=text)
            return src_vocab, tgt_vocab["tgt"]
        src_vocab = _build_vocab_for_col(0, [f + self.src_suffix for f in files], self.src_vectorizers)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 4

Instances


Project Name: dpressel/mead-baseline
Commit Name: 92849315a08ac89243ef2613e28ff56efecd21cb
Time: 2018-12-03
Author: blester125@users.noreply.github.com
File Name: python/baseline/reader.py
Class Name: MultiFileParallelCorpusReader
Method Name: build_vocabs


Project Name: tyarkoni/pliers
Commit Name: 51c953965cfec248402d25e64d28eb888ee1d00c
Time: 2018-01-19
Author: tyarkoni@gmail.com
File Name: pliers/extractors/image.py
Class Name: FaceRecognitionFeatureExtractor
Method Name: _to_df


Project Name: OpenMined/PySyft
Commit Name: 6c794ae3cc2f0c90408ac33ee64030d1fd85752f
Time: 2021-02-19
Author: koenlennartvanderveen@gmail.com
File Name: src/syft/core/node/common/plan/plan.py
Class Name: Plan
Method Name: execute