92849315a08ac89243ef2613e28ff56efecd21cb,python/baseline/reader.py,MultiFileParallelCorpusReader,build_vocabs,#MultiFileParallelCorpusReader#Any#,177
Before Change
// 2 possibilities here, either we have a vocab file, e.g. vocab.bpe.32000, or we are going to generate
// from each column
def build_vocabs(self, files, **kwargs):
if len(files) == 1 and os.path.exists(files[0]):
src_vocab = _build_vocab_for_col(0, files, self.src_vectorizers)
tgt_vocab = _build_vocab_for_col(0, files, {"tgt": self.tgt_vectorizer})
else:
src_vocab = _build_vocab_for_col(0, [f + self.src_suffix for f in files], self.src_vectorizers)
tgt_vocab = _build_vocab_for_col(0, [f + self.tgt_suffix for f in files], {"tgt": self.tgt_vectorizer})
min_f = kwargs.get("min_f", {})
tgt_min_f = {"tgt": min_f.pop("tgt", -1)}
src_vocab = _filter_vocab(src_vocab, min_f)
tgt_vocab = _filter_vocab(tgt_vocab, tgt_min_f)
After Change
all_vects["tgt"] = self.tgt_vectorizer
_vocab_allowed(all_vects)
// Only read the file once.
text = _read_from_col(0, listify(vocab_file))
src_vocab = _build_vocab_for_col(None, None, self.src_vectorizers, text=text)
tgt_vocab = _build_vocab_for_col(None, None, {"tgt": self.tgt_vectorizer}, text=text)
return src_vocab, tgt_vocab["tgt"]
src_vocab = _build_vocab_for_col(0, [f + self.src_suffix for f in files], self.src_vectorizers)
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 4
Instances
Project Name: dpressel/mead-baseline
Commit Name: 92849315a08ac89243ef2613e28ff56efecd21cb
Time: 2018-12-03
Author: blester125@users.noreply.github.com
File Name: python/baseline/reader.py
Class Name: MultiFileParallelCorpusReader
Method Name: build_vocabs
Project Name: tyarkoni/pliers
Commit Name: 51c953965cfec248402d25e64d28eb888ee1d00c
Time: 2018-01-19
Author: tyarkoni@gmail.com
File Name: pliers/extractors/image.py
Class Name: FaceRecognitionFeatureExtractor
Method Name: _to_df
Project Name: OpenMined/PySyft
Commit Name: 6c794ae3cc2f0c90408ac33ee64030d1fd85752f
Time: 2021-02-19
Author: koenlennartvanderveen@gmail.com
File Name: src/syft/core/node/common/plan/plan.py
Class Name: Plan
Method Name: execute