7b205efea6b504de04311bc55d109cd28d8f3e0c,OpenNMT/preprocess.py,,makeVocabulary,#Any#Any#,44
Before Change
reader = onmt.utils.FileReader.new(filename)
while True:
sent = reader.next()
if sent is None:
break
words, features, numFeatures = onmt.utils.Features.extract(sent)
After Change
onmt.Constants.BOS_WORD, onmt.Constants.EOS_WORD])
featuresVocabs = []
with open(filename) as f:
for sent in f.readlines():
words, features = onmt.utils.Features.extract(sent)
numFeatures = len(features)
if len(featuresVocabs) == 0 and numFeatures > 0:
for j in range(numFeatures):
featuresVocabs[j] = onmt.utils.Dict(
{onmt.Constants.PAD_WORD, onmt.Constants.UNK_WORD,
onmt.Constants.BOS_WORD, onmt.Constants.EOS_WORD})
else:
assert len(featuresVocabs) == numFeatures, (
"all sentences must have the same numbers of additional features")
for i in range(len(words)):
wordVocab.add(words[i])
for j in range(numFeatures):
featuresVocabs[j].add(features[j][i])
originalSize = wordVocab.size()
wordVocab = wordVocab.prune(size)
print("Created dictionary of size %d (pruned from %d)" %
(wordVocab.size(), originalSize))
In pattern: SUPERPATTERN
Frequency: 4
Non-data size: 5
Instances Project Name: pytorch/examples
Commit Name: 7b205efea6b504de04311bc55d109cd28d8f3e0c
Time: 2016-12-21
Author: alerer@fb.com
File Name: OpenNMT/preprocess.py
Class Name:
Method Name: makeVocabulary
Project Name: RaRe-Technologies/gensim
Commit Name: 4ea27ba44e2ce56f06c2294d02e59031707e1ca5
Time: 2014-04-19
Author: radimrehurek@seznam.cz
File Name: gensim/matutils.py
Class Name: MmReader
Method Name: __init__
Project Name: balakg/posewarp-cvpr2018
Commit Name: 2cc74a324cc84821f3fbb793630f7328e54bd87e
Time: 2017-05-08
Author: balakg@thousandeyes.csail.mit.edu
File Name: posewarp_test.py
Class Name:
Method Name: train