7b205efea6b504de04311bc55d109cd28d8f3e0c,OpenNMT/preprocess.py,,makeVocabulary,#Any#Any#,44

Before Change


             onmt.Constants.BOS_WORD, onmt.Constants.EOS_WORD})
    featuresVocabs = []

    reader = onmt.utils.FileReader.new(filename)

    while True:
        sent = reader.next()
        if sent is None:
            break

    words, features, numFeatures = onmt.utils.Features.extract(sent)

    if len(featuresVocabs) == 0 and numFeatures > 0:
        for j in range(numFeatures):
            featuresVocabs[j] = onmt.utils.Dict.new(
                    {onmt.Constants.PAD_WORD, onmt.Constants.UNK_WORD,
                     onmt.Constants.BOS_WORD, onmt.Constants.EOS_WORD})
    else:
        assert(len(featuresVocabs) == numFeatures,
               "all sentences must have the same numbers of additional features")

    for i in range(len(words)):
        wordVocab.add(word[i])

        for j in range(numFeatures):
            featuresVocabs[j].add(features[j][i])

    reader.close()

    originalSize = wordVocab.size()
    wordVocab = wordVocab.prune(size)
    print("Created dictionary of size %d (pruned from %d)" %

After Change


             onmt.Constants.BOS_WORD, onmt.Constants.EOS_WORD])
    featuresVocabs = []

    with open(filename) as f:
        for sent in f.readlines():
            words, features = onmt.utils.Features.extract(sent)
            numFeatures = len(features)

            if len(featuresVocabs) == 0 and numFeatures > 0:
                for j in range(numFeatures):
                    featuresVocabs[j] = onmt.utils.Dict(
                            {onmt.Constants.PAD_WORD, onmt.Constants.UNK_WORD,
                             onmt.Constants.BOS_WORD, onmt.Constants.EOS_WORD})
            else:
                assert len(featuresVocabs) == numFeatures, (
                    "all sentences must have the same numbers of additional features")

            for i in range(len(words)):
                wordVocab.add(words[i])

                for j in range(numFeatures):
                    featuresVocabs[j].add(features[j][i])

    originalSize = wordVocab.size()
    wordVocab = wordVocab.prune(size)
    print("Created dictionary of size %d (pruned from %d)" %
          (wordVocab.size(), originalSize))

In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 9

Instances

Link

Project Name: OpenNMT/OpenNMT-py

Commit Name: 7b205efea6b504de04311bc55d109cd28d8f3e0c

Time: 2016-12-21

Author: alerer@fb.com

File Name: OpenNMT/preprocess.py

Class Name:

Method Name: makeVocabulary