61d16bbf3147208c426e001299dca496bf2b5248,gensim/models/word2vec.py,Word2Vec,scan_vocab,#Word2Vec#Any#Any#,479

Before Change


                min_reduce += 1

        total_words += sum(itervalues(vocab))
        logger.info("collected %i word types from a corpus of %i words and %i sentences" %
                    (len(vocab), total_words, sentence_no + 1))
        self.corpus_count = sentence_no + 1
        self.raw_vocab = vocab

    def scale_vocab(self, min_count=None, sample=None, dry_run=False):

After Change


        vocab = defaultdict(int)
        for sentence_no, sentence in enumerate(sentences):
            if sentence_no % progress_per == 0:
                logger.info("PROGRESS: at sentence //%i, processed %i words, keeping %i word types",
                            sentence_no, sum(itervalues(vocab)) + total_words, len(vocab))
            for word in sentence:
                vocab[word] += 1

            if self.max_vocab_size and len(vocab) > self.max_vocab_size:
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 3

Instances


Project Name: RaRe-Technologies/gensim
Commit Name: 61d16bbf3147208c426e001299dca496bf2b5248
Time: 2015-07-06
Author: radimrehurek@seznam.cz
File Name: gensim/models/word2vec.py
Class Name: Word2Vec
Method Name: scan_vocab


Project Name: RaRe-Technologies/gensim
Commit Name: 599b6aea7827c30b2ecb281c80d5171e7de273c9
Time: 2015-07-06
Author: radimrehurek@seznam.cz
File Name: gensim/models/word2vec.py
Class Name: Word2Vec
Method Name: scan_vocab


Project Name: MaybeShewill-CV/lanenet-lane-detection
Commit Name: 699898507179e8429792c137b5af404551d049e1
Time: 2018-05-31
Author: luoyao@baidu.com
File Name: tools/test_lanenet.py
Class Name:
Method Name: test_lanenet_batch