9021ea8b31871e760095086669651dd1f072fce0,gensim/models/fasttext.py,FastTextTrainables,init_ngrams_post_load,#FastTextTrainables#Any#Any#,843

Before Change


        vectors are discarded here to save space.

        
        all_ngrams = []
        wv.vectors = np.zeros((len(wv.vocab), wv.vector_size), dtype=REAL)

        for w, vocab in wv.vocab.items():
            all_ngrams += _compute_ngrams(w, wv.min_n, wv.max_n)
            wv.vectors[vocab.index] += np.array(wv.vectors_ngrams[vocab.index])

        all_ngrams = set(all_ngrams)
        wv.num_ngram_vectors = len(all_ngrams)
        ngram_indices = []
        for i, ngram in enumerate(all_ngrams):
            ngram_hash = _ft_hash(ngram)
            ngram_indices.append(len(wv.vocab) + ngram_hash % self.bucket)
            wv.ngrams[ngram] = i
        wv.vectors_ngrams = wv.vectors_ngrams.take(ngram_indices, axis=0)

        ngram_weights = wv.vectors_ngrams

After Change



        ngram_indices = []
        wv.num_ngram_vectors = 0
        for word in wv.vocab.keys():
            for ngram in _compute_ngrams(word, wv.min_n, wv.max_n):
                ngram_hash = _ft_hash(ngram) % self.bucket
                if ngram_hash in wv.hash2index:
                    continue
                wv.hash2index[ngram_hash] = len(ngram_indices)
                ngram_indices.append(len(wv.vocab) + ngram_hash)
        wv.num_ngram_vectors = len(ngram_indices)
        wv.vectors_ngrams = wv.vectors_ngrams.take(ngram_indices, axis=0)

        ngram_weights = wv.vectors_ngrams

        logger.info(
            "loading weights for %s words for fastText model from %s",
            len(wv.vocab), file_name
        )

        for w, vocab in wv.vocab.items():
            word_ngrams = _compute_ngrams(w, wv.min_n, wv.max_n)
            for word_ngram in word_ngrams:
                vec_idx = wv.hash2index[_ft_hash(word_ngram) % self.bucket]
                wv.vectors[vocab.index] += np.array(ngram_weights[vec_idx])

            wv.vectors[vocab.index] /= (len(word_ngrams) + 1)
        logger.info(
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 6

Instances


Project Name: RaRe-Technologies/gensim
Commit Name: 9021ea8b31871e760095086669651dd1f072fce0
Time: 2018-03-01
Author: johannes.baiter@gmail.com
File Name: gensim/models/fasttext.py
Class Name: FastTextTrainables
Method Name: init_ngrams_post_load


Project Name: bokeh/bokeh
Commit Name: a6e3f06dff54fe4f34bf02a81e53852fa95b227d
Time: 2015-08-30
Author: nroth@dealnews.com
File Name: bokeh/charts/builder/bar_builder.py
Class Name: BarGlyph
Method Name: __dodge__


Project Name: bokeh/bokeh
Commit Name: 4ace574968a1001c80b1689239d767f9e4497d78
Time: 2015-08-14
Author: nroth@dealnews.com
File Name: bokeh/charts/builder/scatter_builder.py
Class Name: ScatterBuilder
Method Name: _yield_renderers