9021ea8b31871e760095086669651dd1f072fce0,gensim/models/fasttext.py,FastTextTrainables,init_ngrams_post_load,#FastTextTrainables#Any#Any#,843
Before Change
vectors are discarded here to save space.
all_ngrams = []
wv.vectors = np.zeros((len(wv.vocab), wv.vector_size), dtype=REAL)
for w, vocab in wv.vocab.items():
all_ngrams += _compute_ngrams(w, wv.min_n, wv.max_n)
wv.vectors[vocab.index] += np.array(wv.vectors_ngrams[vocab.index])
all_ngrams = set(all_ngrams)
wv.num_ngram_vectors = len(all_ngrams)
ngram_indices = []
for i, ngram in enumerate(all_ngrams):
ngram_hash = _ft_hash(ngram)
ngram_indices.append(len(wv.vocab) + ngram_hash % self.bucket)
wv.ngrams[ngram] = i
wv.vectors_ngrams = wv.vectors_ngrams.take(ngram_indices, axis=0)
ngram_weights = wv.vectors_ngrams
After Change
ngram_indices = []
wv.num_ngram_vectors = 0
for word in wv.vocab.keys():
for ngram in _compute_ngrams(word, wv.min_n, wv.max_n):
ngram_hash = _ft_hash(ngram) % self.bucket
if ngram_hash in wv.hash2index:
continue
wv.hash2index[ngram_hash] = len(ngram_indices)
ngram_indices.append(len(wv.vocab) + ngram_hash)
wv.num_ngram_vectors = len(ngram_indices)
wv.vectors_ngrams = wv.vectors_ngrams.take(ngram_indices, axis=0)
ngram_weights = wv.vectors_ngrams
logger.info(
"loading weights for %s words for fastText model from %s",
len(wv.vocab), file_name
)
for w, vocab in wv.vocab.items():
word_ngrams = _compute_ngrams(w, wv.min_n, wv.max_n)
for word_ngram in word_ngrams:
vec_idx = wv.hash2index[_ft_hash(word_ngram) % self.bucket]
wv.vectors[vocab.index] += np.array(ngram_weights[vec_idx])
wv.vectors[vocab.index] /= (len(word_ngrams) + 1)
logger.info(
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 6
Instances
Project Name: RaRe-Technologies/gensim
Commit Name: 9021ea8b31871e760095086669651dd1f072fce0
Time: 2018-03-01
Author: johannes.baiter@gmail.com
File Name: gensim/models/fasttext.py
Class Name: FastTextTrainables
Method Name: init_ngrams_post_load
Project Name: bokeh/bokeh
Commit Name: a6e3f06dff54fe4f34bf02a81e53852fa95b227d
Time: 2015-08-30
Author: nroth@dealnews.com
File Name: bokeh/charts/builder/bar_builder.py
Class Name: BarGlyph
Method Name: __dodge__
Project Name: bokeh/bokeh
Commit Name: 4ace574968a1001c80b1689239d767f9e4497d78
Time: 2015-08-14
Author: nroth@dealnews.com
File Name: bokeh/charts/builder/scatter_builder.py
Class Name: ScatterBuilder
Method Name: _yield_renderers