self.corpus = [self.corpus[idx] for idx in indexes]
self.bug_ids = [self.bug_ids[idx] for idx in indexes]
self.w2vmodel = Word2Vec(self.corpus, size=100, min_count=5)
self.w2vmodel.init_sims(replace=True)
// word2vec.wmdistance calculates only the euclidean distance. To get the cosine distance,
// we"re using the function with a few subtle changes. We compute the cosine distances
After Change
self.lsi = models.LsiModel(
corpus_tfidf, id2word=self.dictionary, num_topics=300
)
corpus_lsi = self.lsi[corpus_tfidf]
// Indexing the corpus
self.index = similarities.Similarity(
output_prefix="simdata.shdat", corpus=corpus_lsi, num_features=300