4ace4ef2fb1956ec4df46f78c9edd02154780913,bugbug/similarity.py,Word2VecWmdSimilarity,__init__,#Word2VecWmdSimilarity#Any#,226
Before Change
self.corpus.append(text_preprocess(get_text(bug)))
self.bug_ids.append(bug["id"])
indexes = list(range(len(self.corpus)))
random.shuffle(indexes)
self.corpus = [self.corpus[idx] for idx in indexes]
self.bug_ids = [self.bug_ids[idx] for idx in indexes]
self.w2vmodel = Word2Vec(self.corpus, size=100, min_count=5)
self.w2vmodel.init_sims(replace=True)
After Change
self.corpus.append([bug["id"], textual_features])
// Assigning unique integer ids to all words
self.dictionary = Dictionary(text for bug_id, text in self.corpus)
// Conversion to BoW
corpus_final = [self.dictionary.doc2bow(text) for bug_id, text in self.corpus]
// Initializing and applying the tfidf transformation model on same corpus,resultant corpus is of same dimensions
tfidf = models.TfidfModel(corpus_final)
corpus_tfidf = tfidf[corpus_final]
// Transform TF-IDF corpus to latent 300-D space via Latent Semantic Indexing
self.lsi = models.LsiModel(
corpus_tfidf, id2word=self.dictionary, num_topics=300
)
corpus_lsi = self.lsi[corpus_tfidf]
// Indexing the corpus
self.index = similarities.Similarity(
output_prefix="simdata.shdat", corpus=corpus_lsi, num_features=300
)
def get_similar_bugs(self, query, k=10):
query_summary = "{} {}".format(query["summary"], query["comments"][0]["text"])
query_summary = self.text_preprocess(query_summary)
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 11
Instances
Project Name: mozilla/bugbug
Commit Name: 4ace4ef2fb1956ec4df46f78c9edd02154780913
Time: 2019-07-24
Author: cklyyung@users.noreply.github.com
File Name: bugbug/similarity.py
Class Name: Word2VecWmdSimilarity
Method Name: __init__
Project Name: mozilla/bugbug
Commit Name: 88b754261ee28f8e4143a573135a0f33da42d249
Time: 2019-07-29
Author: ayush.shridhar1506@gmail.com
File Name: bugbug/similarity.py
Class Name: Word2VecWmdSimilarity
Method Name: __init__
Project Name: fgnt/pb_bss
Commit Name: 32d73ccb0d80db3727775fb9928f5f151ddaaab5
Time: 2015-10-06
Author: cbj@mail.uni-paderborn.de
File Name: nt/speech_enhancement/mask_estimation.py
Class Name:
Method Name: simple_ideal_soft_mask