def __init__(self, cut_off=0.2, cleanup_urls=True):
super().__init__(cleanup_urls=cleanup_urls)
self.corpus = []
self.bug_ids = []
self.cut_off = cut_off
for bug in bugzilla.get_bugs():
self.corpus.append(self.text_preprocess(self.get_text(bug)))
self.bug_ids.append(bug["id"])
After Change
corpus_final = [self.dictionary.doc2bow(text) for bug_id, text in self.corpus]
// Initializing and applying the tfidf transformation model on same corpus,resultant corpus is of same dimensions
tfidf = models.TfidfModel(corpus_final)corpus_tfidf = tfidf[corpus_final]
// Transform TF-IDF corpus to latent 300-D space via Latent Semantic Indexing
self.lsi = models.LsiModel(
corpus_tfidf, id2word=self.dictionary, num_topics=300