if self.use_wordids:
// get all distinct terms in this document, ignore unknown words
uniq_words = set(words).intersection(iterkeys(self.word2id))
// the following creates a unique list of words *in the same order*
// as they were in the input. when iterating over the documents,
// the (word, count) pairs will appear in the same order as they
After Change
use_words.append(word)
counts[word] += 1
// construct a list of (wordIndex, wordFrequency) 2-tuples
doc = [(self.word2id[w], counts[w]) for w in use_words]
else:
word_freqs = Counter(words)
// construct a list of (word, wordFrequency) 2-tuples