if self.use_wordids:
// get all distinct terms in this document, ignore unknown words
uniq_words = set(words).intersection(iterkeys(self.word2id))
// the following creates a unique list of words *in the same order*
// as they were in the input. when iterating over the documents,
// the (word, count) pairs will appear in the same order as they
After Change
// if this was not needed, we might as well have used useWords = set(words)
use_words, counts = [], Counter()
for word in words:
if word not in self.word2id:
continue
if word not in counts:
use_words.append(word)
counts[word] += 1
// construct a list of (wordIndex, wordFrequency) 2-tuples