// construct a list of (wordIndex, wordFrequency) 2-tuples
doc = [(self.word2id.get(w), words.count(w)) for w in use_words]
else:
uniq_words = set(words)
// construct a list of (word, wordFrequency) 2-tuples
doc = [(w, words.count(w)) for w in uniq_words]
// return the document, then forget it and move on to the next one
// note that this way, only one doc is stored in memory at a time, not the whole corpus
return doc
After Change
// construct a list of (wordIndex, wordFrequency) 2-tuples
doc = [(self.word2id[w], counts[w]) for w in use_words]
else:
word_freqs = Counter(words)
// construct a list of (word, wordFrequency) 2-tuples
doc = list(word_freqs.items())
// return the document, then forget it and move on to the next one