num_words = 0
num_unks = 0
for line_num, line in enumerate(input_file):
words = ["<s>"]
words.extend(line.split())
words.append("</s>")
word_ids = dictionary.words_to_ids(words)
num_words += len(word_ids)
num_unks += word_ids.count(dictionary.unk_id)
word_ids = numpy.array([[x] for x in word_ids]).astype("int64")
probs = dictionary.words_to_probs(words)
probs = numpy.array([[x] for x in probs]).astype(theano.config.floatX)
After Change
for line_num, line in enumerate(input_file):
words = line.split()
if words[0] != "<s>":
words.insert(0, "<s>")
if words[-1] != "</s>":
words.append("</s>")
word_ids = dictionary.words_to_ids(words)