def get_embeddings(vocab, word2vec, d_word):
"""Get embeddings for the words in vocab"""
word_v_size = vocab.get_vocab_size("tokens")
embeddings = np.zeros((word_v_size, d_word))
for idx in range(word_v_size): // kind of hacky
word = vocab.get_token_from_index(idx)
if word == "@@PADDING@@" or word == "@@UNKNOWN@@":
continue
After Change
def get_embeddings(vocab, vec_file, d_word):
"""Get embeddings for the words in vocab"""
word_v_size, unk_idx = vocab.get_vocab_size("tokens"), vocab.get_token_index(vocab._oov_token)
embeddings = np.random.randn(word_v_size, d_word) //np.zeros((word_v_size, d_word))
with open(vec_file) as vec_fh:
for line in vec_fh:
word, vec = line.split(" ", 1)