bidirectional=opt.brnn)
self.multiattn = nn.ModuleList([onmt.modules.MultiHeadedAttention(8, self.hidden_size) for _ in range(self.layers)])
self.linear_out = nn.ModuleList([BLinear(self.hidden_size, 2*self.hidden_size) for _ in range(self.layers)])
self.linear_final = nn.ModuleList([BLinear(2*self.hidden_size, self.hidden_size) for _ in range(self.layers)])
After Change
self.word_vec_size = opt.word_vec_size
if self.positional_encoding:
self.pe = make_positional_encodings(opt.word_vec_size, 5000).cuda()
if self.encoder_layer == "transformer":
self.transformer = nn.ModuleList([TransformerEncoder(self.hidden_size, opt)
for _ in range(opt.layers)])