return fc_feats, att_feats, p_att_feats
def _forward(self, fc_feats, att_feats, seq, att_masks=None):
att_feats, att_masks = self.clip_att(att_feats, att_masks)
batch_size = fc_feats.size(0)
state = self.init_hidden(batch_size)
// outputs = []
outputs = fc_feats.new_zeros(batch_size, seq.size(1) - 1, self.vocab_size+1)
fc_feats, att_feats, p_att_feats = self._prepare_feature(fc_feats, att_feats, att_masks)
for i in range(seq.size(1) - 1):
if self.training and i >= 1 and self.ss_prob > 0.0: // otherwiste no need to sample
After Change
outputs = fc_feats.new_zeros(batch_size, seq.size(1) - 1, self.vocab_size+1)
// Prepare the features
p_fc_feats, p_att_feats, pp_att_feats, p_att_masks = self._prepare_feature(fc_feats, att_feats, att_masks)
// pp_att_feats is used for attention, we cache it in advance to reduce computation cost
for i in range(seq.size(1) - 1):