self.vocab_size = len(tgt_dict)
self.beam_size = beam_size
self.minlen = minlen
max_decoder_len = min(m.max_decoder_positions() for m in self.models)
max_decoder_len -= 1 // we define maxlen not including the EOS marker
self.maxlen = max_decoder_len if maxlen is None else min(maxlen, max_decoder_len)
self.stop_early = stop_early
self.normalize_scores = normalize_scores
self.len_penalty = len_penalty
After Change
self.vocab_size = len(tgt_dict)
self.beam_size = beam_size
// the max beam size is the dictionary size - 1, since we never select pad
self.beam_size = min(beam_size, self.vocab_size - 1)
self.max_len_a = max_len_a
self.max_len_b = max_len_b
self.min_len = min_len
self.stop_early = stop_early