// the src tokens and their indices and potentially also the
// src and tgt features and alignment information.
if tgt_examples:
examples = [join_dicts(src, tgt)
for src, tgt in zip(src_examples, tgt_examples)]
else:
examples = src_examples
for i, example in enumerate(examples):
example["indices"] = i
if opt is None or opt.dynamic_dict:
for example in examples:
src = example["src"]
src_vocab = torchtext.vocab.Vocab(Counter(src))
self.src_vocabs.append(src_vocab)
// mapping source tokens to indices in the dynamic dict
src_map = torch.LongTensor([src_vocab.stoi[w] for w in src])
example["src_map"] = src_map
if "tgt" in example:
tgt = example["tgt"]
mask = torch.LongTensor(
[0] + [src_vocab.stoi[w] for w in tgt] + [0])
example["alignment"] = mask
keys = examples[0].keys()fields = [(k, fields[k]) for k in keys]examples = [torchtext.data.Example.fromlist([ex[k] for k in keys],
fields)
for ex in examples]
def filter_pred(example):
return 0 < len(example.src) <= opt.src_seq_length \
and 0 < len(example.tgt) <= opt.tgt_seq_length