from torchtext.datasets import Multi30k
from torchtext.data import Field, BucketIterator
SRC = Field(tokenize = "spacy",
tokenizer_language="de",
init_token = "<sos>",
eos_token = "<eos>",
lower = True)
TRG = Field(tokenize = "spacy",
tokenizer_language="en",
init_token = "<sos>",
eos_token = "<eos>",
lower = True)
train_data, valid_data, test_data = Multi30k.splits(exts = (".de", ".en"),
fields = (SRC, TRG))
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Now that we"ve defined ``train_data``, we can see an extremely useful
// feature of ``torchtext``"s ``Field``: the ``build_vocab`` method
// now allows us to create the vocabulary associated with each language
SRC.build_vocab(train_data, min_freq = 2)
TRG.build_vocab(train_data, min_freq = 2)
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Once these lines of code have been run, ``SRC.vocab.stoi`` will be a