133e5b6a84e126cbcfbc5c11eeb6286185dbec2f,beginner_source/torchtext_translation_tutorial.py,,,#,58
Before Change
eos_token = "<eos>",
lower = True)
train_data, valid_data, test_data = Multi30k.splits(exts = (".de", ".en"),
fields = (SRC, TRG))
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Now that we"ve defined ``train_data``, we can see an extremely useful
// feature of ``torchtext``"s ``Field``: the ``build_vocab`` method
After Change
from torchtext.utils import download_from_url, extract_archive
import io
url_base = "https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/"
train_urls = ("train.de.gz", "train.en.gz")
val_urls = ("val.de.gz", "val.en.gz")
test_urls = ("test_2016_flickr.de.gz", "test_2016_flickr.en.gz")
train_filepaths = [extract_archive(download_from_url(url_base + url))[0] for url in train_urls]
val_filepaths = [extract_archive(download_from_url(url_base + url))[0] for url in val_urls]
test_filepaths = [extract_archive(download_from_url(url_base + url))[0] for url in test_urls]
de_tokenizer = get_tokenizer("spacy", language="de")
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 8
Instances Project Name: pytorch/tutorials
Commit Name: 133e5b6a84e126cbcfbc5c11eeb6286185dbec2f
Time: 2020-12-02
Author: 6156351+zhangguanheng66@users.noreply.github.com
File Name: beginner_source/torchtext_translation_tutorial.py
Class Name:
Method Name:
Project Name: pytorch/text
Commit Name: f34e4fbad1b40627dfdc92c6eaf56969cba77c06
Time: 2019-11-25
Author: 6156351+zhangguanheng66@users.noreply.github.com
File Name: test/data/test_builtin_datasets.py
Class Name: TestDataset
Method Name: test_penntreebank
Project Name: pytorch/text
Commit Name: f34e4fbad1b40627dfdc92c6eaf56969cba77c06
Time: 2019-11-25
Author: 6156351+zhangguanheng66@users.noreply.github.com
File Name: test/data/test_builtin_datasets.py
Class Name: TestDataset
Method Name: test_wikitext2