f34e4fbad1b40627dfdc92c6eaf56969cba77c06,test/data/test_builtin_datasets.py,TestDataset,test_penntreebank,#TestDataset#,35
Before Change
@slow
def test_penntreebank(self):
// smoke test to ensure penn treebank works properly
TEXT = data.Field(lower=True, batch_first=True)
ds = PennTreebank
train, valid, test = ds.splits(TEXT)
TEXT.build_vocab(train)
train_iter, valid_iter, test_iter = data.BPTTIterator.splits(
(train, valid, test), batch_size=3, bptt_len=30)
train_iter, valid_iter, test_iter = ds.iters(batch_size=4,
After Change
self.assertEqual(len(test_dataset), 82114)
self.assertEqual(len(valid_dataset), 73339)
vocab = train_dataset.get_vocab()
tokens_ids = [vocab[token] for token in "the player characters rest".split()]
self.assertEqual(tokens_ids, [2, 2550, 3344, 1125])
// Delete the dataset after we"re done to save disk space on CI
datafile = os.path.join(self.project_root, ".data", "ptb.train.txt")
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 9
Instances
Project Name: pytorch/text
Commit Name: f34e4fbad1b40627dfdc92c6eaf56969cba77c06
Time: 2019-11-25
Author: 6156351+zhangguanheng66@users.noreply.github.com
File Name: test/data/test_builtin_datasets.py
Class Name: TestDataset
Method Name: test_penntreebank
Project Name: pytorch/tutorials
Commit Name: 133e5b6a84e126cbcfbc5c11eeb6286185dbec2f
Time: 2020-12-02
Author: 6156351+zhangguanheng66@users.noreply.github.com
File Name: beginner_source/torchtext_translation_tutorial.py
Class Name:
Method Name:
Project Name: pytorch/text
Commit Name: f34e4fbad1b40627dfdc92c6eaf56969cba77c06
Time: 2019-11-25
Author: 6156351+zhangguanheng66@users.noreply.github.com
File Name: test/data/test_builtin_datasets.py
Class Name: TestDataset
Method Name: test_wikitext2