def test_wikitext2(self):
// smoke test to ensure wikitext2 works properly
ds = WikiText2
TEXT = data.Field(lower=True, batch_first=True)
train, valid, test = ds.splits(TEXT)
TEXT.build_vocab(train)
train_iter, valid_iter, test_iter = data.BPTTIterator.splits(
(train, valid, test), batch_size=3, bptt_len=30)
train_iter, valid_iter, test_iter = ds.iters(batch_size=4,
After Change
self.assertEqual(len(valid_dataset), 214417)
vocab = train_dataset.get_vocab()
tokens_ids = [vocab[token] for token in "the player characters rest".split()]
self.assertEqual(tokens_ids, [2, 286, 503, 700])
// Delete the dataset after we"re done to save disk space on CI