2c4a6e537126f4123de7c97f30587310d3712c06,tests/data/tokenizers/character_tokenizer_test.py,TestCharacterTokenizer,test_splits_into_characters,#TestCharacterTokenizer#,7

Before Change


    def test_splits_into_characters(self):
        tokenizer = CharacterTokenizer(start_tokens=["<S1>", "<S2>"], end_tokens=["</S2>", "</S1>"])
        sentence = "A, small sentence."
        tokens, _ = tokenizer.tokenize(sentence)
        expected_tokens = ["<S1>", "<S2>", "A", ",", " ", "s", "m", "a", "l", "l", " ", "s", "e",
                           "n", "t", "e", "n", "c", "e", ".", "</S2>", "</S1>"]
        assert tokens == expected_tokens

After Change


    def test_splits_into_characters(self):
        tokenizer = CharacterTokenizer(start_tokens=["<S1>", "<S2>"], end_tokens=["</S2>", "</S1>"])
        sentence = "A, small sentence."
        tokens = [t.text for t in tokenizer.tokenize(sentence)]
        expected_tokens = ["<S1>", "<S2>", "A", ",", " ", "s", "m", "a", "l", "l", " ", "s", "e",
                           "n", "t", "e", "n", "c", "e", ".", "</S2>", "</S1>"]
        assert tokens == expected_tokens
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 4

Non-data size: 8

Instances


Project Name: allenai/allennlp
Commit Name: 2c4a6e537126f4123de7c97f30587310d3712c06
Time: 2017-09-13
Author: mattg@allenai.org
File Name: tests/data/tokenizers/character_tokenizer_test.py
Class Name: TestCharacterTokenizer
Method Name: test_splits_into_characters


Project Name: allenai/allennlp
Commit Name: 2c4a6e537126f4123de7c97f30587310d3712c06
Time: 2017-09-13
Author: mattg@allenai.org
File Name: tests/data/tokenizers/word_tokenizer_test.py
Class Name: TestWordTokenizer
Method Name: test_stems_and_filters_correctly


Project Name: allenai/allennlp
Commit Name: 2c4a6e537126f4123de7c97f30587310d3712c06
Time: 2017-09-13
Author: mattg@allenai.org
File Name: tests/data/tokenizers/word_tokenizer_test.py
Class Name: TestWordTokenizer
Method Name: test_passes_through_correctly


Project Name: allenai/allennlp
Commit Name: 2c4a6e537126f4123de7c97f30587310d3712c06
Time: 2017-09-13
Author: mattg@allenai.org
File Name: tests/data/tokenizers/character_tokenizer_test.py
Class Name: TestCharacterTokenizer
Method Name: test_handles_byte_encoding