s1 = "I want some coffee"
s2 = "Turn it up"
rows = [{"text": types.Text(s1)}, {"text": types.Text(s2)}]
expected = [[ord(c) for c in s1], [ord(c) for c in s2]]
tensors = (tensorizer.numberize(row) for row in rows)
After Change
s1 = "I want some coffee"
s2 = "Turn it up"
s3 = "我不会说中文"
rows = [{"text": s1}, {"text": s2}, {"text": s3}]
expected = [list(s1.encode()), list(s2.encode()), list(s3.encode())]
tensors = [tensorizer.numberize(row) for row in rows]
self.assertEqual([(bytes, len(bytes)) for bytes in expected], tensors)
def test_create_word_character_tensors(self):
tensorizer = WordCharacterTensorizer(text_column="text")