kwargs = arrays[0]._asdict()
kwargs["tokens"] = [arr.tokens for arr in arrays]
kwargs["token_ids"] = np.stack([arr.token_ids for arr in arrays], 0)
kwargs["mask"] = np.stack([arr.mask for arr in arrays], 0)
yield ArrayEncodedOutput(**kwargs)
def text_to_tokens_mask(self, pair, Y=None, context=None):
out_gen = self._text_to_ids(pair, pad_token=self.config.pad_token)
After Change
arrays.append(next(super()._text_to_ids(pair, Y=Y)))
kwargs = arrays[0]._asdict()
max_len = max([len(arr.token_ids) for arr in arrays])
kwargs["tokens"] = [arr.tokens for arr in arrays]
kwargs["token_ids"] = padded_stack([arr.token_ids for arr in arrays])
kwargs["mask"] = padded_stack([arr.mask for arr in arrays])
yield ArrayEncodedOutput(**kwargs)