// (this would not be the case if multiple context spans make up the same token)
if char_loc == -1:
tokenized_context.append(default_context)
elif token in ["\n"]:
tokenized_context.append(context_by_char_loc[current_char_loc][1])
else:
if char_loc > context_by_char_loc[current_char_loc][0]:
After Change
// TODO: this is a workaround that has no guarantees of being correct
raise ValueError("Context cannot be fully matched as it appears to not cover the end of the sequence for token {}".format(token))
if token.strip() not in context_by_char_loc[current_char_loc][2]:
warnings.warn("subtoken: {} has matched up with the context for token: {}".format(repr(token), repr(context_by_char_loc[current_char_loc][2])))
tokenized_context.append(context_by_char_loc[current_char_loc][1])
assert len(tokenized_context) == len(encoded_output.token_ends)