def span_tokenize(self, text):
text = normalize_text(text)
return [m.span() for m in self._regexp.finditer(text)]
def add_to_dict(self, tokens):
Builds dictionary from the list of provided tokens.
After Change
def span_tokenize(self, text):
tokens = NLP.tokenizer(text)
return [(t.idx, t.idx + len(t.text)) for t in tokens]
def add_to_dict(self, tokens):
Builds dictionary from the list of provided tokens.