tokens = en_tokenizer(text)
doc = get_doc(tokens.vocab, words=[t.text for t in tokens], heads=heads)
ents = [(e[0].idx, e[-1].idx + len(e[-1]), e.label_, e.lemma_) for e in doc.ents]
for start, end, label, lemma in ents:
merged = doc.merge(start, end, tag=label, lemma=lemma, ent_type=label)
assert merged is not None, (start, end, label, lemma)
After Change
doc = get_doc(tokens.vocab, words=[t.text for t in tokens], heads=heads)
assert doc[4].head.i == 1
with doc.retokenize() as retokenizer:
attrs = {"tag": "NP", "lemma": "tool", "ent_type": "O"}
retokenizer.merge(doc[2:5], attrs=attrs)
assert doc[2].head.i == 1
text = "displaCy is a lightweight and modern dependency parse tree visualization tool built with CSS3 and JavaScript."