5651a0d052bcfd160b187828aa3d8d90652929fe,spacy/tests/doc/test_span_merge.py,,test_span_np_merges,#Any#,69

Before Change


    tokens = en_tokenizer(text)
    doc = get_doc(tokens.vocab, words=[t.text for t in tokens], heads=heads)

    ents = [(e[0].idx, e[-1].idx + len(e[-1]), e.label_, e.lemma_) for e in doc.ents]
    for start, end, label, lemma in ents:
        merged = doc.merge(start, end, tag=label, lemma=lemma, ent_type=label)
        assert merged is not None, (start, end, label, lemma)

    text = "One test with entities like New York City so the ents list is not void"
    heads = [1, 11, -1, -1, -1, 1, 1, -3, 4, 2, 1, 1, 0, -1, -2]
    tokens = en_tokenizer(text)
    doc = get_doc(tokens.vocab, words=[t.text for t in tokens], heads=heads)

After Change


    heads = [1, 11, -1, -1, -1, 1, 1, -3, 4, 2, 1, 1, 0, -1, -2]
    tokens = en_tokenizer(text)
    doc = get_doc(tokens.vocab, words=[t.text for t in tokens], heads=heads)
    with doc.retokenize() as retokenizer:
        for ent in doc.ents:
            retokenizer.merge(ent)


def test_spans_entity_merge(en_tokenizer):
    // fmt: off
    text = "Stewart Lee is a stand up comedian who lives in England and loves Joe Pasquale.\n"
    heads = [1, 1, 0, 1, 2, -1, -4, 1, -2, -1, -1, -3, -10, 1, -2, -13, -1]
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 19

Instances


Project Name: explosion/spaCy
Commit Name: 5651a0d052bcfd160b187828aa3d8d90652929fe
Time: 2019-02-15
Author: ines@ines.io
File Name: spacy/tests/doc/test_span_merge.py
Class Name:
Method Name: test_span_np_merges


Project Name: explosion/spaCy
Commit Name: 5651a0d052bcfd160b187828aa3d8d90652929fe
Time: 2019-02-15
Author: ines@ines.io
File Name: spacy/pipeline/functions.py
Class Name:
Method Name: merge_noun_chunks


Project Name: explosion/spaCy
Commit Name: 5651a0d052bcfd160b187828aa3d8d90652929fe
Time: 2019-02-15
Author: ines@ines.io
File Name: spacy/pipeline/functions.py
Class Name:
Method Name: merge_entities