3252f2117a4b693ca001613b13c28cc2d8cd9eb7,tests/candidates/test_candidates.py,,test_ngrams,#,424

Before Change


    PARALLEL = 4

    max_docs = 1
    session = Meta.init(CONN_STRING).Session()

    docs_path = "tests/data/pure_html/lincoln_short.html"

    logger.info("Parsing...")
    doc_preprocessor = HTMLDocPreprocessor(docs_path, max_docs=max_docs)
    corpus_parser = Parser(session, structural=True, lingual=True)
    corpus_parser.apply(doc_preprocessor, parallelism=PARALLEL)
    assert session.query(Document).count() == max_docs
    assert session.query(Sentence).count() == 503
    docs = session.query(Document).order_by(Document.name).all()

    // Mention Extraction
    Person = mention_subclass("Person")
    person_ngrams = MentionNgrams(n_max=3)
    person_matcher = PersonMatcher()

    mention_extractor = MentionExtractor(
        session, [Person], [person_ngrams], [person_matcher]
    )
    mention_extractor.apply(docs, parallelism=PARALLEL)

    assert session.query(Person).count() == 118
    mentions = session.query(Person).all()
    assert len([x for x in mentions if x.context.get_num_words() == 1]) == 49
    assert len([x for x in mentions if x.context.get_num_words() > 3]) == 0

    // Test for unigram exclusion

After Change


    assert len([x for x in mentions if x.context.get_num_words() > 3]) == 0

    // Test for unigram exclusion
    for mention in doc.persons[:]:
        doc.persons.remove(mention)
    assert len(doc.persons) == 0

In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 5

Instances

Link

Project Name: HazyResearch/fonduer

Commit Name: 3252f2117a4b693ca001613b13c28cc2d8cd9eb7

Time: 2020-02-14

Author: hiromu.hota@hal.hitachi.com

File Name: tests/candidates/test_candidates.py

Class Name:

Method Name: test_ngrams

Link

Project Name: HazyResearch/fonduer

Commit Name: 3252f2117a4b693ca001613b13c28cc2d8cd9eb7

Time: 2020-02-14

Author: hiromu.hota@hal.hitachi.com

File Name: tests/candidates/test_candidates.py

Class Name:

Method Name: test_mention_longest_match

Link

Project Name: daavoo/pyntcloud

Commit Name: 4433db88d5b476a1cd683a024bbd79aa32f1bf77

Time: 2016-10-27

Author: daviddelaiglesiacastro@gmail.com

File Name: pyntcloud/pyntcloud.py

Class Name: PyntCloud

Method Name: add_structure