5d4a480c48a242c1d8051be502ecb98a9b273e9c,tests/candidates/test_candidates.py,,test_ngram_split,#Any#,33

Before Change


def test_ngram_split(caplog):
    Test extracting candidates from mentions from documents.
    caplog.set_level(logging.INFO)
    PARALLEL = 1
    max_docs = 1
    session = Meta.init("postgres://localhost:5432/" + DB).Session()

    docs_path = "tests/data/html/112823.html"
    pdf_path = "tests/data/pdf/112823.pdf"

    // Parsing
    logger.info("Parsing...")
    doc_preprocessor = HTMLDocPreprocessor(docs_path, max_docs=max_docs)
    corpus_parser = Parser(
        structural=True, lingual=True, visual=True, pdf_path=pdf_path
    )
    corpus_parser.apply(doc_preprocessor, parallelism=PARALLEL)
    assert session.query(Document).count() == max_docs
    assert session.query(Sentence).count() == 828
    sents = session.query(Sentence).all()
    // Doesn"t matter which sentence we grab, since we are overwriting the

After Change



    assert len(result) == 2
    assert result[0].get_span() == "New-"
    assert result[1].get_span() == "New"

    // When a text starts with a split_token.
    sent.text = "-Text"
    sent.words = ["-Text"]

In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 8

Instances

Link

Project Name: HazyResearch/fonduer

Commit Name: 5d4a480c48a242c1d8051be502ecb98a9b273e9c

Time: 2018-08-21

Author: hiromu.hota@hal.hitachi.com

File Name: tests/candidates/test_candidates.py

Class Name:

Method Name: test_ngram_split

Link

Project Name: HazyResearch/fonduer

Commit Name: 3252f2117a4b693ca001613b13c28cc2d8cd9eb7

Time: 2020-02-14

Author: hiromu.hota@hal.hitachi.com

File Name: tests/candidates/test_candidates.py

Class Name:

Method Name: test_multimodal_cand

Link

Project Name: HazyResearch/fonduer

Commit Name: 3252f2117a4b693ca001613b13c28cc2d8cd9eb7

Time: 2020-02-14

Author: hiromu.hota@hal.hitachi.com

File Name: tests/candidates/test_candidates.py

Class Name:

Method Name: test_ngrams