e14c08dd732e73cbe2b3e249cba2632663abdd27,tests/parser/test_parser.py,,test_parse_md_paragraphs,#Any#,178

Before Change


def test_parse_md_paragraphs(caplog):
    Unit test of Paragraph parsing.
    caplog.set_level(logging.INFO)
    session = Meta.init("postgres://localhost:5432/" + ATTRIBUTE).Session()

    PARALLEL = 1
    max_docs = 1
    docs_path = "tests/data/html_simple/md_para.html"
    pdf_path = "tests/data/pdf_simple/md_para.pdf"

    // Preprocessor for the Docs
    preprocessor = HTMLDocPreprocessor(docs_path, max_docs=max_docs)

    // Create an Parser and parse the md document
    parser = Parser(
        structural=True, tabular=True, lingual=True, visual=True, pdf_path=pdf_path
    )
    parser.apply(preprocessor, parallelism=PARALLEL)

    // Grab the document
    doc = session.query(Document).order_by(Document.name).all()[0]
    assert doc.name == "md_para"

    // Check that doc has a figure
    assert len(doc.figures) == 6

After Change



    // Preprocessor for the Docs
    preprocessor = HTMLDocPreprocessor(docs_path)
    doc, text = next(preprocessor.parse_file(docs_path, "md_para"))

    // Check that doc has a name
    assert doc.name == "md_para"
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 13

Instances


Project Name: HazyResearch/fonduer
Commit Name: e14c08dd732e73cbe2b3e249cba2632663abdd27
Time: 2018-08-29
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parse_md_paragraphs


Project Name: HazyResearch/fonduer
Commit Name: e14c08dd732e73cbe2b3e249cba2632663abdd27
Time: 2018-08-29
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_simple_tokenizer


Project Name: HazyResearch/fonduer
Commit Name: e14c08dd732e73cbe2b3e249cba2632663abdd27
Time: 2018-08-29
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parse_document_diseases