e14c08dd732e73cbe2b3e249cba2632663abdd27,tests/parser/test_parser.py,,test_simple_tokenizer,#Any#,267

Before Change


    session = Meta.init("postgres://localhost:5432/" + ATTRIBUTE).Session()

    // SpaCy on mac has issue on parallel parseing
    if os.name == "posix":
        PARALLEL = 1
    else:
        PARALLEL = 2  // Travis only gives 2 cores

    max_docs = 2
    docs_path = "tests/data/html_simple/"
    pdf_path = "tests/data/pdf_simple/"

    // Preprocessor for the Docs
    preprocessor = HTMLDocPreprocessor(docs_path, max_docs=max_docs)

    parser = Parser(structural=True, lingual=False, visual=True, pdf_path=pdf_path)
    parser.apply(preprocessor, parallelism=PARALLEL)

    doc = session.query(Document).order_by(Document.name).all()[1]

    logger.info("Doc: {}".format(doc))

After Change



    // Preprocessor for the Docs
    preprocessor = HTMLDocPreprocessor(docs_path)
    doc, text = next(preprocessor.parse_file(docs_path, "md"))

    // Check that doc has a name
    assert doc.name == "md"
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 11

Instances


Project Name: HazyResearch/fonduer
Commit Name: e14c08dd732e73cbe2b3e249cba2632663abdd27
Time: 2018-08-29
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_simple_tokenizer


Project Name: HazyResearch/fonduer
Commit Name: e14c08dd732e73cbe2b3e249cba2632663abdd27
Time: 2018-08-29
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parse_document_diseases


Project Name: HazyResearch/fonduer
Commit Name: e14c08dd732e73cbe2b3e249cba2632663abdd27
Time: 2018-08-29
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parse_style