e14c08dd732e73cbe2b3e249cba2632663abdd27,tests/parser/test_parser.py,,test_parse_style,#Any#,429

Before Change


    Test style tag parsing.
    caplog.set_level(logging.INFO)
    logger = logging.getLogger(__name__)
    session = Meta.init("postgres://localhost:5432/" + ATTRIBUTE).Session()

    // SpaCy on mac has issue on parallel parseing
    if os.name == "posix":
        PARALLEL = 1
    else:
        PARALLEL = 2  // Travis only gives 2 cores

    max_docs = 1
    docs_path = "tests/data/html_extended/ext_diseases.html"
    pdf_path = "tests/data/pdf_extended/ext_diseases.pdf"

    // Preprocessor for the Docs
    preprocessor = HTMLDocPreprocessor(docs_path, max_docs=max_docs)

    // Create an Parser and parse the md document
    parser = Parser(structural=True, lingual=True, visual=True, pdf_path=pdf_path)
    parser.apply(preprocessor, parallelism=PARALLEL)

    // Grab the document
    doc = session.query(Document).order_by(Document.name).all()[0]

    // Grab the sentences parsed by the Parser
    sentences = list(session.query(Sentence).order_by(Sentence.position).all())

    logger.warning("Doc: {}".format(doc))
    for i, sentence in enumerate(sentences):
        logger.warning("    Sentence[{}]: {}".format(i, sentence.html_attrs))

After Change


    parser_udf = get_parser_udf(
        structural=True, lingual=True, visual=True, pdf_path=pdf_path
    )
    for _ in parser_udf.apply((doc, text)):
        pass

    // Grab the sentences parsed by the Parser
    sentences = doc.sentences

    logger.warning("Doc: {}".format(doc))
    for i, sentence in enumerate(sentences):
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 15

Instances


Project Name: HazyResearch/fonduer
Commit Name: e14c08dd732e73cbe2b3e249cba2632663abdd27
Time: 2018-08-29
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parse_style


Project Name: HazyResearch/fonduer
Commit Name: 3252f2117a4b693ca001613b13c28cc2d8cd9eb7
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/candidates/test_candidates.py
Class Name:
Method Name: test_ngrams


Project Name: HazyResearch/fonduer
Commit Name: 3252f2117a4b693ca001613b13c28cc2d8cd9eb7
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/candidates/test_candidates.py
Class Name:
Method Name: test_mention_longest_match