e14c08dd732e73cbe2b3e249cba2632663abdd27,tests/parser/test_parser.py,,test_parse_md_paragraphs,#Any#,178
Before Change
def test_parse_md_paragraphs(caplog):
Unit test of Paragraph parsing.
caplog.set_level(logging.INFO)
session = Meta.init("postgres://localhost:5432/" + ATTRIBUTE).Session()
PARALLEL = 1
max_docs = 1
docs_path = "tests/data/html_simple/md_para.html"
pdf_path = "tests/data/pdf_simple/md_para.pdf"
// Preprocessor for the Docs
preprocessor = HTMLDocPreprocessor(docs_path, max_docs=max_docs)
// Create an Parser and parse the md document
parser = Parser(
structural=True, tabular=True, lingual=True, visual=True, pdf_path=pdf_path
)
parser.apply(preprocessor, parallelism=PARALLEL)
// Grab the document
doc = session.query(Document).order_by(Document.name).all()[0]
assert doc.name == "md_para"
// Check that doc has a figure
assert len(doc.figures) == 6
After Change
// Preprocessor for the Docs
preprocessor = HTMLDocPreprocessor(docs_path)
doc, text = next(preprocessor.parse_file(docs_path, "md_para"))
// Check that doc has a name
assert doc.name == "md_para"
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 13
Instances
Project Name: HazyResearch/fonduer
Commit Name: e14c08dd732e73cbe2b3e249cba2632663abdd27
Time: 2018-08-29
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parse_md_paragraphs
Project Name: HazyResearch/fonduer
Commit Name: e14c08dd732e73cbe2b3e249cba2632663abdd27
Time: 2018-08-29
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_simple_tokenizer
Project Name: HazyResearch/fonduer
Commit Name: e14c08dd732e73cbe2b3e249cba2632663abdd27
Time: 2018-08-29
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parse_document_diseases