e14c08dd732e73cbe2b3e249cba2632663abdd27,tests/parser/test_parser.py,,test_simple_tokenizer,#Any#,267
Before Change
session = Meta.init("postgres://localhost:5432/" + ATTRIBUTE).Session()
// SpaCy on mac has issue on parallel parseing
if os.name == "posix":
PARALLEL = 1
else:
PARALLEL = 2 // Travis only gives 2 cores
max_docs = 2
docs_path = "tests/data/html_simple/"
pdf_path = "tests/data/pdf_simple/"
// Preprocessor for the Docs
preprocessor = HTMLDocPreprocessor(docs_path, max_docs=max_docs)
parser = Parser(structural=True, lingual=False, visual=True, pdf_path=pdf_path)
parser.apply(preprocessor, parallelism=PARALLEL)
doc = session.query(Document).order_by(Document.name).all()[1]
logger.info("Doc: {}".format(doc))
After Change
// Preprocessor for the Docs
preprocessor = HTMLDocPreprocessor(docs_path)
doc, text = next(preprocessor.parse_file(docs_path, "md"))
// Check that doc has a name
assert doc.name == "md"
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 11
Instances
Project Name: HazyResearch/fonduer
Commit Name: e14c08dd732e73cbe2b3e249cba2632663abdd27
Time: 2018-08-29
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_simple_tokenizer
Project Name: HazyResearch/fonduer
Commit Name: e14c08dd732e73cbe2b3e249cba2632663abdd27
Time: 2018-08-29
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parse_document_diseases
Project Name: HazyResearch/fonduer
Commit Name: e14c08dd732e73cbe2b3e249cba2632663abdd27
Time: 2018-08-29
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parse_style