e14c08dd732e73cbe2b3e249cba2632663abdd27,tests/parser/test_parser.py,,test_parse_style,#Any#,429
Before Change
Test style tag parsing.
caplog.set_level(logging.INFO)
logger = logging.getLogger(__name__)
session = Meta.init("postgres://localhost:5432/" + ATTRIBUTE).Session()
// SpaCy on mac has issue on parallel parseing
if os.name == "posix":
PARALLEL = 1
else:
PARALLEL = 2 // Travis only gives 2 cores
max_docs = 1
docs_path = "tests/data/html_extended/ext_diseases.html"
pdf_path = "tests/data/pdf_extended/ext_diseases.pdf"
// Preprocessor for the Docs
preprocessor = HTMLDocPreprocessor(docs_path, max_docs=max_docs)
// Create an Parser and parse the md document
parser = Parser(structural=True, lingual=True, visual=True, pdf_path=pdf_path)
parser.apply(preprocessor, parallelism=PARALLEL)
// Grab the document
doc = session.query(Document).order_by(Document.name).all()[0]
// Grab the sentences parsed by the Parser
sentences = list(session.query(Sentence).order_by(Sentence.position).all())
logger.warning("Doc: {}".format(doc))
for i, sentence in enumerate(sentences):
logger.warning(" Sentence[{}]: {}".format(i, sentence.html_attrs))
After Change
parser_udf = get_parser_udf(
structural=True, lingual=True, visual=True, pdf_path=pdf_path
)
for _ in parser_udf.apply((doc, text)):
pass
// Grab the sentences parsed by the Parser
sentences = doc.sentences
logger.warning("Doc: {}".format(doc))
for i, sentence in enumerate(sentences):
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 15
Instances
Project Name: HazyResearch/fonduer
Commit Name: e14c08dd732e73cbe2b3e249cba2632663abdd27
Time: 2018-08-29
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parse_style
Project Name: HazyResearch/fonduer
Commit Name: 3252f2117a4b693ca001613b13c28cc2d8cd9eb7
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/candidates/test_candidates.py
Class Name:
Method Name: test_ngrams
Project Name: HazyResearch/fonduer
Commit Name: 3252f2117a4b693ca001613b13c28cc2d8cd9eb7
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/candidates/test_candidates.py
Class Name:
Method Name: test_mention_longest_match