87ed32ebed9b436c4dc2f9142241547bafc5ca48,tests/parser/test_parser.py,,test_parser_skips_and_flattens,#,752
Before Change
// Test if a parser skips comments
doc = Document(id=1, name="test", stable_id="1::document:0:0")
doc.text = "<html><body>Hello!<!-- comment --></body></html>"
for _ in parser_udf.apply(doc):
pass
assert doc.sentences[0].text == "Hello!"
// Test if a parser skips blacklisted elements
doc = Document(id=2, name="test2", stable_id="2::document:0:0")
After Change
// Test if a parser skips comments
doc = Document(id=1, name="test", stable_id="1::document:0:0")
doc.text = "<html><body>Hello!<!-- comment --></body></html>"
doc = parser_udf.apply(doc)
assert doc.sentences[0].text == "Hello!"
// Test if a parser skips blacklisted elements
doc = Document(id=2, name="test2", stable_id="2::document:0:0")
In pattern: SUPERPATTERN
Frequency: 19
Non-data size: 5
Instances Project Name: HazyResearch/fonduer
Commit Name: 87ed32ebed9b436c4dc2f9142241547bafc5ca48
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parser_skips_and_flattens
Project Name: HazyResearch/fonduer
Commit Name: 87ed32ebed9b436c4dc2f9142241547bafc5ca48
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_csv_doc_preprocessor
Project Name: HazyResearch/fonduer
Commit Name: 87ed32ebed9b436c4dc2f9142241547bafc5ca48
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parse_document_diseases
Project Name: HazyResearch/fonduer
Commit Name: 506a594e8e5fd755bc02ea2c5fe6437b8b682057
Time: 2018-08-14
Author: lwhsiao@stanford.edu
File Name: fonduer/utils/udf.py
Class Name: UDFRunner
Method Name: apply_st
Project Name: HazyResearch/fonduer
Commit Name: 87ed32ebed9b436c4dc2f9142241547bafc5ca48
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_text_doc_preprocessor
Project Name: HazyResearch/fonduer
Commit Name: 87ed32ebed9b436c4dc2f9142241547bafc5ca48
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parse_wo_tabular
Project Name: HazyResearch/fonduer
Commit Name: 87ed32ebed9b436c4dc2f9142241547bafc5ca48
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_spacy_japanese
Project Name: HazyResearch/fonduer
Commit Name: 506a594e8e5fd755bc02ea2c5fe6437b8b682057
Time: 2018-08-14
Author: lwhsiao@stanford.edu
File Name: fonduer/utils/udf.py
Class Name: UDF
Method Name: run
Project Name: HazyResearch/fonduer
Commit Name: 87ed32ebed9b436c4dc2f9142241547bafc5ca48
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_simple_parser
Project Name: HazyResearch/fonduer
Commit Name: 87ed32ebed9b436c4dc2f9142241547bafc5ca48
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_warning_on_missing_pdf
Project Name: HazyResearch/fonduer
Commit Name: 87ed32ebed9b436c4dc2f9142241547bafc5ca48
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_spacy_chinese
Project Name: HazyResearch/fonduer
Commit Name: 87ed32ebed9b436c4dc2f9142241547bafc5ca48
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_spacy_german
Project Name: HazyResearch/fonduer
Commit Name: 87ed32ebed9b436c4dc2f9142241547bafc5ca48
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_warning_on_incorrect_filename
Project Name: HazyResearch/fonduer
Commit Name: 87ed32ebed9b436c4dc2f9142241547bafc5ca48
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parse_md_details
Project Name: HazyResearch/fonduer
Commit Name: 87ed32ebed9b436c4dc2f9142241547bafc5ca48
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parse_table_span
Project Name: HazyResearch/fonduer
Commit Name: 87ed32ebed9b436c4dc2f9142241547bafc5ca48
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parse_multi_sections
Project Name: HazyResearch/fonduer
Commit Name: 87ed32ebed9b436c4dc2f9142241547bafc5ca48
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parse_md_paragraphs
Project Name: HazyResearch/fonduer
Commit Name: 87ed32ebed9b436c4dc2f9142241547bafc5ca48
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_parse_style
Project Name: HazyResearch/fonduer
Commit Name: 87ed32ebed9b436c4dc2f9142241547bafc5ca48
Time: 2020-02-14
Author: hiromu.hota@hal.hitachi.com
File Name: tests/parser/test_parser.py
Class Name:
Method Name: test_tsv_doc_preprocessor