5d4a480c48a242c1d8051be502ecb98a9b273e9c,tests/candidates/test_candidates.py,,test_ngram_split,#Any#,33
Before Change
pdf_path = "tests/data/pdf/112823.pdf"
// Parsing
logger.info("Parsing...")
doc_preprocessor = HTMLDocPreprocessor(docs_path, max_docs=max_docs)
corpus_parser = Parser(
structural=True, lingual=True, visual=True, pdf_path=pdf_path
)
After Change
// When more than one split_token appears.
sent.text = "New/Text-Word"
sent.words = ["New/Text-Word"]
result = list(ngrams.apply(sent))
assert len(result) == 3
assert result[0].get_span() == "New/Text-Word"
assert result[1].get_span() == "New"
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 2
Instances
Project Name: HazyResearch/fonduer
Commit Name: 5d4a480c48a242c1d8051be502ecb98a9b273e9c
Time: 2018-08-21
Author: hiromu.hota@hal.hitachi.com
File Name: tests/candidates/test_candidates.py
Class Name:
Method Name: test_ngram_split
Project Name: NifTK/NiftyNet
Commit Name: 61a0663a45beb2e59f6a549c51d55940c80734e3
Time: 2018-03-05
Author: rmaplue@ucl.ac.uk
File Name: niftynet/io/image_as_nibabel.py
Class Name:
Method Name:
Project Name: merenlab/anvio
Commit Name: 5befe65a4be3a5a728696777cd8a6a71e3b7ac19
Time: 2019-08-01
Author: quentin.clayssen@gmail.com
File Name: anvio/taxoestimation.py
Class Name: SCGsdiamond
Method Name: predict_from_SCGs_dict_multiseq