1c0cd03712f95fb5966968abc2a39313175594c6,src/fonduer/parser/preprocessors/html_doc_preprocessor.py,HTMLDocPreprocessor,parse_file,#HTMLDocPreprocessor#Any#Any#,13
Before Change
stable_id=stable_id,
text=str(text),
meta={"file_name": file_name},
), str(text)
def _can_read(self, fpath):
return fpath.lower().endswith("html") // includes both .html and .xhtml
After Change
with codecs.open(fp, encoding=self.encoding) as f:
soup = BeautifulSoup(f, "lxml")
all_html_elements = soup.find_all("html")
if len(all_html_elements) != 1:
raise NotImplementedError("Expecting one html element per html file")
text = all_html_elements[0]
name = os.path.basename(fp)[: os.path.basename(fp).rfind(".")]
stable_id = self.get_stable_id(name)
yield Document(
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 4
Instances
Project Name: HazyResearch/fonduer
Commit Name: 1c0cd03712f95fb5966968abc2a39313175594c6
Time: 2018-09-07
Author: jrausch@inf.ethz.ch
File Name: src/fonduer/parser/preprocessors/html_doc_preprocessor.py
Class Name: HTMLDocPreprocessor
Method Name: parse_file
Project Name: BindsNET/bindsnet
Commit Name: e600cf64a4a25aad39c142f92417a3d435b55c92
Time: 2019-06-03
Author: jessehagenaars@gmail.com
File Name: bindsnet/pipeline/base_pipeline.py
Class Name:
Method Name: recursive_to
Project Name: pyprob/pyprob
Commit Name: ba5939a016a6ee4e0d29d2f9f711e853d866eb7a
Time: 2019-02-23
Author: atilimgunes.baydin@gmail.com
File Name: pyprob/distributions/empirical.py
Class Name: Empirical
Method Name: copy