1c0cd03712f95fb5966968abc2a39313175594c6,src/fonduer/parser/preprocessors/html_doc_preprocessor.py,HTMLDocPreprocessor,parse_file,#HTMLDocPreprocessor#Any#Any#,13

Before Change


                    stable_id=stable_id,
                    text=str(text),
                    meta={"file_name": file_name},
                ), str(text)

    def _can_read(self, fpath):
        return fpath.lower().endswith("html")  // includes both .html and .xhtml

After Change


        with codecs.open(fp, encoding=self.encoding) as f:
            soup = BeautifulSoup(f, "lxml")
            all_html_elements = soup.find_all("html")
            if len(all_html_elements) != 1:
                raise NotImplementedError("Expecting one html element per html file")
            text = all_html_elements[0]
            name = os.path.basename(fp)[: os.path.basename(fp).rfind(".")]
            stable_id = self.get_stable_id(name)
            yield Document(
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 4

Instances


Project Name: HazyResearch/fonduer
Commit Name: 1c0cd03712f95fb5966968abc2a39313175594c6
Time: 2018-09-07
Author: jrausch@inf.ethz.ch
File Name: src/fonduer/parser/preprocessors/html_doc_preprocessor.py
Class Name: HTMLDocPreprocessor
Method Name: parse_file


Project Name: BindsNET/bindsnet
Commit Name: e600cf64a4a25aad39c142f92417a3d435b55c92
Time: 2019-06-03
Author: jessehagenaars@gmail.com
File Name: bindsnet/pipeline/base_pipeline.py
Class Name:
Method Name: recursive_to


Project Name: pyprob/pyprob
Commit Name: ba5939a016a6ee4e0d29d2f9f711e853d866eb7a
Time: 2019-02-23
Author: atilimgunes.baydin@gmail.com
File Name: pyprob/distributions/empirical.py
Class Name: Empirical
Method Name: copy