fc44c71d4aa2e8a5ef60effd4bfe7fb82dffbf17,src/pudl/extract/excel.py,GenericExtractor,load_excel_file,#GenericExtractor#Any#,243

Before Change


        else:
            logger.debug("Grabing new file.")

        if p.name != xlsx_filename:
            zf = zipfile.ZipFile(p)
            excel_file = pd.ExcelFile(zf.read(xlsx_filename))
        else:
            excel_file = pd.ExcelFile(p)
        self._file_cache[xlsx_filename] = excel_file
        return excel_file

    def excel_filename(self, page, **partition):

After Change


            pd.ExcelFile instance with the parsed excel spreadsheet frame
        
        xlsx_filename = self.excel_filename(page, **partition)
        if xlsx_filename not in self._file_cache:
            excel_file = None
            try:
                // eia860m exports the resources as raw xlsx files that are not
                // embedded in zip archives. To support this, we will first try
                // to retrieve the resource directly. If this fails, we will attempt
                // to open zip archive and locate the xlsx file inside that.

                // TODO(rousik): if we can make it so, it would be useful to normalize
                // the eia860m and zip the xlsx files. Then we could simplify this code.
                res = self.ds.get_unique_resource(
                    self._dataset_name, name=xlsx_filename)
                excel_file = pd.ExcelFile(res)
            except KeyError:
                zf = self.ds.get_zipfile_resource(self._dataset_name, **partition)
                excel_file = pd.ExcelFile(zf.read(xlsx_filename))
            finally:
                self._file_cache[xlsx_filename] = excel_file
        // TODO(rousik): this _file_cache could be replaced with @cache or @memoize annotations
        return self._file_cache[xlsx_filename]

    def excel_filename(self, page, **partition):
        
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 5

Instances


Project Name: catalyst-cooperative/pudl
Commit Name: fc44c71d4aa2e8a5ef60effd4bfe7fb82dffbf17
Time: 2021-01-06
Author: rousik@gmail.com
File Name: src/pudl/extract/excel.py
Class Name: GenericExtractor
Method Name: load_excel_file


Project Name: tensorflow/datasets
Commit Name: 9ea53fa7a5c1a3594a5a5019e299a34f38290851
Time: 2020-06-26
Author: sharanramjee@gmail.com
File Name: tensorflow_datasets/core/download/kaggle.py
Class Name: KaggleCompetitionDownloader
Method Name: download_competition


Project Name: tensorflow/datasets
Commit Name: db887bdd5236d8d34f0dd3625c7f1026e511b640
Time: 2020-04-16
Author: cs17btech11040@iith.ac.in
File Name: tensorflow_datasets/core/download/kaggle.py
Class Name: KaggleCompetitionDownloader
Method Name: download_file