f967bd87424bbc50f475d5959994a5743ae2af0e,src/pudl/convert/epacems_to_parquet.py,,epacems_to_parquet,#Any#Any#Any#Any#Any#Any#Any#,168
Before Change
data_path = pathlib.Path(datapkg_dir, "data")
// double check that all of the years you are asking for are actually in
_verify_cems_args(data_path, epacems_years, epacems_states)
for file in data_path.iterdir():
if "epacems" in file.name:
df_name = file.name[:file.name.find(".")]
year = int(df_name[25:29])
state = df_name[30:].upper()
// only convert the years and states that you actually want
if year in epacems_years and state in epacems_states:
df = pd.read_csv(
file, dtype=in_types, parse_dates=["operating_datetime_utc"]
).assign(year=year)
logger.info(
f"Converted {len(df)} records for {year} and {state}."
)
pq.write_to_dataset(
pa.Table.from_pandas(
df, preserve_index=False, schema=schema),
root_path=str(out_dir), partition_cols=list(partition_cols),
compression=compression)
def parse_command_line(argv):
Parse command line arguments. See the -h option.
After Change
// Verify that all the requested data files are present:
epacems_years = list(epacems_years)
epacems_years.sort()
epacems_states = list(epacems_states)
epacems_states.sort()
for year in epacems_years:
for state in epacems_states:
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 7
Instances Project Name: catalyst-cooperative/pudl
Commit Name: f967bd87424bbc50f475d5959994a5743ae2af0e
Time: 2019-12-28
Author: zane.selvans@catalyst.coop
File Name: src/pudl/convert/epacems_to_parquet.py
Class Name:
Method Name: epacems_to_parquet
Project Name: vatlab/SoS
Commit Name: c8788d2eedcdb2671289d7d47a41b8fdcb0294f1
Time: 2017-09-11
Author: ben.bog@gmail.com
File Name: src/sos/sos_executor.py
Class Name: Base_Executor
Method Name: resolve_dangling_targets
Project Name: vatlab/SoS
Commit Name: ff5bbede0431e5296811dc57432a33a9a68942d0
Time: 2016-09-22
Author: ben.bog@gmail.com
File Name: pysos/dag.py
Class Name: SoS_DAG
Method Name: build