pt = models.PUDLBase.metadata.tables
// Most of the fields we want come direclty from Fuel Receipts & Costs
frc_tbl = pt["fuel_receipts_costs_eia923"]
frc_select = sa.sql.select([frc_tbl, ])
frc_df = pd.read_sql(frc_select, pudl_engine)
// Need a year column to merge with EIA860 data which is annual.
frc_df["year"] = pd.to_datetime(frc_df["report_date"]).dt.year
// Need to re-integrate the MSHA coalmine info:
cmi_tbl = pt["coalmine_info_eia923"]
cmi_select = sa.sql.select([cmi_tbl, ])
cmi_df = pd.read_sql(cmi_select, pudl_engine)
out_df = pd.merge(frc_df, cmi_df,
how="left",
left_on="coalmine_id",
right_on="id")
pu_eia = plants_utils_eia_df(pudl_engine)
out_df = pd.merge(out_df, pu_eia, how="left", on=["plant_id", "year"])
// Sadly b/c we"re depending on 860 for Operator/Plant mapping,
// we only get 2011 and later
out_df = out_df.dropna(subset=["operator_id", "operator_name"])
cols_to_drop = ["fuel_receipt_id",
"coalmine_id",
"id",
"year"]
out_df = out_df.drop(cols_to_drop, axis=1)
// Calculate a few totals that are commonly needed:
out_df["total_heat_content_mmbtu"] = \
out_df["average_heat_content"] * out_df["fuel_quantity"]
out_df["total_fuel_cost"] = \
out_df["total_heat_content_mmbtu"] * out_df["fuel_cost_per_mmbtu"]
// There are a couple of bad rows with no specified fuel.
out_df = out_df.dropna(subset=["fuel_group"])
// Add a simplified fuel category (this should really happen at ingest)
out_df["fuel_pudl"] = out_df.fuel_group.replace(
to_replace=["Petroleum", "Natural Gas", "Other Gas", "Coal",
"Petroleum Coke"],
value=["oil", "gas", "gas", "coal", "petcoke"])
// Clean up the types of a few columns...
out_df["plant_id"] = out_df.plant_id.astype(int)
out_df["plant_id_pudl"] = out_df.plant_id_pudl.astype(int)
out_df["operator_id"] = out_df.operator_id.astype(int)
out_df["util_id_pudl"] = out_df.util_id_pudl.astype(int)