bf_df: a pandas dataframe.
pudl_engine = pudl.db_connect_pudl(testing=testing)
bf_eia923_tbl = pt["boiler_fuel_eia923"]
bf_eia923_select = sa.sql.select([bf_eia923_tbl, ])
if start_date is not None:
bf_eia923_select = bf_eia923_select.where(
bf_eia923_tbl.c.report_date >= start_date
)
if end_date is not None:
bf_eia923_select = bf_eia923_select.where(
bf_eia923_tbl.c.report_date <= end_date
)
bf_df = pd.read_sql(bf_eia923_select, pudl_engine)
// The total heat content is also useful in its own right, and we"ll keep it
// around. Also needed to calculate average heat content per unit of fuel.
bf_df["total_heat_content_mmbtu"] = bf_df["fuel_qty_consumed"] * \
bf_df["fuel_mmbtu_per_unit"]
// Create a date index for grouping based on freq
by = ["plant_id", "boiler_id", "fuel_type_simple"]
if freq is not None:
// In order to calculate the weighted average sulfur
// content and ash content we need to calculate these totals.
bf_df["total_sulfur_content"] = bf_df["fuel_qty_consumed"] * \
bf_df["sulfur_content_pct"]
bf_df["total_ash_content"] = bf_df["fuel_qty_consumed"] * \
bf_df["ash_content_pct"]
bf_df = bf_df.set_index(pd.DatetimeIndex(bf_df.report_date))
by = by + [pd.Grouper(freq=freq)]
bf_gb = bf_df.groupby(by=by)
// Sum up these totals within each group, and recalculate the per-unit
// values (weighted in this case by fuel_qty_consumed)
bf_df = bf_gb.agg({"total_heat_content_mmbtu": np.sum,
"fuel_qty_consumed": np.sum,
"total_sulfur_content": np.sum,
"total_ash_content": np.sum})
bf_df["fuel_mmbtu_per_unit"] = \
bf_df["total_heat_content_mmbtu"] / bf_df["fuel_qty_consumed"]
bf_df["sulfur_content_pct"] = \
bf_df["total_sulfur_content"] / bf_df["fuel_qty_consumed"]
bf_df["ash_content_pct"] = \
bf_df["total_ash_content"] / bf_df["fuel_qty_consumed"]
bf_df = bf_df.reset_index()
bf_df = bf_df.drop(["total_ash_content", "total_sulfur_content"],
axis=1)
// Grab some basic plant & utility information to add.
pu_eia = plants_utils_eia(start_date=start_date,
end_date=end_date,
testing=False)
out_df = analysis.merge_on_date_year(bf_df, pu_eia, on=["plant_id"])
if freq is None:
out_df = out_df.drop(["id"], axis=1)
out_df = out_df.dropna(subset=[
"plant_id",
"plant_id_pudl",
"operator_id",
"util_id_pudl",
"boiler_id",
])
first_cols = [
"report_date",
"plant_id",
"plant_id_pudl",
"plant_name",
"operator_id",
"util_id_pudl",
"operator_name",
"boiler_id",
]
// Re-arrange the columns for easier readability:
out_df = organize_cols(out_df, first_cols)
out_df["operator_id"] = out_df.operator_id.astype(int)
out_df["util_id_pudl"] = out_df.util_id_pudl.astype(int)
out_df["plant_id_pudl"] = out_df.plant_id_pudl.astype(int)
return(out_df)