mcoe_out = mcoe_out[mcoe_out.heat_rate_mmbtu_mwh >= min_heat_rate]
if min_fuel_cost_per_mwh is not None:
mcoe_out = mcoe_out[mcoe_out.fuel_cost_per_mwh > min_fuel_cost_per_mwh]
if min_cap_fact is not None:
mcoe_out = mcoe_out[mcoe_out.capacity_factor >= min_cap_fact]
if max_cap_fact is not None:
mcoe_out = mcoe_out[mcoe_out.capacity_factor <= max_cap_fact]
return mcoe_out
After Change
if x in pudl_out.fuel_cost().columns and x not in merge_cols]
// start with the generators table so we have all of the generators
mcoe_out = pudl.helpers.merge_on_date_year(
pudl_out.fuel_cost().drop(drop_cols, axis=1),
pudl_out.gens_eia860(),
on=[x for x in merge_cols if x != "report_date"],
how="inner",
)
// Bring together the fuel cost and capacity factor dataframes, which
// also include heat rate information.
mcoe_out = pd.merge(
mcoe_out,
pudl_out.capacity_factor(min_cap_fact=min_cap_fact,
max_cap_fact=max_cap_fact)[
["report_date", "plant_id_eia",
"generator_id", "capacity_factor", "net_generation_mwh"]],
on=["report_date", "plant_id_eia", "generator_id"],
how="outer")
// Bring the PUDL Unit IDs into the output dataframe so we can see how
// the generators are really grouped.
mcoe_out = pudl.helpers.merge_on_date_year(
mcoe_out,
pudl_out.bga()[["report_date",
"plant_id_eia",
"unit_id_pudl",
"generator_id"]].drop_duplicates(),
how="left",
on=["plant_id_eia", "generator_id"])
// Instead of getting the total MMBTU through this multiplication... we
// could also calculate the total fuel consumed on a per-unit basis, from
// the boiler_fuel table, and then determine what proportion should be
// distributed to each generator based on its heat-rate and net generation.
mcoe_out["total_mmbtu"] = \
mcoe_out.net_generation_mwh * mcoe_out.heat_rate_mmbtu_mwh
mcoe_out["total_fuel_cost"] = \
mcoe_out.total_mmbtu * mcoe_out.fuel_cost_per_mmbtu
first_cols = ["report_date",
"plant_id_eia",
"plant_id_pudl",
"unit_id_pudl",
"generator_id",
"plant_name_eia",
"utility_id_eia",
"utility_id_pudl",
"utility_name_eia"]
mcoe_out = pudl.helpers.organize_cols(mcoe_out, first_cols)
mcoe_out = mcoe_out.sort_values(
["plant_id_eia", "unit_id_pudl", "generator_id", "report_date"]
)
// Filter the output based on the range of validity supplied by the user:
mcoe_out = pudl.helpers.oob_to_nan(mcoe_out, ["heat_rate_mmbtu_mwh"],
lb=min_heat_rate, ub=None)
mcoe_out = pudl.helpers.oob_to_nan(mcoe_out, ["fuel_cost_per_mwh"],
lb=min_fuel_cost_per_mwh, ub=None)
mcoe_out = pudl.helpers.oob_to_nan(mcoe_out, ["capacity_factor"],
lb=min_cap_fact, ub=max_cap_fact)
return mcoe_out