// All generators from the Boiler Generator Association table (860)
gens8 = bga8.drop_duplicates(
subset=["plant_id_eia", "plant_id_pudl", "generator_id"])
// All generators from the generation table (923)/
gens9 = g9_summed.drop_duplicates(
subset=["plant_id_eia", "plant_id_pudl", "generator_id",
After Change
// Find all the generator records that were ever missing a boiler:
unassociated_generators = gens[~gens["boiler_generator_assn"]]
// Create a list of plants with unassociated generators, by year.
unassociated_plants = unassociated_generators.\
drop_duplicates(subset=[id_col, "report_date"]).\
drop(["generator_id", "boiler_id", "boiler_generator_assn"], axis=1)
// Tag those plant-years as being unassociated
unassociated_plants["plant_assn"] = False
// Merge the plant association flag back in to the generators
gens = pd.merge(gens, unassociated_plants, how="left",
on=["plant_id_eia", "plant_id_pudl", "report_date"])
// Tag the rest of the generators as being part of a plant association...
// This may or may not be true. Need to filter out partially associated
// plants in the next step.
gens["plant_assn"] = gens.plant_assn.fillna(value=True)
// Using the associtated plants, extract the generator/boiler combos
// that represent complete plants at any time to preserve
// associations (i.e. if a coal plant had its boilers and generators
// fully associated in the bga table in 2011 and then adds a
// combined cycle plant the coal boiler/gen combo will be saved).
// Remove the report_date:
gens_complete = gens.drop("report_date", axis=1)
// Select only those generators tagged as being part of a complete plant:
gens_complete = gens_complete[gens_complete["plant_assn"]]
gens_complete = gens_complete.drop_duplicates(subset=["plant_id_eia",