30703a7036f9bfd58ba23c2872887f1820ef928e,pudl/outputs.py,,frc_eia923_df,#Any#,200

Before Change


    // we only have the 860 data integrated for 2011 forward right now.
    plants_eia860 = pd.read_sql(
        """SELECT * FROM plants_eia860""", pudl_engine)
    plants_eia860 = plants_eia860[["year", "plant_id", "operator_id"]]

    // For the PUDL Utility & Plant IDs, as well as utility & plant names:
    utils_eia = pd.read_sql("""SELECT * FROM utilities_eia""", pudl_engine)

After Change



    // Need to re-integrate the MSHA coalmine info:
    cmi_tbl = pt["coalmine_info_eia923"]
    cmi_select = sa.sql.select([cmi_tbl, ])
    cmi_df = pd.read_sql(cmi_select, pudl_engine)

    out_df = pd.merge(frc_df, cmi_df,
                      how="left",
                      left_on="coalmine_id",
                      right_on="id")
    pu_eia = plants_utils_eia_df(pudl_engine)
    out_df = pd.merge(out_df, pu_eia, how="left", on=["plant_id", "year"])

    // Sadly b/c we"re depending on 860 for Operator/Plant mapping,
    // we only get 2011 and later
    out_df = out_df.dropna(subset=["operator_id", "operator_name"])
    cols_to_drop = ["fuel_receipt_id",
                    "coalmine_id",
                    "id",
                    "year"]
    out_df = out_df.drop(cols_to_drop, axis=1)

    // Calculate a few totals that are commonly needed:
    out_df["total_heat_content_mmbtu"] = \
        out_df["average_heat_content"] * out_df["fuel_quantity"]
    out_df["total_fuel_cost"] = \
        out_df["total_heat_content_mmbtu"] * out_df["fuel_cost_per_mmbtu"]

    // There are a couple of bad rows with no specified fuel.
    out_df = out_df.dropna(subset=["fuel_group"])
    // Add a simplified fuel category (this should really happen at ingest)
    out_df["fuel_pudl"] = out_df.fuel_group.replace(
        to_replace=["Petroleum", "Natural Gas", "Other Gas", "Coal",
                    "Petroleum Coke"],
        value=["oil", "gas", "gas", "coal", "petcoke"])

    // Clean up the types of a few columns...
    out_df["plant_id"] = out_df.plant_id.astype(int)
    out_df["plant_id_pudl"] = out_df.plant_id_pudl.astype(int)
    out_df["operator_id"] = out_df.operator_id.astype(int)
    out_df["util_id_pudl"] = out_df.util_id_pudl.astype(int)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 4

Instances


Project Name: catalyst-cooperative/pudl
Commit Name: 30703a7036f9bfd58ba23c2872887f1820ef928e
Time: 2017-09-25
Author: zane.selvans@catalyst.coop
File Name: pudl/outputs.py
Class Name:
Method Name: frc_eia923_df


Project Name: sahana/eden
Commit Name: ef2c9d1e1c83223c279faa8e2b1fe2df5219c7f9
Time: 2019-08-18
Author: fran@aidiq.com
File Name: modules/s3db/dc.py
Class Name: DataCollectionTemplateModel
Method Name: dc_question_onaccept


Project Name: sahana/eden
Commit Name: 653f76aa574cb2dddb9053eef0f8d815156cb168
Time: 2019-09-27
Author: fran@aidiq.com
File Name: modules/templates/CCC/controllers.py
Class Name: register
Method Name: __call__