30703a7036f9bfd58ba23c2872887f1820ef928e,pudl/outputs.py,,frc_eia923_df,#Any#,200

Before Change


    out_df = pd.merge(out_df, plants_eia860,
                      how="left",
                      on=["plant_id", "year"])
    out_df = pd.merge(out_df, utils_eia, how="left", on="operator_id")
    out_df = pd.merge(out_df, plants_eia, how="left", on="plant_id")

    // Sadly b/c we"re depending on 860 for Operator/Plant mapping,
    // we only get 2011 and later
    out_df = out_df.dropna(subset=["operator_id", "operator_name"])

After Change


    pt = models.PUDLBase.metadata.tables
    // Most of the fields we want come direclty from Fuel Receipts & Costs
    frc_tbl = pt["fuel_receipts_costs_eia923"]
    frc_select = sa.sql.select([frc_tbl, ])
    frc_df = pd.read_sql(frc_select, pudl_engine)

    // Need a year column to merge with EIA860 data which is annual.
    frc_df["year"] = pd.to_datetime(frc_df["report_date"]).dt.year

    // Need to re-integrate the MSHA coalmine info:
    cmi_tbl = pt["coalmine_info_eia923"]
    cmi_select = sa.sql.select([cmi_tbl, ])
    cmi_df = pd.read_sql(cmi_select, pudl_engine)

    out_df = pd.merge(frc_df, cmi_df,
                      how="left",
                      left_on="coalmine_id",
                      right_on="id")
    pu_eia = plants_utils_eia_df(pudl_engine)
    out_df = pd.merge(out_df, pu_eia, how="left", on=["plant_id", "year"])

    // Sadly b/c we"re depending on 860 for Operator/Plant mapping,
    // we only get 2011 and later
    out_df = out_df.dropna(subset=["operator_id", "operator_name"])
    cols_to_drop = ["fuel_receipt_id",
                    "coalmine_id",
                    "id",
                    "year"]
    out_df = out_df.drop(cols_to_drop, axis=1)

    // Calculate a few totals that are commonly needed:
    out_df["total_heat_content_mmbtu"] = \
        out_df["average_heat_content"] * out_df["fuel_quantity"]
    out_df["total_fuel_cost"] = \
        out_df["total_heat_content_mmbtu"] * out_df["fuel_cost_per_mmbtu"]

    // There are a couple of bad rows with no specified fuel.
    out_df = out_df.dropna(subset=["fuel_group"])
    // Add a simplified fuel category (this should really happen at ingest)
    out_df["fuel_pudl"] = out_df.fuel_group.replace(
        to_replace=["Petroleum", "Natural Gas", "Other Gas", "Coal",
                    "Petroleum Coke"],
        value=["oil", "gas", "gas", "coal", "petcoke"])

    // Clean up the types of a few columns...
    out_df["plant_id"] = out_df.plant_id.astype(int)
    out_df["plant_id_pudl"] = out_df.plant_id_pudl.astype(int)
    out_df["operator_id"] = out_df.operator_id.astype(int)
    out_df["util_id_pudl"] = out_df.util_id_pudl.astype(int)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 7

Instances


Project Name: catalyst-cooperative/pudl
Commit Name: 30703a7036f9bfd58ba23c2872887f1820ef928e
Time: 2017-09-25
Author: zane.selvans@catalyst.coop
File Name: pudl/outputs.py
Class Name:
Method Name: frc_eia923_df


Project Name: J535D165/recordlinkage
Commit Name: 4a24e4e36ee175aef54b92eb03e08a2be7811a96
Time: 2018-03-10
Author: jonathandebruinhome@gmail.com
File Name: recordlinkage/datasets/febrl.py
Class Name:
Method Name: _febrl_links


Project Name: Ghadjeres/DeepBach
Commit Name: a8d39d5144c12bbe6e1fe95cd11726e8edd2cb9d
Time: 2017-07-12
Author: bohumir.zamecnik@gmail.com
File Name: models_zoo.py
Class Name:
Method Name: deepBach