590028f40e74f82c3d00f0bc48b4cf415c97bfce,pudl/transform/ferc1.py,,fuel,#Any#Any#,428

Before Change


    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    // Standardize plant_name capitalization and remove leading/trailing white
    // space -- necesary b/c plant_name is part of many foreign keys.
    fuel_ferc1_df = pudl.helpers.strip_lower(fuel_ferc1_df, ["plant_name"])

    // Take the messy free-form fuel & fuel_unit fields, and do our best to
    // map them to some canonical categories... this is necessarily imperfect:
    fuel_ferc1_df.fuel = pudl.helpers.cleanstrings(fuel_ferc1_df.fuel,
                                                   pc.ferc1_fuel_strings,
                                                   unmapped="")

    fuel_ferc1_df.fuel_unit = \
        pudl.helpers.cleanstrings(fuel_ferc1_df.fuel_unit,
                                  pc.ferc1_fuel_unit_strings,
                                  unmapped="")

    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    // PERFORM UNIT CONVERSIONS ////////////////////////////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    // Fuel cost per kWh is a per-unit value that doesn"t make sense to report
    // for a single fuel that may be only a small part of the fuel consumed.
    fuel_ferc1_df.drop("fuel_cost_kwh", axis=1, inplace=True)

    // This is heat rate, but as it"s based only on the heat content of a given
    // fuel which may only be a small portion of the overall fuel consumption,
    // it doesn"t make any sense here.  Drop it.
    fuel_ferc1_df.drop("fuel_generaton", axis=1, inplace=True)

    // Convert from BTU/unit of fuel to 1e6 BTU/unit.
    fuel_ferc1_df["fuel_avg_mmbtu_per_unit"] = fuel_ferc1_df["fuel_avg_heat"] / 1e6
    fuel_ferc1_df.drop("fuel_avg_heat", axis=1, inplace=True)

    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    // RENAME COLUMNS TO MATCH PUDL DB //////////////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    fuel_ferc1_df.rename(columns={
        // FERC 1 DB Name      PUDL DB Name
        "respondent_id": "utility_id_ferc1",
        "fuel": "fuel_type_code_pudl",
        "fuel_avg_mmbtu_per_unit": "fuel_mmbtu_per_unit",
        "fuel_quantity": "fuel_qty_burned",
        "fuel_cost_burned": "fuel_cost_per_unit_burned",
        "fuel_cost_delvd": "fuel_cost_per_unit_delivered",
        "fuel_cost_btu": "fuel_cost_per_mmbtu"},
        inplace=True)

    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    // CORRECT DATA ENTRY ERRORS //////////////////////////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

After Change



    
    // grab table from dictionary of dfs, clean it up a bit
    fuel_ferc1_df = (
        _clean_cols(ferc1_raw_dfs["fuel_ferc1"], "f1_fuel").
        // Standardize plant_name capitalization and remove leading/trailing
        // white space -- necesary b/c plant_name is part of many foreign keys.
        pipe(strip_lower, ["plant_name"]).
        // Take the messy free-form fuel & fuel_unit fields, and do our best to
        // map them to some canonical categories... this is necessarily
        // imperfect:
        pipe(cleanstrings, ["fuel", "fuel_unit"],
             [pc.ferc1_fuel_strings, pc.ferc1_fuel_unit_strings],
             unmapped="").
        // Fuel cost per kWh is a per-unit value that doesn"t make sense to
        // report for a single fuel that may be only a small part of the fuel
        // consumed. "fuel generaton" is heat rate, but as it"s based only on
        // the heat content of a given fuel which may only be a small portion of
        // the overall fuel // consumption, it doesn"t make any sense here. Drop
        // it.
        drop(["fuel_cost_kwh", "fuel_generaton"], axis=1).
        // Convert from BTU/unit of fuel to 1e6 BTU/unit.
        assign(fuel_avg_mmbtu_per_unit=lambda x: x.fuel_avg_heat / 1e6).
        drop("fuel_avg_heat", axis=1).
        // Rename the columns to match our DB definitions
        rename(columns={
            // FERC 1 DB Name      PUDL DB Name
            "respondent_id": "utility_id_ferc1",
            "fuel": "fuel_type_code_pudl",
            "fuel_avg_mmbtu_per_unit": "fuel_mmbtu_per_unit",
            "fuel_quantity": "fuel_qty_burned",
            "fuel_cost_burned": "fuel_cost_per_unit_burned",
            "fuel_cost_delvd": "fuel_cost_per_unit_delivered",
            "fuel_cost_btu": "fuel_cost_per_mmbtu"})
    )

    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    // CORRECT DATA ENTRY ERRORS //////////////////////////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 5

Instances


Project Name: catalyst-cooperative/pudl
Commit Name: 590028f40e74f82c3d00f0bc48b4cf415c97bfce
Time: 2019-06-13
Author: zane.selvans@catalyst.coop
File Name: pudl/transform/ferc1.py
Class Name:
Method Name: fuel


Project Name: has2k1/plotnine
Commit Name: 3c4c60ef5dce4695ebe29f2680058310daef77b9
Time: 2015-04-20
Author: has2k1@gmail.com
File Name: ggplot/layer.py
Class Name: layer
Method Name: compute_aesthetics


Project Name: EpistasisLab/tpot
Commit Name: 2ab8c1444facbd46df8767a5badda5b9f1a50c29
Time: 2016-08-01
Author: supacoofoo@gmail.com
File Name: tpot/tpot.py
Class Name: TPOT
Method Name: predict