f70e71d5c7fdc8e25391e54e74c3402fb323ad5c,examples/plot_employee_salaries.py,,,#,45

Before Change


// we then get the data, and define a target column we will try to predict,
// as well as a dirty colum we will encode with the different methods.
// the rest will have a standard encoding
data_path = fetching.get_data_dir()
fetching.fetch_employee_salaries()
data_file = os.path.join(data_path, "employee_salaries", "rows.csv")
df = pd.read_csv(data_file).astype(str)
df["Current Annual Salary"] = [float(s[1:]) for s
                               in df["Current Annual Salary"]]
df["Year First Hired"] = [int(s.split("/")[-1])

After Change


from dirty_cat.datasets import fetch_employee_salaries

description = fetch_employee_salaries()
df = pd.read_csv(description["path"]).astype(str)

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// and carry out some basic preprocessing:

In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 3

Instances

Link

Project Name: dirty-cat/dirty_cat

Commit Name: f70e71d5c7fdc8e25391e54e74c3402fb323ad5c

Time: 2018-06-06

Author: pierreglaser@msn.com

File Name: examples/plot_employee_salaries.py

Class Name:

Method Name:

Link

Project Name: deepchem/deepchem

Commit Name: fde6f3c658ccaadede04e07cb493f56744ae3511

Time: 2020-11-02

Author: peastman@stanford.edu

File Name: deepchem/molnet/load_function/cell_counting_datasets.py

Class Name:

Method Name:

Link

Project Name: dirty-cat/dirty_cat

Commit Name: 20071121a01cc99ba1b5fd735e039c2517f08576

Time: 2018-06-06

Author: gael.varoquaux@normalesup.org

File Name: examples/plot_investigating_dirty_categories.py

Class Name:

Method Name: