f70e71d5c7fdc8e25391e54e74c3402fb323ad5c,examples/plot_employee_salaries.py,,,#,45

Before Change


// we then get the data, and define a target column we will try to predict,
// as well as a dirty colum we will encode with the different methods.
// the rest will have a standard encoding
data_path = fetching.get_data_dir()
fetching.fetch_employee_salaries()
data_file = os.path.join(data_path, "employee_salaries", "rows.csv")
df = pd.read_csv(data_file).astype(str)
df["Current Annual Salary"] = [float(s[1:]) for s
                               in df["Current Annual Salary"]]
df["Year First Hired"] = [int(s.split("/")[-1])
                          for s in df["Date First Hired"]]

target_column = "Current Annual Salary"
y = df[target_column].values.ravel()

After Change


import pandas as pd
from dirty_cat.datasets import fetch_employee_salaries

description = fetch_employee_salaries()
df = pd.read_csv(description["path"]).astype(str)

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// and carry out some basic preprocessing:
df["Current Annual Salary"] = df["Current Annual Salary"].str.strip("$").astype(
    float)
df["Date First Hired"] = pd.to_datetime(df["Date First Hired"])
df["Year First Hired"] = df["Date First Hired"].apply(lambda x: x.year)

target_column = "Current Annual Salary"
y = df[target_column].values.ravel()
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 7

Instances


Project Name: dirty-cat/dirty_cat
Commit Name: f70e71d5c7fdc8e25391e54e74c3402fb323ad5c
Time: 2018-06-06
Author: pierreglaser@msn.com
File Name: examples/plot_employee_salaries.py
Class Name:
Method Name:


Project Name: arraiy/torchgeometry
Commit Name: 206798edabf99a8ee4bb03ffba25968d2057bb18
Time: 2020-07-30
Author: anguelos.nicolaou@gmail.com
File Name: setup.py
Class Name:
Method Name:


Project Name: catalyst-team/catalyst
Commit Name: 1ef3ad90a3423ed15ca41e0ea4e81012ebe84a9f
Time: 2020-08-11
Author: scitator@gmail.com
File Name: catalyst/data/scripts/project_embeddings.py
Class Name:
Method Name: main