column = switch.get("col")
// Score the column
transformed_column = pd.Series([np.nan] * X.shape[0], name=column)
for val in switch.get("woe"):
transformed_column.loc[X[column] == val] = switch.get("woe")[val] // THIS LINE IS SLOW
// Replace missing values only in the computed columns
if self.impute_missing:
if self.handle_unknown == "impute":
transformed_column.fillna(0, inplace=True)
elif self.handle_unknown == "error":
missing = transformed_column.isnull()
if any(missing):
raise ValueError("Unexpected categories found in column %s" % switch.get("col"))
// Randomization is meaningful only for training data -> we do it only if y is present
if self.randomized and y is not None:
random_state_generator = check_random_state(self.random_state)
transformed_column = (transformed_column * random_state_generator.normal(1., self.sigma, transformed_column.shape[0]))