
Before Change

// example.

subset_feature = ["embarked", "sex", "pclass", "age", "fare"]
X = X[subset_feature]

// Then, we introspect the information regarding each column data type.

After Change

    ("num", numeric_transformer, selector(dtype_exclude="category")),
    ("cat", categorical_transformer, selector(dtype_include="category"))
clf = Pipeline(steps=[("preprocessor", preprocessor),
                      ("classifier", LogisticRegression())])

clf.fit(X_train, y_train)
print("model score: %.3f" % clf.score(X_test, y_test))

// The resulting score is not exactly the same as the one from the previous
// pipeline becase the dtype-based selector treats the ``pclass`` columns as
// a numeric features instead of a categorical feature as previously:




// Using the prediction pipeline in a grid search
// Grid search can also be performed on the different preprocessing steps
// defined in the ``ColumnTransformer`` object, together with the classifier"s
// hyperparameters as part of the ``Pipeline``.
// We will search for both the imputer strategy of the numeric preprocessing
// and the regularization parameter of the logistic regression using
// :class:`sklearn.model_selection.GridSearchCV`.

param_grid = {
    "preprocessor__num__imputer__strategy": ["mean", "median"],
    "classifier__C": [0.1, 1.0, 10, 100],

grid_search = GridSearchCV(clf, param_grid, cv=10)

// Calling "fit" triggers the cross-validated search for the best
// hyper-parameters combination:
grid_search.fit(X_train, y_train)

print(f"Best params:")

// The internal cross-validation scores obtained by those parameters is:
print(f"Internal CV score: {grid_search.best_score_:.3f}")

// We can also introspect the top grid search results as a pandas dataframe:
import pandas as pd

cv_results = pd.DataFrame(grid_search.cv_results_)
cv_results = cv_results.sort_values("mean_test_score", ascending=False)
cv_results[["mean_test_score", "std_test_score",
