9d677b56f4244eb08f1c7e2061a1e86fd0aee748,examples/compose/plot_column_transformer_mixed_types.py,,,#,38

Before Change


// example.

subset_feature = ["embarked", "sex", "pclass", "age", "fare"]
X = X[subset_feature]

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Then, we introspect the information regarding each column data type.

After Change


    ("num", numeric_transformer, selector(dtype_exclude="category")),
    ("cat", categorical_transformer, selector(dtype_include="category"))
])
clf = Pipeline(steps=[("preprocessor", preprocessor),
                      ("classifier", LogisticRegression())])


clf.fit(X_train, y_train)
print("model score: %.3f" % clf.score(X_test, y_test))

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// The resulting score is not exactly the same as the one from the previous
// pipeline becase the dtype-based selector treats the ``pclass`` columns as
// a numeric features instead of a categorical feature as previously:

selector(dtype_exclude="category")(X_train)

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

selector(dtype_include="category")(X_train)

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Using the prediction pipeline in a grid search
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Grid search can also be performed on the different preprocessing steps
// defined in the ``ColumnTransformer`` object, together with the classifier"s
// hyperparameters as part of the ``Pipeline``.
// We will search for both the imputer strategy of the numeric preprocessing
// and the regularization parameter of the logistic regression using
// :class:`sklearn.model_selection.GridSearchCV`.

param_grid = {
    "preprocessor__num__imputer__strategy": ["mean", "median"],
    "classifier__C": [0.1, 1.0, 10, 100],
}

grid_search = GridSearchCV(clf, param_grid, cv=10)
grid_search

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Calling "fit" triggers the cross-validated search for the best
// hyper-parameters combination:
//
grid_search.fit(X_train, y_train)

print(f"Best params:")
print(grid_search.best_params_)

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// The internal cross-validation scores obtained by those parameters is:
print(f"Internal CV score: {grid_search.best_score_:.3f}")

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// We can also introspect the top grid search results as a pandas dataframe:
import pandas as pd

cv_results = pd.DataFrame(grid_search.cv_results_)
cv_results = cv_results.sort_values("mean_test_score", ascending=False)
cv_results[["mean_test_score", "std_test_score",
            "param_preprocessor__num__imputer__strategy",
            "param_classifier__C"
            ]].head(5)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 4

Instances


Project Name: scikit-learn/scikit-learn
Commit Name: 9d677b56f4244eb08f1c7e2061a1e86fd0aee748
Time: 2020-05-16
Author: olivier.grisel@gmail.com
File Name: examples/compose/plot_column_transformer_mixed_types.py
Class Name:
Method Name:


Project Name: biolab/orange3
Commit Name: 8dc523d9bd92011caca608833221916d9e1b8130
Time: 2014-02-18
Author: ales.erjavec@fri.uni-lj.si
File Name: Orange/classification/logistic_regression.py
Class Name: LogisticRegressionLearner
Method Name: fit


Project Name: mne-tools/mne-python
Commit Name: bcabf319eab3dcd3902125da344d13eafd1669a6
Time: 2018-09-27
Author: alexandre.gramfort@m4x.org
File Name: mne/decoding/tests/test_base.py
Class Name:
Method Name: test_cross_val_multiscore