a49752375d5775b1f0e6393a811c937332dccb18,examples/compose/plot_column_transformer.py,,,#,83

Before Change


        return features


pipeline = Pipeline([
    // Extract the subject & body
    ("subjectbody", SubjectBodyExtractor()),

    // Use ColumnTransformer to combine the features from subject and body
    ("union", ColumnTransformer(
        [
            // Pulling features from the post"s subject line (first column)
            ("subject", TfidfVectorizer(min_df=50), 0),

            // Pipeline for standard bag-of-words model for body (second column)
            ("body_bow", Pipeline([
                ("tfidf", TfidfVectorizer()),
                ("best", TruncatedSVD(n_components=50)),
            ]), 1),

            // Pipeline for pulling ad hoc features from post"s body
            ("body_stats", Pipeline([
                ("stats", TextStats()),  // returns a list of dicts
                ("vect", DictVectorizer()),  // list of dicts -> feature matrix
            ]), 1),
        ],

        // weight components in ColumnTransformer
        transformer_weights={
            "subject": 0.8,
            "body_bow": 0.5,
            "body_stats": 1.0,
        }
    )),

    // Use a SVC classifier on the combined features
    ("svc", LinearSVC(dual=False)),
], verbose=True)

// limit the list of categories to make running this example faster.
categories = ["alt.atheism", "talk.religion.misc"]
X_train, y_train = fetch_20newsgroups(random_state=1,

After Change


// Each feature comprises meta information about that post, such as the subject,
// and the body of the news post.

print(X_train[0])

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Creating transformers
// ---------------------
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 4

Non-data size: 6

Instances


Project Name: scikit-learn/scikit-learn
Commit Name: a49752375d5775b1f0e6393a811c937332dccb18
Time: 2020-05-17
Author: jliu176@gmail.com
File Name: examples/compose/plot_column_transformer.py
Class Name:
Method Name:


Project Name: dmbee/seglearn
Commit Name: b1de080823e41b921bec2949db2b6c3cb1f1d5ef
Time: 2018-03-11
Author: david.mo.burns@gmail.com
File Name: examples/plot_feature_rep.py
Class Name:
Method Name:


Project Name: scikit-multiflow/scikit-multiflow
Commit Name: cd37c1e406d5f1742077ff228a4944846fff339c
Time: 2017-07-24
Author: guilhermekmatsumoto@gmail.com
File Name: skmultiflow/demos/_test_knn.py
Class Name:
Method Name: demo


Project Name: dmbee/seglearn
Commit Name: b1de080823e41b921bec2949db2b6c3cb1f1d5ef
Time: 2018-03-11
Author: david.mo.burns@gmail.com
File Name: examples/plot_scoring.py
Class Name:
Method Name: