a49752375d5775b1f0e6393a811c937332dccb18,examples/compose/plot_column_transformer.py,,,#,83

Before Change


        return features


pipeline = Pipeline([
    // Extract the subject & body
    ("subjectbody", SubjectBodyExtractor()),

    // Use ColumnTransformer to combine the features from subject and body
    ("union", ColumnTransformer(
        [
            // Pulling features from the post"s subject line (first column)
            ("subject", TfidfVectorizer(min_df=50), 0),

            // Pipeline for standard bag-of-words model for body (second column)
            ("body_bow", Pipeline([
                ("tfidf", TfidfVectorizer()),
                ("best", TruncatedSVD(n_components=50)),
            ]), 1),

            // Pipeline for pulling ad hoc features from post"s body
            ("body_stats", Pipeline([
                ("stats", TextStats()),  // returns a list of dicts
                ("vect", DictVectorizer()),  // list of dicts -> feature matrix
            ]), 1),
        ],

        // weight components in ColumnTransformer
        transformer_weights={
            "subject": 0.8,
            "body_bow": 0.5,
            "body_stats": 1.0,
        }
    )),

    // Use a SVC classifier on the combined features
    ("svc", LinearSVC(dual=False)),
], verbose=True)

// limit the list of categories to make running this example faster.
categories = ["alt.atheism", "talk.religion.misc"]
X_train, y_train = fetch_20newsgroups(random_state=1,
                                      subset="train",
                                      categories=categories,
                                      remove=("footers", "quotes"),
                                      return_X_y=True)
X_test, y_test = fetch_20newsgroups(random_state=1,
                                    subset="test",
                                    categories=categories,
                                    remove=("footers", "quotes"),
                                    return_X_y=True)

pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
print(classification_report(y_test, y_pred))

After Change


// Each feature comprises meta information about that post, such as the subject,
// and the body of the news post.

print(X_train[0])

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Creating transformers
// ---------------------
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 4

Instances


Project Name: scikit-learn/scikit-learn
Commit Name: a49752375d5775b1f0e6393a811c937332dccb18
Time: 2020-05-17
Author: jliu176@gmail.com
File Name: examples/compose/plot_column_transformer.py
Class Name:
Method Name:


Project Name: nishitpatel01/Fake_News_Detection
Commit Name: 95b8e2a603f66cc3091e3266a717c0f206be3e95
Time: 2017-12-03
Author: nkp3@illinois.edu
File Name: FeatureSelection.py
Class Name:
Method Name:


Project Name: nishitpatel01/Fake_News_Detection
Commit Name: 3b49ffd98696ad697cf2b9685e581459d51ea0b1
Time: 2017-12-03
Author: nkp3@illinois.edu
File Name: FeatureSelection.py
Class Name:
Method Name: