a49752375d5775b1f0e6393a811c937332dccb18,examples/compose/plot_column_transformer.py,,,#,83
Before Change
return features
pipeline = Pipeline([
// Extract the subject & body
("subjectbody", SubjectBodyExtractor()),
// Use ColumnTransformer to combine the features from subject and body
("union", ColumnTransformer(
[
// Pulling features from the post"s subject line (first column)
("subject", TfidfVectorizer(min_df=50), 0),
// Pipeline for standard bag-of-words model for body (second column)
("body_bow", Pipeline([
("tfidf", TfidfVectorizer()),
("best", TruncatedSVD(n_components=50)),
]), 1),
// Pipeline for pulling ad hoc features from post"s body
("body_stats", Pipeline([
("stats", TextStats()), // returns a list of dicts
("vect", DictVectorizer()), // list of dicts -> feature matrix
]), 1),
],
// weight components in ColumnTransformer
transformer_weights={
"subject": 0.8,
"body_bow": 0.5,
"body_stats": 1.0,
}
)),
// Use a SVC classifier on the combined features
("svc", LinearSVC(dual=False)),
], verbose=True)
// limit the list of categories to make running this example faster.
categories = ["alt.atheism", "talk.religion.misc"]
X_train, y_train = fetch_20newsgroups(random_state=1,
After Change
// Each feature comprises meta information about that post, such as the subject,
// and the body of the news post.
print(X_train[0])
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Creating transformers
// ---------------------
In pattern: SUPERPATTERN
Frequency: 4
Non-data size: 6
Instances Project Name: scikit-learn/scikit-learn
Commit Name: a49752375d5775b1f0e6393a811c937332dccb18
Time: 2020-05-17
Author: jliu176@gmail.com
File Name: examples/compose/plot_column_transformer.py
Class Name:
Method Name:
Project Name: dmbee/seglearn
Commit Name: b1de080823e41b921bec2949db2b6c3cb1f1d5ef
Time: 2018-03-11
Author: david.mo.burns@gmail.com
File Name: examples/plot_feature_rep.py
Class Name:
Method Name:
Project Name: scikit-multiflow/scikit-multiflow
Commit Name: cd37c1e406d5f1742077ff228a4944846fff339c
Time: 2017-07-24
Author: guilhermekmatsumoto@gmail.com
File Name: skmultiflow/demos/_test_knn.py
Class Name:
Method Name: demo
Project Name: dmbee/seglearn
Commit Name: b1de080823e41b921bec2949db2b6c3cb1f1d5ef
Time: 2018-03-11
Author: david.mo.burns@gmail.com
File Name: examples/plot_scoring.py
Class Name:
Method Name: