a49752375d5775b1f0e6393a811c937332dccb18,examples/compose/plot_column_transformer.py,,,#,83
Before Change
return features
pipeline = Pipeline([
// Extract the subject & body
("subjectbody", SubjectBodyExtractor()),
// Use ColumnTransformer to combine the features from subject and body
("union", ColumnTransformer(
[
// Pulling features from the post"s subject line (first column)
("subject", TfidfVectorizer(min_df=50), 0),
// Pipeline for standard bag-of-words model for body (second column)
("body_bow", Pipeline([
("tfidf", TfidfVectorizer()),
("best", TruncatedSVD(n_components=50)),
]), 1),
// Pipeline for pulling ad hoc features from post"s body
("body_stats", Pipeline([
("stats", TextStats()), // returns a list of dicts
("vect", DictVectorizer()), // list of dicts -> feature matrix
]), 1),
],
// weight components in ColumnTransformer
transformer_weights={
"subject": 0.8,
"body_bow": 0.5,
"body_stats": 1.0,
}
)),
// Use a SVC classifier on the combined features
("svc", LinearSVC(dual=False)),
], verbose=True)
// limit the list of categories to make running this example faster.
categories = ["alt.atheism", "talk.religion.misc"]
X_train, y_train = fetch_20newsgroups(random_state=1,
subset="train",
categories=categories,
remove=("footers", "quotes"),
return_X_y=True)
X_test, y_test = fetch_20newsgroups(random_state=1,
subset="test",
categories=categories,
remove=("footers", "quotes"),
return_X_y=True)
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
print(classification_report(y_test, y_pred))
After Change
// Each feature comprises meta information about that post, such as the subject,
// and the body of the news post.
print(X_train[0])
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Creating transformers
// ---------------------
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 4
Instances Project Name: scikit-learn/scikit-learn
Commit Name: a49752375d5775b1f0e6393a811c937332dccb18
Time: 2020-05-17
Author: jliu176@gmail.com
File Name: examples/compose/plot_column_transformer.py
Class Name:
Method Name:
Project Name: nishitpatel01/Fake_News_Detection
Commit Name: 95b8e2a603f66cc3091e3266a717c0f206be3e95
Time: 2017-12-03
Author: nkp3@illinois.edu
File Name: FeatureSelection.py
Class Name:
Method Name:
Project Name: nishitpatel01/Fake_News_Detection
Commit Name: 3b49ffd98696ad697cf2b9685e581459d51ea0b1
Time: 2017-12-03
Author: nkp3@illinois.edu
File Name: FeatureSelection.py
Class Name:
Method Name: