4c14b496bb5d1a4dfb92a3f32d3478c9233bcc8a,deepchem/models/tests/test_api.py,TestModelAPI,test_singletask_sklearn_rf_ECFP_regression_sharded_API,#TestModelAPI#,136
Before Change
Test of singletask RF ECFP regression API: sharded edition.
splittype = "scaffold"
featurizer = CircularFingerprint(size=1024)
model_params = {}
tasks = ["label"]
task_type = "regression"
task_types = {task: task_type for task in tasks}
input_file = os.path.join(
self.current_dir, "../../../datasets/pdbbind_core_df.pkl.gz")
loader = DataLoader(tasks=tasks,
smiles_field=self.smiles_field,
featurizer=featurizer,
verbosity="low")
dataset = loader.featurize(input_file, self.data_dir)
splitter = ScaffoldSplitter()
train_dataset, test_dataset = splitter.train_test_split(
dataset, self.train_dir, self.test_dir)
input_transformers = []
output_transformers = [
NormalizationTransformer(transform_y=True, dataset=train_dataset)]
transformers = input_transformers + output_transformers
for dataset in [train_dataset, test_dataset]:
for transformer in transformers:
transformer.transform(dataset)
// We set shard size above to force the creation of multiple shards of the data.
// pdbbind_core has ~200 examples.
model_params["data_shape"] = train_dataset.get_data_shape()
regression_metrics = [Metric(metrics.r2_score),
Metric(metrics.mean_squared_error),
Metric(metrics.mean_absolute_error)]
model = SklearnModel(tasks, task_types, model_params, self.model_dir,
mode="regression",
model_instance=RandomForestRegressor())
// Fit trained model
model.fit(train_dataset)
model.save()
After Change
Metric(metrics.mean_squared_error),
Metric(metrics.mean_absolute_error)]
sklearn_model = RandomForestRegressor()
model = SklearnModel(sklearn_model, self.model_dir)
// Fit trained model
model.fit(train_dataset)
model.save()
In pattern: SUPERPATTERN
Frequency: 4
Non-data size: 12
Instances
Project Name: deepchem/deepchem
Commit Name: 4c14b496bb5d1a4dfb92a3f32d3478c9233bcc8a
Time: 2016-09-19
Author: bharath.ramsundar@gmail.com
File Name: deepchem/models/tests/test_api.py
Class Name: TestModelAPI
Method Name: test_singletask_sklearn_rf_ECFP_regression_sharded_API
Project Name: deepchem/deepchem
Commit Name: 4c14b496bb5d1a4dfb92a3f32d3478c9233bcc8a
Time: 2016-09-19
Author: bharath.ramsundar@gmail.com
File Name: deepchem/models/tests/test_api.py
Class Name: TestModelAPI
Method Name: test_singletask_sklearn_rf_ECFP_regression_sharded_API
Project Name: deepchem/deepchem
Commit Name: 4c14b496bb5d1a4dfb92a3f32d3478c9233bcc8a
Time: 2016-09-19
Author: bharath.ramsundar@gmail.com
File Name: deepchem/models/tests/test_api.py
Class Name: TestModelAPI
Method Name: test_singletask_sklearn_rf_ECFP_regression_API
Project Name: deepchem/deepchem
Commit Name: 4c14b496bb5d1a4dfb92a3f32d3478c9233bcc8a
Time: 2016-09-19
Author: bharath.ramsundar@gmail.com
File Name: deepchem/models/tests/test_api.py
Class Name: TestModelAPI
Method Name: test_singletask_sklearn_rf_user_specified_regression_API
Project Name: deepchem/deepchem
Commit Name: 4c14b496bb5d1a4dfb92a3f32d3478c9233bcc8a
Time: 2016-09-19
Author: bharath.ramsundar@gmail.com
File Name: deepchem/models/tests/test_api.py
Class Name: TestModelAPI
Method Name: test_singletask_sklearn_rf_RDKIT_descriptor_regression_API