6b91fba9eb6e822f6d2035a6487a97e4520793fe,deepchem/molnet/load_function/uv_datasets.py,,gen_uv,#Any#Any#Any#Any#Any#Any#,35
Before Change
// Featurizing datasets
featurizer = None
loader = deepchem.data.UserCSVLoader(tasks=UV_tasks, featurizer=featurizer)
logger.info("Featurizing train datasets...")
train_dataset = loader.featurize(input_files=train_files, shard_size=shard_size)
logger.info("Train dataset featurization complete.")
logger.info("Featurizing validation datasets...")
valid_dataset = loader.featurize(input_files=valid_files, shard_size=shard_size)
logger.info("Validation dataset featurization complete.")
logger.info("Featurizing test datasets....")
test_dataset = loader.featurize(input_files=test_files, shard_size=shard_size)
logger.info("Test dataset featurization complete.")
// TODO: Add missing entries removal
// Shuffle the training data
logger.info("Shuffling the training dataset")
train_dataset.sparse_shuffle()
// Apply transformations
logger.info("Starting transformations")
transformers = get_transformers(train_dataset)
for transformer in transformers:
logger.info("Performing transformations with {}".format(transformer.__class__.__name__))
logger.info("Transforming the training dataset...")
train_dataset = transformer.transform(train_dataset)
logger.info("Training dataset transformation complete.")
logger.info("Transforming the validation dataset...")
valid_dataset = transformer.transform(valid_dataset)
logger.info("Validation dataset transformation complete.")
logger.info("Transforming the test dataset...")
test_dataset = transformer.transform(test_dataset)
logger.info("Test dataset transformation complete.")
logger.info("Transformations complete.")
logger.info("Moving datasets to corresponding directories")
After Change
// Featurizing datasets
logger.info("About to featurize UV dataset.")
featurizer = deepchem.feat.UserDefinedFeaturizer(merck_descriptors)
loader = deepchem.data.UserCSVLoader(
tasks=UV_tasks, id_field="Molecule", featurizer=featurizer)
logger.info("Featurizing train datasets...")
train_dataset = loader.featurize(input_files=train_files, shard_size=shard_size)
logger.info("Featurizing validation datasets...")
valid_dataset = loader.featurize(input_files=valid_files, shard_size=shard_size)
logger.info("Featurizing test datasets....")
test_dataset = loader.featurize(input_files=test_files, shard_size=shard_size)
// Missing entry removal
logger.info("Removing missing entries from dataset.")
remove_missing_entries(train_dataset)
remove_missing_entries(valid_dataset)
remove_missing_entries(test_dataset)
// Shuffle the training data
logger.info("Shuffling the training dataset")
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 11
Instances
Project Name: deepchem/deepchem
Commit Name: 6b91fba9eb6e822f6d2035a6487a97e4520793fe
Time: 2018-09-30
Author: vsomnath@student.ethz.ch
File Name: deepchem/molnet/load_function/uv_datasets.py
Class Name:
Method Name: gen_uv
Project Name: deepchem/deepchem
Commit Name: 6b91fba9eb6e822f6d2035a6487a97e4520793fe
Time: 2018-09-30
Author: vsomnath@student.ethz.ch
File Name: deepchem/molnet/load_function/kinase_datasets.py
Class Name:
Method Name: gen_kinase
Project Name: deepchem/deepchem
Commit Name: 6b91fba9eb6e822f6d2035a6487a97e4520793fe
Time: 2018-09-30
Author: vsomnath@student.ethz.ch
File Name: deepchem/molnet/load_function/uv_datasets.py
Class Name:
Method Name: gen_uv
Project Name: deepchem/deepchem
Commit Name: 6b91fba9eb6e822f6d2035a6487a97e4520793fe
Time: 2018-09-30
Author: vsomnath@student.ethz.ch
File Name: deepchem/molnet/load_function/factors_datasets.py
Class Name:
Method Name: gen_factors