6b91fba9eb6e822f6d2035a6487a97e4520793fe,deepchem/molnet/load_function/uv_datasets.py,,gen_uv,#Any#Any#Any#Any#Any#Any#,35
 
Before Change
  // Featurizing datasets
  featurizer = None
  loader = deepchem.data.UserCSVLoader(tasks=UV_tasks, featurizer=featurizer)
  logger.info("Featurizing train datasets...")
  train_dataset = loader.featurize(input_files=train_files, shard_size=shard_size)
  logger.info("Train dataset featurization complete.")
  logger.info("Featurizing validation datasets...")
  valid_dataset = loader.featurize(input_files=valid_files, shard_size=shard_size)
  logger.info("Validation dataset featurization complete.")
  logger.info("Featurizing test datasets....")
  test_dataset = loader.featurize(input_files=test_files, shard_size=shard_size)
  logger.info("Test dataset featurization complete.")
  // TODO: Add missing entries removal
  // Shuffle the training data
  logger.info("Shuffling the training dataset")
  train_dataset.sparse_shuffle()
  // Apply transformations
  logger.info("Starting transformations")
  transformers = get_transformers(train_dataset)
  for transformer in transformers:
    logger.info("Performing transformations with {}".format(transformer.__class__.__name__))
    logger.info("Transforming the training dataset...")
    train_dataset = transformer.transform(train_dataset)
    logger.info("Training dataset transformation complete.")
    logger.info("Transforming the validation dataset...")
    valid_dataset = transformer.transform(valid_dataset)
    logger.info("Validation dataset transformation complete.")
    logger.info("Transforming the test dataset...")
    test_dataset = transformer.transform(test_dataset)
    logger.info("Test dataset transformation complete.")
  logger.info("Transformations complete.")
  logger.info("Moving datasets to corresponding directories")
After Change
  // Featurizing datasets
  logger.info("About to featurize UV dataset.")
  featurizer = deepchem.feat.UserDefinedFeaturizer(merck_descriptors)
  loader = deepchem.data.UserCSVLoader(
    tasks=UV_tasks, id_field="Molecule", featurizer=featurizer)
  logger.info("Featurizing train datasets...")
  train_dataset = loader.featurize(input_files=train_files, shard_size=shard_size)
  logger.info("Featurizing validation datasets...")
  valid_dataset = loader.featurize(input_files=valid_files, shard_size=shard_size)
  logger.info("Featurizing test datasets....")
  test_dataset = loader.featurize(input_files=test_files, shard_size=shard_size)
  // Missing entry removal
  logger.info("Removing missing entries from dataset.")
  remove_missing_entries(train_dataset)
  remove_missing_entries(valid_dataset)
  remove_missing_entries(test_dataset)
  // Shuffle the training data
  logger.info("Shuffling the training dataset")

In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 11
Instances
 Project Name: deepchem/deepchem
 Commit Name: 6b91fba9eb6e822f6d2035a6487a97e4520793fe
 Time: 2018-09-30
 Author: vsomnath@student.ethz.ch
 File Name: deepchem/molnet/load_function/uv_datasets.py
 Class Name: 
 Method Name: gen_uv
 Project Name: deepchem/deepchem
 Commit Name: 6b91fba9eb6e822f6d2035a6487a97e4520793fe
 Time: 2018-09-30
 Author: vsomnath@student.ethz.ch
 File Name: deepchem/molnet/load_function/kinase_datasets.py
 Class Name: 
 Method Name: gen_kinase
 Project Name: deepchem/deepchem
 Commit Name: 6b91fba9eb6e822f6d2035a6487a97e4520793fe
 Time: 2018-09-30
 Author: vsomnath@student.ethz.ch
 File Name: deepchem/molnet/load_function/uv_datasets.py
 Class Name: 
 Method Name: gen_uv
 Project Name: deepchem/deepchem
 Commit Name: 6b91fba9eb6e822f6d2035a6487a97e4520793fe
 Time: 2018-09-30
 Author: vsomnath@student.ethz.ch
 File Name: deepchem/molnet/load_function/factors_datasets.py
 Class Name: 
 Method Name: gen_factors