6b91fba9eb6e822f6d2035a6487a97e4520793fe,deepchem/molnet/load_function/uv_datasets.py,,gen_uv,#Any#Any#Any#Any#Any#Any#,35

Before Change



  // Featurizing datasets

  featurizer = None
  loader = deepchem.data.UserCSVLoader(tasks=UV_tasks, featurizer=featurizer)

  logger.info("Featurizing train datasets...")
  train_dataset = loader.featurize(input_files=train_files, shard_size=shard_size)
  logger.info("Train dataset featurization complete.")

  logger.info("Featurizing validation datasets...")
  valid_dataset = loader.featurize(input_files=valid_files, shard_size=shard_size)
  logger.info("Validation dataset featurization complete.")

  logger.info("Featurizing test datasets....")
  test_dataset = loader.featurize(input_files=test_files, shard_size=shard_size)
  logger.info("Test dataset featurization complete.")

  // TODO: Add missing entries removal

  // Shuffle the training data
  logger.info("Shuffling the training dataset")
  train_dataset.sparse_shuffle()

  // Apply transformations
  logger.info("Starting transformations")
  transformers = get_transformers(train_dataset)

  for transformer in transformers:
    logger.info("Performing transformations with {}".format(transformer.__class__.__name__))

    logger.info("Transforming the training dataset...")
    train_dataset = transformer.transform(train_dataset)
    logger.info("Training dataset transformation complete.")

    logger.info("Transforming the validation dataset...")
    valid_dataset = transformer.transform(valid_dataset)
    logger.info("Validation dataset transformation complete.")

    logger.info("Transforming the test dataset...")
    test_dataset = transformer.transform(test_dataset)
    logger.info("Test dataset transformation complete.")

  logger.info("Transformations complete.")
  logger.info("Moving datasets to corresponding directories")

After Change



  // Featurizing datasets
  logger.info("About to featurize UV dataset.")
  featurizer = deepchem.feat.UserDefinedFeaturizer(merck_descriptors)
  loader = deepchem.data.UserCSVLoader(
    tasks=UV_tasks, id_field="Molecule", featurizer=featurizer)

  logger.info("Featurizing train datasets...")
  train_dataset = loader.featurize(input_files=train_files, shard_size=shard_size)

  logger.info("Featurizing validation datasets...")
  valid_dataset = loader.featurize(input_files=valid_files, shard_size=shard_size)

  logger.info("Featurizing test datasets....")
  test_dataset = loader.featurize(input_files=test_files, shard_size=shard_size)

  // Missing entry removal
  logger.info("Removing missing entries from dataset.")
  remove_missing_entries(train_dataset)
  remove_missing_entries(valid_dataset)
  remove_missing_entries(test_dataset)

  // Shuffle the training data
  logger.info("Shuffling the training dataset")
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 11

Instances


Project Name: deepchem/deepchem
Commit Name: 6b91fba9eb6e822f6d2035a6487a97e4520793fe
Time: 2018-09-30
Author: vsomnath@student.ethz.ch
File Name: deepchem/molnet/load_function/uv_datasets.py
Class Name:
Method Name: gen_uv


Project Name: deepchem/deepchem
Commit Name: 6b91fba9eb6e822f6d2035a6487a97e4520793fe
Time: 2018-09-30
Author: vsomnath@student.ethz.ch
File Name: deepchem/molnet/load_function/kinase_datasets.py
Class Name:
Method Name: gen_kinase


Project Name: deepchem/deepchem
Commit Name: 6b91fba9eb6e822f6d2035a6487a97e4520793fe
Time: 2018-09-30
Author: vsomnath@student.ethz.ch
File Name: deepchem/molnet/load_function/uv_datasets.py
Class Name:
Method Name: gen_uv


Project Name: deepchem/deepchem
Commit Name: 6b91fba9eb6e822f6d2035a6487a97e4520793fe
Time: 2018-09-30
Author: vsomnath@student.ethz.ch
File Name: deepchem/molnet/load_function/factors_datasets.py
Class Name:
Method Name: gen_factors