343cb214a384df7061e0e4300bc376ae4f66b5ef,deepchem/models/tests/test_overfit.py,TestOverfit,test_siamese_singletask_classification_overfit,#TestOverfit#,594

Before Change


    Test siamese singletask model overfits tiny data.
    np.random.seed(123)
    tf.set_random_seed(123)
    g = tf.Graph()
    sess = tf.Session(graph=g)
    n_tasks = 1
    n_feat = 75
    max_depth = 4
    n_pos = 6
    n_neg = 4
    test_batch_size = 10
    n_train_trials = 80
    support_batch_size = n_pos + n_neg
    
    // Load mini log-solubility dataset.
    featurizer = dc.feat.ConvMolFeaturizer()
    tasks = ["outcome"]
    input_file = os.path.join(self.current_dir, "example_classification.csv")
    loader = dc.data.CSVLoader(
        tasks=tasks, smiles_field="smiles", featurizer=featurizer)
    dataset = loader.featurize(input_file)

    classification_metric = dc.metrics.Metric(dc.metrics.accuracy_score)

    with g.as_default():
      support_model = dc.nn.SequentialSupportGraph(n_feat)
      
      // Add layers
      // output will be (n_atoms, 64)
      support_model.add(dc.nn.GraphConv(64, activation="relu"))
      // Need to add batch-norm separately to test/support due to differing
      // shapes.
      // output will be (n_atoms, 64)
      support_model.add_test(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
      // output will be (n_atoms, 64)
      support_model.add_support(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
      support_model.add(dc.nn.GraphPool())
      support_model.add_test(dc.nn.GraphGather(test_batch_size))
      support_model.add_support(dc.nn.GraphGather(support_batch_size))

      with self.test_session() as sess:
        model = dc.models.SupportGraphClassifier(
          sess, support_model, test_batch_size=test_batch_size,
          support_batch_size=support_batch_size, learning_rate=1e-3)

        // Fit trained model. Dataset has 6 positives and 4 negatives, so set
        // n_pos/n_neg accordingly.
        model.fit(dataset, n_episodes_per_epoch=n_train_trials, n_pos=n_pos,
                  n_neg=n_neg)
        model.save()

        // Eval model on train. Dataset has 6 positives and 4 negatives, so set
        // n_pos/n_neg accordingly. Note that support is *not* excluded (so we
        // can measure model has memorized support).  Replacement is turned off to
        // ensure that support contains full training set. This checks that the
        // model has mastered memorization of provided support.
        scores, _ = model.evaluate(dataset, classification_metric, n_trials=5,
                                   n_pos=n_pos, n_neg=n_neg,
                                   exclude_support=False)

      // Measure performance on 0-th task.
      assert scores[0] > .9

  def test_attn_lstm_singletask_classification_overfit(self):
    Test attn lstm singletask overfits tiny data.
    np.random.seed(123)
    tf.set_random_seed(123)

After Change



    classification_metric = dc.metrics.Metric(dc.metrics.accuracy_score)

    support_model = dc.nn.SequentialSupportGraph(n_feat)

    // Add layers
    // output will be (n_atoms, 64)
    support_model.add(dc.nn.GraphConv(64, n_feat, activation="relu"))
    // Need to add batch-norm separately to test/support due to differing
    // shapes.
    // output will be (n_atoms, 64)
    support_model.add_test(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
    // output will be (n_atoms, 64)
    support_model.add_support(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
    support_model.add(dc.nn.GraphPool())
    support_model.add_test(dc.nn.GraphGather(test_batch_size))
    support_model.add_support(dc.nn.GraphGather(support_batch_size))

    model = dc.models.SupportGraphClassifier(
        support_model,
        test_batch_size=test_batch_size,
        support_batch_size=support_batch_size,
        learning_rate=1e-3)

    // Fit trained model. Dataset has 6 positives and 4 negatives, so set
    // n_pos/n_neg accordingly.
    model.fit(
        dataset, n_episodes_per_epoch=n_train_trials, n_pos=n_pos, n_neg=n_neg)
    model.save()

    // Eval model on train. Dataset has 6 positives and 4 negatives, so set
    // n_pos/n_neg accordingly. Note that support is *not* excluded (so we
    // can measure model has memorized support).  Replacement is turned off to
    // ensure that support contains full training set. This checks that the
    // model has mastered memorization of provided support.
    scores, _ = model.evaluate(
        dataset,
        classification_metric,
        n_trials=5,
        n_pos=n_pos,
        n_neg=n_neg,
        exclude_support=False)

    ////////////////////////////////////////////////////////////////////////////////////////////////////////// DEBUG
    // TODO(rbharath): Check if something went wrong here...
    // Measure performance on 0-th task.

In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 31

Instances

Link

Project Name: deepchem/deepchem

Commit Name: 343cb214a384df7061e0e4300bc376ae4f66b5ef

Time: 2017-02-20

Author: bharath.ramsundar@gmail.com

File Name: deepchem/models/tests/test_overfit.py

Class Name: TestOverfit

Method Name: test_siamese_singletask_classification_overfit

Link

Project Name: deepchem/deepchem

Commit Name: 343cb214a384df7061e0e4300bc376ae4f66b5ef

Time: 2017-02-20

Author: bharath.ramsundar@gmail.com

File Name: deepchem/models/tests/test_overfit.py

Class Name: TestOverfit

Method Name: test_attn_lstm_singletask_classification_overfit

Link

Project Name: deepchem/deepchem

Commit Name: 343cb214a384df7061e0e4300bc376ae4f66b5ef

Time: 2017-02-20

Author: bharath.ramsundar@gmail.com

File Name: deepchem/models/tests/test_overfit.py

Class Name: TestOverfit

Method Name: test_siamese_singletask_classification_overfit

Link

Project Name: deepchem/deepchem

Commit Name: 343cb214a384df7061e0e4300bc376ae4f66b5ef

Time: 2017-02-20

Author: bharath.ramsundar@gmail.com

File Name: deepchem/models/tests/test_overfit.py

Class Name: TestOverfit

Method Name: test_residual_lstm_singletask_classification_overfit