04f4305931bedd43ca76190b25de120239d2dfe6,deepchem/splits/tests/test_splitter.py,TestSplitter,test_singletask_stratified_column_indices_mask,#TestSplitter#,334

Before Change


    w[:n_positives // 2] = 0

    stratified_splitter = dc.splits.RandomStratifiedSplitter()
    column_indices = stratified_splitter.get_task_split_indices(
        y, w, frac_split=.5)

    split_index = column_indices[0]
    // There are 10 nonzero actives.
    // The split index should partition this into half, so expect 5
    w_present = (w != 0)
    y_present = y * w_present

After Change


    n_tasks = 1

    // Test case where some weights are zero (i.e. masked)
    X = np.ones(n_samples)
    y = np.zeros((n_samples, n_tasks))
    y[:n_positives] = 1
    w = np.ones((n_samples, n_tasks))
    // Set half the positives to have zero weight
    w[:n_positives // 2] = 0
    dataset = dc.data.NumpyDataset(X, y, w)

    stratified_splitter = dc.splits.RandomStratifiedSplitter()
    train, valid, test = stratified_splitter.split(dataset, 0.5, 0, 0.5)

    // There are 10 nonzero actives.
    // The split index should partition this into half, so expect 5
    w_present = (w != 0)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 4

Non-data size: 11

Instances


Project Name: deepchem/deepchem
Commit Name: 04f4305931bedd43ca76190b25de120239d2dfe6
Time: 2020-09-28
Author: peastman@stanford.edu
File Name: deepchem/splits/tests/test_splitter.py
Class Name: TestSplitter
Method Name: test_singletask_stratified_column_indices_mask


Project Name: deepchem/deepchem
Commit Name: 04f4305931bedd43ca76190b25de120239d2dfe6
Time: 2020-09-28
Author: peastman@stanford.edu
File Name: deepchem/splits/tests/test_splitter.py
Class Name: TestSplitter
Method Name: test_singletask_stratified_column_indices


Project Name: deepchem/deepchem
Commit Name: 04f4305931bedd43ca76190b25de120239d2dfe6
Time: 2020-09-28
Author: peastman@stanford.edu
File Name: deepchem/splits/tests/test_splitter.py
Class Name: TestSplitter
Method Name: test_multitask_stratified_column_indices


Project Name: deepchem/deepchem
Commit Name: 04f4305931bedd43ca76190b25de120239d2dfe6
Time: 2020-09-28
Author: peastman@stanford.edu
File Name: deepchem/splits/tests/test_splitter.py
Class Name: TestSplitter
Method Name: test_multitask_stratified_column_indices_masked