862f99942ce1eefe93f0cfd1bcf3ade031679cd4,pmlb/dataset_lists.py,,,#,26

Before Change


    "712_chscase_geyser1"
]

datasets_with_metadata = [
    "molecular_biology_promoters",
    "car",
    "connect_4",
    "dna",
    "542_pollution",
    "560_bodyfat",
    "poker",
    "USCrime",
    "pollen",
    "chess",
    "penguins",
    "bupa",
    "movement_libras",
    "adult",
    "waveform_21",
    "waveform_40",
    "saheart",
    "wine_quality_white",
    "wine_quality_red",
    "irish",
    "mushroom"
]

After Change



df_summary = pandas.read_csv("pmlb/all_summary_stats.tsv", sep="\t")
regression_dataset_names = df_summary.query("task=="regression"")["dataset"].tolist()
classification_dataset_names = df_summary.query("task=="classification"")["dataset"].tolist()
dataset_names = regression_dataset_names + classification_dataset_names

def get_datasets_with_metadata(dataset_names, local_cache_dir = "datasets/"):
    assert (local_cache_dir != None)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 4

Non-data size: 5

Instances


Project Name: EpistasisLab/penn-ml-benchmarks
Commit Name: 862f99942ce1eefe93f0cfd1bcf3ade031679cd4
Time: 2020-09-03
Author: grixor@gmail.com
File Name: pmlb/dataset_lists.py
Class Name:
Method Name:


Project Name: Pinafore/qb
Commit Name: 9d22742baeeb608bac5594c3ece96f62f0734dce
Time: 2017-05-13
Author: sjtufs@gmail.com
File Name: qanta/buzzer/trainer.py
Class Name: Trainer
Method Name: test


Project Name: googledatalab/pydatalab
Commit Name: 04c749f7175fd3e180b1a27884e5a65622cc36be
Time: 2017-07-05
Author: brandondutra@google.com
File Name: solutionbox/code_free_ml/test_mltoolbox/test_training.py
Class Name: TestSpecialCharacters
Method Name: testCommaQuote


Project Name: oddt/oddt
Commit Name: e626254b74ecb6dc71396c1b35237b53a5e35163
Time: 2017-08-23
Author: maciek@wojcikowski.pl
File Name: oddt/datasets.py
Class Name: pdbbind
Method Name: __init__