4de234b54989f65a8c383deed7823ebe42abd704,perf_tests/batch_vs_single.py,,,#,13

Before Change


    for additional_x_vars in [0, 10, 40]:
        // lower percents means more ties.
        // original rossi dataset has 0.113
        for fraction in np.linspace(0.01, 0.99, 8):
            n = n_copies * ROSSI_ROWS
            print(n_copies, additional_x_vars, fraction)

            df = pd.concat([ROSSI] * n_copies)
            n_unique_durations = int(df.shape[0] * fraction) + 1
            unique_durations = np.round(np.random.exponential(10, size=n_unique_durations), 5)

            df["week"] = np.tile(unique_durations, int(np.ceil(1 / fraction)))[: df.shape[0]]

            for i in range(additional_x_vars):
                df["%i" % i] = np.random.randn(n)

            batch_results = []
            for _ in range(3):
                cph_batch = CoxPHFitter()
                start_time = time()
                cph_batch.fit(df, "week", "arrest", batch_mode=True)
                batch_results.append(time() - start_time)

            single_results = []
            for _ in range(3):
                cph_single = CoxPHFitter()
                start_time = time()
                cph_single.fit(df, "week", "arrest", batch_mode=False)
                single_results.append(time() - start_time)

            batch_time = min(batch_results)
            single_time = min(single_results)
            print({"batch": batch_time, "single": single_time})
            results[(n, fraction, df.shape[1] - 2)] = {"batch": batch_time, "single": single_time}

results = pd.DataFrame(results).T.sort_index()
results = results.reset_index()
results = results.rename(columns={"level_0": "N", "level_1": "frac", "level_2": "N_vars"})
results["ratio"] = results["batch"] / results["single"]

After Change


results = {}


for n_copies, additional_x_vars, fraction in product(
    [1, 3, 6, 10, 50, 500, 2500], [0, 10, 20], np.logspace(-6, np.log10(0.99), 8)
):
    try:
        print(n_copies, additional_x_vars, fraction)

        n = n_copies * ROSSI_ROWS
        df = pd.concat([ROSSI] * n_copies)
        n_unique_durations = int(df.shape[0] * fraction) + 1
        unique_durations = np.round(np.random.exponential(10, size=n_unique_durations), 5)

        df["week"] = np.tile(unique_durations, int(np.ceil(1 / fraction)))[: df.shape[0]]

        for i in range(additional_x_vars):
            df["%i" % i] = np.random.randn(n)

        batch_results = []
        for _ in range(3):
            cph_batch = CoxPHFitter()
            start_time = time()
            cph_batch.fit(df, "week", "arrest", batch_mode=True)
            batch_results.append(time() - start_time)

        single_results = []
        for _ in range(3):
            cph_single = CoxPHFitter()
            start_time = time()
            cph_single.fit(df, "week", "arrest", batch_mode=False)
            single_results.append(time() - start_time)

        batch_time = min(batch_results)
        single_time = min(single_results)
        print({"batch": batch_time, "single": single_time})
        results[(n, fraction, df.shape[1] - 2)] = {"batch": batch_time, "single": single_time}
    except KeyboardInterrupt:
        break

results = pd.DataFrame(results).T.sort_index()
results = results.reset_index()
results = results.rename(columns={"level_0": "N", "level_1": "frac", "level_2": "N_vars"})
results["ratio"] = results["batch"] / results["single"]
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 5

Instances


Project Name: CamDavidsonPilon/lifelines
Commit Name: 4de234b54989f65a8c383deed7823ebe42abd704
Time: 2020-07-09
Author: cam.davidson.pilon@gmail.com
File Name: perf_tests/batch_vs_single.py
Class Name:
Method Name:


Project Name: deepchem/deepchem
Commit Name: 5273d873b9154ad706bfdfcf4fc8e2354fd425c0
Time: 2016-01-16
Author: bharath.ramsundar@gmail.com
File Name: deepchem/models/__init__.py
Class Name: Model
Method Name: predict


Project Name: has2k1/plotnine
Commit Name: f91ee4a6cdeb4548d0e71ab42dae101166eedaf4
Time: 2014-02-27
Author: greg@yhatHQ.com
File Name: ggplot/components/colors.py
Class Name:
Method Name: color_gen