f16992b25bb153df3ab87c5111db2a101cf68c73,bugbug/models/testselect.py,TestSelectModel,train_test_split,#TestSelectModel#Any#Any#,91

Before Change


    // To properly test the performance of our model, we need to split the data
    // according to time: we train on older pushes and evaluate on newer pushes.
    def train_test_split(self, X, y):
        pushes = OrderedDict()
        for test_data in test_scheduling.get_test_scheduling_history(self.granularity):
            rev = test_data["revs"][0]
            name = test_data["name"]

            if self.granularity == "label" and not name.startswith("test-"):
                continue

            if rev in pushes:
                pushes[rev] += 1
            else:
                pushes[rev] = 1

        train_push_len = math.floor(0.9 * len(pushes))
        train_pushes = list(pushes.values())[:train_push_len]
        train_len = sum(count for count in train_pushes)
        print(
            f"{train_push_len} pushes in the training set (corresponding to {train_len} push/jobs)"
        )
        return X[:train_len], X[train_len:], y[:train_len], y[train_len:]

After Change


    def train_test_split(self, X, y):
        pushes, train_push_len = self.get_pushes()
        train_len = sum(
            len(push["failures"]) + len(push["passes"])
            for push in pushes[:train_push_len]
        )
        print(
            f"{train_push_len} pushes in the training set (corresponding to {train_len} push/jobs)"
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 10

Instances


Project Name: mozilla/bugbug
Commit Name: f16992b25bb153df3ab87c5111db2a101cf68c73
Time: 2020-04-09
Author: mcastelluccio@mozilla.com
File Name: bugbug/models/testselect.py
Class Name: TestSelectModel
Method Name: train_test_split


Project Name: mathics/Mathics
Commit Name: d3ef32acab314fc2c145d890b0a23e57ac9dd2f8
Time: 2016-09-01
Author: Bernhard.Liebl@gmx.org
File Name: mathics/builtin/colors.py
Class Name:
Method Name: convert


Project Name: mozilla/bugbug
Commit Name: f16992b25bb153df3ab87c5111db2a101cf68c73
Time: 2020-04-09
Author: mcastelluccio@mozilla.com
File Name: bugbug/models/testselect.py
Class Name: TestSelectModel
Method Name: train_test_split


Project Name: probcomp/bayeslite
Commit Name: 68a7d5553fa4cef09e0159ac5e100a6424172386
Time: 2015-09-24
Author: gremio@acm.org
File Name: src/metamodels/crosscat.py
Class Name: CrosscatMetamodel
Method Name: create_generator