f16992b25bb153df3ab87c5111db2a101cf68c73,bugbug/models/testselect.py,TestSelectModel,train_test_split,#TestSelectModel#Any#Any#,91

Before Change


    // To properly test the performance of our model, we need to split the data
    // according to time: we train on older pushes and evaluate on newer pushes.
    def train_test_split(self, X, y):
        pushes = OrderedDict()
        for test_data in test_scheduling.get_test_scheduling_history(self.granularity):
            rev = test_data["revs"][0]
            name = test_data["name"]

            if self.granularity == "label" and not name.startswith("test-"):
                continue

            if rev in pushes:
                pushes[rev] += 1
            else:
                pushes[rev] = 1

        train_push_len = math.floor(0.9 * len(pushes))
        train_pushes = list(pushes.values())[:train_push_len]
        train_len = sum(count for count in train_pushes)
        print(
            f"{train_push_len} pushes in the training set (corresponding to {train_len} push/jobs)"
        )
        return X[:train_len], X[train_len:], y[:train_len], y[train_len:]

After Change


    // according to time: we train on older pushes and evaluate on newer pushes.
    def train_test_split(self, X, y):
        pushes, train_push_len = self.get_pushes()
        train_len = sum(
            len(push["failures"]) + len(push["passes"])
            for push in pushes[:train_push_len]
        )
        print(
            f"{train_push_len} pushes in the training set (corresponding to {train_len} push/jobs)"
        )
        return X[:train_len], X[train_len:], y[:train_len], y[train_len:]

In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 11

Instances

Link

Project Name: mozilla/bugbug

Commit Name: f16992b25bb153df3ab87c5111db2a101cf68c73

Time: 2020-04-09

Author: mcastelluccio@mozilla.com

File Name: bugbug/models/testselect.py

Class Name: TestSelectModel

Method Name: train_test_split

Link

Project Name: RaRe-Technologies/gensim

Commit Name: 8e70d6ea0e250736a6cb298eaec6a1dc572fd467

Time: 2010-02-21

Author: piskvorky@92d0401f-a546-4972-9173-107b360ed7e5

File Name: matutils.py

Class Name: MmWriter

Method Name: writeTfidf

Link

Project Name: RaRe-Technologies/gensim

Commit Name: 67fb64104da2ac16850d6a0d3cc2098086b47bbb

Time: 2010-02-21

Author: radimrehurek@seznam.cz

File Name: matutils.py

Class Name: MmWriter

Method Name: writeTfidf