1a19396d3daea14a189c4335e5df9f4072a764cc,dirty_cat/similarity_encoder.py,SimilarityEncoder,fit,#SimilarityEncoder#Any#Any#,143

Before Change


        n_samples, n_features = X.shape

        if self.categories in [None, "auto"]:
            self._label_encoders_ = [LabelEncoder() for _ in range(n_features)]

        if self.categories == "most_frequent":
            self.categories_ = []

        for i in range(n_features):
            Xi = X[:, i]
            if self.categories in [None, "auto"]:
                le = self._label_encoders_[i]

            if self.categories == "most_frequent":
                self.categories_.append(self.get_most_frequent(Xi))
            elif self.categories == "auto":
                le.fit(Xi)
            else:
                if self.handle_unknown == "error":
                    valid_mask = np.in1d(Xi, self.categories[i])
                    if not np.all(valid_mask):
                        diff = np.unique(Xi[~valid_mask])
                        msg = ("Found unknown categories {0} in column {1}"
                               " during fit".format(diff, i))
                        raise ValueError(msg)
                self.categories_.append(np.array(self.categories[i],
                                                 dtype=object))
                
        if self.categories == "auto":
            self.categories_ = [le.classes_ for le in self._label_encoders_]

        return self

After Change


        for i in range(n_features):
            Xi = X[:, i]
            if self.categories == "auto":
                self.categories_.append(np.unique(Xi))
            elif self.categories == "most_frequent":
                self.categories_.append(self.get_most_frequent(Xi))
            else:
                if self.handle_unknown == "error":
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 4

Instances


Project Name: dirty-cat/dirty_cat
Commit Name: 1a19396d3daea14a189c4335e5df9f4072a764cc
Time: 2018-11-08
Author: maxime.cuny@inria.fr
File Name: dirty_cat/similarity_encoder.py
Class Name: SimilarityEncoder
Method Name: fit


Project Name: automl/auto-sklearn
Commit Name: dfd23ba635f59f0dbc2c0cdf04445e5f6eda3c66
Time: 2014-12-11
Author: feurerm@informatik.uni-freiburg.de
File Name: AutoSklearn/autosklearn.py
Class Name: AutoSklearnClassifier
Method Name: fit


Project Name: biolab/orange3
Commit Name: 3721aada353b474d5b5ce726fa65de4968fb6b5e
Time: 2013-06-02
Author: jure.zbontar@gmail.com
File Name: Orange/classification/logistic.py
Class Name:
Method Name: