de074e29f36c33d5a8627f9e8ab92f1e0fd46d82,AutoSklearn/implementations/OneHotEncoder.py,OneHotEncoder,_transform,#OneHotEncoder#Any#,228

Before Change


    def _transform(self, X):
        Assumes X contains only categorical features.
        X = check_arrays(X, sparse_format="dense", dtype=np.int)[0]
        if np.any(X < 0):
            raise ValueError("X needs to contain only non-negative integers.")
        n_samples, n_features = X.shape

        indices = self.feature_indices_
        if n_features != len(indices):
            raise ValueError("X has different shape than during fitting."
                             " Expected %d, got %d."
                             % (len(indices), n_features))

        //if (np.max(X, axis=0) >= self.n_values_).any():
        //    raise ValueError("Feature out of bounds. Try setting n_values.")

        //column_indices = (X + indices[:-1]).ravel()
        row_indices = np.tile(np.arange(n_samples, dtype=np.int32),
                              n_features)

        column_indices = []
        max_n_features = 0
        for idx, feature in enumerate(range(n_features)):
            // TODO
            indices_idx = indices[idx]
            column_indices.extend([indices_idx[value] for value in X[:,idx]])
            max_n_features = max(max_n_features, max(column_indices))
        // The highest index we find is zero-based...
        max_n_features += 1

        data = np.ones(n_samples * n_features)
        out = sparse.coo_matrix((data, (row_indices, column_indices)),
                                shape=(n_samples, max_n_features),
                                dtype=self.dtype).tocsr()
        if self.n_values == "auto":
            out = out[:, self.active_features_]

        return out if self.sparse else out.toarray()

    def transform(self, X):
        Transform X using one-hot encoding.

After Change


            column_indices_idx = [feature_indices_idx.get(x, offset)
                                  for x in X[:,idx]]
            data_idx = [1 if feature_indices_idx.get(x) is not None else 0
                        for x in X[:, idx]]

            column_indices.extend(column_indices_idx)
            data.extend(data_idx)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 8

Instances


Project Name: automl/auto-sklearn
Commit Name: de074e29f36c33d5a8627f9e8ab92f1e0fd46d82
Time: 2014-12-15
Author: feurerm@informatik.uni-freiburg.de
File Name: AutoSklearn/implementations/OneHotEncoder.py
Class Name: OneHotEncoder
Method Name: _transform


Project Name: pyannote/pyannote-audio
Commit Name: 89da05ea9d6de97da9bd21949a26ceb0042ef361
Time: 2017-07-19
Author: hbredin@users.noreply.github.com
File Name: pyannote/audio/features/with_librosa.py
Class Name: LibrosaFeatureExtractor
Method Name: __call__


Project Name: pyannote/pyannote-audio
Commit Name: 89da05ea9d6de97da9bd21949a26ceb0042ef361
Time: 2017-07-19
Author: hbredin@users.noreply.github.com
File Name: pyannote/audio/features/utils.py
Class Name: RawAudio
Method Name: __call__