def _transform(self, X):
Assumes X contains only categorical features.
X = check_arrays(X, sparse_format="dense", dtype=np.int)[0]
if np.any(X < 0):
raise ValueError("X needs to contain only non-negative integers.")
n_samples, n_features = X.shape
indices = self.feature_indices_
if n_features != len(indices):
raise ValueError("X has different shape than during fitting."
" Expected %d, got %d."
% (len(indices), n_features))
//if (np.max(X, axis=0) >= self.n_values_).any():
// raise ValueError("Feature out of bounds. Try setting n_values.")
//column_indices = (X + indices[:-1]).ravel()
row_indices = np.tile(np.arange(n_samples, dtype=np.int32),
n_features)
column_indices = []
max_n_features = 0
for idx, feature in enumerate(range(n_features)):
// TODO
indices_idx = indices[idx]
column_indices.extend([indices_idx[value] for value in X[:,idx]])
max_n_features = max(max_n_features, max(column_indices))
// The highest index we find is zero-based...
max_n_features += 1data = np.ones(n_samples * n_features)
out = sparse.coo_matrix((data, (row_indices, column_indices)),
shape=(n_samples, max_n_features),
dtype=self.dtype).tocsr()
if self.n_values == "auto":
out = out[:, self.active_features_]
return out if self.sparse else out.toarray()
def transform(self, X):
Transform X using one-hot encoding.
After Change
column_indices_idx = [feature_indices_idx.get(x, offset)
for x in X[:,idx]]
data_idx = [1 if feature_indices_idx.get(x) is not None else 0
for x in X[:, idx]]
column_indices.extend(column_indices_idx)
data.extend(data_idx)