n_samples, n_features = X.shape
if self.categories in [None, "auto"]:
self._label_encoders_ = [LabelEncoder() for _ in range(n_features)]
if self.categories == "most_frequent":
self.categories_ = []
for i in range(n_features):
Xi = X[:, i]
if self.categories in [None, "auto"]:
le = self._label_encoders_[i]
if self.categories == "most_frequent":
self.categories_.append(self.get_most_frequent(Xi))
elif self.categories == "auto":
le.fit(Xi)
else:
if self.handle_unknown == "error":
valid_mask = np.in1d(Xi, self.categories[i])
if not np.all(valid_mask):
diff = np.unique(Xi[~valid_mask])
msg = ("Found unknown categories {0} in column {1}"
" during fit".format(diff, i))
raise ValueError(msg)
self.categories_.append(np.array(self.categories[i],
dtype=object))
if self.categories == "auto":
self.categories_ = [le.classes_ for le in self._label_encoders_]
return self
After Change
for i in range(n_features):
Xi = X[:, i]
if self.categories == "auto":
self.categories_.append(np.unique(Xi))
elif self.categories == "most_frequent":
self.categories_.append(self.get_most_frequent(Xi))
else:
if self.handle_unknown == "error":