332759f41664989156b7a2994e8c21ebc9e7f798,sklearn/impute/_iterative.py,IterativeImputer,fit_transform,#IterativeImputer#Any#Any#,521

Before Change


            )

        if self.add_indicator:
            self.indicator_ = MissingIndicator(
                missing_values=self.missing_values, error_on_new=False)
            X_trans_indicator = self.indicator_.fit_transform(X)
        else:
            self.indicator_ = None

        if self.estimator is None:
            from ..linear_model import BayesianRidge
            self._estimator = BayesianRidge()
        else:
            self._estimator = clone(self.estimator)

        self.imputation_sequence_ = []

        if hasattr(self._estimator, "random_state"):
            self._estimator.random_state = self.random_state_

        self._min_value = -np.inf if self.min_value is None else self.min_value
        self._max_value = np.inf if self.max_value is None else self.max_value

        self.initial_imputer_ = None
        X, Xt, mask_missing_values = self._initial_imputation(X)
        if self.max_iter == 0 or np.all(mask_missing_values):
            self.n_iter_ = 0
            return Xt

        // Edge case: a single feature. We return the initial ...
        if Xt.shape[1] == 1:
            self.n_iter_ = 0
            return Xt

        // order in which to impute
        // note this is probably too slow for large feature data (d > 100000)
        // and a better way would be good.
        // see: https://goo.gl/KyCNwj and subsequent comments
        ordered_idx = self._get_ordered_idx(mask_missing_values)
        self.n_features_with_missing_ = len(ordered_idx)

        abs_corr_mat = self._get_abs_corr_mat(Xt)

        n_samples, n_features = Xt.shape
        if self.verbose > 0:
            print("[IterativeImputer] Completing matrix with shape %s"
                  % (X.shape,))
        start_t = time()
        if not self.sample_posterior:
            Xt_previous = Xt.copy()
            normalized_tol = self.tol * np.max(np.abs(X[~mask_missing_values]))
        for self.n_iter_ in range(1, self.max_iter + 1):
            if self.imputation_order == "random":
                ordered_idx = self._get_ordered_idx(mask_missing_values)

            for feat_idx in ordered_idx:
                neighbor_feat_idx = self._get_neighbor_feat_idx(n_features,
                                                                feat_idx,
                                                                abs_corr_mat)
                Xt, estimator = self._impute_one_feature(
                    Xt, mask_missing_values, feat_idx, neighbor_feat_idx,
                    estimator=None, fit_mode=True)
                estimator_triplet = _ImputerTriplet(feat_idx,
                                                    neighbor_feat_idx,
                                                    estimator)
                self.imputation_sequence_.append(estimator_triplet)

            if self.verbose > 1:
                print("[IterativeImputer] Ending imputation round "
                      "%d/%d, elapsed time %0.2f"
                      % (self.n_iter_, self.max_iter, time() - start_t))

            if not self.sample_posterior:
                inf_norm = np.linalg.norm(Xt - Xt_previous, ord=np.inf,
                                          axis=None)
                if self.verbose > 0:
                    print("[IterativeImputer] "
                          "Change: {}, scaled tolerance: {} ".format(
                            inf_norm, normalized_tol))
                if inf_norm < normalized_tol:
                    if self.verbose > 0:
                        print("[IterativeImputer] Early stopping criterion "
                              "reached.")
                    break
                Xt_previous = Xt.copy()
        else:
            if not self.sample_posterior:
                warnings.warn("[IterativeImputer] Early stopping criterion not"
                              " reached.", ConvergenceWarning)
        Xt[~mask_missing_values] = X[~mask_missing_values]

        if self.add_indicator:
            Xt = np.hstack((Xt, X_trans_indicator))
        return Xt

    def transform(self, X):
        Imputes all missing values in X.

After Change


        // Edge case: a single feature. We return the initial ...
        if Xt.shape[1] == 1:
            self.n_iter_ = 0
            return super()._concatenate_indicator(Xt, X_indicator)

        // order in which to impute
        // note this is probably too slow for large feature data (d > 100000)
        // and a better way would be good.
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 4

Non-data size: 5

Instances


Project Name: scikit-learn/scikit-learn
Commit Name: 332759f41664989156b7a2994e8c21ebc9e7f798
Time: 2019-10-28
Author: g.lemaitre58@gmail.com
File Name: sklearn/impute/_iterative.py
Class Name: IterativeImputer
Method Name: fit_transform


Project Name: pavlin-policar/openTSNE
Commit Name: 46379cefade313fee9fa99c8a6ebb62668918f82
Time: 2018-10-28
Author: pavlin.g.p@gmail.com
File Name: fastTSNE/tsne.py
Class Name: TSNE
Method Name: generate_initial_coordinates


Project Name: EducationalTestingService/skll
Commit Name: 46c488aeb460cebff7b536c2450fcb249cea02e1
Time: 2013-09-04
Author: mheilman@ets.org
File Name: skll/data.py
Class Name:
Method Name: _features_for_gen_func


Project Name: KrishnaswamyLab/PHATE
Commit Name: e5a98adc53c682047bd6873c127c9c94588b28ab
Time: 2019-12-01
Author: scottgigante@gmail.com
File Name: Python/phate/mds.py
Class Name:
Method Name: cmdscale_fast