a5ce019480c33dba9ee92b453800b30cb6c8066b,snorkel/learning/gen_learning.py,GenerativeModel,train,#GenerativeModel#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#,72

Before Change


        if cardinality is None:
            // If candidate_ranges is provided, use this to determine cardinality
            if candidate_ranges is not None:
                cardinality = max(map(max, candidate_ranges))
            else:
                // This is just an annoying hack for LIL sparse matrices...
                try:
                    lmax = L.max()
                except AttributeError:
                    lmax = L.tocoo().max()

                if lmax > 2:
                    cardinality = lmax
                elif lmax < 2:
                    cardinality = 2
                else:
                    raise ValueError(
                        "L.max() == %s, cannot infer cardinality." % lmax)
            print("Inferred cardinality: %s" % cardinality)
        self.cardinality = cardinality

        // Priors for LFs default to fixed prior value
        // NOTE: Setting default != 0.5 creates a (fixed) factor which increases
        // runtime (by ~0.5x that of a non-fixed factor)...
        if LF_acc_prior_weights is None:
            LF_acc_prior_weights = [LF_acc_prior_weight_default for _ in range(n)]
        else:
            LF_acc_prior_weights = list(copy(LF_acc_prior_weights))

        // LF weights are un-fixed
        is_fixed = [False for _ in range(n)]

        // If supervised labels are provided, add them as a fixed LF with prior
        // Note: For large L this column stack operation could be very
        // inefficient, can consider refactoring...
        if labels is not None:
            labels = labels.reshape(m, 1)
            L = sparse.hstack([L, labels])
            is_fixed.append(True)
            LF_acc_prior_weights.append(label_prior_weight)
            n += 1

        // Reduce overhead of tracking indices by converting L to a CSR sparse matrix.
        L = sparse.csr_matrix(L).copy()

        // If candidate_ranges is provided, remap the values of L using
        // candidate_ranges. This "scoped categorical" approach allows learning
        // and inference to be efficient even with very large cardinality, as
        // we only sample relevant values for each candidate. Also set
        // per-candidate cardinalities according to candidate_ranges if not None,
        // else as constant value.
        self.cardinalities = self.cardinality * np.ones(m, dtype=np.int64)
        self.candidate_ranges = candidate_ranges
        if self.candidate_ranges is not None:
            L, self.cardinalities, _ = self._remap_scoped_categoricals(L, 
                self.candidate_ranges)

        // Shuffle the data points, cardinalities, and candidate_ranges
        idxs = range(m)
        self.rng.shuffle(idxs)
        L = L[idxs, :]
        if candidate_ranges is not None:
            self.cardinalities = self.cardinalities[idxs]
            c_ranges_reshuffled = []
            for i in idxs:
                c_ranges_reshuffled.append(self.candidate_ranges[i])
            self.candidate_ranges = c_ranges_reshuffled

        // Compile factor graph
        self._process_dependency_graph(L, deps)
        weight, variable, factor, ftv, domain_mask, n_edges = self._compile(
            L, init_deps, init_class_prior, LF_acc_prior_weights, is_fixed, self.cardinalities)
        fg = NumbSkull(
            n_inference_epoch=0,
            n_learning_epoch=epochs, 
            stepsize=step_size,
            decay=decay,
            reg_param=reg_param,
            regularization=reg_type,
            truncation=truncation,
            quiet=(not verbose),
            verbose=verbose, 
            learn_non_evidence=True,
            burn_in=burn_in,
            nthreads=threads
        )
        fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)

        if timer is not None:
            timer.start()
        fg.learning(out=False)
        if timer is not None:
            timer.end()
        self._process_learned_weights(L, fg, LF_acc_prior_weights, is_fixed)

        // Store info from factor graph
        if self.candidate_ranges is not None:
            self.cardinality_for_stats = int(max(self.cardinalities))
        else:
            self.cardinality_for_stats = self.cardinality
        self.learned_weights = fg.factorGraphs[0].weight_value
        weight, variable, factor, ftv, domain_mask, n_edges =\
            self._compile(sparse.coo_matrix((1, n), L.dtype), init_deps,
                init_class_prior, LF_acc_prior_weights, is_fixed,
                [self.cardinality_for_stats])

        variable["isEvidence"] = False
        weight["isFixed"] = True
        weight["initialValue"] = fg.factorGraphs[0].weight_value

        fg.factorGraphs = []
        fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)

        self.fg = fg
        self.nlf = n
        self.cardinality = cardinality

    def _remap_scoped_categoricals(self, L_in, candidate_ranges):

After Change


        if cardinality is None:
            // If candidate_ranges is provided, use this to determine cardinality
            if candidate_ranges is not None:
                cardinality = max(list(map(max, candidate_ranges)))
            else:
                // This is just an annoying hack for LIL sparse matrices...
                try:
                    lmax = L.max()
                except AttributeError:
                    lmax = L.tocoo().max()

                if lmax > 2:
                    cardinality = lmax
                elif lmax < 2:
                    cardinality = 2
                else:
                    raise ValueError(
                        "L.max() == %s, cannot infer cardinality." % lmax)
            print("Inferred cardinality: %s" % cardinality)
        self.cardinality = cardinality

        // Priors for LFs default to fixed prior value
        // NOTE: Setting default != 0.5 creates a (fixed) factor which increases
        // runtime (by ~0.5x that of a non-fixed factor)...
        if LF_acc_prior_weights is None:
            LF_acc_prior_weights = [LF_acc_prior_weight_default for _ in range(n)]
        else:
            LF_acc_prior_weights = list(copy(LF_acc_prior_weights))

        // LF weights are un-fixed
        is_fixed = [False for _ in range(n)]

        // If supervised labels are provided, add them as a fixed LF with prior
        // Note: For large L this column stack operation could be very
        // inefficient, can consider refactoring...
        if labels is not None:
            labels = labels.reshape(m, 1)
            L = sparse.hstack([L, labels])
            is_fixed.append(True)
            LF_acc_prior_weights.append(label_prior_weight)
            n += 1

        // Reduce overhead of tracking indices by converting L to a CSR sparse matrix.
        L = sparse.csr_matrix(L).copy()

        // If candidate_ranges is provided, remap the values of L using
        // candidate_ranges. This "scoped categorical" approach allows learning
        // and inference to be efficient even with very large cardinality, as
        // we only sample relevant values for each candidate. Also set
        // per-candidate cardinalities according to candidate_ranges if not None,
        // else as constant value.
        self.cardinalities = self.cardinality * np.ones(m, dtype=np.int64)
        self.candidate_ranges = candidate_ranges
        if self.candidate_ranges is not None:
            L, self.cardinalities, _ = self._remap_scoped_categoricals(L, 
                self.candidate_ranges)

        // Shuffle the data points, cardinalities, and candidate_ranges
        idxs = list(range(m))
        self.rng.shuffle(idxs)
        L = L[idxs, :]
        if candidate_ranges is not None:
            self.cardinalities = self.cardinalities[idxs]
            c_ranges_reshuffled = []
            for i in idxs:
                c_ranges_reshuffled.append(self.candidate_ranges[i])
            self.candidate_ranges = c_ranges_reshuffled

        // Compile factor graph
        self._process_dependency_graph(L, deps)
        weight, variable, factor, ftv, domain_mask, n_edges = self._compile(
            L, init_deps, init_class_prior, LF_acc_prior_weights, is_fixed, self.cardinalities)
        fg = NumbSkull(
            n_inference_epoch=0,
            n_learning_epoch=epochs, 
            stepsize=step_size,
            decay=decay,
            reg_param=reg_param,
            regularization=reg_type,
            truncation=truncation,
            quiet=(not verbose),
            verbose=verbose, 
            learn_non_evidence=True,
            burn_in=burn_in,
            nthreads=threads
        )
        fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)

        if timer is not None:
            timer.start()
        fg.learning(out=False)
        if timer is not None:
            timer.end()
        self._process_learned_weights(L, fg, LF_acc_prior_weights, is_fixed)

        // Store info from factor graph
        if self.candidate_ranges is not None:
            self.cardinality_for_stats = int(max(self.cardinalities))
        else:
            self.cardinality_for_stats = self.cardinality
        self.learned_weights = fg.factorGraphs[0].weight_value
        weight, variable, factor, ftv, domain_mask, n_edges =\
            self._compile(sparse.coo_matrix((1, n), L.dtype), init_deps,
                init_class_prior, LF_acc_prior_weights, is_fixed,
                [self.cardinality_for_stats])

        variable["isEvidence"] = False
        weight["isFixed"] = True
        weight["initialValue"] = fg.factorGraphs[0].weight_value

        fg.factorGraphs = []
        fg.loadFactorGraph(weight, variable, factor, ftv, domain_mask, n_edges)

        self.fg = fg
        self.nlf = n
        self.cardinality = cardinality

    def _remap_scoped_categoricals(self, L_in, candidate_ranges):

In pattern: SUPERPATTERN

Frequency: 4

Non-data size: 7

Instances

Link

Project Name: snorkel-team/snorkel

Commit Name: a5ce019480c33dba9ee92b453800b30cb6c8066b

Time: 2017-12-14

Author: ajratner@gmail.com

File Name: snorkel/learning/gen_learning.py

Class Name: GenerativeModel

Method Name: train

Link

Project Name: astroML/astroML

Commit Name: 43b8de281630e844fcaab814b69285edb3e0d426

Time: 2014-10-18

Author: jakevdp@gmail.com

File Name: astroML/datasets/LINEAR_sample.py

Class Name: LINEARdata

Method Name: __init__

Link

Project Name: sony/nnabla

Commit Name: 271557a8fa949cbc9b833b81650c0e7e7546cfdc

Time: 2017-07-14

Author: Takuya.Narihira@jp.sony.com

File Name: examples/vision/imagenet/tiny_imagenet_data.py

Class Name: TinyImageNetDataSource

Method Name: __init__

Link

Project Name: sony/nnabla

Commit Name: 6c1c2c7348f7ec3b780bd9d78453ac747af00b52

Time: 2017-12-21

Author: Yukio.Oobuchi@sony.com

File Name: python/src/nnabla/utils/data_source_implements.py

Class Name: ConcatDataSource

Method Name: __init__