14538c6714f45c0dbd6c95ff351c272c9cf85701,ggplot/stats/stat_bin.py,stat_bin,_calculate,#stat_bin#Any#,24

Before Change


            if breaks is None and binwidth is None:
                _bin_count = 30
                if not self._warning_printed:
                    sys.stderr.write(
                        "stat_bin: binwidth defaulted to range/30." +
                        " Use "binwidth = x" to adjust this.")
                    self._warning_printed = True
            if binwidth:
                _bin_count = int(np.ceil(np.ptp(x))) / binwidth

After Change



        // y values are not needed
        try:
            del data["y"]
        except KeyError:
            pass
        else:
            self._print_warning(_MSG1, 1)

        // TODO: make sure it is passed from the geom
        // data.pop("weight")
        weight = 1
        x_weights = np.ones(len(x)) * weight

        categorical = is_categorical(x.values)
        if categorical:
            x_assignments = x
            x = sorted(set(x))
            width = make_iterable_ntimes(self.params["width"], len(x))
        elif cbook.is_numlike(x.iloc[0]):
            if breaks is None and binwidth is None:
                _bin_count = 30
                self._print_warning(_MSG1, 1)
            if binwidth:
                _bin_count = int(np.ceil(np.ptp(x))) / binwidth

            // Breaks have a higher precedence and,
            // pandas accepts either the breaks or the number of bins
            _bins_info = breaks or _bin_count
            x_assignments, breaks = pd.cut(x, bins=_bins_info, labels=False,
                                           right=right, retbins=True)
            width = np.diff(breaks)
            x = [breaks[i] + width[i] / 2
                 for i in range(len(breaks)-1)]
        else:
            // TODO: Create test case
            raise Exception("Cannot recognise the type of x")

        // Create a dataframe with two columns:
        //   - the bins to which each x is assigned
        //   - the weights of each x value
        // Then create a weighted frequency table
        _df = pd.DataFrame({"assignments": x_assignments,
                            "weights": x_weights
                            })
        _wfreq_table = pd.pivot_table(_df, values="weights",
                                      rows=["assignments"], aggfunc=np.sum)

        // For numerical x values, empty bins get have no value
        // in the computed frequency table. We need to add the zeros and
        // since frequency table is a Series object, we need to keep it ordered
        if len(_wfreq_table) < len(x):
            empty_bins = set(range(max(x_assignments))) - set(x_assignments)
            for _b in empty_bins:
                _wfreq_table[_b] = 0
            _wfreq_table = _wfreq_table.sort_index()

        y = list(_wfreq_table)
        new_data = pd.DataFrame({"x": x, "y": y, "width": width})

        // Copy the other aesthetics into the new dataframe
        n = len(x)
        for ae in data:
            new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
        return new_data

Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 4

Instances


Project Name: has2k1/plotnine
Commit Name: 14538c6714f45c0dbd6c95ff351c272c9cf85701
Time: 2014-04-29
Author: has2k1@gmail.com
File Name: ggplot/stats/stat_bin.py
Class Name: stat_bin
Method Name: _calculate


Project Name: elbayadm/attn2d
Commit Name: 8bafae2ee7044529543768eec63d8460d894f5c6
Time: 2017-10-19
Author: myleott@fb.com
File Name: train.py
Class Name:
Method Name: validate