83c8b3a4f321c4a924d344df275ddc403d3ca835,scipy/stats/_binned_statistic.py,,binned_statistic_dd,#Any#Any#Any#Any#Any#Any#,338

Before Change



    try:
        // `values` is an ND-array.
        Vdim, Vlen = values.shape
    except (AttributeError, ValueError):
        // Sample is a sequence of 1D arrays.
        values = np.atleast_2d(values)

After Change


        sample = np.atleast_2d(sample).T
        Dlen, Ndim = sample.shape

    initVdim = np.ndim(values)
    values = np.atleast_2d(values)
    Vdim, Vlen = values.shape

    // Make sure `values` match `sample`
    if(statistic is not "count" and Vlen != Dlen):
        raise AttributeError("The number of `values` elements must match the "
                             "length of each `sample` dimension.")

    nbin = np.empty(Ndim, int)    // Number of bins in each dimension
    edges = Ndim * [None]         // Bin edges for each dim (will be 2D array)
    dedges = Ndim * [None]        // Spacing between edges (will be 2D array)

    try:
        M = len(bins)
        if M != Ndim:
            raise AttributeError("The dimension of bins must be equal "
                                 "to the dimension of the sample x.")
    except TypeError:
        bins = Ndim * [bins]

    // Select range for each dimension
    // Used only if number of bins is given.
    if range is None:
        smin = np.atleast_1d(np.array(sample.min(axis=0), float))
        smax = np.atleast_1d(np.array(sample.max(axis=0), float))
    else:
        smin = np.zeros(Ndim)
        smax = np.zeros(Ndim)
        for i in xrange(Ndim):
            smin[i], smax[i] = range[i]

    // Make sure the bins have a finite width.
    for i in xrange(len(smin)):
        if smin[i] == smax[i]:
            smin[i] = smin[i] - .5
            smax[i] = smax[i] + .5

    // Create edge arrays
    for i in xrange(Ndim):
        if np.isscalar(bins[i]):
            nbin[i] = bins[i] + 2  // +2 for outlier bins
            edges[i] = np.linspace(smin[i], smax[i], nbin[i] - 1)
        else:
            edges[i] = np.asarray(bins[i], float)
            nbin[i] = len(edges[i]) + 1  // +1 for outlier bins
        dedges[i] = np.diff(edges[i])

    nbin = np.asarray(nbin)

    // Compute the bin number each sample falls into, in each dimension
    sampBin = {}
    for i in xrange(Ndim):
        sampBin[i] = np.digitize(sample[:, i], edges[i])

    // Using `digitize`, values that fall on an edge are put in the right bin.
    // For the rightmost bin, we want values equal to the right
    // edge to be counted in the last bin, and not as an outlier.
    for i in xrange(Ndim):
        // Find the rounding precision
        decimal = int(-np.log10(dedges[i].min())) + 6
        // Find which points are on the rightmost edge.
        on_edge = np.where(np.around(sample[:, i], decimal) ==
                           np.around(edges[i][-1], decimal))[0]
        // Shift these points one bin to the left.
        sampBin[i][on_edge] -= 1

    // Compute the sample indices in the flattened statistic matrix.
    ni = nbin.argsort()
    // `binnumbers` is which bin (in linearized `Ndim` space) each sample goes
    binnumbers = np.zeros(Dlen, int)
    for i in xrange(0, Ndim - 1):
        binnumbers += sampBin[ni[i]] * nbin[ni[i + 1:]].prod()
    binnumbers += sampBin[ni[-1]]

    result = np.empty([Vdim, nbin.prod()], float)

    if statistic == "mean":
        result.fill(np.nan)
        flatcount = np.bincount(binnumbers, None)
        a = flatcount.nonzero()
        for vv in xrange(Vdim):
            flatsum = np.bincount(binnumbers, values[vv])
            result[vv, a] = flatsum[a] / flatcount[a]
    elif statistic == "std":
        result.fill(0)
        flatcount = np.bincount(binnumbers, None)
        a = flatcount.nonzero()
        for vv in xrange(Vdim):
            flatsum = np.bincount(binnumbers, values[vv])
            flatsum2 = np.bincount(binnumbers, values[vv] ** 2)
            result[vv, a] = np.sqrt(flatsum2[a] / flatcount[a] -
                                    (flatsum[a] / flatcount[a]) ** 2)
    elif statistic == "count":
        result.fill(0)
        flatcount = np.bincount(binnumbers, None)
        a = np.arange(len(flatcount))
        result[:, a] = flatcount[np.newaxis, :]
    elif statistic == "sum":
        result.fill(0)
        for vv in xrange(Vdim):
            flatsum = np.bincount(binnumbers, values[vv])
            a = np.arange(len(flatsum))
            result[vv, a] = flatsum
    elif statistic == "median":
        result.fill(np.nan)
        for i in np.unique(binnumbers):
            for vv in xrange(Vdim):
                result[vv, i] = np.median(values[vv, binnumbers == i])
    elif callable(statistic):
        with warnings.catch_warnings():
            // Numpy generates a warnings for mean/std/... with empty list
            warnings.filterwarnings("ignore", category=RuntimeWarning)
            old = np.seterr(invalid="ignore")
            try:
                null = statistic([])
            except:
                null = np.nan
            np.seterr(**old)
        result.fill(null)
        for i in np.unique(binnumbers):
            for vv in xrange(Vdim):
                result[vv, i] = statistic(values[vv, binnumbers == i])

    // Shape into a proper matrix
    result = result.reshape(np.append(Vdim, np.sort(nbin)))

    for i in xrange(nbin.size):
        j = ni.argsort()[i]
        // Accomodate the extra `Vdim` dimension-zero with `+1`
        result = result.swapaxes(i+1, j+1)
        ni[i], ni[j] = ni[j], ni[i]

    // Remove outliers (indices 0 and -1 for each dimension).
    core = [slice(None)] + Ndim * [slice(1, -1)]
    result = result[core]

    // Unravel binnumbers into an ndarray, each row the bins for each dimension
    if(expand_binnumbers and Ndim > 1):
        binnumbers = np.asarray(np.unravel_index(binnumbers, nbin))

    if np.any(result.shape[1:] != nbin - 2):
        raise RuntimeError("Internal Shape Error")

    // Remove excess dimension-zero if `values` was 1-D
    if(initVdim == 1):
        result = result.reshape(*(nbin-2))

    return BinnedStatisticddResult(result, edges, binnumbers)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 5

Instances


Project Name: scipy/scipy
Commit Name: 83c8b3a4f321c4a924d344df275ddc403d3ca835
Time: 2015-11-24
Author: lkelley@cfa.harvard.edu
File Name: scipy/stats/_binned_statistic.py
Class Name:
Method Name: binned_statistic_dd


Project Name: dmlc/dgl
Commit Name: deb653f8dc9b396ba99d5c66e0658fb056f37a38
Time: 2018-11-22
Author: ly979@nyu.edu
File Name: python/dgl/function/message.py
Class Name: SrcMulEdgeMessageFunction
Method Name: __call__


Project Name: keras-team/keras
Commit Name: 8b3543fca9d811c638bb72d78601c8564f5465fd
Time: 2016-04-03
Author: EderSantana@users.noreply.github.com
File Name: keras/backend/theano_backend.py
Class Name:
Method Name: batch_dot