try:
// `values` is an ND-array.
Vdim, Vlen = values.shape
except (AttributeError, ValueError):
// Sample is a sequence of 1D arrays.
values = np.atleast_2d(values)
After Change
sample = np.atleast_2d(sample).T
Dlen, Ndim = sample.shape
initVdim = np.ndim(values)
values = np.atleast_2d(values)
Vdim, Vlen = values.shape
// Make sure `values` match `sample`
if(statistic is not "count" and Vlen != Dlen):
raise AttributeError("The number of `values` elements must match the "
"length of each `sample` dimension.")
nbin = np.empty(Ndim, int) // Number of bins in each dimension
edges = Ndim * [None] // Bin edges for each dim (will be 2D array)
dedges = Ndim * [None] // Spacing between edges (will be 2D array)
try:
M = len(bins)
if M != Ndim:
raise AttributeError("The dimension of bins must be equal "
"to the dimension of the sample x.")
except TypeError:
bins = Ndim * [bins]
// Select range for each dimension
// Used only if number of bins is given.
if range is None:
smin = np.atleast_1d(np.array(sample.min(axis=0), float))
smax = np.atleast_1d(np.array(sample.max(axis=0), float))
else:
smin = np.zeros(Ndim)
smax = np.zeros(Ndim)
for i in xrange(Ndim):
smin[i], smax[i] = range[i]
// Make sure the bins have a finite width.
for i in xrange(len(smin)):
if smin[i] == smax[i]:
smin[i] = smin[i] - .5
smax[i] = smax[i] + .5
// Create edge arrays
for i in xrange(Ndim):
if np.isscalar(bins[i]):
nbin[i] = bins[i] + 2 // +2 for outlier bins
edges[i] = np.linspace(smin[i], smax[i], nbin[i] - 1)
else:
edges[i] = np.asarray(bins[i], float)
nbin[i] = len(edges[i]) + 1 // +1 for outlier bins
dedges[i] = np.diff(edges[i])
nbin = np.asarray(nbin)
// Compute the bin number each sample falls into, in each dimension
sampBin = {}
for i in xrange(Ndim):
sampBin[i] = np.digitize(sample[:, i], edges[i])
// Using `digitize`, values that fall on an edge are put in the right bin.
// For the rightmost bin, we want values equal to the right
// edge to be counted in the last bin, and not as an outlier.
for i in xrange(Ndim):
// Find the rounding precision
decimal = int(-np.log10(dedges[i].min())) + 6
// Find which points are on the rightmost edge.
on_edge = np.where(np.around(sample[:, i], decimal) ==
np.around(edges[i][-1], decimal))[0]
// Shift these points one bin to the left.
sampBin[i][on_edge] -= 1
// Compute the sample indices in the flattened statistic matrix.
ni = nbin.argsort()
// `binnumbers` is which bin (in linearized `Ndim` space) each sample goes
binnumbers = np.zeros(Dlen, int)
for i in xrange(0, Ndim - 1):
binnumbers += sampBin[ni[i]] * nbin[ni[i + 1:]].prod()
binnumbers += sampBin[ni[-1]]
result = np.empty([Vdim, nbin.prod()], float)
if statistic == "mean":
result.fill(np.nan)
flatcount = np.bincount(binnumbers, None)
a = flatcount.nonzero()
for vv in xrange(Vdim):
flatsum = np.bincount(binnumbers, values[vv])
result[vv, a] = flatsum[a] / flatcount[a]
elif statistic == "std":
result.fill(0)
flatcount = np.bincount(binnumbers, None)
a = flatcount.nonzero()
for vv in xrange(Vdim):
flatsum = np.bincount(binnumbers, values[vv])
flatsum2 = np.bincount(binnumbers, values[vv] ** 2)
result[vv, a] = np.sqrt(flatsum2[a] / flatcount[a] -
(flatsum[a] / flatcount[a]) ** 2)
elif statistic == "count":
result.fill(0)
flatcount = np.bincount(binnumbers, None)
a = np.arange(len(flatcount))
result[:, a] = flatcount[np.newaxis, :]
elif statistic == "sum":
result.fill(0)
for vv in xrange(Vdim):
flatsum = np.bincount(binnumbers, values[vv])
a = np.arange(len(flatsum))
result[vv, a] = flatsum
elif statistic == "median":
result.fill(np.nan)
for i in np.unique(binnumbers):
for vv in xrange(Vdim):
result[vv, i] = np.median(values[vv, binnumbers == i])
elif callable(statistic):
with warnings.catch_warnings():
// Numpy generates a warnings for mean/std/... with empty list
warnings.filterwarnings("ignore", category=RuntimeWarning)
old = np.seterr(invalid="ignore")
try:
null = statistic([])
except:
null = np.nan
np.seterr(**old)
result.fill(null)
for i in np.unique(binnumbers):
for vv in xrange(Vdim):
result[vv, i] = statistic(values[vv, binnumbers == i])
// Shape into a proper matrix
result = result.reshape(np.append(Vdim, np.sort(nbin)))
for i in xrange(nbin.size):
j = ni.argsort()[i]
// Accomodate the extra `Vdim` dimension-zero with `+1`
result = result.swapaxes(i+1, j+1)
ni[i], ni[j] = ni[j], ni[i]
// Remove outliers (indices 0 and -1 for each dimension).
core = [slice(None)] + Ndim * [slice(1, -1)]
result = result[core]
// Unravel binnumbers into an ndarray, each row the bins for each dimension
if(expand_binnumbers and Ndim > 1):
binnumbers = np.asarray(np.unravel_index(binnumbers, nbin))
if np.any(result.shape[1:] != nbin - 2):
raise RuntimeError("Internal Shape Error")
// Remove excess dimension-zero if `values` was 1-D
if(initVdim == 1):
result = result.reshape(*(nbin-2))
return BinnedStatisticddResult(result, edges, binnumbers)