// y values are not needed
try:
del data["y"]
except KeyError:
pass
else:
self._print_warning(_MSG1, 1)
// TODO: make sure it is passed from the geom
// data.pop("weight")
weight = 1
x_weights = np.ones(len(x)) * weight
categorical = is_categorical(x.values)
if categorical:
x_assignments = x
x = sorted(set(x))
width = make_iterable_ntimes(self.params["width"], len(x))
elif cbook.is_numlike(x.iloc[0]):
if breaks is None and binwidth is None:
_bin_count = 30
self._print_warning(_MSG1, 1)
if binwidth:
_bin_count = int(np.ceil(np.ptp(x))) / binwidth
// Breaks have a higher precedence and,
// pandas accepts either the breaks or the number of bins
_bins_info = breaks or _bin_count
x_assignments, breaks = pd.cut(x, bins=_bins_info, labels=False,
right=right, retbins=True)
width = np.diff(breaks)
x = [breaks[i] + width[i] / 2
for i in range(len(breaks)-1)]
else:
// TODO: Create test case
raise Exception("Cannot recognise the type of x")
// Create a dataframe with two columns:
// - the bins to which each x is assigned
// - the weights of each x value
// Then create a weighted frequency table
_df = pd.DataFrame({"assignments": x_assignments,
"weights": x_weights
})
_wfreq_table = pd.pivot_table(_df, values="weights",
rows=["assignments"], aggfunc=np.sum)
// For numerical x values, empty bins get have no value
// in the computed frequency table. We need to add the zeros and
// since frequency table is a Series object, we need to keep it ordered
if len(_wfreq_table) < len(x):
empty_bins = set(range(max(x_assignments))) - set(x_assignments)
for _b in empty_bins:
_wfreq_table[_b] = 0
_wfreq_table = _wfreq_table.sort_index()
y = list(_wfreq_table)
new_data = pd.DataFrame({"x": x, "y": y, "width": width})
// Copy the other aesthetics into the new dataframe
n = len(x)
for ae in data:
new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n)
return new_data