// Should we consider a sample be the instance, or the batch?
diff = X - self.m
incr = (1-alpha) * diff
self.m += incr.mean(axis=0)
self.v += (diff * incr).mean(axis=0)
self.v *= alpha
Xhat = _forward(self.ops, X, mu, var)
After Change
def begin_update(self, X, drop=0.):
if drop is None:
return self.predict(X), None
assert X.dtype == "float32"
X, backprop_child = self.child.begin_update(X, drop=0.)
N, mu, var = _get_moments(self.ops, X)