//TODO: vectorize
for v in vdata:
//score -= self.marginal_free_energy(v)
dw += be.outer(v,be.expit(be.dot(v,w) + b))
da += v
db += be.expit(b + be.dot(v,w))
grad = gu.Gradient(
After Change
intermediate = be.expit(be.dot(vdata,w) + b)
da = be.mean(vdata, axis=0)
db = be.mean(intermediate, axis=0)
batch_size = be.shape(vdata)[0]
// This is the same as \sum_{i} vdata[i] \outer intermediate[i]
// TODO: is this efficient?
dw = be.dot(be.transpose(vdata), intermediate) / batch_size