grad_KL = torch.zeros(nn_state.rbm_am.num_pars,dtype=torch.double)
for i in range(len(vis)):
grad_KL += ((target_psi[i,0])**2)*nn_state.gradient(vis[i])
grad_KL -=probability(nn_state,vis[i], Z)*nn_state.gradient(vis[i])
//for rbmType in nn_state.gradient(vis[i]):
// for pars in nn_state.gradient(vis[i])[rbmType]:
// grad_KL[rbmType][pars] += ((target_psi[i,0])**2)*nn_state.gradient(vis[i])[rbmType][pars]
// grad_KL[rbmType][pars] -= probability(nn_state,vis[i], Z)*nn_state.gradient(vis[i])[rbmType][pars]