c702968d83449f3a6aad3a5cec6cc0b2aafd78c5,mushroom/algorithms/dqn.py,RDQN,fit,#RDQN#Any#Any#,228
Before Change
q_tilde_next = self._target_approximator.predict_all(
next_state) * (1. - next_absorbing)
r_tilde_next = np.zeros(q_tilde_next.shape)
for i in xrange(r_tilde_next.shape[0]):
r_tilde_next[i, next_action] = next_reward[i]
q_next = r_tilde_next + self._gamma * q_tilde_next
max_q_next = np.max(q_next, axis=1)
self.approximator.train_on_batch(
After Change
q_tilde_next = self._target_approximator.predict_all(
next_state) * (1. - next_absorbing.reshape(-1, 1))
r_tilde_next = np.zeros(q_tilde_next.shape)
r_tilde_next[:, next_action.ravel().astype(np.int)] = next_reward
q_next = r_tilde_next + self._gamma * q_tilde_next
max_q_next = np.max(q_next, axis=1)
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 5
Instances
Project Name: AIRLab-POLIMI/mushroom
Commit Name: c702968d83449f3a6aad3a5cec6cc0b2aafd78c5
Time: 2017-09-27
Author: carloderamo@gmail.com
File Name: mushroom/algorithms/dqn.py
Class Name: RDQN
Method Name: fit
Project Name: AIRLab-POLIMI/mushroom
Commit Name: ca4c92fe1b53086373cfca478560df06b03701d8
Time: 2017-08-28
Author: carlo.deramo@gmail.com
File Name: examples/atari_dqn/convnet.py
Class Name: ConvNet
Method Name: train_on_batch
Project Name: brian-team/brian2
Commit Name: 0ac2bf1c2cafee4cc9555c9f09a17143b05b1a88
Time: 2014-03-06
Author: dan.goodman@ens.fr
File Name: brian2/synapses/spikequeue.py
Class Name: SpikeQueue
Method Name: prepare