3031109e464b9211201f6e9fcc838e8afafccdbb,Reinforcement_learning_TUT/5.1_Double_DQN/RL_brain.py,DoubleDQN,learn,#DoubleDQN#,123

Before Change


            self._replace_target_params()
            print("\ntarget_params_replaced\n")

        batch_memory = self.memory.sample(self.batch_size) \
            if self.memory_counter > self.memory_size \
            else self.memory.iloc[:self.memory_counter].sample(self.batch_size, replace=True)

        q_next, q_eval4next = self.sess.run(
            [self.q_next, self.q_eval],
            feed_dict={self.s_: batch_memory.iloc[:, -self.n_features:],    // next observation
                       self.s: batch_memory.iloc[:, -self.n_features:]})    // next observation
        q_eval = self.sess.run(self.q_eval, {self.s: batch_memory.iloc[:, :self.n_features]})

        q_target = q_eval.copy()

        batch_index = np.arange(self.batch_size, dtype=np.int32)
        eval_act_index = batch_memory.iloc[:, self.n_features].astype(int)
        reward = batch_memory.iloc[:, self.n_features + 1]

        if self.double_q:
            max_act4next = np.argmax(q_eval4next, axis=1)        // the action that brings the highest value is evaluated by q_eval
            selected_q_next = q_next[batch_index, max_act4next]  // Double DQN, select q_next depending on above actions
        else:
            selected_q_next = np.max(q_next, axis=1)    // the natural DQN

        q_target[batch_index, eval_act_index] = reward + self.gamma * selected_q_next

        _, self.cost = self.sess.run([self._train_op, self.loss],
                                     feed_dict={self.s: batch_memory.iloc[:, :self.n_features],
                                                self.q_target: q_target})
        self.cost_his.append(self.cost)

After Change


            self._replace_target_params()
            print("\ntarget_params_replaced\n")

        if self.memory_counter > self.memory_size:
            sample_index = np.random.choice(self.memory_size, size=self.batch_size)
        else:
            sample_index = np.random.choice(self.memory_counter, size=self.batch_size)
        batch_memory = self.memory[sample_index, :]

        q_next, q_eval4next = self.sess.run(
            [self.q_next, self.q_eval],
            feed_dict={self.s_: batch_memory[:, -self.n_features:],    // next observation
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 25

Instances


Project Name: MorvanZhou/tutorials
Commit Name: 3031109e464b9211201f6e9fcc838e8afafccdbb
Time: 2017-03-06
Author: morvanzhou@gmail.com
File Name: Reinforcement_learning_TUT/5.1_Double_DQN/RL_brain.py
Class Name: DoubleDQN
Method Name: learn


Project Name: MorvanZhou/tutorials
Commit Name: 3031109e464b9211201f6e9fcc838e8afafccdbb
Time: 2017-03-06
Author: morvanzhou@gmail.com
File Name: Reinforcement_learning_TUT/5.1_Double_DQN/RL_brain.py
Class Name: DoubleDQN
Method Name: learn


Project Name: MorvanZhou/tutorials
Commit Name: 3031109e464b9211201f6e9fcc838e8afafccdbb
Time: 2017-03-06
Author: morvanzhou@gmail.com
File Name: Reinforcement_learning_TUT/5_Deep_Q_Network/RL_brain.py
Class Name: DeepQNetwork
Method Name: learn


Project Name: MorvanZhou/tutorials
Commit Name: 3031109e464b9211201f6e9fcc838e8afafccdbb
Time: 2017-03-06
Author: morvanzhou@gmail.com
File Name: Reinforcement_learning_TUT/6_OpenAI_gym/RL_brain.py
Class Name: DeepQNetwork
Method Name: learn