d22bf94eddbff3cfc6ceb7bf3ac8cb4c5ac83c5d,contents/10_A3C/A3C_continuous_action.py,ACNet,__init__,#ACNet#Any#Any#,45

Before Change



        if scope == GLOBAL_NET_SCOPE:   // get global network
            with tf.variable_scope(scope):
                self.s = tf.placeholder(tf.float32, [None, N_S], "S")
                self._build_net()
                self.a_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + "/actor")
                self.c_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + "/critic")
        else:   // local net, calculate losses
            with tf.variable_scope(scope):
                self.s = tf.placeholder(tf.float32, [None, N_S], "S")
                self.a_his = tf.placeholder(tf.float32, [None, N_A], "A")
                self.v_target = tf.placeholder(tf.float32, [None, 1], "Vtarget")

                mu, sigma, self.v = self._build_net()

                td = tf.subtract(self.v_target, self.v, name="TD_error")
                with tf.name_scope("c_loss"):
                    self.c_loss = tf.reduce_mean(tf.square(td))

                with tf.name_scope("wrap_a_out"):
                    mu, sigma = mu * A_BOUND[1], sigma + 1e-4

                normal_dist = tf.contrib.distributions.Normal(mu, sigma)

                with tf.name_scope("a_loss"):
                    log_prob = normal_dist.log_prob(self.a_his)
                    exp_v = log_prob * td
                    entropy = tf.stop_gradient(normal_dist.entropy())  // encourage exploration
                    self.exp_v = ENTROPY_BETA * entropy + exp_v
                    self.a_loss = tf.reduce_mean(-self.exp_v)

                with tf.name_scope("choose_a"):  // use local params to choose action
                    self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), A_BOUND[0], A_BOUND[1])
                with tf.name_scope("local_grad"):
                    self.a_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + "/actor")
                    self.c_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + "/critic")
                    self.a_grads = tf.gradients(self.a_loss, self.a_params)
                    self.c_grads = tf.gradients(self.c_loss, self.c_params)

            with tf.name_scope("sync"):
                with tf.name_scope("pull"):
                    self.pull_a_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.a_params, globalAC.a_params)]
                    self.pull_c_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.c_params, globalAC.c_params)]
                with tf.name_scope("push"):
                    self.update_a_op = OPT_A.apply_gradients(zip(self.a_grads, globalAC.a_params))
                    self.update_c_op = OPT_C.apply_gradients(zip(self.c_grads, globalAC.c_params))

    def _build_net(self):
        w_init = tf.random_normal_initializer(0., .1)

After Change



        if scope == GLOBAL_NET_SCOPE:   // get global network
            with tf.variable_scope(scope):
                self.s = tf.placeholder(tf.float32, [None, N_S], "S")
                self.a_params, self.c_params = self._build_net(scope)[-2:]
        else:   // local net, calculate losses
            with tf.variable_scope(scope):
                self.s = tf.placeholder(tf.float32, [None, N_S], "S")
                self.a_his = tf.placeholder(tf.float32, [None, N_A], "A")
                self.v_target = tf.placeholder(tf.float32, [None, 1], "Vtarget")

                mu, sigma, self.v, self.a_params, self.c_params = self._build_net(scope)

                td = tf.subtract(self.v_target, self.v, name="TD_error")
                with tf.name_scope("c_loss"):
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 41

Instances


Project Name: MorvanZhou/Reinforcement-learning-with-tensorflow
Commit Name: d22bf94eddbff3cfc6ceb7bf3ac8cb4c5ac83c5d
Time: 2017-08-13
Author: morvanzhou@gmail.com
File Name: contents/10_A3C/A3C_continuous_action.py
Class Name: ACNet
Method Name: __init__


Project Name: MorvanZhou/Reinforcement-learning-with-tensorflow
Commit Name: d22bf94eddbff3cfc6ceb7bf3ac8cb4c5ac83c5d
Time: 2017-08-13
Author: morvanzhou@gmail.com
File Name: contents/10_A3C/A3C_discrete_action.py
Class Name: ACNet
Method Name: __init__


Project Name: MorvanZhou/Reinforcement-learning-with-tensorflow
Commit Name: d22bf94eddbff3cfc6ceb7bf3ac8cb4c5ac83c5d
Time: 2017-08-13
Author: morvanzhou@gmail.com
File Name: contents/10_A3C/A3C_RNN.py
Class Name: ACNet
Method Name: __init__


Project Name: MorvanZhou/Reinforcement-learning-with-tensorflow
Commit Name: d22bf94eddbff3cfc6ceb7bf3ac8cb4c5ac83c5d
Time: 2017-08-13
Author: morvanzhou@gmail.com
File Name: contents/10_A3C/A3C_continuous_action.py
Class Name: ACNet
Method Name: __init__