3058bfb87edb7f12bdece2e9e454048df146490a,tensorforce/models/naf_model.py,NAFModel,create_tf_operations,#NAFModel#Any#,46

Before Change


            training_output_vars = tf.contrib.framework.get_variables("training_outputs")

        with tf.variable_scope("target"):
            self.target_network = NeuralNetwork(config.network, inputs=self.state)
            self.internal_inputs.extend(self.target_network.internal_inputs)
            self.internal_outputs.extend(self.target_network.internal_outputs)
            self.internal_inits.extend(self.target_network.internal_inits)
            target_value = dict()

        with tf.variable_scope("target_outputs"):
            // State-value function
            target_value_output = layers["linear"](x=self.target_network.output, size=1)
            for action in self.action:
                // Naf directly outputs V(s)
                target_value[action] = target_value_output

            target_output_vars = tf.contrib.framework.get_variables("target_outputs")

        with tf.name_scope("update"):
            for action in self.action:
                q_target = self.reward[:-1] + (1.0 - tf.cast(self.terminal[:-1], tf.float32)) * config.discount * target_value[action][1:]
                delta = q_target - q_value[:-1]
                self.loss_per_instance = tf.square(delta)

                // We observe issues with numerical stability in some tests, gradient clipping can help
                if config.clip_gradients > 0.0:

After Change


            training_output_vars = tf.contrib.framework.get_variables("training_outputs")

        with tf.variable_scope("target"):
            network_builder = util.get_function(fct=config.network)
            self.target_network = NeuralNetwork(network_builder=network_builder, inputs=self.state)
            self.internal_inputs.extend(self.target_network.internal_inputs)
            self.internal_outputs.extend(self.target_network.internal_outputs)
            self.internal_inits.extend(self.target_network.internal_inits)
            target_value = dict()

        with tf.variable_scope("target_outputs"):
            // State-value function
            target_value_output = layers["linear"](x=self.target_network.output, size=1)
            for action in self.action:
                // Naf directly outputs V(s)
                target_value[action] = target_value_output

            target_output_vars = tf.contrib.framework.get_variables("target_outputs")

        with tf.name_scope("update"):
            for action in self.action:
                q_target = self.reward[:-1] + (1.0 - tf.cast(self.terminal[:-1], tf.float32)) * config.discount * target_value[action][1:]
                delta = q_target - q_value[:-1]
                self.loss_per_instance = tf.square(delta)

                // We observe issues with numerical stability in some tests, gradient clipping can help
                if config.clip_gradients > 0.0:
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 9

Instances


Project Name: reinforceio/tensorforce
Commit Name: 3058bfb87edb7f12bdece2e9e454048df146490a
Time: 2017-07-22
Author: aok25@cl.cam.ac.uk
File Name: tensorforce/models/naf_model.py
Class Name: NAFModel
Method Name: create_tf_operations


Project Name: reinforceio/tensorforce
Commit Name: 3058bfb87edb7f12bdece2e9e454048df146490a
Time: 2017-07-22
Author: aok25@cl.cam.ac.uk
File Name: tensorforce/models/dqn_model.py
Class Name: DQNModel
Method Name: create_tf_operations


Project Name: reinforceio/tensorforce
Commit Name: 3058bfb87edb7f12bdece2e9e454048df146490a
Time: 2017-07-22
Author: aok25@cl.cam.ac.uk
File Name: tensorforce/models/policy_gradient_model.py
Class Name: PolicyGradientModel
Method Name: create_tf_operations