78c163f37be165673b040cff49e1ee98eb89ca0f,tf_agents/bandits/agents/greedy_reward_prediction_agent_test.py,AgentTest,testTrainPerArmAgentWithConstraint,#AgentTest#,435

Before Change



  def testTrainPerArmAgentWithConstraint(self):
    obs_spec = bandit_spec_utils.create_per_arm_observation_spec(2, 3, 4)
    reward_spec = tensor_spec.TensorSpec(
        shape=(2,), dtype=tf.float32, name="reward")
    time_step_spec = ts.time_step_spec(obs_spec, reward_spec)
    reward_net = (
        global_and_arm_feature_network.create_feed_forward_common_tower_network(
            obs_spec, (4, 3), (3, 4), (4, 2)))

After Change



  def testTrainPerArmAgentWithConstraint(self):
    obs_spec = bandit_spec_utils.create_per_arm_observation_spec(2, 3, 4)
    reward_spec = {
        "reward": tensor_spec.TensorSpec(
            shape=(), dtype=tf.float32, name="reward"),
        "constraint": tensor_spec.TensorSpec(
            shape=(), dtype=tf.float32, name="constraint")
    }
    time_step_spec = ts.time_step_spec(obs_spec, reward_spec)
    reward_net = (
        global_and_arm_feature_network.create_feed_forward_common_tower_network(
            obs_spec, (4, 3), (3, 4), (4, 2)))
    optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.1)
    constraint_net = (
        global_and_arm_feature_network.create_feed_forward_common_tower_network(
            obs_spec, (4, 3), (3, 4), (4, 2)))
    neural_constraint = constraints.NeuralConstraint(
        time_step_spec,
        self._action_spec,
        constraint_network=constraint_net)

    agent = greedy_agent.GreedyRewardPredictionAgent(
        time_step_spec,
        self._action_spec,
        reward_network=reward_net,
        accepts_per_arm_features=True,
        optimizer=optimizer,
        constraints=[neural_constraint])
    observations = {
        bandit_spec_utils.GLOBAL_FEATURE_KEY:
            tf.constant([[1, 2], [3, 4]], dtype=tf.float32),
        bandit_spec_utils.PER_ARM_FEATURE_KEY:
            tf.cast(
                tf.reshape(tf.range(24), shape=[2, 4, 3]), dtype=tf.float32)
    }
    actions = np.array([0, 3], dtype=np.int32)
    rewards = {
        "reward": np.array([0.5, 3.0], dtype=np.float32),
        "constraint": np.array([6.0, 4.0], dtype=np.float32)
    }
    initial_step, final_step = _get_initial_and_final_steps_nested_rewards(
        observations, rewards)
    action_step = policy_step.PolicyStep(
        action=tf.convert_to_tensor(actions),
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 10

Instances


Project Name: tensorflow/agents
Commit Name: 78c163f37be165673b040cff49e1ee98eb89ca0f
Time: 2020-06-12
Author: kokiopou@google.com
File Name: tf_agents/bandits/agents/greedy_reward_prediction_agent_test.py
Class Name: AgentTest
Method Name: testTrainPerArmAgentWithConstraint


Project Name: tensorflow/agents
Commit Name: 78c163f37be165673b040cff49e1ee98eb89ca0f
Time: 2020-06-12
Author: kokiopou@google.com
File Name: tf_agents/bandits/agents/greedy_reward_prediction_agent_test.py
Class Name: AgentTest
Method Name: testTrainPerArmAgentWithConstraint


Project Name: tensorflow/agents
Commit Name: 78c163f37be165673b040cff49e1ee98eb89ca0f
Time: 2020-06-12
Author: kokiopou@google.com
File Name: tf_agents/bandits/agents/greedy_reward_prediction_agent_test.py
Class Name: AgentTest
Method Name: testTrainAgentWithConstraint


Project Name: tensorflow/agents
Commit Name: 78c163f37be165673b040cff49e1ee98eb89ca0f
Time: 2020-06-12
Author: kokiopou@google.com
File Name: tf_agents/bandits/agents/greedy_reward_prediction_agent_test.py
Class Name: AgentTest
Method Name: testTrainAgentWithMaskAndConstraint