78c163f37be165673b040cff49e1ee98eb89ca0f,tf_agents/bandits/agents/greedy_reward_prediction_agent_test.py,AgentTest,testTrainPerArmAgentWithConstraint,#AgentTest#,435
Before Change
def testTrainPerArmAgentWithConstraint(self):
obs_spec = bandit_spec_utils.create_per_arm_observation_spec(2, 3, 4)
reward_spec = tensor_spec.TensorSpec(
shape=(2,), dtype=tf.float32, name="reward")
time_step_spec = ts.time_step_spec(obs_spec, reward_spec)
reward_net = (
global_and_arm_feature_network.create_feed_forward_common_tower_network(
obs_spec, (4, 3), (3, 4), (4, 2)))
After Change
def testTrainPerArmAgentWithConstraint(self):
obs_spec = bandit_spec_utils.create_per_arm_observation_spec(2, 3, 4)
reward_spec = {
"reward": tensor_spec.TensorSpec(
shape=(), dtype=tf.float32, name="reward"),
"constraint": tensor_spec.TensorSpec(
shape=(), dtype=tf.float32, name="constraint")
}
time_step_spec = ts.time_step_spec(obs_spec, reward_spec)
reward_net = (
global_and_arm_feature_network.create_feed_forward_common_tower_network(
obs_spec, (4, 3), (3, 4), (4, 2)))
optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.1)
constraint_net = (
global_and_arm_feature_network.create_feed_forward_common_tower_network(
obs_spec, (4, 3), (3, 4), (4, 2)))
neural_constraint = constraints.NeuralConstraint(
time_step_spec,
self._action_spec,
constraint_network=constraint_net)
agent = greedy_agent.GreedyRewardPredictionAgent(
time_step_spec,
self._action_spec,
reward_network=reward_net,
accepts_per_arm_features=True,
optimizer=optimizer,
constraints=[neural_constraint])
observations = {
bandit_spec_utils.GLOBAL_FEATURE_KEY:
tf.constant([[1, 2], [3, 4]], dtype=tf.float32),
bandit_spec_utils.PER_ARM_FEATURE_KEY:
tf.cast(
tf.reshape(tf.range(24), shape=[2, 4, 3]), dtype=tf.float32)
}
actions = np.array([0, 3], dtype=np.int32)
rewards = {
"reward": np.array([0.5, 3.0], dtype=np.float32),
"constraint": np.array([6.0, 4.0], dtype=np.float32)
}
initial_step, final_step = _get_initial_and_final_steps_nested_rewards(
observations, rewards)
action_step = policy_step.PolicyStep(
action=tf.convert_to_tensor(actions),
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 10
Instances
Project Name: tensorflow/agents
Commit Name: 78c163f37be165673b040cff49e1ee98eb89ca0f
Time: 2020-06-12
Author: kokiopou@google.com
File Name: tf_agents/bandits/agents/greedy_reward_prediction_agent_test.py
Class Name: AgentTest
Method Name: testTrainPerArmAgentWithConstraint
Project Name: tensorflow/agents
Commit Name: 78c163f37be165673b040cff49e1ee98eb89ca0f
Time: 2020-06-12
Author: kokiopou@google.com
File Name: tf_agents/bandits/agents/greedy_reward_prediction_agent_test.py
Class Name: AgentTest
Method Name: testTrainPerArmAgentWithConstraint
Project Name: tensorflow/agents
Commit Name: 78c163f37be165673b040cff49e1ee98eb89ca0f
Time: 2020-06-12
Author: kokiopou@google.com
File Name: tf_agents/bandits/agents/greedy_reward_prediction_agent_test.py
Class Name: AgentTest
Method Name: testTrainAgentWithConstraint
Project Name: tensorflow/agents
Commit Name: 78c163f37be165673b040cff49e1ee98eb89ca0f
Time: 2020-06-12
Author: kokiopou@google.com
File Name: tf_agents/bandits/agents/greedy_reward_prediction_agent_test.py
Class Name: AgentTest
Method Name: testTrainAgentWithMaskAndConstraint