state = (1, 0)
rewards = [0.0] * 100
for n in xrange(10000):
action = agent.get_action(state=state)
if action == 0:
state = (1, 0)
reward = 0.0
terminal = False
After Change
network_builder = layered_network_builder([{"type": "dense", "num_outputs": 16}, {"type": "linear", "num_outputs": 2}])
agent = DQNAgent(config=config, network_builder=network_builder)
runner = Runner(agent=agent, environment=environment)
def episode_finished(r):
return r.episode < 100 or not all(x >= 1.0 for x in r.episode_rewards[-100:])
runner.run(episodes=10000, episode_finished=episode_finished)
self.assertTrue(runner.episode < 10000)