})
result = agent.train()
for i in range(10):
result = agent.train()
print("num_steps_sampled={}".format(
result["info"]["num_steps_sampled"]))
print("num_steps_trained={}".format(
result["info"]["num_steps_trained"]))
print("num_steps_sampled={}".format(
result["info"]["num_steps_sampled"]))
print("num_steps_trained={}".format(
result["info"]["num_steps_trained"]))
if i == 0:
self.assertGreater(
result["info"]["learner"]["default_policy"]["cur_lr"],
0.01)
if result["info"]["learner"]["default_policy"]["cur_lr"] < \
0.07:
break
self.assertLess(
After Change
0.1 - ((0.1 - 0.000001) / 100000) * global_timesteps
lr = policy.cur_lr
if fw == "tf":
lr = policy._sess.run(lr)
check(lr, expected_lr, rtol=0.05)
agent.stop()
def test_no_step_on_init(self):