if __name__ == "__main__":
sess = tf.Session()
// ============================= TRAINING ===============================
with tf.device("/cpu:0"):
OPT_A = tf.train.RMSPropOptimizer(LR_A, name="RMSPropA")
OPT_C = tf.train.RMSPropOptimizer(LR_C, name="RMSPropC")
GLOBAL_AC = ACNet(GLOBAL_NET_SCOPE) // we only need its params
workers = []
// Create worker
for i in range(N_WORKERS):
i_name = "Worker_%i" % i // worker name
workers.append(Worker(i_name, GLOBAL_AC))
COORD = tf.train.Coordinator()
sess.run(tf.global_variables_initializer())
// start TF threading
worker_threads = []
for worker in workers:
After Change
GLOBAL_AC.load_ckpt()
while True:
s = env.reset()
rall = 0
while True:
env.render()
s = s.astype("float32") // double to float
a = GLOBAL_AC.choose_action(s)
s, r, d, _ = env.step(a)
rall += r
if d:
print("reward", rall)
break