p = pickle.dumps(policy)
with tf.Session(graph=tf.Graph()):
policy_pickled = pickle.loads(p)
action2, prob2 = policy_pickled.get_action(obs)
assert env.action_space.contains(action1)
assert np.array_equal(action1, action2)
assert np.array_equal(prob1["mean"], prob2["mean"])
After Change
// assign it to all one
return_var.load(tf.ones_like(return_var).eval())
output1 = self.sess.run(
policy.model.outputs[:-1],
feed_dict={policy.model.input: [obs.flatten()]})
p = pickle.dumps(policy)