a95eb8bd878b43346c4bb2e8e29911dc1ab90638,A2C.py,A2C,learn,#A2C#,49
Before Change
returns = np.concatenate([discount_rewards(trajectory["reward"], config["gamma"]) for trajectory in trajectories])
qw_new = self.get_critic_value(all_state)
self.sess.run([self.critic_train], feed_dict={self.critic_state_in: all_state, self.critic_target: returns}) // Reshape or not?
self.sess.run([self.actor_train], feed_dict={self.actor_input: all_state, self.actions_taken: all_action, self.critic_feedback: qw_new, self.critic_rewards: returns})
episode_rewards = np.array([trajectory["reward"].sum() for trajectory in trajectories]) // episode total rewards
After Change
self.episode_lengths: np.mean(episode_lengths)
})
self.writer.add_summary(results[0], iteration)
self.writer.flush()
reporter.print_iteration_stats(iteration, episode_rewards, episode_lengths, total_n_trajectories)
class A2CDiscrete(A2C):
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 3
Instances
Project Name: arnomoonens/yarll
Commit Name: a95eb8bd878b43346c4bb2e8e29911dc1ab90638
Time: 2017-02-07
Author: x-006@hotmail.com
File Name: A2C.py
Class Name: A2C
Method Name: learn
Project Name: galeone/dynamic-training-bench
Commit Name: 50914556832ada0004e55f004eed2b44f448cac6
Time: 2016-10-25
Author: nessuno@nerdz.eu
File Name: train_fixed_dropout.py
Class Name:
Method Name: train
Project Name: arnomoonens/yarll
Commit Name: a95eb8bd878b43346c4bb2e8e29911dc1ab90638
Time: 2017-02-07
Author: x-006@hotmail.com
File Name: A2C.py
Class Name: A2CContinuous
Method Name: learn