for _ in runner.step_epochs():
for cycle in range(self._steps_per_epoch):
runner.step_path = runner.obtain_trajectories(runner.step_itr)
last_return = self.train_once(runner.step_itr,
runner.step_path)
if (cycle == 0 and self.replay_buffer.n_transitions_stored >=
self._min_buffer_size):
runner.enable_logging = True
log_performance(runner.step_itr,
obtain_evaluation_samples(
self.policy, self._eval_env),
discount=self._discount)
runner.step_itr += 1
return last_return
def train_once(self, itr, trajectories):
Perform one iteration of training.
After Change
for _ in runner.step_epochs():
for cycle in range(self._steps_per_epoch):
runner.step_path = runner.obtain_trajectories(runner.step_itr)
self.train_once(runner.step_itr, runner.step_path)
if (cycle == 0 and self.replay_buffer.n_transitions_stored >=
self._min_buffer_size):
runner.enable_logging = True
eval_samples = obtain_evaluation_samples(