episode_coor = self.monitor.data_coor["episode"]
// TODO generalize and make state to include observables
state = self.env.reset()
logger.debug(f"reset state {state}")
self.agent.reset()
// RL steps for SARS
for t in range(self.env.max_timestep):
After Change
action_space = self.agent_space.act(state_space)
logger.debug(f"action_space {action_space}")
(reward_space, state_space,
done_space) = self.env_space.step(action_space)
logger.debug(
f"reward_space: {reward_space}, state_space: {state_space}, done_space: {done_space}")
// completes cycle of full info for agent_space
self.agent_space.update(reward_space, state_space, done_space)