@lab_api
def act(self, state_space):
data_names = ("action",)
action_v, = self.aeb_space.init_data_v(data_names)
for agent in self.agents:
a = agent.a
state_a = state_space.get(a=a)
action_a = agent.space_act(state_a)
After Change
def update(self, state, action, reward, next_state, done):
"""Update per timestep after env transitions, e.g. memory, algorithm, update agent params, train net"""
self.body.update(state, action, reward, next_state, done)
if util.in_eval_lab_modes(): // eval does not update agent for training
return
self.body.memory.update(state, action, reward, next_state, done)
loss = self.algorithm.train()
if not np.isnan(loss): // set for log_summary()
self.body.loss = loss
explore_var = self.algorithm.update()
return loss, explore_var