def update(self, state, action, reward, next_state, done):
"""Update per timestep after env transitions, e.g. memory, algorithm, update agent params, train net"""
self.body.update(state, action, reward, next_state, done)
if util.in_eval_lab_modes(): // eval does not update agent for training
return
self.body.memory.update(state, action, reward, next_state, done)
loss = self.algorithm.train()
if not np.isnan(loss): // set for log_summary()
self.body.loss = loss
explore_var = self.algorithm.update()
return loss, explore_var