if any(batch_reset):
warnings.warn("A2C currently does not support resetting an env withtout reaching a terminal state during training. When receiving True in batch_reset, A2C considers it as True in batch_done instead.") // NOQA
for i, reset in enumerate(batch_reset):
if reset:
batch_done[i] = True
statevar = self.batch_states(batch_obs, self.xp, self.phi)
self.masks[self.t - self.t_start - 1] =\
self.xp.array([0.0 if done else 1.0 for done in batch_done])