def append(self, state, action, reward):
// Add state, action and reward at time tself.timesteps.append(self.t)self.states.append(state[-1].mul(255).byte().cpu()) // Only store last frame and discretise to save memoryself.actions.append(action)self.rewards.append(reward)// Technically from time t + 1, but kept at t for all buffers to be in syncself.nonterminals.append(True)
self.t += 1
self.priorities.append(max(self.priorities.max, 1)) // Store new transition with maximum priority (or use initial priority 1)// Add empty state at end of episode
After Change
def append(self, state, action, reward):
state = state[-1].mul(255).byte().cpu() // Only store last frame and discretise to save memory// Store new transition with maximum priority (or use initial priority 1)
self.transitions.append(Transition(self.t, state, action, reward, True), max(self.transitions.max, 1))
self.t += 1// Add empty state at end of episode