def append(self, state, action, reward):
// Add state, action and reward at time t
self.timesteps.append(self.t)self.states.append(state[-1].mul(255).byte().cpu()) // Only store last frame and discretise to save memory
self.actions.append(action)
self.rewards.append(reward) // Technically from time t + 1, but kept at t for all buffers to be in sync
self.nonterminals.append(True)
self.t += 1
self.priorities.append(max(self.priorities.max, 1)) // Store new transition with maximum priority (or use initial priority 1)
// Add empty state at end of episode
After Change
def append(self, state, action, reward):
state = state[-1].mul(255).byte().cpu() // Only store last frame and discretise to save memory
// Store new transition with maximum priority (or use initial priority 1)
self.transitions.append(Transition(self.t, state, action, reward, True), max(self.transitions.max, 1))
self.t += 1
// Add empty state at end of episode