self._observation_dim = env_spec.observation_space.flat_dim
self._action_dim = env_spec.action_space.flat_dim
self._max_buffer_size = max_replay_buffer_size
self._observations = np.zeros((max_replay_buffer_size,
self._observation_dim))
// It"s a bit memory inefficient to save the observations twice,
// but it makes the code *much* easier since you no longer have to
// worry about termination conditions.
self._next_obs = np.zeros((max_replay_buffer_size,