self._check_trajectory_dimensions(experience)
if self._n_step_update == 1:
time_steps, actions, next_time_steps = self._experience_to_transitions(
experience)
else:
// To compute n-step returns, we need the first time steps, the first
// actions, and the last time steps. Therefore we extract the first and
// last transitions from our Trajectory.
After Change
// method requires a time dimension to compute the loss properly.
self._check_trajectory_dimensions(experience)
squeeze_time_dim = not self._q_network.state_spec
if self._n_step_update == 1:
time_steps, policy_steps, next_time_steps = (
trajectory.experience_to_transitions(experience, squeeze_time_dim))
actions = policy_steps.action
else:
// To compute n-step returns, we need the first time steps, the first
// actions, and the last time steps. Therefore we extract the first and