td_loss = tf.reduce_sum(input_tensor=td_loss, axis=1)
if weights is not None:
td_loss *= weights
// Average across the elements of the batch.
// Note: We use an element wise loss above to ensure each element is always
// weighted by 1/N where N is the batch size, even when some of the
// weights are zero due to boundary transitions. Weighting by 1/K where K
// is the actual number of non-zero weight would artificially increase
// their contribution in the loss. Think about what would happen as
// the number of boundary samples increases.
loss = tf.reduce_mean(input_tensor=td_loss)
// Add network loss (such as regularization loss)
if self._q_network.losses:
loss = loss + tf.reduce_mean(self._q_network.losses)
with tf.name_scope("Losses/"):
tf.compat.v2.summary.scalar(
name="loss", data=loss, step=self.train_step_counter)
if self._summarize_grads_and_vars:
with tf.name_scope("Variables/"):
for var in self._q_network.trainable_weights:
tf.compat.v2.summary.histogram(