// avoid_zero_div must go inside sqrt to avoid a divide by zero
// in the gradient through this operation
if LooseVersion(tf.__version__) < LooseVersion("1.8.0"):
eta_sum = tf.reduce_sum(tf.square(eta),
reduc_ind, keep_dims=True)
else:
eta_sum = tf.reduce_sum(tf.square(eta),
reduc_ind, keepdims=True)
norm = tf.sqrt(tf.maximum(avoid_zero_div,eta_sum))
// We must *clip* to within the norm ball, not *normalize* onto the
// surface of the ball
factor = tf.minimum(1., eps / norm)