// (1) get the scores and gradients
_, (scores, pred_scores), gradients = ep.value_aux_and_grad(loss_fun, x)
pred = scores.argmax(-1)
num_classes = scores.shape[-1]
// (2) calculate gradient norm
gradients_l2_norm = flatten(gradients.square()).sum(1)
// (3) calculate delta
a = self.stepsize * x_l2_norm * gradients_l2_norm
b = pred_scores - 1.0 / num_classes
delta = ep.minimum(a, b)
// (4) stop the attack if an adversarial example has been found
// this is not described in the paper but otherwise once the prob. drops
// below chance level the likelihood is not decreased but increased
is_not_adversarial = (pred == classes).float32()
delta *= is_not_adversarial
// (5) calculate & apply current perturbation
a = atleast_kd(delta / gradients_l2_norm.square(), gradients.ndim)x -= a * gradients
x = ep.clip(x, min_, max_)
return restore_type(x)