x_adv = initial_sample
delta = initial_delta
epsilon = initial_epsilon
clip_min, clip_max = self.classifier.clip_values
// Main loop to wander around the boundary
for _ in range(self.max_iter):
// Trust region method to adjust delta
for _ in range(self.max_iter):
potential_advs = []
for _ in range(self.sample_size):
potential_adv = x_adv + self._orthogonal_perturb(delta, x_adv, original_sample)
potential_adv = np.clip(potential_adv, clip_min, clip_max)
potential_advs.append(potential_adv)
preds = np.argmax(self.classifier.predict(np.array(potential_advs)), axis=1)
After Change
potential_advs = []
for _ in range(self.sample_size):
potential_adv = x_adv + self._orthogonal_perturb(delta, x_adv, original_sample)
if hasattr(self.classifier, "clip_values") and self.classifier is not None:
np.clip(potential_adv, self.classifier.clip_values[0], self.classifier.clip_values[1],
out=potential_adv)
potential_advs.append(potential_adv)
preds = np.argmax(self.classifier.predict(np.array(potential_advs)), axis=1)
if self.targeted:
satisfied = (preds == target)
else:
satisfied = (preds != target)
delta_ratio = np.mean(satisfied)
if delta_ratio < 0.5:
delta *= self.step_adapt
else:
delta /= self.step_adapt
if delta_ratio > 0:
x_adv = potential_advs[np.where(satisfied)[0][0]]
break
else:
logging.warning("Adversarial example found but not optimal.")
return x_adv
// Trust region method to adjust epsilon
for _ in range(self.max_iter):
perturb = original_sample - x_adv
perturb *= epsilon
potential_adv = x_adv + perturb
if hasattr(self.classifier, "clip_values") and self.classifier.clip_values is not None:
np.clip(potential_adv, self.classifier.clip_values[0], self.classifier.clip_values[1],
out=potential_adv)
pred = np.argmax(self.classifier.predict(np.array([potential_adv])), axis=1)[0]