// perform backward pass from op to merged for obtaining the gradients
gradient = th.autograd.grad(outputs=op, inputs=merged,
grad_outputs=th.ones(op.size()).cuda(fake_samps.device),
create_graph=True, retain_graph=True, only_inputs=True)[0]
// calculate the penalty using these gradients
After Change
// perform backward pass from op to merged for obtaining the gradients
gradient = th.autograd.grad(outputs=op, inputs=merged,
grad_outputs=th.ones_like(op), create_graph=True,
retain_graph=True, only_inputs=True)[0]
// calculate the penalty using these gradients