return F.log_softmax(x, dim=1)
model = Net().to(device)
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Training the model
// ------------------
//
// Now, let"s use the SGD algorithm to train the model. The network is
// learning the classification task in a supervised way. In the same time
// the model is learning STN automatically in an end-to-end fashion.
optimizer = optim.SGD(model.parameters(), lr=0.01)
def train(epoch):