3128e13109c8766eafb413f2428bba976701e929,beginner_source/transformer_tutorial.py,,,#,295

Before Change


epochs = 3 // The number of epochs
best_model = None

for epoch in range(1, epochs + 1):
    epoch_start_time = time.time()
    train()
    val_loss = evaluate(model, val_data)
    print("-" * 89)
    print("| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | "
          "valid ppl {:8.2f}".format(epoch, (time.time() - epoch_start_time),
                                     val_loss, math.exp(val_loss)))
    print("-" * 89)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model = model

    scheduler.step()


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Evaluate the model with the test dataset
// -------------------------------------
//
// Apply the best model to check the result with the test dataset.

test_loss = evaluate(best_model, test_data)
print("=" * 89)
print("| End of training | test loss {:5.2f} | test ppl {:8.2f}".format(
    test_loss, math.exp(test_loss)))

After Change


// equal to the length of the vocab object.
//

ntokens = len(vocab.stoi) // the size of vocabulary
emsize = 200 // embedding dimension
nhid = 200 // the dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 2 // the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
nhead = 2 // the number of heads in the multiheadattention models
dropout = 0.2 // the dropout value
model = TransformerModel(ntokens, emsize, nhead, nhid, nlayers, dropout).to(device)


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Run the model
// -------------
//


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// `CrossEntropyLoss <https://pytorch.org/docs/master/nn.html?highlight=crossentropyloss//torch.nn.CrossEntropyLoss>`__
// is applied to track the loss and
// `SGD <https://pytorch.org/docs/master/optim.html?highlight=sgd//torch.optim.SGD>`__
// implements stochastic gradient descent method as the optimizer. The initial
// learning rate is set to 5.0. `StepLR <https://pytorch.org/docs/master/optim.html?highlight=steplr//torch.optim.lr_scheduler.StepLR>`__ is
// applied to adjust the learn rate through epochs. During the
// training, we use
// `nn.utils.clip_grad_norm\_ <https://pytorch.org/docs/master/nn.html?highlight=nn%20utils%20clip_grad_norm//torch.nn.utils.clip_grad_norm_>`__
// function to scale all the gradient together to prevent exploding.
//

criterion = nn.CrossEntropyLoss()
lr = 5.0 // learning rate
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)

import time
def train():
    model.train() // Turn on the train mode
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 12

Instances


Project Name: pytorch/tutorials
Commit Name: 3128e13109c8766eafb413f2428bba976701e929
Time: 2020-12-02
Author: 6156351+zhangguanheng66@users.noreply.github.com
File Name: beginner_source/transformer_tutorial.py
Class Name:
Method Name:


Project Name: rusty1s/pytorch_geometric
Commit Name: 04ad358395c7bd074ffb9af235ccb76d2c0dc4cc
Time: 2020-07-01
Author: matthias.fey@tu-dortmund.de
File Name: examples/pna.py
Class Name:
Method Name:


Project Name: pytorch/tutorials
Commit Name: 3128e13109c8766eafb413f2428bba976701e929
Time: 2020-12-02
Author: 6156351+zhangguanheng66@users.noreply.github.com
File Name: beginner_source/transformer_tutorial.py
Class Name:
Method Name:


Project Name: rusty1s/pytorch_geometric
Commit Name: e2db3b3f1d3d23cd5bc1e295835e0f4b33e95447
Time: 2018-03-07
Author: matthias.fey@tu-dortmund.de
File Name: examples/cora_gcn.py
Class Name:
Method Name: