// a computational graph. We just use closures.
backprop(yh-y, optimizer)
epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum()
// Slightly useful trick: start with low batch size, accelerate.
trainer.batch_size = min(int(batch_size), max_batch_size)
batch_size *= 1.001
After Change
// * concatenate (|): Merge the outputs of two models into a single vector,
// i.e. (f|g)(x) -> hstack(f(x), g(x))
with Model.define_operators({">>": chain, "**": clone, "|": concatenate,
"+": add}):
// Important trick: text isn"t like images, and the best way to use
// convolution is different. Don"t use pooling-over-time. Instead,
// use the window to compute one vector per word, and do this N deep.