config["common"] = dict(conv=dict(use_bias=False))
// The learning rate starts from 0.1 (no warming up), and is divided by 10 at 30 and 60 epochs
// with batch size = 256 on ImageNet.
init_lr = 1e-3 if is_best_practice() else .1
config["decay"] = ("const", dict(boundaries=[117188, 234375], values=[init_lr, init_lr/10, init_lr/100]))
config["optimizer"] = dict(name="Momentum", momentum=.9)
return config
After Change
// with batch size = 256 on ImageNet.
lr = .1
config["decay"] = ("const", dict(boundaries=[117188, 234375], values=[lr, lr/10, lr/100]))
config["optimizer"] = ("Momentum", dict(momentum=.9))
return config
def build_config(self, names=None):