optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum)
tower_grads = []
tower_summaries = None
for i in range(FLAGS.num_clones):
with tf.device("/gpu:%d" % i):
with tf.name_scope("clone_%d" % i) as scope:
loss = _tower_loss(
iterator=iterator,
num_of_classes=num_of_classes,
ignore_label=ignore_label,
scope=scope,
reuse_variable=(i != 0))
grads = optimizer.compute_gradients(loss)
tower_grads.append(grads)
// Retain the summaries from the first tower.
if not i:
tower_summaries = tf.summary.merge_all(scope=scope)
with tf.device("/cpu:0"):
grads_and_vars = _average_gradients(tower_grads)
if tower_summaries is not None:
summaries.append(tower_summaries)
// Modify the gradients for biases and last layer variables.
last_layers = model.get_extra_layer_scopes(
FLAGS.last_layers_contain_logits_only)