return None, None
// Configure the learning rate using an exponetial decay.
global_step = tf.compat.v1.train.get_or_create_global_step()
current_epoch = tf.cast(global_step, tf.float32) / batches_per_epoch
decay_steps = int(1.0 * batches_per_epoch * num_epochs_per_decay)
learning_rate = tf.compat.v1.train.exponential_decay(
learning_rate=initial_learning_rate,
global_step=global_step,
decay_steps=decay_steps,
decay_rate=learning_rate_decay_factor,
staircase=True)
// Set a minimum boundary for the learning rate to be a fixed value of 1e-9.
// It"s common to see these tf.max(...) operations when training inception,
// with a max of 1e-4 * initial_learning_rate but this makes it hard to
// explore learning rate schedules that decay quickly or by a lot of each
// step. Here we just use a very small constant 1e-9 as the minimum value.
learning_rate = tf.maximum(learning_rate, 1e-9, name="learning_rate")
optimizer = tf.compat.v1.train.RMSPropOptimizer(
learning_rate,
rmsprop_decay,
momentum=rmsprop_momentum,
epsilon=rmsprop_epsilon)
if self.use_tpu:
optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)
update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_op = optimizer.minimize(total_loss, global_step=global_step)
// NB. In the inception code this was "tf.trainable_variables()
// + tf.moving_average_variables()", but we"ve settled on just
// tf.model_variables() in the existing production DV2.
variables_to_average = tf.compat.v1.model_variables()
variable_averages = tf.train.ExponentialMovingAverage(
decay=moving_average_decay, num_updates=global_step)
with tf.control_dependencies([train_op
]), tf.compat.v1.name_scope("moving_average"):
train_op = variable_averages.apply(variables_to_average)
tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.UPDATE_OPS, train_op)