0f2aa2756bdcabfc79ad8b0c0ff6640ca1cc96c2,scripts/tf_cnn_benchmarks/benchmark_cnn.py,BenchmarkCNN,add_forward_pass_and_gradients,#BenchmarkCNN#Any#Any#Any#Any#Any#Any#Any#,2541

Before Change


        results["logits"] = logits
        return results
      loss_func = self.model.loss_function or loss_function
      base_loss = loss_func(logits, labels, aux_logits=aux_logits)
      params = self.variable_mgr.trainable_variables_on_device(
          rel_device_num, abs_device_num)
      l2_loss = None
      total_loss = base_loss
      with tf.name_scope("l2_loss"):
        fp32_params = params
        if data_type == tf.float16 and self.params.fp16_vars:
          // fp16 reductions are very slow on GPUs, so cast to fp32 before
          // calling tf.nn.l2_loss and tf.add_n.
          // TODO(b/36217816): Once the bug is fixed, investigate if we should do
          // this reduction in fp16.
          fp32_params = (tf.cast(p, tf.float32) for p in params)
        if rel_device_num == len(self.devices) - 1:
          // We compute the L2 loss for only one device instead of all of them,
          // because the L2 loss for each device is the same. To adjust for this,
          // we multiply the L2 loss by the number of devices. We choose the
          // last device because for some reason, on a Volta DGX1, the first four
          // GPUs take slightly longer to complete a step than the last four.
          // TODO(reedwm): Shard the L2 loss computations across GPUs.
          if self.params.single_l2_loss_op:
            // TODO(reedwm): If faster, create a fused op that does the L2 loss
            // on multiple tensors, and use that instead of concatenating
            // tensors.
            reshaped_params = [tf.reshape(p, (-1,)) for p in fp32_params]
            l2_loss = tf.nn.l2_loss(tf.concat(reshaped_params, axis=0))
          else:
            l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in fp32_params])
      weight_decay = self.params.weight_decay
      if (weight_decay is not None and weight_decay != 0. and
          l2_loss is not None):
        total_loss += len(self.devices) * weight_decay * l2_loss

      aggmeth = tf.AggregationMethod.DEFAULT
      scaled_loss = (total_loss if self.loss_scale is None
                     else total_loss * self.loss_scale)
      grads = tf.gradients(scaled_loss, params, aggregation_method=aggmeth)
      if self.loss_scale is not None:
        // TODO(reedwm): If automatic loss scaling is not used, we could avoid
        // these multiplications by directly modifying the learning rate instead.
        // If this is done, care must be taken to ensure that this scaling method
        // is correct, as some optimizers square gradients and do other
        // operations which might not be compatible with modifying both the
        // gradients and the learning rate.

        grads = [
            grad * tf.cast(1. / self.loss_scale, grad.dtype) for grad in grads
        ]

      if self.params.variable_update == "horovod":
        import horovod.tensorflow as hvd  // pylint: disable=g-import-not-at-top
        if self.params.horovod_device:
          horovod_device = "/%s:0" % self.params.horovod_device
        else:
          horovod_device = ""
        // All-reduce gradients using Horovod.
        grads = [hvd.allreduce(grad, average=False, device_dense=horovod_device)
                 for grad in grads]

      if self.params.staged_vars:
        grad_dtypes = [grad.dtype for grad in grads]
        grad_shapes = [grad.shape for grad in grads]
        grad_stage = data_flow_ops.StagingArea(grad_dtypes, grad_shapes)
        grad_stage_op = grad_stage.put(grads)
        // In general, this decouples the computation of the gradients and
        // the updates of the weights.
        // During the pipeline warm up, this runs enough training to produce
        // the first set of gradients.
        gpu_grad_stage_ops.append(grad_stage_op)
        grads = grad_stage.get()

      param_refs = self.variable_mgr.trainable_variables_on_device(
          rel_device_num, abs_device_num, writable=True)
      gradvars = list(zip(grads, param_refs))
      if self.params.loss_type_to_report == "total_loss":
        results["loss"] = total_loss
      else:
        results["loss"] = base_loss
      results["gradvars"] = gradvars
      return results

  def get_image_preprocessor(self):
    Returns the image preprocessor to used, based on the model.

After Change



      return results

    with tf.device(self.devices[rel_device_num]):
      outputs = forward_pass_and_gradients()
      logits, loss, grads = unpack_forward_pass_and_gradients_output(outputs)
      return make_results(logits, loss, grads)

  def get_image_preprocessor(self):
    Returns the image preprocessor to used, based on the model.

    Returns:

In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 7

Instances

Link

Project Name: tensorflow/benchmarks

Commit Name: 0f2aa2756bdcabfc79ad8b0c0ff6640ca1cc96c2

Time: 2018-08-10

Author: ycao@google.com

File Name: scripts/tf_cnn_benchmarks/benchmark_cnn.py

Class Name: BenchmarkCNN

Method Name: add_forward_pass_and_gradients

Link

Project Name: YerevaNN/mimic3-benchmarks

Commit Name: 5d353701dd56a1fc8abc15e4082e33b7bed2a241

Time: 2017-08-09

Author: harhro@gmail.com

File Name: mimic3models/split_train_val.py

Class Name:

Method Name:

Link

Project Name: YerevaNN/mimic3-benchmarks

Commit Name: 7567cc646d258e40dde9790a28a9b264ccd494fb

Time: 2017-08-27

Author: harhro@gmail.com

File Name: mimic3models/split_train_val.py

Class Name:

Method Name: