9a9f0b65c5344878582772337a8e4b94cff9c747,official/nlp/modeling/layers/rezero_transformer.py,ReZeroTransformer,build,#ReZeroTransformer#Any#,89

Before Change


          "heads (%d)" % (hidden_size, self._num_heads))
    self._attention_head_size = int(hidden_size // self._num_heads)

    self._attention_layer = attention.MultiHeadAttention(
        num_heads=self._num_heads,
        key_size=self._attention_head_size,
        dropout=self._attention_dropout_rate,
        kernel_initializer=self._kernel_initializer,
        bias_initializer=self._bias_initializer,
        kernel_regularizer=self._kernel_regularizer,
        bias_regularizer=self._bias_regularizer,
        activity_regularizer=self._activity_regularizer,
        kernel_constraint=self._kernel_constraint,
        bias_constraint=self._bias_constraint,
        name="self_attention")
    self._attention_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate)
    if self._use_layer_norm:
      // Use float32 in layernorm for numeric stability.
      // It is probably safe in mixed_float16, but we haven"t validated this yet.
      self._attention_layer_norm = (
          tf.keras.layers.LayerNormalization(
              name="self_attention_layer_norm",
              axis=-1,
              epsilon=1e-12,
              dtype=tf.float32))
    self._intermediate_dense = dense_einsum.DenseEinsum(
        output_shape=self._intermediate_size,
        activation=None,
        kernel_initializer=self._kernel_initializer,
        bias_initializer=self._bias_initializer,
        kernel_regularizer=self._kernel_regularizer,
        bias_regularizer=self._bias_regularizer,
        activity_regularizer=self._activity_regularizer,
        kernel_constraint=self._kernel_constraint,
        bias_constraint=self._bias_constraint,
        name="intermediate")
    policy = tf.keras.mixed_precision.experimental.global_policy()
    if policy.name == "mixed_bfloat16":
      // bfloat16 causes BERT with the LAMB optimizer to not converge
      // as well, so we use float32.
      // TODO(b/154538392): Investigate this.
      policy = tf.float32
    self._intermediate_activation_layer = tf.keras.layers.Activation(
        self._intermediate_activation, dtype=policy)
    self._output_dense = dense_einsum.DenseEinsum(
        output_shape=hidden_size,
        kernel_initializer=self._kernel_initializer,
        bias_initializer=self._bias_initializer,
        kernel_regularizer=self._kernel_regularizer,
        bias_regularizer=self._bias_regularizer,
        activity_regularizer=self._activity_regularizer,
        kernel_constraint=self._kernel_constraint,
        bias_constraint=self._bias_constraint,
        name="output")
    self._output_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate)
    if self._use_layer_norm:
      // Use float32 in layernorm for numeric stability.
      self._output_layer_norm = tf.keras.layers.LayerNormalization(

After Change


      raise ValueError(
          "The input size (%d) is not a multiple of the number of attention "
          "heads (%d)" % (hidden_size, self._num_heads))
    self._attention_head_size = int(hidden_size // self._num_heads)
    common_kwargs = dict(
        kernel_initializer=self._kernel_initializer,
        bias_initializer=self._bias_initializer,
        kernel_regularizer=self._kernel_regularizer,
        bias_regularizer=self._bias_regularizer,
        activity_regularizer=self._activity_regularizer,
        kernel_constraint=self._kernel_constraint,
        bias_constraint=self._bias_constraint)
    self._attention_layer = attention.MultiHeadAttention(
        num_heads=self._num_heads,
        key_size=self._attention_head_size,
        dropout=self._attention_dropout_rate,
        name="self_attention",
        **common_kwargs)
    self._attention_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate)
    if self._use_layer_norm:
      // Use float32 in layernorm for numeric stability.
      // It is probably safe in mixed_float16, but we haven"t validated this yet.
      self._attention_layer_norm = (
          tf.keras.layers.LayerNormalization(
              name="self_attention_layer_norm",
              axis=-1,
              epsilon=1e-12,
              dtype=tf.float32))
    self._intermediate_dense = tf.keras.layers.experimental.EinsumDense(
        "abc,cd->abd",
        output_shape=(None, self._intermediate_size),
        bias_axes="d",
        name="intermediate",
        **common_kwargs)
    policy = tf.keras.mixed_precision.experimental.global_policy()
    if policy.name == "mixed_bfloat16":
      // bfloat16 causes BERT with the LAMB optimizer to not converge
      // as well, so we use float32.
      // TODO(b/154538392): Investigate this.
      policy = tf.float32
    self._intermediate_activation_layer = tf.keras.layers.Activation(
        self._intermediate_activation, dtype=policy)
    self._output_dense = tf.keras.layers.experimental.EinsumDense(
        "abc,cd->abd",
        output_shape=(None, hidden_size),
        bias_axes="d",
        name="output",
        **common_kwargs)
    self._output_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate)
    if self._use_layer_norm:
      // Use float32 in layernorm for numeric stability.
      self._output_layer_norm = tf.keras.layers.LayerNormalization(
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 4

Non-data size: 51

Instances


Project Name: tensorflow/models
Commit Name: 9a9f0b65c5344878582772337a8e4b94cff9c747
Time: 2020-07-08
Author: hongkuny@google.com
File Name: official/nlp/modeling/layers/rezero_transformer.py
Class Name: ReZeroTransformer
Method Name: build


Project Name: tensorflow/models
Commit Name: 43f5340f40697be2c27f632889f8bca919670d06
Time: 2020-07-08
Author: hongkuny@google.com
File Name: official/nlp/modeling/layers/transformer.py
Class Name: Transformer
Method Name: build


Project Name: tensorflow/models
Commit Name: 9a9f0b65c5344878582772337a8e4b94cff9c747
Time: 2020-07-08
Author: hongkuny@google.com
File Name: official/nlp/modeling/layers/rezero_transformer.py
Class Name: ReZeroTransformer
Method Name: build


Project Name: tensorflow/models
Commit Name: 9a9f0b65c5344878582772337a8e4b94cff9c747
Time: 2020-07-08
Author: hongkuny@google.com
File Name: official/nlp/modeling/layers/transformer.py
Class Name: Transformer
Method Name: build


Project Name: tensorflow/models
Commit Name: 43f5340f40697be2c27f632889f8bca919670d06
Time: 2020-07-08
Author: hongkuny@google.com
File Name: official/nlp/modeling/layers/rezero_transformer.py
Class Name: ReZeroTransformer
Method Name: build