7fd53c15c6273327ef10c2458848fcaf2a85e406,finetune/base_models/gpt/featurizer.py,,gpt_featurizer,#Any#Any#Any#Any#Any#,133

Before Change


        features: The output of the featurizer_final state.
        sequence_features: The output of the featurizer at each timestep.
    
    initial_shape = [a or -1 for a in X.get_shape().as_list()]
    X = tf.reshape(X, shape=[-1] + initial_shape[-2:])

    with tf.variable_scope("model/featurizer", reuse=reuse):
        embed_weights = tf.get_variable(
            name="we",
            shape=[encoder.vocab_size + config.max_length, config.n_embed],
            initializer=tf.random_normal_initializer(stddev=config.weight_stddev)
        )
        if config.train_embeddings:
            embed_weights = dropout(embed_weights, config.embed_p_drop, train)
        else:
            embed_weights = tf.stop_gradient(embed_weights)

        X = tf.reshape(X, [-1, config.max_length, 2])

        h = embed(X, embed_weights)
        for layer in range(config.n_layer):
            if (config.n_layer - layer) == config.num_layers_trained and config.num_layers_trained != config.n_layer:
                h = tf.stop_gradient(h)
                train_layer = False
            else:
                train_layer = train

            with tf.variable_scope("h%d_" % layer):
                block_fn = functools.partial(block, n_head=config.n_heads, act_fn=config.act_fn,
                                             resid_pdrop=config.resid_p_drop, attn_pdrop=config.attn_p_drop,
                                             scope="h%d" % layer, train=train_layer, scale=True)
                if config.low_memory_mode and train_layer:
                    block_fn = recompute_grad(block_fn, use_entire_scope=True)
                h = block_fn(h)

        // Use hidden state at classifier token as input to final proj. + softmax
        clf_h = tf.reshape(h, [-1, config.n_embed])  // [batch * seq_len, embed]
        clf_token = encoder["_classify_"]
        pool_idx = tf.cast(tf.argmax(tf.cast(tf.equal(X[:, :, 0], clf_token), tf.float32), 1), tf.int32)
        clf_h = tf.gather(clf_h, tf.range(shape_list(X)[0], dtype=tf.int32) * config.max_length + pool_idx)
        clf_h = tf.reshape(clf_h, shape=initial_shape[: -2] + [config.n_embed])
        seq_feats = tf.reshape(h, shape=initial_shape[:-1] + [config.n_embed])

        return {
            "embed_weights": embed_weights,

After Change


        features: The output of the featurizer_final state.
        sequence_features: The output of the featurizer at each timestep.
    
    initial_shape = tf.shape(X)
    X = tf.reshape(X, shape=[-1] + initial_shape[-2:])
    X = tf.reshape(X, shape=tf.concat(([-1], initial_shape[-2:]), 0))

    with tf.variable_scope("model/featurizer", reuse=reuse):
        embed_weights = tf.get_variable(
            name="we",
            shape=[encoder.vocab_size + config.max_length, config.n_embed],
            initializer=tf.random_normal_initializer(stddev=config.weight_stddev)
        )
        if config.train_embeddings:
            embed_weights = dropout(embed_weights, config.embed_p_drop, train)
        else:
            embed_weights = tf.stop_gradient(embed_weights)

        X = tf.reshape(X, [-1, config.max_length, 2])

        h = embed(X, embed_weights)
        for layer in range(config.n_layer):
            if (config.n_layer - layer) == config.num_layers_trained and config.num_layers_trained != config.n_layer:
                h = tf.stop_gradient(h)
                train_layer = False
            else:
                train_layer = train

            with tf.variable_scope("h%d_" % layer):
                block_fn = functools.partial(block, n_head=config.n_heads, act_fn=config.act_fn,
                                             resid_pdrop=config.resid_p_drop, attn_pdrop=config.attn_p_drop,
                                             scope="h%d" % layer, train=train_layer, scale=True)
                if config.low_memory_mode and train_layer:
                    block_fn = recompute_grad(block_fn, use_entire_scope=True)
                h = block_fn(h)

        // Use hidden state at classifier token as input to final proj. + softmax
        clf_h = tf.reshape(h, [-1, config.n_embed])  // [batch * seq_len, embed]
        clf_token = encoder["_classify_"]
        pool_idx = tf.cast(tf.argmax(tf.cast(tf.equal(X[:, :, 0], clf_token), tf.float32), 1), tf.int32)
        clf_h = tf.gather(clf_h, tf.range(shape_list(X)[0], dtype=tf.int32) * config.max_length + pool_idx)
        clf_h = tf.reshape(clf_h, shape=tf.concat((initial_shape[: -2], [config.n_embed]), 0))
        seq_feats = tf.reshape(h, shape=tf.concat((initial_shape[:-1], [config.n_embed]), 0))

        return {
            "embed_weights": embed_weights,

In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 20

Instances

Link

Project Name: IndicoDataSolutions/finetune

Commit Name: 7fd53c15c6273327ef10c2458848fcaf2a85e406

Time: 2019-03-21

Author: benlt@hotmail.co.uk

File Name: finetune/base_models/gpt/featurizer.py

Class Name:

Method Name: gpt_featurizer

Link

Project Name: IndicoDataSolutions/finetune

Commit Name: 08e59a637de862e7c816d0e62f0f0d14a094c221

Time: 2019-03-21

Author: benlt@hotmail.co.uk

File Name: finetune/base_models/textcnn/featurizer.py

Class Name:

Method Name: textcnn_featurizer

Link

Project Name: IndicoDataSolutions/finetune

Commit Name: 7fd53c15c6273327ef10c2458848fcaf2a85e406

Time: 2019-03-21

Author: benlt@hotmail.co.uk

File Name: finetune/base_models/gpt2/featurizer.py

Class Name:

Method Name: gpt2_featurizer

Link

Project Name: IndicoDataSolutions/finetune

Commit Name: 7fd53c15c6273327ef10c2458848fcaf2a85e406

Time: 2019-03-21

Author: benlt@hotmail.co.uk

File Name: finetune/base_models/gpt/featurizer.py

Class Name:

Method Name: gpt_featurizer