a5f63384ac283ea71ca2668963da77db00cafa46,pycorrector/rnn_lm/infer.py,,ppl,#Any#,71

Before Change




def ppl(sentence_list):
    ppl_list = []
    // load data dict
    word_to_int = load_word_dict(conf.word_dict_path)
    // init params
    batch_size = 1
    tf.reset_default_graph()
    input_data = tf.placeholder(tf.int32, [batch_size, None])
    output_targets = tf.placeholder(tf.int32, [batch_size, None])
    // init model
    end_points = rnn_model(model="lstm",
                           input_data=input_data,
                           output_data=output_targets,
                           vocab_size=len(word_to_int),
                           rnn_size=128,
                           num_layers=2,
                           batch_size=batch_size,
                           learning_rate=conf.learning_rate)
    saver = tf.train.Saver(tf.global_variables())
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    with tf.Session() as sess:
        // init op
        sess.run(init_op)
        checkpoint = tf.train.latest_checkpoint(conf.model_dir)
        saver.restore(sess, checkpoint)
        print("loading model from the checkpoint {0}".format(checkpoint))

        // infer each sentence
        for sentence in sentence_list:
            ppl = 0
            // data idx
            x = [word_to_int[c] if c in word_to_int else word_to_int[UNK_TOKEN] for c in sentence]
            x = [word_to_int[START_TOKEN]] + x + [word_to_int[END_TOKEN]]
            print("x:", x)
            // reshape
            y = np.array(x[1:]).reshape((-1, batch_size))
            x = np.array(x[:-1]).reshape((-1, batch_size))
            print(x.shape)
            print(y.shape)
            // get each word perplexity
            word_count = x.shape[0]
            for i in range(word_count):
                perplexity = sess.run(end_points["perplexity"],
                                      feed_dict={input_data: x[i:i + 1, :],
                                                 output_targets: y[i:i + 1, :]})
                print("{0} -> {1}, perplexity: {2}".format(x[i:i + 1, :], y[i:i + 1, :], perplexity))
                if i == 0 or i == word_count:
                    continue
                ppl += perplexity
            ppl /= (word_count - 2)
            print("perplexity:" + str(ppl))
            ppl_list.append(ppl)
    return ppl_list


def infer_generate():

After Change




def ppl(sentence_list):
    result = dict()
    // load data dict
    word_to_idx = load_word_dict(config.word_dict_path)
    idx_to_word = {v: k for k, v in word_to_idx.items()}
    // init params
    batch_size = 1
    tf.reset_default_graph()
    input_data = tf.placeholder(tf.int32, [batch_size, None])
    output_targets = tf.placeholder(tf.int32, [batch_size, None])
    // init model
    end_points = rnn_model(model="lstm",
                           input_data=input_data,
                           output_data=output_targets,
                           vocab_size=len(word_to_idx),
                           rnn_size=128,
                           num_layers=2,
                           batch_size=batch_size,
                           learning_rate=config.learning_rate)
    saver = tf.train.Saver(tf.global_variables())
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    with tf.Session() as sess:
        // init op
        sess.run(init_op)
        checkpoint = tf.train.latest_checkpoint(config.model_dir)
        saver.restore(sess, checkpoint)
        print("loading model from the checkpoint {0}".format(checkpoint))

        // infer each sentence
        for sentence in sentence_list:
            ppl = 0
            // data idx
            x = [word_to_idx[c] if c in word_to_idx else word_to_idx[UNK_TOKEN] for c in sentence]
            x = [word_to_idx[START_TOKEN]] + x + [word_to_idx[END_TOKEN]]
            // print("x:", x)
            // reshape
            y = np.array(x[1:]).reshape((-1, batch_size))
            x = np.array(x[:-1]).reshape((-1, batch_size))
            // get each word perplexity
            word_count = x.shape[0]
            for i in range(word_count):
                perplexity = sess.run(end_points["perplexity"],
                                      feed_dict={input_data: x[i:i + 1, :],
                                                 output_targets: y[i:i + 1, :]})
                print("{0} -> {1}, perplexity: {2}".format(idx_to_word[x[i:i + 1, :].tolist()[0][0]],
                                                           idx_to_word[y[i:i + 1, :].tolist()[0][0]],
                                                           perplexity))
                if i == 0 or i == word_count:
                    continue
                ppl += perplexity
            ppl /= (word_count - 2)
            result[sentence] = ppl
    return result


def infer_generate():
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 7

Non-data size: 7

Instances


Project Name: shibing624/pycorrector
Commit Name: a5f63384ac283ea71ca2668963da77db00cafa46
Time: 2019-07-08
Author: 507153809@qq.com
File Name: pycorrector/rnn_lm/infer.py
Class Name:
Method Name: ppl


Project Name: dirty-cat/dirty_cat
Commit Name: f819a34e2fbea2dab4997b3b236b517fa12d115d
Time: 2018-06-08
Author: gael.varoquaux@normalesup.org
File Name: examples/02_predict_employee_salaries.py
Class Name:
Method Name:


Project Name: pantsbuild/pants
Commit Name: cb091ce8cd52691e9eb569dacd960cd326030c8b
Time: 2014-02-20
Author: jsirois@twitter.com
File Name: src/python/twitter/pants/tasks/builddictionary.py
Class Name: BuildBuildDictionary
Method Name: _gen_goals_reference


Project Name: openml/openml-python
Commit Name: 7dba90f659bd36cbc82290fe8a5d2f83f4e5169a
Time: 2016-09-05
Author: janvanrijn@gmail.com
File Name: openml/runs/functions.py
Class Name:
Method Name: _list_runs


Project Name: tensorflow/models
Commit Name: 36101ab4095065a4196ff4f6437e94f0d91df4e9
Time: 2020-07-21
Author: hongkuny@google.com
File Name: official/nlp/modeling/layers/transformer.py
Class Name: TransformerDecoderLayer
Method Name: call


Project Name: onnx/onnx-tensorflow
Commit Name: 58ace0a10f2859a7bfbb9b56238ba47e4175f5ac
Time: 2020-10-09
Author: wtsang@us.ibm.com
File Name: onnx_tf/backend.py
Class Name: TensorflowBackend
Method Name: _onnx_graph_to_tensorflow_rep


Project Name: tensorflow/models
Commit Name: 570d9a2b06fd6269c930d7fddf38bc60b212ebee
Time: 2020-07-21
Author: hongkuny@google.com
File Name: official/nlp/modeling/layers/transformer.py
Class Name: TransformerDecoderLayer
Method Name: call