151f9e7d141372ce45fc4367b81385eeccc95093,sequenceLabelling/preprocess.py,,to_vector_simple_with_elmo,#Any#Any#Any#Any#Any#,256

Before Change


    window = tokens[-maxlen:]
    
    // TBD: use better initializers (uniform, etc.) 
    x = np.zeros((maxlen, embeddings.embed_size), )

    if lowercase:
        x_elmo = embeddings.get_sentence_wector_ELMo(lower(tokens))
    else:
        x_elmo = embeddings.get_sentence_wector_ELMo(tokens)

    // TBD: padding should be left and which vector do we use for padding? 
    // and what about masking padding later for RNN?
    for i, word in enumerate(window):
        if lowercase:
            word = _lower(word)
        if num_norm:
            word = _normalize_num(word)
        x[i,:] = np.concat(
                embeddings.get_word_vector(word).astype("float32"),
                x_elmo[i]
                )

    return x

def to_casing_single(tokens, maxlen=300):
    

After Change


    subtokens = []
    for i in range(0, len(tokens)):
        local_tokens = []
        for j in range(0, min(len(tokens[i]), maxlen)):
            if lowercase:
                local_tokens.append(_lower(tokens[i][j]))
            else:
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 8

Instances


Project Name: kermitt2/delft
Commit Name: 151f9e7d141372ce45fc4367b81385eeccc95093
Time: 2018-06-20
Author: patrice.lopez@science-miner.com
File Name: sequenceLabelling/preprocess.py
Class Name:
Method Name: to_vector_simple_with_elmo


Project Name: IBM/adversarial-robustness-toolbox
Commit Name: 601d77a77a55323770bb2b214aa8844538b13d1f
Time: 2018-09-25
Author: Maria-Irina.Nicolae@ibm.com
File Name: art/visualization.py
Class Name:
Method Name: convert_to_rgb


Project Name: scikit-learn-contrib/imbalanced-learn
Commit Name: aa6af82f458acf3f853e5174d34b11d319eea1c0
Time: 2016-06-17
Author: victor.dvro@gmail.com
File Name: unbalanced_dataset/under_sampling/instance_hardness_threshold.py
Class Name: InstanceHardnessThreshold
Method Name: transform