151f9e7d141372ce45fc4367b81385eeccc95093,sequenceLabelling/preprocess.py,,to_vector_simple_with_elmo,#Any#Any#Any#Any#Any#,256
Before Change
window = tokens[-maxlen:]
// TBD: use better initializers (uniform, etc.)
x = np.zeros((maxlen, embeddings.embed_size), )
if lowercase:
x_elmo = embeddings.get_sentence_wector_ELMo(lower(tokens))
else:
x_elmo = embeddings.get_sentence_wector_ELMo(tokens)
// TBD: padding should be left and which vector do we use for padding?
// and what about masking padding later for RNN?
for i, word in enumerate(window):
if lowercase:
word = _lower(word)
if num_norm:
word = _normalize_num(word)
x[i,:] = np.concat(
embeddings.get_word_vector(word).astype("float32"),
x_elmo[i]
)
return x
def to_casing_single(tokens, maxlen=300):
After Change
subtokens = []
for i in range(0, len(tokens)):
local_tokens = []
for j in range(0, min(len(tokens[i]) , maxlen)):
if lowercase:
local_tokens.append(_lower(tokens[i][j]))
else:
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 8
Instances Project Name: kermitt2/delft
Commit Name: 151f9e7d141372ce45fc4367b81385eeccc95093
Time: 2018-06-20
Author: patrice.lopez@science-miner.com
File Name: sequenceLabelling/preprocess.py
Class Name:
Method Name: to_vector_simple_with_elmo
Project Name: IBM/adversarial-robustness-toolbox
Commit Name: 601d77a77a55323770bb2b214aa8844538b13d1f
Time: 2018-09-25
Author: Maria-Irina.Nicolae@ibm.com
File Name: art/visualization.py
Class Name:
Method Name: convert_to_rgb
Project Name: scikit-learn-contrib/imbalanced-learn
Commit Name: aa6af82f458acf3f853e5174d34b11d319eea1c0
Time: 2016-06-17
Author: victor.dvro@gmail.com
File Name: unbalanced_dataset/under_sampling/instance_hardness_threshold.py
Class Name: InstanceHardnessThreshold
Method Name: transform