be5b7adef5a75a8db9af53c641cf6e2233fb2832,snips_nlu/slot_filler/data_augmentation.py,,get_noise_iterator,#Any#Any#Any#,70

Before Change


        size = random.choice(range(min_size, max_size + 1))
        tokens = tokenize(subtitle)
        while len(tokens) < size:
            tokens = tokenize(next(subtitles_it))
        start = random.randint(0, len(tokens) - size)
        yield " ".join(t.value.lower() for t in tokens[start:start + size])

After Change



def get_noise_iterator(language, min_size, max_size):
    subtitles = get_subtitles(language)
    tokenized_subtitles = [tokenize(s) for s in subtitles]
    tokenized_subtitles = [t for t in tokenized_subtitles if
                           len(t) >= max_size]
    if len(tokenized_subtitles) == 0:
        raise ValueError("Could not find long enought subtitles")
    subtitles_it = cycle(np.random.permutation(tokenized_subtitles))
    for tokens in subtitles_it:
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 4

Instances


Project Name: snipsco/snips-nlu
Commit Name: be5b7adef5a75a8db9af53c641cf6e2233fb2832
Time: 2017-04-12
Author: clement.doumouro@snips.ai
File Name: snips_nlu/slot_filler/data_augmentation.py
Class Name:
Method Name: get_noise_iterator


Project Name: Rostlab/nalaf
Commit Name: f558b935a7d4dca8328c1201a26b762029695e8f
Time: 2015-06-17
Author: carsten.uhlig@gmail.com
File Name: demo.py
Class Name:
Method Name:


Project Name: MycroftAI/padatious
Commit Name: 001010c152897b4f476ca5e435f86c9b75b17078
Time: 2017-10-11
Author: matthew3311999@gmail.com
File Name: padatious/train_data.py
Class Name: TrainData
Method Name: add_lines