be5b7adef5a75a8db9af53c641cf6e2233fb2832,snips_nlu/slot_filler/data_augmentation.py,,get_noise_iterator,#Any#Any#Any#,70

Before Change


    subtitles_it = cycle(np.random.permutation(list(subtitles)))
    for subtitle in subtitles_it:
        size = random.choice(range(min_size, max_size + 1))
        tokens = tokenize(subtitle)
        while len(tokens) < size:
            tokens = tokenize(next(subtitles_it))
        start = random.randint(0, len(tokens) - size)
        yield " ".join(t.value.lower() for t in tokens[start:start + size])

After Change



def get_noise_iterator(language, min_size, max_size):
    subtitles = get_subtitles(language)
    tokenized_subtitles = [tokenize(s) for s in subtitles]
    tokenized_subtitles = [t for t in tokenized_subtitles if
                           len(t) >= max_size]
    if len(tokenized_subtitles) == 0:
        raise ValueError("Could not find long enought subtitles")
    subtitles_it = cycle(np.random.permutation(tokenized_subtitles))
    for tokens in subtitles_it:
        size = random.choice(range(min_size, max_size + 1))
        start = random.randint(0, len(tokens) - size)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 5

Instances


Project Name: snipsco/snips-nlu
Commit Name: be5b7adef5a75a8db9af53c641cf6e2233fb2832
Time: 2017-04-12
Author: clement.doumouro@snips.ai
File Name: snips_nlu/slot_filler/data_augmentation.py
Class Name:
Method Name: get_noise_iterator


Project Name: daniel-kukiela/nmt-chatbot
Commit Name: 0e2a7f6d85a341959eba41d65019b2566084b406
Time: 2017-12-03
Author: daniel@kukiela.pl
File Name: inference.py
Class Name:
Method Name: inference


Project Name: gooofy/zamia-speech
Commit Name: 9f376975884e7a0d7a553dcdfa1ab54b66ddbb1f
Time: 2018-12-10
Author: guenter@zamia.org
File Name: speech_editor.py
Class Name:
Method Name: