f59d5873a23dfa3864d3ff5e3ed1070a3ca762ec,finetune/input_pipeline.py,BasePipeline,_dataset_with_targets,#BasePipeline#Any#Any#Any#,127

Before Change


        dataset_encoded = lambda: itertools.chain.from_iterable(
            map(lambda xy: self.text_to_tokens_mask(*xy), dataset()))
        shape_def = self.feed_shape_type_def()
        if not callable(Y) and self.config.chunk_long_sequences and train:
            dataset_encoded_list = list(dataset_encoded())  // come up with a more principled way to do this .
            self.config.dataset_size = len(dataset_encoded_list)
        return Dataset.from_generator(lambda: self.wrap_tqdm(dataset_encoded(), train), *shape_def)

    def _dataset_without_targets(self, Xs, train):
        if not callable(Xs):

After Change


        shape_def = self.feed_shape_type_def()
        if not callable(Y) and train:
            dataset_encoded_list = list(dataset_encoded())
            counter = Counter()
            for doc, target_arr in dataset_encoded_list:
                counter.update(target_arr[doc["mask"].astype(np.bool)])
            class_counts = {
                self.label_encoder.inverse_transform([target])[0]: count 
                for target, count in counter.items()
            }
            self.config.class_weights = compute_class_weights(self.config.class_weights, class_counts=class_counts)
            self.config.dataset_size = len(dataset_encoded_list)
        return Dataset.from_generator(lambda: self.wrap_tqdm(dataset_encoded(), train), *shape_def)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 5

Non-data size: 4

Instances


Project Name: IndicoDataSolutions/finetune
Commit Name: f59d5873a23dfa3864d3ff5e3ed1070a3ca762ec
Time: 2019-06-12
Author: madison@indico.io
File Name: finetune/input_pipeline.py
Class Name: BasePipeline
Method Name: _dataset_with_targets


Project Name: pytorch/tutorials
Commit Name: 4794be6f7e3827228b6e0dc9b1cfe432a3ecdeb3
Time: 2021-03-04
Author: brianjo@fb.com
File Name: beginner_source/text_sentiment_ngrams_tutorial.py
Class Name:
Method Name:


Project Name: RaRe-Technologies/gensim
Commit Name: 680de8d4f35325e7486c07c4e06422929e826b57
Time: 2019-01-10
Author: __Singleton__@hackerdom.ru
File Name: gensim/corpora/lowcorpus.py
Class Name: LowCorpus
Method Name: line2doc


Project Name: PacktPublishing/Deep-Reinforcement-Learning-Hands-On
Commit Name: c7d92c9df54a69a21d32984e837485e2de245fd1
Time: 2018-01-02
Author: max.lapan@gmail.com
File Name: ch12/tools/sub_reader.py
Class Name:
Method Name:


Project Name: dirty-cat/dirty_cat
Commit Name: a1a430fa42fa58cb3ba9d93f44749319928b78c8
Time: 2018-03-14
Author: patricio.cerda@inria.fr
File Name: dirty_cat/string_distances.py
Class Name:
Method Name: ngram_similarity