f59d5873a23dfa3864d3ff5e3ed1070a3ca762ec,finetune/input_pipeline.py,BasePipeline,_dataset_with_targets,#BasePipeline#Any#Any#Any#,127
Before Change
dataset_encoded = lambda: itertools.chain.from_iterable(
map(lambda xy: self.text_to_tokens_mask(*xy), dataset()))
shape_def = self.feed_shape_type_def()
if not callable(Y) and self.config.chunk_long_sequences and train:
dataset_encoded_list = list(dataset_encoded()) // come up with a more principled way to do this .
self.config.dataset_size = len(dataset_encoded_list)
return Dataset.from_generator(lambda: self.wrap_tqdm(dataset_encoded(), train), *shape_def)
def _dataset_without_targets(self, Xs, train):
if not callable(Xs):
After Change
shape_def = self.feed_shape_type_def()
if not callable(Y) and train:
dataset_encoded_list = list(dataset_encoded())
counter = Counter()
for doc, target_arr in dataset_encoded_list:
counter.update(target_arr[doc["mask"].astype(np.bool)])
class_counts = {
self.label_encoder.inverse_transform([target])[0]: count
for target, count in counter.items()
}
self.config.class_weights = compute_class_weights(self.config.class_weights, class_counts=class_counts)
self.config.dataset_size = len(dataset_encoded_list)
return Dataset.from_generator(lambda: self.wrap_tqdm(dataset_encoded(), train), *shape_def)
In pattern: SUPERPATTERN
Frequency: 5
Non-data size: 4
Instances
Project Name: IndicoDataSolutions/finetune
Commit Name: f59d5873a23dfa3864d3ff5e3ed1070a3ca762ec
Time: 2019-06-12
Author: madison@indico.io
File Name: finetune/input_pipeline.py
Class Name: BasePipeline
Method Name: _dataset_with_targets
Project Name: pytorch/tutorials
Commit Name: 4794be6f7e3827228b6e0dc9b1cfe432a3ecdeb3
Time: 2021-03-04
Author: brianjo@fb.com
File Name: beginner_source/text_sentiment_ngrams_tutorial.py
Class Name:
Method Name:
Project Name: RaRe-Technologies/gensim
Commit Name: 680de8d4f35325e7486c07c4e06422929e826b57
Time: 2019-01-10
Author: __Singleton__@hackerdom.ru
File Name: gensim/corpora/lowcorpus.py
Class Name: LowCorpus
Method Name: line2doc
Project Name: PacktPublishing/Deep-Reinforcement-Learning-Hands-On
Commit Name: c7d92c9df54a69a21d32984e837485e2de245fd1
Time: 2018-01-02
Author: max.lapan@gmail.com
File Name: ch12/tools/sub_reader.py
Class Name:
Method Name:
Project Name: dirty-cat/dirty_cat
Commit Name: a1a430fa42fa58cb3ba9d93f44749319928b78c8
Time: 2018-03-14
Author: patricio.cerda@inria.fr
File Name: dirty_cat/string_distances.py
Class Name:
Method Name: ngram_similarity