// Read text
text = Parallel(n_jobs=-1)(delayed(read_text)(str(f)) for f in file_list)
text = Parallel(n_jobs=-1)(delayed(tokenizer.encode)(txt) for txt in text)
// Read file size and sort dataset by file size (Note: feature len. may be different)
file_len = Parallel(n_jobs=-1)(delayed(getsize)(f) for f in file_list)
self.file_list, self.text = zip(*[(f_name,txt) \
After Change
// Read text
text = Parallel(n_jobs=READ_FILE_THREADS)(delayed(read_text)(str(f)) for f in file_list)
//text = Parallel(n_jobs=-1)(delayed(tokenizer.encode)(txt) for txt in text)
text = [tokenizer.encode(txt) for txt in text]
// Read file size and sort dataset by file size (Note: feature len. may be different)
file_len = Parallel(n_jobs=READ_FILE_THREADS)(delayed(getsize)(f) for f in file_list)
self.file_list, self.text = zip(*[(f_name,txt) \