preproc = pkl.load(open(preproc_file, "rb"))
save_preproc = 0
else:
preproc = {}
for task in tasks:
if task.name in preproc:
train, val, test = preproc[task.name]
task.train_data = train
task.val_data = val
task.test_data = test
log.info("\tLoaded indexed data for %s from %s", task.name, preproc_file)
else:
log.info("\tIndexing task %s from scratch", task.name)
process_task(task, token_indexer, vocab)
del_field_tokens(task)
preproc[task.name] = (task.train_data, task.val_data, task.test_data)
save_preproc = 1
log.info("\tFinished indexing tasks")
if save_preproc: // save preprocessing again because we processed something from scratch
pkl.dump(preproc, open(preproc_file, "wb"))
log.info("\tSaved data to %s", preproc_file)
del preproc
train_tasks = [task for task in tasks if task.name in train_task_names]
After Change
preproc_file_names = []
if not args.reload_vocab and not args.reload_indexing:
for file in os.listdir(preproc_dir):
preproc_file_names.append(file)
for task in tasks:
if task.name in preproc_files:
train, val, test = get_task_generator(task.name)
task.train_data = train