rewardQ1 = Parallel(n_jobs=-1)(
delayed(experiment)(QLearning, 1) for _ in xrange(n_experiment))
rewardQ08 = Parallel(n_jobs=-1)(
delayed(experiment)(QLearning, .8)for _ in xrange(n_experiment))
rewardDQ1 = Parallel(n_jobs=-1)(
delayed(experiment)(DoubleQLearning, 1) for _ in xrange(n_experiment))
rewardDQ08 = Parallel(n_jobs=-1)(
After Change
for a in [QLearning, DoubleQLearning]:
r = Parallel(n_jobs=-1)(
delayed(experiment)(a, e) for _ in xrange(n_experiment))
np.save("r" + names[a] + names[e] + ".npy", np.mean(r, axis=0))