delayed(experiment)(QLearning, 1) for _ in xrange(n_experiment))
rewardQ08 = Parallel(n_jobs=-1)(
delayed(experiment)(QLearning, .8) for _ in xrange(n_experiment))
rewardDQ1 = Parallel(n_jobs=-1)(
delayed(experiment)(DoubleQLearning, 1) for _ in xrange(n_experiment))
rewardDQ08 = Parallel(n_jobs=-1)(
delayed(experiment)(DoubleQLearning, .8) for _ in xrange(n_experiment))
np.save("rQ1.npy", np.mean(rewardQ1, axis=0))