delayed(experiment)(QLearning, 1) for _ in xrange(n_experiment))
rewardQ08 = Parallel(n_jobs=-1)(
delayed(experiment)(QLearning, .8) for _ in xrange(n_experiment))
rewardDQ1 = Parallel(n_jobs=-1)(
delayed(experiment)(DoubleQLearning, 1) for _ in xrange(n_experiment))
rewardDQ08 = Parallel(n_jobs=-1)(
delayed(experiment)(DoubleQLearning, .8) for _ in xrange(n_experiment))
np.save("rQ1.npy", np.mean(rewardQ1, axis=0))
After Change
logger.Logger(3)
names = {1:"1", 0.8:"08", QLearning:"Q", DoubleQLearning:"DQ"}
for e in [1, .8]:
for a in [QLearning, DoubleQLearning]:
r = Parallel(n_jobs=-1)(