3552d25e57159d4684f73ef62bbf9335cd8b4f36,chapter06/MaximizationBias.py,,chooseAction,#Any#Any#,51
Before Change
if np.random.binomial(1, EPSILON) == 1:
return np.random.choice(stateActions[state])
else:
return np.argmax(stateActionValues[state])
// take @action in @state, return the reward
def takeAction(state, action):
if state == STATE_A:
After Change
if np.random.binomial(1, EPSILON) == 1:
return np.random.choice(stateActions[state])
else:
values_ = stateActionValues[state]
return np.random.choice([action_ for action_, value_ in enumerate(values_) if value_ == np.max(values_)])
// take @action in @state, return the reward
def takeAction(state, action):
if state == STATE_A:
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 8
Instances Project Name: ShangtongZhang/reinforcement-learning-an-introduction
Commit Name: 3552d25e57159d4684f73ef62bbf9335cd8b4f36
Time: 2017-10-28
Author: zhangshangtong.cpp@icloud.com
File Name: chapter06/MaximizationBias.py
Class Name:
Method Name: chooseAction
Project Name: ShangtongZhang/reinforcement-learning-an-introduction
Commit Name: 3552d25e57159d4684f73ef62bbf9335cd8b4f36
Time: 2017-10-28
Author: zhangshangtong.cpp@icloud.com
File Name: chapter06/WindyGridWorld.py
Class Name:
Method Name: oneEpisode
Project Name: ShangtongZhang/reinforcement-learning-an-introduction
Commit Name: 3552d25e57159d4684f73ef62bbf9335cd8b4f36
Time: 2017-10-28
Author: zhangshangtong.cpp@icloud.com
File Name: chapter06/CliffWalking.py
Class Name:
Method Name: chooseAction