3552d25e57159d4684f73ef62bbf9335cd8b4f36,chapter06/MaximizationBias.py,,chooseAction,#Any#Any#,51

Before Change


    if np.random.binomial(1, EPSILON) == 1:
        return np.random.choice(stateActions[state])
    else:
        return np.argmax(stateActionValues[state])

// take @action in @state, return the reward
def takeAction(state, action):
    if state == STATE_A:

After Change


    if np.random.binomial(1, EPSILON) == 1:
        return np.random.choice(stateActions[state])
    else:
        values_ = stateActionValues[state]
        return np.random.choice([action_ for action_, value_ in enumerate(values_) if value_ == np.max(values_)])

// take @action in @state, return the reward
def takeAction(state, action):
    if state == STATE_A:
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 8

Instances


Project Name: ShangtongZhang/reinforcement-learning-an-introduction
Commit Name: 3552d25e57159d4684f73ef62bbf9335cd8b4f36
Time: 2017-10-28
Author: zhangshangtong.cpp@icloud.com
File Name: chapter06/MaximizationBias.py
Class Name:
Method Name: chooseAction


Project Name: ShangtongZhang/reinforcement-learning-an-introduction
Commit Name: 3552d25e57159d4684f73ef62bbf9335cd8b4f36
Time: 2017-10-28
Author: zhangshangtong.cpp@icloud.com
File Name: chapter06/WindyGridWorld.py
Class Name:
Method Name: oneEpisode


Project Name: ShangtongZhang/reinforcement-learning-an-introduction
Commit Name: 3552d25e57159d4684f73ef62bbf9335cd8b4f36
Time: 2017-10-28
Author: zhangshangtong.cpp@icloud.com
File Name: chapter06/CliffWalking.py
Class Name:
Method Name: chooseAction