752f17f65ff851521b00f699e32f45e14945f069,tests/algorithms/test_td.py,,test_sarsa,#,102
Before Change
pi, mdp, _ = initialize()
alg = SARSA(pi, mdp.info, Parameter(.1))
alg.Q.table = np.arange(np.prod(mdp.info.size)).reshape(mdp.info.size)
alg._update(0, 1, 100, 1, 0)
alg._update(1, 0, 10, 3, 1)
alg._update(3, 1, 50, 3, 0)
alg._update(2, 2, -100, 3, 1)
test_q = np.array([[0, 11, 2, 3],
[4, 5, 6, 7],
[8, 9, -1, 11],
[12, 17, 14, 15]])
assert np.array_equal(alg.Q.table, test_q)
After Change
pi, mdp, _ = initialize()
agent = SARSA(pi, mdp.info, Parameter(.1))
core = Core(agent, mdp)
// Train
core.learn(n_steps=100, n_steps_per_fit=1, quiet=True)
test_q = np.array([[4.31368701e-2, 3.68037689e-1, 4.14040445e-2, 1.64007642e-1],
[6.45491436e-1, 4.68559000, 8.07603735e-2, 1.67297938e-1],
[4.21445838e-2, 3.71538042e-3, 0., 3.439],
In pattern: SUPERPATTERN
Frequency: 4
Non-data size: 26
Instances
Project Name: AIRLab-POLIMI/mushroom
Commit Name: 752f17f65ff851521b00f699e32f45e14945f069
Time: 2019-11-12
Author: carlo.deramo@gmail.com
File Name: tests/algorithms/test_td.py
Class Name:
Method Name: test_sarsa
Project Name: AIRLab-POLIMI/mushroom
Commit Name: 752f17f65ff851521b00f699e32f45e14945f069
Time: 2019-11-12
Author: carlo.deramo@gmail.com
File Name: tests/algorithms/test_td.py
Class Name:
Method Name: test_q_learning
Project Name: AIRLab-POLIMI/mushroom
Commit Name: 752f17f65ff851521b00f699e32f45e14945f069
Time: 2019-11-12
Author: carlo.deramo@gmail.com
File Name: tests/algorithms/test_td.py
Class Name:
Method Name: test_sarsa
Project Name: AIRLab-POLIMI/mushroom
Commit Name: 752f17f65ff851521b00f699e32f45e14945f069
Time: 2019-11-12
Author: carlo.deramo@gmail.com
File Name: tests/algorithms/test_td.py
Class Name:
Method Name: test_weighted_q_learning
Project Name: AIRLab-POLIMI/mushroom
Commit Name: 752f17f65ff851521b00f699e32f45e14945f069
Time: 2019-11-12
Author: carlo.deramo@gmail.com
File Name: tests/algorithms/test_td.py
Class Name:
Method Name: test_expected_sarsa