00726c8b57ad409363ede0e754c0137d3fcc71cc,chapter04/car_rental.py,,figure_4_2,#Any#,112
Before Change
break
// policy improvement
new_policy = np.copy(policy)
for i in range(MAX_CARS + 1):
for j in range(MAX_CARS + 1):
action_returns = []
for action in actions:
if (action >= 0 and i >= action) or (action < 0 and j >= abs(action)):
action_returns.append(expected_return([i, j], action, value, constant_returned_cars))
else:
action_returns.append(-float("inf"))
new_policy[i, j] = actions[np.argmax(action_returns)]
policy_change = (new_policy != policy).sum()
print("policy changed in %d states" % (policy_change))
policy = new_policy
if policy_change == 0:
fig = sns.heatmap(np.flipud(value), cmap="YlGnBu", ax=axes[-1])
After Change
new_state_value = expected_return([i, j], policy[i, j], value, constant_returned_cars)
value[i, j] = new_state_value
max_value_change = abs(old_value - value).max()
print("max value change {}".format(max_value_change))
if max_value_change < 1e-4:
break
// policy improvement
In pattern: SUPERPATTERN
Frequency: 5
Non-data size: 3
Instances
Project Name: ShangtongZhang/reinforcement-learning-an-introduction
Commit Name: 00726c8b57ad409363ede0e754c0137d3fcc71cc
Time: 2019-06-12
Author: wlbksy@126.com
File Name: chapter04/car_rental.py
Class Name:
Method Name: figure_4_2
Project Name: autorope/donkeycar
Commit Name: d78d0732117d256aafb669b303e79a1da6fe0b26
Time: 2018-08-04
Author: tawnkramer@gmail.com
File Name: donkeycar/parts/lidar.py
Class Name: RPLidar
Method Name: update
Project Name: albermax/innvestigate
Commit Name: 911aaf10b91a5a09670679d2aeda64473377c1c3
Time: 2018-03-26
Author: philipp.seegerer@tu-berlin.de
File Name: examples/mnist_perturbation.py
Class Name:
Method Name:
Project Name: IndicoDataSolutions/finetune
Commit Name: 5f94a31e5e5c6b336f147d5bef669ef456b00c2c
Time: 2019-08-27
Author: madison@indico.io
File Name: tests/test_classifier.py
Class Name: TestClassifier
Method Name: test_reasonable_predictions
Project Name: dmlc/dgl
Commit Name: 6367318f455aa8c27b6341c9b98794351dfd168e
Time: 2020-08-17
Author: coin2028@hotmail.com
File Name: examples/pytorch/pinsage/data_utils.py
Class Name:
Method Name: train_test_split_by_time