("TotalReturnAvg", np.mean(total_returns)),
("TotalReturnMin", np.min(total_returns)),
("TotalReturnMax", np.max(total_returns)),
("TotalReturnStd", np.std(total_returns))
])
for key, value in statistics.items():
After Change
path["rewards"].sum() for path in paths
]
episode_lengths = [
len(p["rewards"]) for p in paths
]
statistics = OrderedDict([
("Epoch", epoch),
("Alpha", self._alpha),