8896f02f94da16afe5b3ea5330e4f99245b75d6b,examples/lqr_pg.py,,experiment,#Any#Any#Any#Any#,24
Before Change
dataset_eval = core.evaluate(n_episodes=ep_per_run)
print("policy parameters: ", policy.get_weights())
J = compute_J(dataset_eval, gamma=mdp.info.gamma)
print("J at iteration " + str(i) + ": " + str(np.mean(J)))
if __name__ == "__main__":
After Change
def experiment(alg, n_epochs, n_iterations, ep_per_run):
np.random.seed()
logger = Logger(alg.__name__, results_dir=None)
logger.strong_line()
logger.info("Experiment Algorithm: " + alg.__name__)
// MDP
mdp = LQR.generate(dimensions=1)
approximator = Regressor(LinearApproximator,
input_shape=mdp.info.observation_space.shape,
output_shape=mdp.info.action_space.shape)
sigma = Regressor(LinearApproximator,
input_shape=mdp.info.observation_space.shape,
output_shape=mdp.info.action_space.shape)
sigma_weights = 2 * np.ones(sigma.weights_size)
sigma.set_weights(sigma_weights)
policy = StateStdGaussianPolicy(approximator, sigma)
// Agent
optimizer = AdaptiveOptimizer(eps=.01)
algorithm_params = dict(optimizer=optimizer)
agent = alg(mdp.info, policy, **algorithm_params)
// Train
core = Core(agent, mdp)
dataset_eval = core.evaluate(n_episodes=ep_per_run)
J = compute_J(dataset_eval, gamma=mdp.info.gamma)
logger.epoch_info(0, J=np.mean(J), policy_weights=policy.get_weights())
for i in trange(n_epochs, leave=False):
core.learn(n_episodes=n_iterations * ep_per_run,
n_episodes_per_fit=ep_per_run)
dataset_eval = core.evaluate(n_episodes=ep_per_run)
J = compute_J(dataset_eval, gamma=mdp.info.gamma)
logger.epoch_info(i+1, J=np.mean(J), policy_weights=policy.get_weights())
if __name__ == "__main__":
algs = [REINFORCE, GPOMDP, eNAC]
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 16
Instances
Project Name: AIRLab-POLIMI/mushroom
Commit Name: 8896f02f94da16afe5b3ea5330e4f99245b75d6b
Time: 2021-01-08
Author: boris.ilpossente@hotmail.it
File Name: examples/lqr_pg.py
Class Name:
Method Name: experiment
Project Name: AIRLab-POLIMI/mushroom
Commit Name: 60ebba7d23a946b55de9aaa34d08637e42e75d3b
Time: 2021-01-08
Author: boris.ilpossente@hotmail.it
File Name: examples/lqr_bbo.py
Class Name:
Method Name: experiment
Project Name: AIRLab-POLIMI/mushroom
Commit Name: b8364d493b6145c31780573f3c4995a2967c2631
Time: 2021-01-08
Author: boris.ilpossente@hotmail.it
File Name: examples/ship_steering_bbo.py
Class Name:
Method Name: experiment