@lab_api
def init_algorithm_params(self):
"""Initialize other algorithm parameters"""
if self.algorithm_spec["action_policy"] == "default":
if self.body.is_discrete:
self.algorithm_spec["action_policy"] = "softmax"
else:
self.algorithm_spec["action_policy"] = "gaussian"
util.set_attr(self, self.algorithm_spec, [
"action_policy",
"gamma", // the discount factor
"add_entropy",
"entropy_weight",
"continuous_action_clip",
"training_frequency",
"training_iters_per_batch",
"use_GAE",
"lam",
"num_step_returns",
"policy_loss_weight",
"val_loss_weight",
])
self.action_policy = act_fns[self.action_policy]
self.to_train = 0
// To save on a forward pass keep the log probs from each action
self.saved_log_probs = []self.entropy = []
// Select appropriate function for calculating state-action-value estimate (target)
if self.use_GAE:
self.get_target = self.get_gae_target
else:
After Change
logger.info(util.self_desc(self))
@lab_api
def init_algorithm_params(self):
"""Initialize other algorithm parameters"""
// set default
util.set_attr(self, dict(
action_pdtype="default",
action_policy="default",
action_policy_update="no_update",
explore_var_start=np.nan,
explore_var_end=np.nan,
explore_anneal_epi=np.nan,
))
util.set_attr(self, self.algorithm_spec, [
"action_policy",
// theoretically, AC does not have policy update; but in this implementation we have such option
"action_policy_update",
"explore_var_start", "explore_var_end", "explore_anneal_epi",
"gamma", // the discount factor
"add_entropy",
"entropy_weight",
"continuous_action_clip",
"training_frequency",
"training_iters_per_batch",
"use_GAE",
"lam",
"num_step_returns",
"policy_loss_weight",
"val_loss_weight",
])
self.to_train = 0
self.action_policy = getattr(policy_util, self.action_policy)
self.action_policy_update = getattr(policy_util, self.action_policy_update)
for body in self.agent.nanflat_body_a:
body.explore_var = self.explore_var_start
// Select appropriate function for calculating state-action-value estimate (target)
if self.use_GAE: