54f30e39c7aee65218cb40e219fe9f4f816f0eee,slm_lab/agent/algorithm/sac.py,SoftActorCritic,calc_policy_loss,#SoftActorCritic#Any#Any#,141

Before Change


            reparam_mus = action_pd.rsample()  // reparametrization for paper eq. 11
            reparam_actions = torch.tanh(reparam_mus)
            // paper Appendix C. Enforcing Action Bounds for continuous actions
            log_probs = action_pd.log_prob(reparam_mus) - torch.log(1 - reparam_actions.pow(2) + 1e-6).sum(1)

        q1_preds = self.calc_q(states, reparam_actions, self.q1_net)
        q2_preds = self.calc_q(states, reparam_actions, self.q2_net)
        q_preds = torch.min(q1_preds, q2_preds)

After Change



    def calc_policy_loss(self, batch, action_pd):
        """policy_loss = alpha * log pi(f(a)|s) - Q1(s, f(a)), where f(a) = reparametrized action"""
        log_probs, reparam_actions = self.sample_log_probs(action_pd, reparam=True)
        states = batch["states"]
        q1_preds = self.calc_q(states, reparam_actions, self.q1_net)
        q2_preds = self.calc_q(states, reparam_actions, self.q2_net)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 6

Instances


Project Name: kengz/SLM-Lab
Commit Name: 54f30e39c7aee65218cb40e219fe9f4f816f0eee
Time: 2019-08-04
Author: kengzwl@gmail.com
File Name: slm_lab/agent/algorithm/sac.py
Class Name: SoftActorCritic
Method Name: calc_policy_loss


Project Name: biolab/orange3
Commit Name: 40f2c96e139a038b93146fbd1f65fdea3ee39015
Time: 2013-02-13
Author: janez.demsar@fri.uni-lj.si
File Name: Orange/widgets/utils/plot/owaxis.py
Class Name: OWAxis
Method Name: update_ticks


Project Name: dpressel/mead-baseline
Commit Name: 78a7f7de24c34235d0784a5781f46de34d2336eb
Time: 2019-10-29
Author: dpressel@gmail.com
File Name: python/eight_mile/pytorch/layers.py
Class Name: LayerNorm
Method Name: forward