171e9e18a10f2daea090bc6f4815db41072d66b6,ch10/01_cartpole_pg.py,,,#,36

Before Change


        if len(batch_states) < BATCH_SIZE:
            continue

        states_v = Variable(torch.from_numpy(np.array(batch_states, dtype=np.float32)))
        batch_actions_t = torch.LongTensor(batch_actions)
        batch_scale_v = Variable(torch.FloatTensor(batch_scales))

        optimizer.zero_grad()
        logits_v = net(states_v)
        log_prob_v = F.log_softmax(logits_v, dim=1)
        log_prob_actions_v = batch_scale_v * log_prob_v[range(BATCH_SIZE), batch_actions_t]
        loss_policy_v = -log_prob_actions_v.mean()

        loss_policy_v.backward(retain_graph=True)
        grads = np.concatenate([p.grad.data.numpy().flatten()
                                for p in net.parameters()
                                if p.grad is not None])

        prob_v = F.softmax(logits_v, dim=1)
        entropy_v = -(prob_v * log_prob_v).sum(dim=1).mean()
        entropy_loss_v = -ENTROPY_BETA * entropy_v
        entropy_loss_v.backward()
        optimizer.step()

        loss_v = loss_policy_v + entropy_loss_v

        // calc KL-div
        new_logits_v = net(states_v)
        new_prob_v = F.softmax(new_logits_v, dim=1)
        kl_div_v = -((new_prob_v / prob_v).log() * prob_v).sum(dim=1).mean()
        writer.add_scalar("kl", kl_div_v.data.numpy()[0], step_idx)

        writer.add_scalar("baseline", baseline, step_idx)
        writer.add_scalar("entropy", entropy_v.data.numpy()[0], step_idx)
        writer.add_scalar("batch_scales", np.mean(batch_scales), step_idx)
        writer.add_scalar("loss_entropy", entropy_loss_v.data.numpy()[0], step_idx)
        writer.add_scalar("loss_policy", loss_policy_v.data.numpy()[0], step_idx)
        writer.add_scalar("loss_total", loss_v.data.numpy()[0], step_idx)

        writer.add_scalar("grad_l2", np.sqrt(np.mean(np.square(grads))), step_idx)
        writer.add_scalar("grad_max", np.max(np.abs(grads)), step_idx)

After Change


        if len(batch_states) < BATCH_SIZE:
            continue

        states_v = torch.FloatTensor(batch_states)
        batch_actions_t = torch.LongTensor(batch_actions)
        batch_scale_v = torch.FloatTensor(batch_scales)

        optimizer.zero_grad()
        logits_v = net(states_v)
        log_prob_v = F.log_softmax(logits_v, dim=1)
        log_prob_actions_v = batch_scale_v * log_prob_v[range(BATCH_SIZE), batch_actions_t]
        loss_policy_v = -log_prob_actions_v.mean()

        loss_policy_v.backward(retain_graph=True)
        grads = np.concatenate([p.grad.data.numpy().flatten()
                                for p in net.parameters()
                                if p.grad is not None])

        prob_v = F.softmax(logits_v, dim=1)
        entropy_v = -(prob_v * log_prob_v).sum(dim=1).mean()
        entropy_loss_v = -ENTROPY_BETA * entropy_v
        entropy_loss_v.backward()
        optimizer.step()

        loss_v = loss_policy_v + entropy_loss_v

        // calc KL-div
        new_logits_v = net(states_v)
        new_prob_v = F.softmax(new_logits_v, dim=1)
        kl_div_v = -((new_prob_v / prob_v).log() * prob_v).sum(dim=1).mean()
        writer.add_scalar("kl", kl_div_v.item(), step_idx)

        writer.add_scalar("baseline", baseline, step_idx)
        writer.add_scalar("entropy", entropy_v.item(), step_idx)
        writer.add_scalar("batch_scales", np.mean(batch_scales), step_idx)
        writer.add_scalar("loss_entropy", entropy_loss_v.item(), step_idx)
        writer.add_scalar("loss_policy", loss_policy_v.item(), step_idx)
        writer.add_scalar("loss_total", loss_v.item(), step_idx)

        writer.add_scalar("grad_l2", np.sqrt(np.mean(np.square(grads))), step_idx)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 17

Instances


Project Name: PacktPublishing/Deep-Reinforcement-Learning-Hands-On
Commit Name: 171e9e18a10f2daea090bc6f4815db41072d66b6
Time: 2018-04-27
Author: max.lapan@gmail.com
File Name: ch10/01_cartpole_pg.py
Class Name:
Method Name:


Project Name: PacktPublishing/Deep-Reinforcement-Learning-Hands-On
Commit Name: 7327bc3aa7a0e66168a84380edbd1e74a5a16355
Time: 2018-04-25
Author: max.lapan@gmail.com
File Name: ch04/04_frozenlake_nonslippery.py
Class Name:
Method Name:


Project Name: PacktPublishing/Deep-Reinforcement-Learning-Hands-On
Commit Name: 7327bc3aa7a0e66168a84380edbd1e74a5a16355
Time: 2018-04-25
Author: max.lapan@gmail.com
File Name: ch04/03_frozenlake_tweaked.py
Class Name:
Method Name: