a997c9720844894b9119be2d6ea8dd6fa057c143,samples/dqn_expreplay.py,,,#,16

Before Change



        if idx % 10 == 0:
            total_rewards = exp_source.pop_total_rewards()
            if total_rewards:
                mean_reward = np.mean(total_rewards)
                print("%d: Mean reward: %.2f, done: %d, epsilon: %.4f" % (
                    idx, mean_reward, len(total_rewards), action_selector.epsilon
                ))
                if mean_reward > run.getfloat("defaults", "stop_mean_reward", fallback=2*abs(mean_reward)):
                    print("We"ve reached mean reward bound, exit")
                    break
            else:
                print("%d: no reward info, epsilon: %.4f" % (idx, action_selector.epsilon))
    env.close()
    pass

After Change


            q_vals.append(train_q)
        return torch.from_numpy(np.array(states, dtype=np.float32)), torch.stack(q_vals)

    reward_sma = utils.SMAQueue(run.getint("stop", "mean_games", fallback=100))

    for idx in range(10000):
        exp_replay.populate(run.getint("exp_buffer", "populate"))

        for batch in exp_replay.batches(run.getint("learning", "batch_size")):
            optimizer.zero_grad()

            // populate buffer
            states, q_vals = batch_to_train(batch)
            // ready to train
            states, q_vals = Variable(states), Variable(q_vals)
            if cuda_enabled:
                states = states.cuda()
                q_vals = q_vals.cuda()
            l = loss_fn(model(states), q_vals)
            l.backward()
            optimizer.step()

        action_selector.epsilon *= run.getfloat("defaults", "epsilon_decay")

        if idx % 10 == 0:
            total_rewards = exp_source.pop_total_rewards()
            reward_sma += total_rewards
            mean_reward = reward_sma.mean()
            print("%d: Mean reward: %.2f, done: %d, epsilon: %.4f" % (
                idx, mean_reward, len(total_rewards), action_selector.epsilon
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 5

Instances


Project Name: Shmuma/ptan
Commit Name: a997c9720844894b9119be2d6ea8dd6fa057c143
Time: 2017-05-03
Author: maxl@fornova.net
File Name: samples/dqn_expreplay.py
Class Name:
Method Name:


Project Name: Shmuma/ptan
Commit Name: 584d38348bfe5246ff0d128bb23f1355560788db
Time: 2017-05-21
Author: max.lapan@gmail.com
File Name: samples/dqn_tweaks_atari.py
Class Name:
Method Name:


Project Name: PyMVPA/PyMVPA
Commit Name: 378f02bf9cd59fa2609ce3339be5885599ae1fac
Time: 2008-06-23
Author: michael.hanke@gmail.com
File Name: mvpa/base/__init__.py
Class Name:
Method Name: