1a693073cd01fffa7cb3f018b7221459703119f9,ch09/01_cartpole_dqn.py,,,#,48
Before Change
batch_states, batch_actions, batch_targets = [], [], []
total_rewards = []
for step_idx, exp in enumerate(exp_source):
selector.epsilon = max(EPSILON_STOP, EPSILON_START - step_idx / EPSILON_STEPS)
batch_states.append(exp.state)
batch_actions.append(exp.action)
batch_targets.append(calc_target(tgt_net.target_model, exp.reward, exp.last_state))
if len(batch_states) == BATCH_SIZE:
optimizer.zero_grad()
states_v = Variable(torch.from_numpy(np.array(batch_states, dtype=np.float32)))
net_q_v = net(states_v)
target_q = net_q_v.data.numpy().copy()
target_q[range(BATCH_SIZE), batch_actions] = batch_targets
target_q_v = Variable(torch.from_numpy(target_q))
loss_v = mse_loss(net_q_v, target_q_v)
loss_v.backward()
optimizer.step()
// clear batch
batch_states.clear()
batch_actions.clear()
batch_targets.clear()
new_rewards = exp_source.pop_total_rewards()
if new_rewards:
reward = new_rewards[0]
total_rewards.append(reward)
mean_rewards = float(np.mean(total_rewards[-100:]))
print("%d: reward: %6.2f,\tmean_100: %6.2f,\tepsilon: %.2f" % (step_idx, reward, mean_rewards, selector.epsilon))
writer.add_scalar("reward", reward, step_idx)
writer.add_scalar("reward_100", mean_rewards, step_idx)
writer.add_scalar("epsilon", selector.epsilon, step_idx)
if mean_rewards > 195:
print("Solved in %d steps!" % step_idx)
break
if step_idx % TARGET_STEPS == 0:
tgt_net.sync()
writer.close()
pass
After Change
continue
// sample batch
batch = replay_buffer.sample(BATCH_SIZE)
batch_states = [exp.state for exp in batch]
batch_actions = [exp.action for exp in batch]
batch_targets = [calc_target(net, exp.reward, exp.last_state)
for exp in batch]
// train
optimizer.zero_grad()
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 5
Instances
Project Name: PacktPublishing/Deep-Reinforcement-Learning-Hands-On
Commit Name: 1a693073cd01fffa7cb3f018b7221459703119f9
Time: 2017-11-30
Author: max.lapan@gmail.com
File Name: ch09/01_cartpole_dqn.py
Class Name:
Method Name:
Project Name: PacktPublishing/Deep-Reinforcement-Learning-Hands-On
Commit Name: fdfbd05f6a3e9b5bb1d73302180b4ecf260ff3d2
Time: 2017-11-17
Author: max.lapan@gmail.com
File Name: ch08/run_model.py
Class Name:
Method Name:
Project Name: Shmuma/ptan
Commit Name: 5e63df5e0b53b583b6865cc099cecd5436563113
Time: 2017-06-22
Author: max.lapan@gmail.com
File Name: samples/reinforce.py
Class Name:
Method Name: