2381a50a70559340a0335288d648b4bb9a675588,slm_lab/agent/algorithm/dqn.py,VanillaDQN,train,#VanillaDQN#,124
Before Change
"""
total_t = util.s_get(self, "aeb_space.clock").get("total_t")
if (total_t > self.training_min_timestep and total_t % self.training_frequency == 0):
logger.debug3(f"Training at total_t: {total_t}")
total_loss = 0.0
for _b in range(self.training_epoch):
batch = self.sample()
batch_loss = 0.0
for _i in range(self.training_iters_per_batch):
with torch.no_grad():
q_targets = self.compute_q_target_values(batch)
y = q_targets
loss = self.net.training_step(batch["states"], y)
batch_loss += loss.item()
batch_loss /= self.training_iters_per_batch
total_loss += batch_loss
total_loss /= self.training_epoch
logger.debug(f"total_loss {total_loss}")
return total_loss
else:
logger.debug3("NOT training")
return np.nan
@lab_api
def body_act(self, body, state):
After Change
self.body.entropies = []
logger.debug(f"Loss: {loss}")
self.last_loss = loss.item()
return self.last_loss
@lab_api
def update(self):
"""Update the agent after training"""
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 3
Instances
Project Name: kengz/SLM-Lab
Commit Name: 2381a50a70559340a0335288d648b4bb9a675588
Time: 2018-06-12
Author: kengzwl@gmail.com
File Name: slm_lab/agent/algorithm/dqn.py
Class Name: VanillaDQN
Method Name: train
Project Name: kengz/SLM-Lab
Commit Name: 2381a50a70559340a0335288d648b4bb9a675588
Time: 2018-06-12
Author: kengzwl@gmail.com
File Name: slm_lab/agent/algorithm/dqn.py
Class Name: HydraDQN
Method Name: train
Project Name: kengz/SLM-Lab
Commit Name: 2381a50a70559340a0335288d648b4bb9a675588
Time: 2018-06-12
Author: kengzwl@gmail.com
File Name: slm_lab/agent/algorithm/dqn.py
Class Name: VanillaDQN
Method Name: train
Project Name: kengz/SLM-Lab
Commit Name: ec849adaf4ceb42ed52ca142c839f627c34b9434
Time: 2018-05-21
Author: kengzwl@gmail.com
File Name: slm_lab/agent/algorithm/sarsa.py
Class Name: SARSA
Method Name: train