9527891f31435622726d8bd1fa84defe6fa29dbe,torch_ac/algos/ppo.py,PPOAlgo,update_parameters,#PPOAlgo#,22
Before Change
b.old_log_prob = b.old_log_prob.detach()
b.advantage = b.advantage.detach()
b.old_value = b.old_value.detach()
b.returnn = b.returnn.detach()
// Compute loss
preprocessed_obs = self.preprocess_obss(b.obs, requires_grad=True, use_gpu=torch.cuda.is_available())
After Change
// Add old action log probs and old values to transitions
preprocessed_obs = self.preprocess_obss(ts.obs, use_gpu=torch.cuda.is_available())
with torch.no_grad():
ts.old_log_prob = self.acmodel.get_dist(preprocessed_obs).log_prob(ts.action)
ts.old_value = self.acmodel.get_value(preprocessed_obs)
if self.batch_size == 0:
self.batch_size = len(ts)
for _ in range(self.epochs):
In pattern: SUPERPATTERN
Frequency: 4
Non-data size: 4
Instances
Project Name: lcswillems/torch-rl
Commit Name: 9527891f31435622726d8bd1fa84defe6fa29dbe
Time: 2018-04-26
Author: lcswillems@gmail.com
File Name: torch_ac/algos/ppo.py
Class Name: PPOAlgo
Method Name: update_parameters
Project Name: kengz/SLM-Lab
Commit Name: 368128f2294b79b26e7d3c9b553395d6c5ef74e5
Time: 2019-05-01
Author: kengzwl@gmail.com
File Name: slm_lab/agent/algorithm/dqn.py
Class Name: DQNBase
Method Name: calc_q_loss
Project Name: kengz/SLM-Lab
Commit Name: 5503ea33ceb7685fd1bf8c844cdfc4dba3ebbb3e
Time: 2019-10-19
Author: kengzwl@gmail.com
File Name: slm_lab/agent/algorithm/sac.py
Class Name: SoftActorCritic
Method Name: train_alpha
Project Name: kengz/SLM-Lab
Commit Name: 368128f2294b79b26e7d3c9b553395d6c5ef74e5
Time: 2019-05-01
Author: kengzwl@gmail.com
File Name: slm_lab/agent/algorithm/dqn.py
Class Name: VanillaDQN
Method Name: calc_q_loss