24964db1b986850ea5a194bd7be2d30a1f256e50,slm_lab/agent/algorithm/actor_critic.py,ActorCritic,train_shared,#ActorCritic#,217
Before Change
policy_loss = self.calc_policy_loss(batch, advs) // from actor
val_loss = self.calc_val_loss(batch, v_targets) // from critic
loss = policy_loss + val_loss
self.net.training_step(loss=loss, lr_clock=self.body.env.clock)
// reset
self.to_train = 0
self.body.entropies = []
self.body.log_probs = []
After Change
Trains the network when the actor and critic share parameters
loss = self.policy_loss_coef * policy_loss + self.val_loss_coef * val_loss
"""
clock = self.body.env.clock
if self.to_train == 1:
batch = self.sample()
with torch.no_grad():
advs, v_targets = self.calc_advs_v_targets(batch)
policy_loss = self.calc_policy_loss(batch, advs) // from actor
val_loss = self.calc_val_loss(batch, v_targets) // from critic
loss = policy_loss + val_loss
self.net.training_step(loss=loss, lr_clock=clock)
// reset
self.to_train = 0
self.body.entropies = []
self.body.log_probs = []
In pattern: SUPERPATTERN
Frequency: 6
Non-data size: 5
Instances
Project Name: kengz/SLM-Lab
Commit Name: 24964db1b986850ea5a194bd7be2d30a1f256e50
Time: 2018-11-30
Author: kengzwl@gmail.com
File Name: slm_lab/agent/algorithm/actor_critic.py
Class Name: ActorCritic
Method Name: train_shared
Project Name: kengz/SLM-Lab
Commit Name: 24964db1b986850ea5a194bd7be2d30a1f256e50
Time: 2018-11-30
Author: kengzwl@gmail.com
File Name: slm_lab/agent/algorithm/sarsa.py
Class Name: SARSA
Method Name: train
Project Name: kengz/SLM-Lab
Commit Name: 24964db1b986850ea5a194bd7be2d30a1f256e50
Time: 2018-11-30
Author: kengzwl@gmail.com
File Name: slm_lab/agent/algorithm/sil.py
Class Name: SIL
Method Name: train_shared
Project Name: kengz/SLM-Lab
Commit Name: 24964db1b986850ea5a194bd7be2d30a1f256e50
Time: 2018-11-30
Author: kengzwl@gmail.com
File Name: slm_lab/agent/algorithm/sil.py
Class Name: SIL
Method Name: train_separate
Project Name: kengz/SLM-Lab
Commit Name: 24964db1b986850ea5a194bd7be2d30a1f256e50
Time: 2018-11-30
Author: kengzwl@gmail.com
File Name: slm_lab/agent/algorithm/ppo.py
Class Name: PPO
Method Name: train_shared
Project Name: kengz/SLM-Lab
Commit Name: 24964db1b986850ea5a194bd7be2d30a1f256e50
Time: 2018-11-30
Author: kengzwl@gmail.com
File Name: slm_lab/agent/algorithm/reinforce.py
Class Name: Reinforce
Method Name: train