// calculate policy gradients only
loss_policy_v.backward(retain_graph=True)
grads = np.concatenate([p.grad.data.cpu().numpy().flatten()
for p in net.parameters()
if p.grad is not None])
// apply entropy and value gradients
loss_v = entropy_loss_v + loss_value_v
After Change
parser.add_argument("--port-ofs", type=int, default=0, help="Offset for container"s ports, default=0")
parser.add_argument("--env", default=ENV_NAME, help="Environment name to solve, default=" + ENV_NAME)
parser.add_argument("--demo", help="Demo dir to load. Default=No demo")
parser.add_argument("--host", default="localhost", help="Host with docker containers")
args = parser.parse_args()
env_name = args.env
if not env_name.startswith("wob.mini."):