print("Output files are saved in {}".format(args.outdir))
// In training, life loss is considered as terminal states
env = ale.ALE(args.rom, use_sdl=args.use_sdl, seed=train_seed)
misc.env_modifiers.make_reward_clipped(env, -1, 1)
// In testing, an episode is terminated when all lives are lost
eval_env = ale.ALE(args.rom, use_sdl=args.use_sdl,
treat_life_lost_as_terminal=False,
seed=test_seed)
n_actions = env.number_of_actions
activation = parse_activation(args.activation)
q_func = parse_arch(args.arch, n_actions, activation)
// Draw the computational graph and save it in the output directory.
After Change
choices=["DQN", "DoubleDQN", "PAL"])
parser.add_argument("--logging-level", type=int, default=20,
help="Logging level. 10:DEBUG, 20:INFO etc.")
args = parser.parse_args()
import logging
logging.basicConfig(level=args.logging_level)
// Set a random seed used in ChainerRL.
misc.set_random_seed(args.seed, gpus=(args.gpu,))
// Set different random seeds for train and test envs.
train_seed = args.seed
test_seed = 2 ** 31 - 1 - args.seed
args.outdir = experiments.prepare_output_dir(args, args.outdir)
print("Output files are saved in {}".format(args.outdir))
// In training, life loss is considered as terminal states
env = gym.make(args.env)
env.seed(train_seed)
env = atari_wrappers.wrap_deepmind(env)
// In testing, an episode is terminated when all lives are lost
eval_env = gym.make(args.env)
eval_env.seed(test_seed)
eval_env = atari_wrappers.wrap_deepmind(
eval_env, episode_life=False, clip_rewards=False)
n_actions = env.action_space.n
activation = parse_activation(args.activation)
q_func = parse_arch(args.arch, n_actions, activation)
// Draw the computational graph and save it in the output directory.