863b8dee69df21ff479b0f28422f2bf2b14f05bd,examples/quickstart.py,,,#,29
Before Change
// Create a Trust Region Policy Optimization agent
agent = PPOAgent(
Configuration(
log_level="info",
batch_size=4000,
// max_kl_divergence=0.1,
// cg_iterations=20,
// cg_damping=0.001,
// ls_max_backtracks=10,
// ls_accept_ratio=0.9,
// ls_override=False,
learning_rate=0.001,
entropy_penalty=0.01,
epochs=5,
optimizer_batch_size=512,
loss_clipping=0.2,
normalize_advantage=False,
baseline=dict(
type="mlp",
sizes=[32, 32],
epochs=1,
update_batch_size=512,
learning_rate=0.01
),
states=env.states,
actions=env.actions,
network=layered_network_builder([
dict(type="dense", size=32, activation="tanh"),
dict(type="dense", size=32, activation="tanh")
])
))
// Create the runner
runner = Runner(agent=agent, environment=env)
After Change
env = OpenAIGym("CartPole-v0")
config = Configuration(
batch_size=4096,
// Agent
preprocessing=None,
exploration=None,
reward_preprocessing=None,
// BatchAgent
keep_last_timestep=True,
// PPOAgent
step_optimizer=dict(
type="adam",
learning_rate=1e-3
),
optimization_steps=10,
// Model
scope="ppo",
discount=0.99,
// DistributionModel
distributions=None, // not documented!!!
entropy_regularization=0.01,
// PGModel
baseline_mode=None,
baseline=None,
baseline_optimizer=None,
gae_lambda=None,
normalize_rewards=False,
// PGLRModel
likelihood_ratio_clipping=0.2,
// Logging
log_level="info",
// TensorFlow Summaries
summary_logdir=None,
summary_labels=["total-loss"],
summary_frequency=1,
// Distributed
// TensorFlow distributed configuration
cluster_spec=None,
parameter_server=False,
task_index=0,
device=None,
local_model=False,
replica_model=False,
)
// Network as list of layers
network_spec = [
dict(type="dense", size=32, activation="tanh"),
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 3
Instances
Project Name: reinforceio/tensorforce
Commit Name: 863b8dee69df21ff479b0f28422f2bf2b14f05bd
Time: 2017-10-15
Author: mi.schaarschmidt@gmail.com
File Name: examples/quickstart.py
Class Name:
Method Name:
Project Name: reinforceio/tensorforce
Commit Name: ad1a625cd2b2dd42701435e6174a98c323be5a3e
Time: 2017-10-16
Author: mi.schaarschmidt@gmail.com
File Name: tensorforce/tests/test_reward_estimation.py
Class Name: TestRewardEstimation
Method Name: test_gae
Project Name: geometalab/OSMDeepOD
Commit Name: 3a5ed88d739d77c79a569b58710822b1a9832d59
Time: 2017-03-31
Author: samuel.kurath@gmail.com
File Name: tests/conftest.py
Class Name:
Method Name: configuration_no_compare