863b8dee69df21ff479b0f28422f2bf2b14f05bd,examples/quickstart.py,,,#,29
Before Change
env = OpenAIGym("CartPole-v0")
// Create a Trust Region Policy Optimization agent
agent = PPOAgent(
Configuration(
log_level="info",
batch_size=4000,
// max_kl_divergence=0.1,
// cg_iterations=20,
// cg_damping=0.001,
// ls_max_backtracks=10,
// ls_accept_ratio=0.9,
// ls_override=False,
learning_rate=0.001,
entropy_penalty=0.01,
epochs=5,
optimizer_batch_size=512,
loss_clipping=0.2,
normalize_advantage=False,
baseline=dict(
type="mlp",
sizes=[32, 32],
epochs=1,
update_batch_size=512,
learning_rate=0.01
),
states=env.states,
actions=env.actions,
network=layered_network_builder([
dict(type="dense", size=32, activation="tanh"),
dict(type="dense", size=32, activation="tanh")
])
))
// Create the runner
runner = Runner(agent=agent, environment=env)
After Change
dict(type="dense", size=32, activation="tanh")
]
agent = PPOAgent(
states_spec=env.states,
actions_spec=env.actions,
network_spec=network_spec,
config=config
)
// Create the runner
runner = Runner(agent=agent, environment=env)
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 3
Instances
Project Name: reinforceio/tensorforce
Commit Name: 863b8dee69df21ff479b0f28422f2bf2b14f05bd
Time: 2017-10-15
Author: mi.schaarschmidt@gmail.com
File Name: examples/quickstart.py
Class Name:
Method Name:
Project Name: reinforceio/tensorforce
Commit Name: d10e1736bef8954722d0d19631fe42f6b83e8ce6
Time: 2017-11-11
Author: mi.schaarschmidt@gmail.com
File Name: examples/quickstart.py
Class Name:
Method Name:
Project Name: reinforceio/tensorforce
Commit Name: ac04dcf9ced65fdd2cafc5c967400cabf32d3c6a
Time: 2017-10-15
Author: mi.schaarschmidt@gmail.com
File Name: tensorforce/tests/test_quickstart_example.py
Class Name: TestQuickstartExample
Method Name: test_example