agent = Agent.create(
agent="tensorforce",
states=dict(type="float", shape=(10,)),
actions=dict(type="int", num_values=5),
max_episode_timesteps=100,
memory=10000,
update=dict(unit="timesteps", batch_size=64),
optimizer=dict(type="adam", learning_rate=3e-4),
policy=dict(network="auto"),
objective="policy_gradient",
reward_estimation=dict(horizon=20)
)
// Retrieve the latest (observable) environment state
state = get_current_state() // (float array of shape [10])// Query the agent for its action decisionaction = agent.act(states=state)// (scalar between 0 and 4)// Execute the decision and retrieve the current performance scorereward = execute_decision(action)// (any scalar float)// Pass feedback about performance (and termination) to the agent
agent.observe(reward=reward, terminal=False)