980fe014b6215730ac4fdfa451b067e6fb44e622,tensorforce/agents/dpg.py,DeterministicPolicyGradient,__init__,#DeterministicPolicyGradient#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#,129

Before Change


            entropy_regularization=entropy_regularization, **kwargs
        )

        action_spec = next(iter(self.actions_spec.values()))
        if len(self.actions_spec) > 1 or action_spec.type != "float" or \
                (action_spec.shape != () and action_spec.shape != (1,)):
            raise TensorforceError.value(
                name="DeterministicPolicyGradient", argument="actions", value=actions,

After Change


        // Config, saver, summarizer, recorder
        config=None, saver=None, summarizer=None, recorder=None,
        // Deprecated
        estimate_terminal=None, critic_network=None, **kwargs
    ):
        raise TensorforceError(message="Temporarily broken.")
        if estimate_terminal is not None:
            raise TensorforceError.deprecated(
                name="DPG", argument="estimate_terminal", replacement="predict_terminal_values"
            )
        if critic_network is not None:
            raise TensorforceError.deprecated(
                name="DPG", argument="critic_network", replacement="critic"
            )

        self.spec = OrderedDict(
            agent="dpg",
            states=states, actions=actions, memory=memory, batch_size=batch_size,
            max_episode_timesteps=max_episode_timesteps,
            network=network, use_beta_distribution=use_beta_distribution,
            update_frequency=update_frequency, start_updating=start_updating,
            learning_rate=learning_rate,
            horizon=horizon, discount=discount, predict_terminal_values=predict_terminal_values,
            critic=critic, critic_optimizer=critic_optimizer,
            preprocessing=preprocessing,
            exploration=exploration, variable_noise=variable_noise,
            l2_regularization=l2_regularization, entropy_regularization=entropy_regularization,
            parallel_interactions=parallel_interactions,
            config=config, saver=saver, summarizer=summarizer, recorder=recorder
        )

        policy = dict(
            type="parametrized_distributions", network=network, temperature=0.0,
            use_beta_distribution=use_beta_distribution
        )

        memory = dict(type="replay", capacity=memory)

        update = dict(unit="timesteps", batch_size=batch_size)
        if update_frequency != "batch_size":
            update["frequency"] = update_frequency
        if start_updating is not None:
            update["start"] = start_updating

        optimizer = dict(type="adam", learning_rate=learning_rate)
        objective = "deterministic_policy_gradient"

        reward_estimation = dict(
            horizon=horizon, discount=discount, predict_horizon_values="late",
            estimate_advantage=False, predict_action_values=True,
            predict_terminal_values=predict_terminal_values
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 6

Instances


Project Name: reinforceio/tensorforce
Commit Name: 980fe014b6215730ac4fdfa451b067e6fb44e622
Time: 2020-08-13
Author: alexkuhnle@t-online.de
File Name: tensorforce/agents/dpg.py
Class Name: DeterministicPolicyGradient
Method Name: __init__


Project Name: pandas-dev/pandas
Commit Name: ebd9906e3b489387e500e3e31e53f159bae3cb9b
Time: 2020-10-06
Author: 45562402+rhshadrach@users.noreply.github.com
File Name: pandas/core/aggregation.py
Class Name:
Method Name: transform_dict_like


Project Name: GoogleCloudPlatform/PerfKitBenchmarker
Commit Name: 37bb2945cc38af48dfa5ad09392736c427008a80
Time: 2015-12-09
Author: connormccoy@google.com
File Name: perfkitbenchmarker/linux_benchmarks/redis_benchmark.py
Class Name:
Method Name: Run