ce96b03b07a4a4bdd851aa84493c616cd291aff2,rllib/examples/env/mbmpo_env.py,HopperWrapper,reward,#HopperWrapper#Any#Any#Any#,39
Before Change
HopperEnv.__init__(self, *args, **kwargs)
def reward(self, obs, action, obs_next):
alive_bonus = 1.0
assert obs.ndim == 2 and action.ndim == 2
assert obs.shape == obs_next.shape and action.shape[0] == obs.shape[0]
vel = obs_next[:, 5]
ctrl_cost = 1e-3 * np.sum(np.square(action), axis=1)
reward = vel + alive_bonus - ctrl_cost
return np.minimum(np.maximum(-1000.0, reward) , 1000.0)
if __name__ == "__main__":
After Change
// obs = [cos(theta), sin(theta), dtheta/dt]
// To get the angle back from obs: atan2(sin(theta), cos(theta)).
theta = np.arctan2(
np.clip(obs[:, 1], -1.0, 1.0), np.clip(obs[:, 0], -1.0, 1.0) )
// Do everything in (B,) space (single theta-, action- and
// reward values).
a = np.clip(action, -self.max_torque, self.max_torque)[0]
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 3
Instances Project Name: ray-project/ray
Commit Name: ce96b03b07a4a4bdd851aa84493c616cd291aff2
Time: 2020-10-06
Author: sven@anyscale.io
File Name: rllib/examples/env/mbmpo_env.py
Class Name: HopperWrapper
Method Name: reward
Project Name: keras-team/keras
Commit Name: 08a6dd04f9143a04ee77af1128eaedb3218c6147
Time: 2018-11-03
Author: gabrieldemarmiesse@gmail.com
File Name: keras/backend/numpy_backend.py
Class Name:
Method Name: hard_sigmoid
Project Name: nipy/dipy
Commit Name: a731dbfb00e17f59b55d0b36a14f69c65e9a849a
Time: 2012-04-05
Author: Bago.Amirbekian@ucsf.edu
File Name: dipy/reconst/shm.py
Class Name:
Method Name: normalize_data