ce96b03b07a4a4bdd851aa84493c616cd291aff2,rllib/examples/env/mbmpo_env.py,HopperWrapper,reward,#HopperWrapper#Any#Any#Any#,39
Before Change
assert obs.ndim == 2 and action.ndim == 2
assert obs.shape == obs_next.shape and action.shape[0] == obs.shape[0]
vel = obs_next[:, 5]
ctrl_cost = 1e-3 * np.sum(np.square(action), axis=1)
reward = vel + alive_bonus - ctrl_cost
return np.minimum(np.maximum(-1000.0, reward), 1000.0)
if __name__ == "__main__":
After Change
// obs = [cos(theta), sin(theta), dtheta/dt]
// To get the angle back from obs: atan2(sin(theta), cos(theta)).
theta = np.arctan2(
np.clip(obs[:, 1], -1.0, 1.0), np.clip(obs[:, 0], -1.0, 1.0))
// Do everything in (B,) space (single theta-, action- and
// reward values).
a = np.clip(action, -self.max_torque, self.max_torque)[0]
costs = self.angle_normalize(theta) ** 2 + \
0.1 * obs[:, 2] ** 2 + 0.001 * (a ** 2)
return -costs
@staticmethod
def angle_normalize(x):
In pattern: SUPERPATTERN
Frequency: 5
Non-data size: 6
Instances
Project Name: ray-project/ray
Commit Name: ce96b03b07a4a4bdd851aa84493c616cd291aff2
Time: 2020-10-06
Author: sven@anyscale.io
File Name: rllib/examples/env/mbmpo_env.py
Class Name: HopperWrapper
Method Name: reward
Project Name: ultralytics/yolov3
Commit Name: 636c1cff7a91c0b54c996ef48b36274b08e4a8b8
Time: 2019-08-11
Author: glenn.jocher@ultralytics.com
File Name: models.py
Class Name: YOLOLayer
Method Name: forward
Project Name: merenlab/anvio
Commit Name: 25f293aaf43c420572ca1cec94ac0ea6a16f70e7
Time: 2019-08-13
Author: quentin.clayssen@gmail.com
File Name: anvio/taxoestimation.py
Class Name: SCGsTaxomy
Method Name: make_list_taxonomy
Project Name: KrishnaswamyLab/PHATE
Commit Name: 32514a805320fa562efdddb2757431b27c5ef076
Time: 2018-03-20
Author: davidvandijk@gmail.com
File Name: Python/phate/phate.py
Class Name:
Method Name: embed_phate
Project Name: BindsNET/bindsnet
Commit Name: dcc5890dc2773090ab46a5e8bdd981a1104d9273
Time: 2018-06-29
Author: djsaunde@umass.edu
File Name: bindsnet/evaluation/__init__.py
Class Name:
Method Name: ngram