5dafbde8a22a81827514d41513f32ea9be63a9b2,ml/rl/test/gym/open_ai_gym_environment.py,OpenAIGymEnvironment,policy,#OpenAIGymEnvironment#Any#Any#Any#,108
Before Change
action_str = predictor.discrete_action_policy(next_state_dict)[1]
action_idx = self.actions.index(action_str.decode("utf-8"))
else:
action_score_dict = predictor.predict(next_state_dict)[0]
best_action = None
best_score = None
for action_name, action_score in action_score_dict.items():
After Change
action_str = predictor.discrete_action_policy(next_state_dict)[1]
action_idx = self.actions.index(action_str.decode("utf-8"))
elif isinstance(predictor, ContinuousActionDQNPredictor):
normed_action_keys = sorted(self.normalization_action.keys())
best_action = None
best_score = None
for action_key in normed_action_keys:
action_score = predictor.predict(
next_state_dict, [{action_key: 1}])[0]["Q"]
if best_action is None or best_score < action_score:
best_action = action_key
best_score = action_score
action_idx = normed_action_keys.index(best_action)
action[action_idx] = 1.0
return action
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 4
Instances
Project Name: facebookresearch/Horizon
Commit Name: 5dafbde8a22a81827514d41513f32ea9be63a9b2
Time: 2018-03-20
Author: edoardoc@fb.com
File Name: ml/rl/test/gym/open_ai_gym_environment.py
Class Name: OpenAIGymEnvironment
Method Name: policy
Project Name: tensorflow/models
Commit Name: 11e9c7adfbf7d50dd9ef4442cf7806cdb2ee2368
Time: 2017-11-17
Author: markdaoust@google.com
File Name: samples/core/get_started/premade_estimator.py
Class Name:
Method Name: main
Project Name: tensorflow/models
Commit Name: 11e9c7adfbf7d50dd9ef4442cf7806cdb2ee2368
Time: 2017-11-17
Author: markdaoust@google.com
File Name: samples/core/get_started/custom_estimator.py
Class Name:
Method Name: main