See parent class
// upper bound used to ensure the reward is always positive
max_cost = np.array([self.env_params["target_velocity"]]*self.scenario.num_vehicles)max_cost = np.linalg.norm(max_cost)
// cost associated with being away from target velocity
// if the vehicle"s velocity is more than twice the target velocity, the cost does not become worse
cost = velocity - self.env_params["target_velocity"]cost = np.linalg.norm(cost)
////////////////////////////////////////////////////////////////////////////////////////////
if any(velocity < 0):
print("------------------------------")
print(velocity)
print(np.array(self.rl_ids)[np.array(velocity) < 0])
print("------------------------------")
////////////////////////////////////////////////////////////////////////////////////////////
return max_cost - cost
// return np.linalg.norm(np.array([0]*len(velocity)) - np.array([50]*len(velocity))) - \
// np.linalg.norm(velocity - self.env_params["target_velocity"])