// action selectionif np.random.rand() < self.epsilon:
// choose best actionstate_action = self.q_table.loc[observation, :]
// some actions may have the same value, randomly choose on in these actions
action = np.random.choice(state_action[state_action == np.max(state_action)].index)else:
// choose random action
action = np.random.choice(self.actions)
return action