ENTROPY_BETA = 0.005 // factor for entropy boosted exploration
LR_A = 0.00005 // learning rate for actor
LR_C = 0.0001 // learning rate for critic
GLOBAL_RUNNING_R = []
GLOBAL_EP = 0 // will increase during training, stop training when it >= MAX_GLOBAL_EP
////////////////////////////////////// Asynchronous Advantage Actor Critic (A3C) ////////////////////////////////////////////////////////////////////////