schedule_params = ScheduleParameters()
schedule_params.improve_steps = EnvironmentSteps(6250000)
schedule_params.steps_between_evaluation_periods = EnvironmentSteps(62500)
schedule_params.evaluation_steps = EnvironmentSteps(6250)
// There is no heatup for DFP. heatup length is determined according to batch size. See below.
After Change
agent_params.algorithm.goal_vector = [1] // health
// this works better than the default which is set to 8 (while running with 8 workers)
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1)
// scale observation and measurements to be -0.5 <-> 0.5
agent_params.network_wrappers["main"].input_embedders_parameters["measurements"].input_rescaling["vector"] = 100.
agent_params.network_wrappers["main"].input_embedders_parameters["measurements"].input_offset["vector"] = 0.5