582921ffe3b04ff502e1c3a05088ba2902e0f5bd,rl_coach/agents/value_optimization_agent.py,ValueOptimizationAgent,run_off_policy_evaluation,#ValueOptimizationAgent#,104
Before Change
assert self.ope_manager
dataset_as_episodes = self.call_memory("get_all_complete_episodes_from_to",
(self.call_memory("get_last_training_set_episode_id") + 1,
self.call_memory("num_complete_episodes")))
if len(dataset_as_episodes) == 0:
raise ValueError("train_to_eval_ratio is too high causing the evaluation set to be empty. "
After Change
assert self.ope_manager
if not isinstance(self.pre_network_filter, NoInputFilter) and len(self.pre_network_filter.reward_filters) != 0:
raise ValueError("Defining a pre-network reward filter when OPEs are calculated will result in a mismatch "
"between q values (which are scaled), and actual rewards, which are not. It is advisable "
"to use an input_filter, if possible, instead, which will filter the transitions directly "
"in the replay buffer, affecting both the q_values and the rewards themselves. ")
ips, dm, dr, seq_dr, wis = self.ope_manager.evaluate(
evaluation_dataset_as_episodes=self.memory.evaluation_dataset_as_episodes,
evaluation_dataset_as_transitions=self.memory.evaluation_dataset_as_transitions,
batch_size=self.ap.network_wrappers["main"].batch_size,
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 2
Instances Project Name: NervanaSystems/coach
Commit Name: 582921ffe3b04ff502e1c3a05088ba2902e0f5bd
Time: 2019-05-02
Author: gal.leibovich@intel.com
File Name: rl_coach/agents/value_optimization_agent.py
Class Name: ValueOptimizationAgent
Method Name: run_off_policy_evaluation
Project Name: NervanaSystems/coach
Commit Name: a7f5442015df3693dc4fe86755ea84ef697c76f4
Time: 2018-10-23
Author: ajay.deshpande@intel.com
File Name: rl_coach/agents/agent.py
Class Name: Agent
Method Name: _should_train
Project Name: NervanaSystems/coach
Commit Name: 72a1d9d426004269997f8b40bdd64f8ee582d91e
Time: 2018-09-04
Author: 30383381+itaicaspi-intel@users.noreply.github.com
File Name: rl_coach/agents/policy_optimization_agent.py
Class Name: PolicyOptimizationAgent
Method Name: train