9bdbb11cf27060e7847a87dcdf691dd6b96ce6df,rlkit/data_management/obs_dict_replay_buffer.py,ObsDictRelabelingBuffer,random_batch,#ObsDictRelabelingBuffer#Any#,171

Before Change


                num_rollout_goals:last_env_goal_idx] = \
                    env_goals[goal_key]
        if num_future_goals > 0:
            future_obs_idxs = []
            for i in indices[-num_future_goals:]:
                possible_future_obs_idxs = self._idx_to_future_obs_idx[i]
                // This is generally faster than random.choice. Makes you wonder what
                // random.choice is doing
                num_options = len(possible_future_obs_idxs)
                next_obs_i = int(np.random.randint(0, num_options))
                future_obs_idxs.append(possible_future_obs_idxs[next_obs_i])
            future_obs_idxs = np.array(future_obs_idxs)
            resampled_goals[-num_future_goals:] = self._next_obs[
                self.achieved_goal_key
            ][future_obs_idxs]
            for goal_key in self.goal_keys:
                new_obs_dict[goal_key][-num_future_goals:] = \
                    self._next_obs[goal_key][future_obs_idxs]
                new_next_obs_dict[goal_key][-num_future_goals:] = \
                    self._next_obs[goal_key][future_obs_idxs]

        new_obs_dict[self.desired_goal_key] = resampled_goals
        new_next_obs_dict[self.desired_goal_key] = resampled_goals
        new_obs_dict = postprocess_obs_dict(new_obs_dict)
        new_next_obs_dict = postprocess_obs_dict(new_next_obs_dict)
        // resampled_goals must be postprocessed as well
        resampled_goals = new_next_obs_dict[self.desired_goal_key]

        new_actions = self._actions[indices]
        
        For example, the environments in this repo have batch-wise
        implementations of computing rewards:

        https://github.com/vitchyr/multiworld
        

        if hasattr(self.env, "compute_rewards"):
            new_rewards = self.env.compute_rewards(
                new_actions,
                new_next_obs_dict,
            )
        else:  // Assuming it"s a (possibly wrapped) gym GoalEnv
            new_rewards = np.ones((batch_size, 1))
            for i in range(batch_size):
                new_rewards[i] = self.env.compute_reward(
                    new_next_obs_dict[self.achieved_goal_key][i],
                    new_next_obs_dict[self.desired_goal_key][i],
                    None

After Change


        if num_future_goals > 0:
            //// better future obs sample algorithm
            future_indices = indices[-num_future_goals:]
            possible_future_obs_lens = np.array([len(self._idx_to_future_obs_idx[i]) for i in future_indices])
            next_obs_idxs = (np.random.random(num_future_goals) * possible_future_obs_lens).astype(np.int)
            future_obs_idxs = np.array([self._idx_to_future_obs_idx[ids][next_obs_idxs[i]] for i, ids in enumerate(future_indices)])

            resampled_goals[-num_future_goals:] = self._next_obs[
                self.achieved_goal_key
            ][future_obs_idxs]
            for goal_key in self.goal_keys:
                new_obs_dict[goal_key][-num_future_goals:] = \
                    self._next_obs[goal_key][future_obs_idxs]
                new_next_obs_dict[goal_key][-num_future_goals:] = \
                    self._next_obs[goal_key][future_obs_idxs]

        new_obs_dict[self.desired_goal_key] = resampled_goals
        new_next_obs_dict[self.desired_goal_key] = resampled_goals
        new_obs_dict = postprocess_obs_dict(new_obs_dict)
        new_next_obs_dict = postprocess_obs_dict(new_next_obs_dict)
        // resampled_goals must be postprocessed as well
        resampled_goals = new_next_obs_dict[self.desired_goal_key]

        new_actions = self._actions[indices]
        
        For example, the environments in this repo have batch-wise
        implementations of computing rewards:

        https://github.com/vitchyr/multiworld
        

        if hasattr(self.env, "compute_rewards"):
            new_rewards = self.env.compute_rewards(
                new_actions,
                new_next_obs_dict,
            )
        else:  // Assuming it"s a (possibly wrapped) gym GoalEnv
            new_rewards = np.ones((batch_size, 1))
            for i in range(batch_size):
                new_rewards[i] = self.env.compute_reward(
                    new_next_obs_dict[self.achieved_goal_key][i],
                    new_next_obs_dict[self.desired_goal_key][i],
                    None

In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 10

Instances

Link

Project Name: vitchyr/rlkit

Commit Name: 9bdbb11cf27060e7847a87dcdf691dd6b96ce6df

Time: 2020-08-09

Author: 38036768+YangRui2015@users.noreply.github.com

File Name: rlkit/data_management/obs_dict_replay_buffer.py

Class Name: ObsDictRelabelingBuffer

Method Name: random_batch

Link

Project Name: sentinel-hub/eo-learn

Commit Name: 01342cee01410f5f0278f85c3b95f158614dc3a4

Time: 2019-10-15

Author: jovan.visnjic@sinergise.com

File Name: io/eolearn/io/processing_api.py

Class Name: SentinelHubProcessingInput

Method Name: execute

Link

Project Name: brian-team/brian2

Commit Name: e48fb30ea6f7eb0c3092ca3b55bdc75176be272a

Time: 2013-10-11

Author: marcel.stimberg@ens.fr

File Name: brian2/stateupdaters/exact.py

Class Name:

Method Name: get_linear_system