75dff38e70ec291fbd60382174277fcadddfd285,reagent/ope/estimators/contextual_bandits_estimators.py,IPSEstimator,evaluate,#IPSEstimator#Any#,109

Before Change



    def evaluate(self, input: BanditsEstimatorInput, **kwargs) -> EstimatorResults:
        self.reset()
        for log in input.logs:
            log_reward = RunningAverage()
            tgt_reward = RunningAverage()
            gt_reward = RunningAverage()
            for sample in log.samples:
                log_reward.add(sample.logged_reward)
                weight = (
                    sample.target_propensities[sample.logged_action]
                    / sample.logged_propensities[sample.logged_action]
                )
                weight = self._weight_clamper(weight)
                tgt_reward.add(sample.logged_reward * weight)
                rewards = input.ground_truth_model(sample.context)
                gt_reward.add(rewards[sample.target_action])
            self._append_estimate(
                log_reward.average, tgt_reward.average, gt_reward.average
            )
        return self.results


class DoublyRobustEstimator(IPSEstimator):

After Change


    def evaluate(
        self, input: BanditsEstimatorInput, **kwargs
    ) -> Optional[EstimatorResult]:
        log_avg = RunningAverage()
        tgt_avg = RunningAverage()
        acc_weight = RunningAverage()
        gt_avg = RunningAverage()
        for sample in input.samples:
            log_avg.add(sample.log_reward)
            weight = (
                sample.tgt_action_probabilities[sample.log_action]
                / sample.log_action_probabilities[sample.log_action]
            )
            weight = self._weight_clamper(weight)
            tgt_avg.add(sample.log_reward * weight)
            acc_weight.add(weight)
            gt_avg.add(sample.ground_truth_reward)
        if self._weighted:
            return EstimatorResult(
                log_avg.average,
                tgt_avg.total / acc_weight.total,
                gt_avg.average,
                acc_weight.average,
            )
        else:
            return EstimatorResult(
                log_avg.average, tgt_avg.average, gt_avg.average, tgt_avg.count
            )

In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 21

Instances

Link

Project Name: facebookresearch/Horizon

Commit Name: 75dff38e70ec291fbd60382174277fcadddfd285

Time: 2020-06-10

Author: jialiu@fb.com

File Name: reagent/ope/estimators/contextual_bandits_estimators.py

Class Name: IPSEstimator

Method Name: evaluate

Link

Project Name: facebookresearch/Horizon

Commit Name: 75dff38e70ec291fbd60382174277fcadddfd285

Time: 2020-06-10

Author: jialiu@fb.com

File Name: reagent/ope/estimators/contextual_bandits_estimators.py

Class Name: DoublyRobustEstimator

Method Name: evaluate

Link

Project Name: facebookresearch/Horizon

Commit Name: 75dff38e70ec291fbd60382174277fcadddfd285

Time: 2020-06-10

Author: jialiu@fb.com

File Name: reagent/ope/estimators/contextual_bandits_estimators.py

Class Name: DMEstimator

Method Name: evaluate