29b8a4deb58ca9798b61690a31de1ea57de92122,fairseq/trainer.py,Trainer,_check_grad_norms,#Trainer#Any#,840
Before Change
self._grad_norm_buf.zero_()
self._grad_norm_buf[self.data_parallel_rank] = grad_norm
distributed_utils.all_reduce(self._grad_norm_buf, group=self.data_parallel_process_group)
if not (self._grad_norm_buf == self._grad_norm_buf[0]).all():
raise RuntimeError(
"Fatal error: gradients are inconsistent between workers. "
"Try --ddp-backend=no_c10d."
)
def _reduce_and_log_stats(self, logging_outputs, sample_size, grad_norm=None):
if grad_norm is not None:
metrics.log_speed("ups", 1., priority=100, round=2)
metrics.log_scalar("gnorm", grad_norm, priority=400, round=3)
After Change
group=self.data_parallel_process_group
)
if not self._is_grad_norms_consistent(self._grad_norm_buf):
pretty_detail = "\n".join(
"rank {:3d} = {:.8f}".format(r, n)
for r, n in enumerate(self._grad_norm_buf.tolist())
)
error_detail = "grad_norm across the workers:\n{}\n".format(pretty_detail)
raise RuntimeError(
"Fatal error: gradients are inconsistent between workers. "
"Try --ddp-backend=no_c10d. "
"Or are you mixing up different generation of GPUs in training?"
+ "\n"
+ "-" * 80
+ "\n{}\n".format(error_detail)
+ "-" * 80
)
def _reduce_and_log_stats(self, logging_outputs, sample_size, grad_norm=None):
if grad_norm is not None:
metrics.log_speed("ups", 1., priority=100, round=2)
metrics.log_scalar("gnorm", grad_norm, priority=400, round=3)
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 8
Instances
Project Name: pytorch/fairseq
Commit Name: 29b8a4deb58ca9798b61690a31de1ea57de92122
Time: 2020-05-29
Author: yqw@fb.com
File Name: fairseq/trainer.py
Class Name: Trainer
Method Name: _check_grad_norms
Project Name: VOLTTRON/volttron
Commit Name: 13cb16b9edbb868505c41401dc951a8c8accb396
Time: 2020-06-22
Author: james.larson@pnnl.gov
File Name: services/core/MasterDriverAgent/master_driver/interfaces/ecobee.py
Class Name: Interface
Method Name: _set_point
Project Name: VOLTTRON/volttron
Commit Name: 694383ee05139068146f021d19f80143f98011f6
Time: 2020-06-01
Author: james.larson@pnnl.gov
File Name: services/core/MasterDriverAgent/master_driver/interfaces/ecobee.py
Class Name: Interface
Method Name: _set_point