backend=args.distributed_backend, init_method=args.distributed_init_method,
world_size=args.distributed_world_size)
args.distributed_rank = torch.distributed.get_rank()
if not is_master(args):
suppress_output()
return args.distributed_rank
After Change
if args.distributed_world_size == 1:
raise ValueError("Cannot initialize distributed with distributed_world_size=1")
if _use_c10d[0] is None:
_use_c10d[0] = not args.no_c10d
if _use_c10d[0] and not hasattr(torch.nn.parallel, "_DistributedDataParallelC10d"):