From 4a5775428b9cbeedc8786f293730032b1dc2ffb6 Mon Sep 17 00:00:00 2001
From: smisra <sanchit.misra@intel.com>
Date: Wed, 25 Sep 2024 03:10:11 -0700
Subject: [PATCH] fixed a few issues with train_hypergrid

---
 tutorials/examples/train_hypergrid.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py
index 67ce0e86..49c50ebd 100644
--- a/tutorials/examples/train_hypergrid.py
+++ b/tutorials/examples/train_hypergrid.py
@@ -50,9 +50,6 @@ def average_gradients(model):
 
 def initalize_distributed_compute(dist_backend: str = "ccl"):
     """Initalizes distributed compute using either ccl or mpi backends."""
-    global my_rank  # TODO: remove globals?
-    global my_size  # TODO: remove globals?
-
     pmi_size = int(os.environ.get("PMI_SIZE", "0"))  # 0 or 1 default value?
     print("+ Initalizing distributed compute, PMI_SIZE={}".format(pmi_size))
 
@@ -95,7 +92,6 @@ def initalize_distributed_compute(dist_backend: str = "ccl"):
 
 def main(args):  # noqa: C901
     seed = args.seed if args.seed != 0 else DEFAULT_SEED
-    set_seed(seed)
     device_str = "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"
 
     use_wandb = args.wandb_project != ""
@@ -111,12 +107,13 @@ def main(args):  # noqa: C901
 
     if args.distributed:
         initalize_distributed_compute()
-        rank = dist.get_rank()
+        my_rank = dist.get_rank()
         world_size = torch.distributed.get_world_size()
-        print(f"Running with DDP on rank {rank}/{world_size}.")
+        print(f"Running with DDP on rank {my_rank}/{world_size}.")
     else:
         world_size = 1  # Single machine.
         my_rank = 0  # Single machine.
+    set_seed(seed + my_rank)
 
     # 1. Create the environment
     env = HyperGrid(
@@ -468,8 +465,8 @@ def main(args):  # noqa: C901
         }
 
         print("+ Final timing.")
-        for k, v in to_log.iteritems():
-            print("  {k}: {.:6f}".format(k, v))
+        for k, v in to_log.items():
+            print("  {}: {:.6f}".format(k, v))
 
     if args.profile:
         prof.stop()