From 7a7862df46f40cdd719df3c165e5b76dca44f913 Mon Sep 17 00:00:00 2001 From: venkatram-dev Date: Sat, 31 Aug 2024 15:13:49 -0700 Subject: [PATCH 1/5] make_distribute_tutorial_work_in_google_colab --- intermediate_source/dist_tuto.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/intermediate_source/dist_tuto.rst b/intermediate_source/dist_tuto.rst index 35f6341395..37de383604 100644 --- a/intermediate_source/dist_tuto.rst +++ b/intermediate_source/dist_tuto.rst @@ -47,6 +47,7 @@ the following template. """run.py:""" #!/usr/bin/env python import os + import sys import torch import torch.distributed as dist import torch.multiprocessing as mp @@ -66,7 +67,11 @@ the following template. if __name__ == "__main__": size = 2 processes = [] - mp.set_start_method("spawn") + if "google.colab" in sys.modules: + print("Running in Google Colab") + mp.get_context("spawn") + else: + mp.set_start_method("spawn") for rank in range(size): p = mp.Process(target=init_process, args=(rank, size, run)) p.start() @@ -156,7 +161,8 @@ we should not modify the sent tensor nor access the received tensor before ``req In other words, - writing to ``tensor`` after ``dist.isend()`` will result in undefined behaviour. -- reading from ``tensor`` after ``dist.irecv()`` will result in undefined behaviour. +- reading from ``tensor`` after ``dist.irecv()`` will result in undefined behaviour, +until ``req.wait()`` has been executed. However, after ``req.wait()`` has been executed we are guaranteed that the communication took place, From 513a37813119b0cd75522b812754e7b9ab827bb8 Mon Sep 17 00:00:00 2001 From: venkatram-dev Date: Sat, 31 Aug 2024 16:53:44 -0700 Subject: [PATCH 2/5] format comment --- intermediate_source/dist_tuto.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/intermediate_source/dist_tuto.rst b/intermediate_source/dist_tuto.rst index 37de383604..937ed5e7df 100644 --- a/intermediate_source/dist_tuto.rst +++ b/intermediate_source/dist_tuto.rst @@ -161,8 +161,8 @@ we should not modify the sent tensor nor access the received tensor before ``req In other words, - writing to ``tensor`` after ``dist.isend()`` will result in undefined behaviour. -- reading from ``tensor`` after ``dist.irecv()`` will result in undefined behaviour, -until ``req.wait()`` has been executed. +- reading from ``tensor`` after ``dist.irecv()`` will result in undefined behaviour, until ``req.wait()`` +has been executed. However, after ``req.wait()`` has been executed we are guaranteed that the communication took place, From 2bc88a5d7aa658c4264e935ca05b9b18b10f0b03 Mon Sep 17 00:00:00 2001 From: venkatram-dev Date: Sat, 31 Aug 2024 16:56:30 -0700 Subject: [PATCH 3/5] format comments --- intermediate_source/dist_tuto.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/intermediate_source/dist_tuto.rst b/intermediate_source/dist_tuto.rst index 937ed5e7df..ca1687f494 100644 --- a/intermediate_source/dist_tuto.rst +++ b/intermediate_source/dist_tuto.rst @@ -161,8 +161,7 @@ we should not modify the sent tensor nor access the received tensor before ``req In other words, - writing to ``tensor`` after ``dist.isend()`` will result in undefined behaviour. -- reading from ``tensor`` after ``dist.irecv()`` will result in undefined behaviour, until ``req.wait()`` -has been executed. +- reading from ``tensor`` after ``dist.irecv()`` will result in undefined behaviour, until ``req.wait()`` has been executed. However, after ``req.wait()`` has been executed we are guaranteed that the communication took place, From 268c638ec808ce2d9470edde7e980885258d91c1 Mon Sep 17 00:00:00 2001 From: venkatram-dev Date: Sat, 31 Aug 2024 16:59:35 -0700 Subject: [PATCH 4/5] format comment space --- intermediate_source/dist_tuto.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/intermediate_source/dist_tuto.rst b/intermediate_source/dist_tuto.rst index ca1687f494..3370cc44a1 100644 --- a/intermediate_source/dist_tuto.rst +++ b/intermediate_source/dist_tuto.rst @@ -161,7 +161,8 @@ we should not modify the sent tensor nor access the received tensor before ``req In other words, - writing to ``tensor`` after ``dist.isend()`` will result in undefined behaviour. -- reading from ``tensor`` after ``dist.irecv()`` will result in undefined behaviour, until ``req.wait()`` has been executed. +- reading from ``tensor`` after ``dist.irecv()`` will result in undefined + behaviour, until ``req.wait()`` has been executed. However, after ``req.wait()`` has been executed we are guaranteed that the communication took place, From c7f5a9d9fbe2d4bea64067ca488921c20514ba1b Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 13 Jan 2025 08:33:12 -0800 Subject: [PATCH 5/5] Update intermediate_source/dist_tuto.rst --- intermediate_source/dist_tuto.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intermediate_source/dist_tuto.rst b/intermediate_source/dist_tuto.rst index b0645df77f..9a004aa67b 100644 --- a/intermediate_source/dist_tuto.rst +++ b/intermediate_source/dist_tuto.rst @@ -73,7 +73,7 @@ the following template. else: mp.set_start_method("spawn") for rank in range(size): - p = mp.Process(target=init_process, args=(rank, size, run)) + p = mp.Process(target=init_process, args=(rank, world_size, run)) p.start() processes.append(p)