diff --git a/intermediate_source/dist_tuto.rst b/intermediate_source/dist_tuto.rst index 35f6341395..3370cc44a1 100644 --- a/intermediate_source/dist_tuto.rst +++ b/intermediate_source/dist_tuto.rst @@ -47,6 +47,7 @@ the following template. """run.py:""" #!/usr/bin/env python import os + import sys import torch import torch.distributed as dist import torch.multiprocessing as mp @@ -66,7 +67,11 @@ the following template. if __name__ == "__main__": size = 2 processes = [] - mp.set_start_method("spawn") + if "google.colab" in sys.modules: + print("Running in Google Colab") + mp.get_context("spawn") + else: + mp.set_start_method("spawn") for rank in range(size): p = mp.Process(target=init_process, args=(rank, size, run)) p.start() @@ -156,7 +161,8 @@ we should not modify the sent tensor nor access the received tensor before ``req In other words, - writing to ``tensor`` after ``dist.isend()`` will result in undefined behaviour. -- reading from ``tensor`` after ``dist.irecv()`` will result in undefined behaviour. +- reading from ``tensor`` after ``dist.irecv()`` will result in undefined + behaviour, until ``req.wait()`` has been executed. However, after ``req.wait()`` has been executed we are guaranteed that the communication took place,