Support low cpu mem usage in SPMD-FSDP (#11)

* feat: support low cpu mem usage in spmd-fsdp * fix linting
AlibabaPAI · Sep 3, 2024 · 5070f86 · 5070f86
1 parent 63e20fb
commit 5070f86
Showing 1 changed file with 7 additions and 0 deletions.
diff --git a/torch_xla/experimental/spmd_fully_sharded_data_parallel.py b/torch_xla/experimental/spmd_fully_sharded_data_parallel.py
@@ -11,6 +11,7 @@
 import torch_xla.core.xla_model as xm
 import torch_xla.distributed.spmd as spmd
 from torch_xla.distributed.fsdp.wrap import recursive_wrap
+from torch_xla.distributed.fsdp._init_utils import _materialize_module
 
 
 def _prepare_spmd_partition_spec(param):
@@ -95,6 +96,12 @@ def __init__(
       )
       self._auto_wrap(auto_wrap_kwargs, fsdp_kwargs)
 
+    _materialize_module(
+        module,
+        None, [],
+        deferred_init_check_fn=lambda k: not isinstance(
+            k, SpmdFullyShardedDataParallel))
+
     # Let's move the module to xla device in case it's not moved
     # by the caller already.
     self._orig_module = module.to(xm.xla_device())