Merge pull request #92 from BlackSamorez/falcon

Falcon lm_head split hotfix
BlackSamorez · Jun 23, 2023 · 9e8ced1 · 9e8ced1
2 parents f8475bc + db8da5a
commit 9e8ced1
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 1 deletion.
diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = tensor_parallel
-version = 1.2.7
+version = 1.2.8
 author = Andrei Panferov and Yaroslav Lisnyak
 author_email = [email protected]
 description = Automatically shard your large model between multiple GPUs, works without torch.distributed

diff --git a/src/tensor_parallel/slicing_configs.py b/src/tensor_parallel/slicing_configs.py
@@ -418,6 +418,8 @@ def get_refined_web_config(model_config: PretrainedConfig, devices: Sequence[tor
             r".*mlp\.dense_4h_to_h\.weight$": Split(world_size=world_size, dim=1),
             # RWModel
             r".*word_embeddings\.weight$": Split(world_size=world_size, dim=1),
+            # RWForCausalLM
+            r".*lm_head\.weight$": Split(world_size=world_size, dim=1),
         },
         input_rules={
             r".*self_attention$": {"layer_past": select_kv_for_rank},