baichuan_npu

intel-analytics · Jul 10, 2024 · 56283d8 · 56283d8
1 parent 76a5802
commit 56283d8
Show file tree

Hide file tree

Showing 3 changed files with 63 additions and 2 deletions.
diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/README.md b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/README.md
@@ -1,7 +1,7 @@
 # Run Large Language Model on Intel NPU
-In this directory, you will find examples on how you could apply IPEX-LLM INT4 or INT8 optimizations on LLM models on [Intel NPUs](../../../README.md). For illustration purposes, we utilize the [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) as reference Llama2 models. In this directory, you will find examples on how you could apply IPEX-LLM INT4 or INT8 optimizations on LLM models on Intel NPUs. See the table blow for verified models.
+In this directory, you will find examples on how you could apply IPEX-LLM INT4 or INT8 optimizations on LLM models on [Intel NPUs](../../../README.md). In this directory, you will find examples on how you could apply IPEX-LLM INT4 or INT8 optimizations on LLM models on Intel NPUs. See the table blow for verified models.
 
-## Verification Models
+## Verified Models
 
 | Model      | Model Link                                                    |
 |------------|----------------------------------------------------------------|

diff --git a/python/llm/src/ipex_llm/transformers/npu_models/baichuan.py b/python/llm/src/ipex_llm/transformers/npu_models/baichuan.py
@@ -0,0 +1,53 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Some parts of this file is adapted from
+# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llama/modeling_llama.py
+# which is licensed under Apache License 2.0:
+#
+# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch
+from ipex_llm.transformers.npu_models.common import merge_linear
+
+
+def merge_mlp(module: torch.nn.Module):
+    if type(module).__name__ == "MLP":
+        gate_up_proj = merge_linear([
+            module.gate_proj,
+            module.up_proj,
+        ])
+        module.gate_up_proj = gate_up_proj
+        del module.gate_proj, module.up_proj
+
+
+def baichuan_mlp_forward(self, x):
+    gate_up_proj = self.gate_up_proj(x)
+    gate_proj, up_proj = gate_up_proj.chunk(2, dim=-1)
+    down_proj = self.down_proj(self.act_fn(gate_proj) * up_proj)
+    return down_proj
diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert.py b/python/llm/src/ipex_llm/transformers/npu_models/convert.py
@@ -169,3 +169,11 @@ def optimize_llm(model: torch.nn.Module):
         convert_forward(model, StableLmModel, stablelm_model_forward)
         convert_forward(model, StableLmAttention, stablelm_attention_forward)
         convert_forward(model, StableLmMLP, stablelm_mlp_forward)
+
+    elif model.config.model_type == "baichuan":
+        modeling_module_name = model.__class__.__module__
+        module = importlib.import_module(modeling_module_name)
+        from ipex_llm.transformers.npu_models.baichuan import baichuan_mlp_forward, merge_mlp
+        model.apply(merge_mlp)
+
+        convert_forward(model, module.MLP, baichuan_mlp_forward)