From 56283d8b29c0561ee901d5df6f445f8d4d11aefd Mon Sep 17 00:00:00 2001 From: leonardozcm Date: Wed, 10 Jul 2024 09:46:31 +0800 Subject: [PATCH] baichuan_npu --- .../HF-Transformers-AutoModels/LLM/README.md | 4 +- .../transformers/npu_models/baichuan.py | 53 +++++++++++++++++++ .../transformers/npu_models/convert.py | 8 +++ 3 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 python/llm/src/ipex_llm/transformers/npu_models/baichuan.py diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/README.md b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/README.md index 65a672637b3..57b5a1f33d5 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/README.md +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/README.md @@ -1,7 +1,7 @@ # Run Large Language Model on Intel NPU -In this directory, you will find examples on how you could apply IPEX-LLM INT4 or INT8 optimizations on LLM models on [Intel NPUs](../../../README.md). For illustration purposes, we utilize the [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) as reference Llama2 models. In this directory, you will find examples on how you could apply IPEX-LLM INT4 or INT8 optimizations on LLM models on Intel NPUs. See the table blow for verified models. +In this directory, you will find examples on how you could apply IPEX-LLM INT4 or INT8 optimizations on LLM models on [Intel NPUs](../../../README.md). In this directory, you will find examples on how you could apply IPEX-LLM INT4 or INT8 optimizations on LLM models on Intel NPUs. See the table blow for verified models. -## Verification Models +## Verified Models | Model | Model Link | |------------|----------------------------------------------------------------| diff --git a/python/llm/src/ipex_llm/transformers/npu_models/baichuan.py b/python/llm/src/ipex_llm/transformers/npu_models/baichuan.py new file mode 100644 index 00000000000..091336c067c --- /dev/null +++ b/python/llm/src/ipex_llm/transformers/npu_models/baichuan.py @@ -0,0 +1,53 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Some parts of this file is adapted from +# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llama/modeling_llama.py +# which is licensed under Apache License 2.0: +# +# Copyright 2021 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch +from ipex_llm.transformers.npu_models.common import merge_linear + + +def merge_mlp(module: torch.nn.Module): + if type(module).__name__ == "MLP": + gate_up_proj = merge_linear([ + module.gate_proj, + module.up_proj, + ]) + module.gate_up_proj = gate_up_proj + del module.gate_proj, module.up_proj + + +def baichuan_mlp_forward(self, x): + gate_up_proj = self.gate_up_proj(x) + gate_proj, up_proj = gate_up_proj.chunk(2, dim=-1) + down_proj = self.down_proj(self.act_fn(gate_proj) * up_proj) + return down_proj diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert.py b/python/llm/src/ipex_llm/transformers/npu_models/convert.py index b750c75087a..03a1f18d7be 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/convert.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/convert.py @@ -169,3 +169,11 @@ def optimize_llm(model: torch.nn.Module): convert_forward(model, StableLmModel, stablelm_model_forward) convert_forward(model, StableLmAttention, stablelm_attention_forward) convert_forward(model, StableLmMLP, stablelm_mlp_forward) + + elif model.config.model_type == "baichuan": + modeling_module_name = model.__class__.__module__ + module = importlib.import_module(modeling_module_name) + from ipex_llm.transformers.npu_models.baichuan import baichuan_mlp_forward, merge_mlp + model.apply(merge_mlp) + + convert_forward(model, module.MLP, baichuan_mlp_forward)