From 1604b4ead884ac6a52a8b45e528efafb285c8158 Mon Sep 17 00:00:00 2001 From: Yishuo Wang Date: Thu, 26 Dec 2024 11:35:12 +0800 Subject: [PATCH] small fix (#12616) --- python/llm/src/ipex_llm/transformers/convert.py | 3 --- python/llm/src/ipex_llm/transformers/models/llama.py | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/python/llm/src/ipex_llm/transformers/convert.py b/python/llm/src/ipex_llm/transformers/convert.py index f321fa44acc..c5a583d2c88 100644 --- a/python/llm/src/ipex_llm/transformers/convert.py +++ b/python/llm/src/ipex_llm/transformers/convert.py @@ -1784,9 +1784,6 @@ def _optimize_post(model, lightweight_bmm=False): convert_forward(model, module.CohereAttention, cohere_attention_forward) - convert_forward(model, - module.CohereLayerNorm, - rms_norm_forward) convert_forward(model, module.CohereMLP, mlp_silu_forward) diff --git a/python/llm/src/ipex_llm/transformers/models/llama.py b/python/llm/src/ipex_llm/transformers/models/llama.py index d4000caa03f..59257c050ae 100644 --- a/python/llm/src/ipex_llm/transformers/models/llama.py +++ b/python/llm/src/ipex_llm/transformers/models/llama.py @@ -144,12 +144,12 @@ def llama_attention_forward( if query_states.device.type == "xpu": import xe_addons - if position_embeddings is None: - # transformers < 4.43 + if hasattr(self, "rotary_emb"): + # transformers < 4.46 xe_addons.rotary_half_inplaced(self.rotary_emb.inv_freq, position_ids, query_states, key_states) else: - # transformers >= 4.43 + # transformers >= 4.46 cos, sin = position_embeddings make_cache_contiguous_inplaced(cos, sin) xe_addons.rotary_half_with_cache_inplaced(query_states, key_states, cos, sin)