Skip to content

Commit

Permalink
Fix Llama 3.2 & 3.1 on LNL (#12196)
Browse files Browse the repository at this point in the history
  • Loading branch information
Oscilloscope98 authored Oct 14, 2024
1 parent 516b578 commit f8d1adc
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
3 changes: 2 additions & 1 deletion python/llm/src/ipex_llm/transformers/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -1268,7 +1268,7 @@ def _optimize_post(model, lightweight_bmm=False):
from ipex_llm.transformers.models.llama import llama_mlp_forward

if model.config.model_type == "llama" and model.config.rope_scaling is not None:
# llama 3.2
# llama 3.2 & llama 3.1
modeling_module_name = model.__class__.__module__
module = importlib.import_module(modeling_module_name)
from ipex_llm.transformers.models.common import rms_norm_forward
Expand All @@ -1279,6 +1279,7 @@ def _optimize_post(model, lightweight_bmm=False):
convert_forward(model, module.LlamaMLP, mlp_silu_forward)
convert_forward(model, module.LlamaModel, llama_model_forward)
convert_forward(model, module.LlamaAttention, llama_attention_forward)
convert_forward(model, module.LlamaSdpaAttention, llama_attention_forward)
elif model.config.model_type == "mllama":
# llama 3.2 vision
modeling_module_name = model.__class__.__module__
Expand Down
2 changes: 2 additions & 0 deletions python/llm/src/ipex_llm/transformers/models/llama32.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,8 @@ def llama_attention_forward(
kv_seq_len = key_states.size(2)
if attention_mask is not None: # no matter the length, we just slice it
causal_mask = attention_mask[:, :, :, :kv_seq_len]
else:
causal_mask = None

attn_weights = None
if use_sdp(q_len, kv_seq_len, self.head_dim, query_states):
Expand Down

0 comments on commit f8d1adc

Please sign in to comment.