From 0b96ad2cabc2f54fe33b6156943903574b352535 Mon Sep 17 00:00:00 2001 From: qiuxin2012 Date: Thu, 13 Jun 2024 18:45:54 +0800 Subject: [PATCH] fix chatglm4 Nan --- python/llm/src/ipex_llm/transformers/models/chatglm2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/llm/src/ipex_llm/transformers/models/chatglm2.py b/python/llm/src/ipex_llm/transformers/models/chatglm2.py index 983a6533e89..fc77cb89e70 100644 --- a/python/llm/src/ipex_llm/transformers/models/chatglm2.py +++ b/python/llm/src/ipex_llm/transformers/models/chatglm2.py @@ -92,8 +92,8 @@ def glm_sdpa(query, key, value, attention_mask=None, is_causal=False): context_layer = attn_output.view(query.shape) else: head_dim = query.size(-1) - attn = torch.matmul(query.to(key.dtype), - key.transpose(2, 3)) / math.sqrt(head_dim) + attn = torch.matmul(query.to(key.dtype) / math.sqrt(head_dim), + key.transpose(2, 3)) if attn_bias is not None: attn += attn_bias attn = F.softmax(attn, dim=-1,