remove useless code

intel-analytics · Jun 13, 2024 · 40e3ab8 · 40e3ab8
1 parent 09d0205
commit 40e3ab8
Showing 1 changed file with 0 additions and 22 deletions.
diff --git a/python/llm/src/ipex_llm/transformers/models/chatglm4.py b/python/llm/src/ipex_llm/transformers/models/chatglm4.py
@@ -161,27 +161,6 @@ def chatglm4_model_forward_internal(
     )
 
 
-@torch.jit.script
-def apply_rotary_pos_emb(x: torch.Tensor, rope_cache: torch.Tensor) -> torch.Tensor:
-    # x: [b, np, sq, hn]
-    b, np, sq, hn = x.size(0), x.size(1), x.size(2), x.size(3)
-    rot_dim = rope_cache.shape[-2] * 2
-    x, x_pass = x[..., :rot_dim], x[..., rot_dim:]
-    # truncate to support variable sizes
-    rope_cache = rope_cache[:, :sq]
-    xshaped = x.reshape(b, np, sq, rot_dim // 2, 2)
-    rope_cache = rope_cache.view(-1, 1, sq, xshaped.size(3), 2)
-    x_out2 = torch.stack(
-        [
-            xshaped[..., 0] * rope_cache[..., 0] - xshaped[..., 1] * rope_cache[..., 1],
-            xshaped[..., 1] * rope_cache[..., 0] + xshaped[..., 0] * rope_cache[..., 1],
-        ],
-        -1,
-    )
-    x_out2 = x_out2.flatten(3)
-    return torch.cat((x_out2, x_pass), dim=-1)
-
-
 def chatglm4_attention_forward(
     self, hidden_states, attention_mask, rotary_pos_emb, kv_cache=None, use_cache=True
 ):
@@ -235,7 +214,6 @@ def chatglm4_attention_forward(
         kv_seq_len, use_quantize_kv, hidden_states.device
     )
 
-    # past_key_value: [bsz, n_kv_head, seq_len, head_dim] -> [seq_len, bsz, n_kv_head, head_dim]
     if use_cache:
         if past_key_value is None:
             past_key_value = torch.cat((key_states.unsqueeze(0).unsqueeze(0),