diff --git a/python/llm/src/ipex_llm/transformers/models/llama.py b/python/llm/src/ipex_llm/transformers/models/llama.py index 9e17a00c6ddd..6f5441327762 100644 --- a/python/llm/src/ipex_llm/transformers/models/llama.py +++ b/python/llm/src/ipex_llm/transformers/models/llama.py @@ -1076,7 +1076,7 @@ def llama_attention_forward_4_41_original( kv_seq_len += 1 # update past_key_value's seem_tokens and kv caches. if self.layer_idx == 0: - past_key_value.seen_tokens = kv_seq_len + past_key_value._seen_tokens = kv_seq_len past_key_value.key_cache[self.layer_idx] = key_states past_key_value.value_cache[self.layer_idx] = value_states