From 2ec88272881a49d40d91ae0cd858b19d22996c70 Mon Sep 17 00:00:00 2001 From: Sky Lee <46676799+skylee-01@users.noreply.github.com> Date: Fri, 15 Nov 2024 13:40:10 +0800 Subject: [PATCH] [Bugfix] Qwen-vl output is inconsistent in speculative decoding (#10350) --- vllm/spec_decode/batch_expansion.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/spec_decode/batch_expansion.py b/vllm/spec_decode/batch_expansion.py index 6a7929d9d8f9c..25ef27b8378f0 100644 --- a/vllm/spec_decode/batch_expansion.py +++ b/vllm/spec_decode/batch_expansion.py @@ -353,6 +353,7 @@ def _create_single_target_seq_group_metadata( seq_data = seq_group_metadata.seq_data[seq_id] prompt_token_ids = seq_data.prompt_token_ids_array new_output_token_ids = [*seq_data.get_output_token_ids(), *token_ids] + mrope_position_delta = seq_data.mrope_position_delta new_seq_data_dict = { target_seq_id: @@ -368,6 +369,7 @@ def _create_single_target_seq_group_metadata( # the kv cache is filled by a previous batch in the batch expansion. for data in new_seq_data_dict.values(): data.update_num_computed_tokens(data.get_len() - 1) + data.mrope_position_delta = mrope_position_delta return SequenceGroupMetadata( request_id=seq_group_metadata.request_id,