From 43b5c48c39a6f04e670b4f6025decc332bd3de81 Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Thu, 16 Jan 2025 22:26:02 +0100 Subject: [PATCH 1/2] Use correct model_source for loading GPTQ models with FMS Signed-off-by: Thomas Parnell --- vllm/model_executor/model_loader/spyre.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/model_loader/spyre.py b/vllm/model_executor/model_loader/spyre.py index d2c4634e4..5b4f45d1f 100644 --- a/vllm/model_executor/model_loader/spyre.py +++ b/vllm/model_executor/model_loader/spyre.py @@ -143,7 +143,7 @@ def load_weights(self, model_config: ModelConfig, max_prompt_length: int, "desc_act": quant_cfg['desc_act'], } data_type = None - model_source = "llama_gptq_hf_unfused_aiu" + model_source = "hf_gptq_aiu" else: linear_config = {"linear_type": "torch_linear"} data_type = self.dtype From d19ff3d3ceb095866f92bd7984f6c1cd179ee6c2 Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Fri, 17 Jan 2025 19:39:59 +0000 Subject: [PATCH 2/2] Fix error with deploy test Signed-off-by: Thomas Parnell --- vllm/core/scheduler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py index e78d187b5..0834706c7 100644 --- a/vllm/core/scheduler.py +++ b/vllm/core/scheduler.py @@ -919,8 +919,9 @@ def _schedule_prefills( ignored_seq_groups: List[SequenceGroup] = [] seq_groups: List[ScheduledSequenceGroup] = [] - applicable_spyre_warmup_shapes = list( - self.scheduler_config.spyre_warmup_shapes) + if self.scheduler_config.spyre_scheduling_enabled: + applicable_spyre_warmup_shapes = list( + self.scheduler_config.spyre_warmup_shapes) waiting_queue = self.waiting