From 43b5c48c39a6f04e670b4f6025decc332bd3de81 Mon Sep 17 00:00:00 2001
From: Thomas Parnell <tpa@zurich.ibm.com>
Date: Thu, 16 Jan 2025 22:26:02 +0100
Subject: [PATCH 1/2] Use correct model_source for loading GPTQ models with FMS

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
---
 vllm/model_executor/model_loader/spyre.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/model_loader/spyre.py b/vllm/model_executor/model_loader/spyre.py
index d2c4634e4..5b4f45d1f 100644
--- a/vllm/model_executor/model_loader/spyre.py
+++ b/vllm/model_executor/model_loader/spyre.py
@@ -143,7 +143,7 @@ def load_weights(self, model_config: ModelConfig, max_prompt_length: int,
                 "desc_act": quant_cfg['desc_act'],
             }
             data_type = None
-            model_source = "llama_gptq_hf_unfused_aiu"
+            model_source = "hf_gptq_aiu"
         else:
             linear_config = {"linear_type": "torch_linear"}
             data_type = self.dtype

From d19ff3d3ceb095866f92bd7984f6c1cd179ee6c2 Mon Sep 17 00:00:00 2001
From: Thomas Parnell <tpa@zurich.ibm.com>
Date: Fri, 17 Jan 2025 19:39:59 +0000
Subject: [PATCH 2/2] Fix error with deploy test

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
---
 vllm/core/scheduler.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py
index e78d187b5..0834706c7 100644
--- a/vllm/core/scheduler.py
+++ b/vllm/core/scheduler.py
@@ -919,8 +919,9 @@ def _schedule_prefills(
         ignored_seq_groups: List[SequenceGroup] = []
         seq_groups: List[ScheduledSequenceGroup] = []
 
-        applicable_spyre_warmup_shapes = list(
-            self.scheduler_config.spyre_warmup_shapes)
+        if self.scheduler_config.spyre_scheduling_enabled:
+            applicable_spyre_warmup_shapes = list(
+                self.scheduler_config.spyre_warmup_shapes)
 
         waiting_queue = self.waiting