Skip to content

Commit

Permalink
Use correct model_source for loading GPTQ models with FMS (#61)
Browse files Browse the repository at this point in the history
  • Loading branch information
tdoublep authored Jan 17, 2025
2 parents 590042e + d19ff3d commit 7e068b5
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
5 changes: 3 additions & 2 deletions vllm/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,8 +919,9 @@ def _schedule_prefills(
ignored_seq_groups: List[SequenceGroup] = []
seq_groups: List[ScheduledSequenceGroup] = []

applicable_spyre_warmup_shapes = list(
self.scheduler_config.spyre_warmup_shapes)
if self.scheduler_config.spyre_scheduling_enabled:
applicable_spyre_warmup_shapes = list(
self.scheduler_config.spyre_warmup_shapes)

waiting_queue = self.waiting

Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/model_loader/spyre.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def load_weights(self, model_config: ModelConfig, max_prompt_length: int,
"desc_act": quant_cfg['desc_act'],
}
data_type = None
model_source = "llama_gptq_hf_unfused_aiu"
model_source = "hf_gptq_aiu"
else:
linear_config = {"linear_type": "torch_linear"}
data_type = self.dtype
Expand Down

0 comments on commit 7e068b5

Please sign in to comment.