diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py index 0f8c6ac72539a..d63931b901319 100755 --- a/vllm/worker/hpu_model_runner.py +++ b/vllm/worker/hpu_model_runner.py @@ -2002,8 +2002,9 @@ def warmup_graphs(self, seq_len, is_prompt, kv_caches, - temperature=1.0 if batch_size - not in warmed_random_sampler_bs else 0) + temperature=0) + # temperature=1.0 if batch_size + # not in warmed_random_sampler_bs else 0) warmed_random_sampler_bs.add(batch_size) used_mem = align_workers(mem_prof.consumed_device_memory, torch.distributed.ReduceOp.MAX)