diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py index 721f7481380..31c3ecee8b2 100644 --- a/python/llm/dev/benchmark/all-in-one/run.py +++ b/python/llm/dev/benchmark/all-in-one/run.py @@ -1803,6 +1803,8 @@ def run_pipeline_parallel_gpu(repo_id, if 'exclude' in conf: excludes = conf['exclude'] streaming = False + use_fp16_torch_dtype = False + n_gpu = 2 if 'streaming' in conf: streaming = conf['streaming'] if 'use_fp16_torch_dtype' in conf: