From 9b23022a2cfd9a38f22dbdf3de5e668c3991f529 Mon Sep 17 00:00:00 2001 From: sgwhat Date: Fri, 29 Mar 2024 17:25:35 +0800 Subject: [PATCH] add model half to reduce memory usage --- modules/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/models.py b/modules/models.py index 24a52a845a..73151594cb 100644 --- a/modules/models.py +++ b/modules/models.py @@ -352,7 +352,7 @@ def bigdl_llm_loader(model_name): if shared.args.device == "GPU": import intel_extension_for_pytorch - model = model.to("xpu") + model = model.half().to("xpu") tokenizer = AutoTokenizer.from_pretrained(path_to_model, trust_remote_code=shared.args.trust_remote_code)