From 9b23022a2cfd9a38f22dbdf3de5e668c3991f529 Mon Sep 17 00:00:00 2001
From: sgwhat <ge.song@intel.com>
Date: Fri, 29 Mar 2024 17:25:35 +0800
Subject: [PATCH] add model half to reduce memory usage

---
 modules/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/models.py b/modules/models.py
index 24a52a845a..73151594cb 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -352,7 +352,7 @@ def bigdl_llm_loader(model_name):
 
     if shared.args.device == "GPU":
         import intel_extension_for_pytorch
-        model = model.to("xpu")
+        model = model.half().to("xpu")
 
     tokenizer = AutoTokenizer.from_pretrained(path_to_model, trust_remote_code=shared.args.trust_remote_code)