bugfix for qlora finetuning on GPU (intel-analytics#12298)

* bugfix for qlora 100 step error * indent fix * annotation fix
ch1y0q · Oct 30, 2024 · 46d8300 · 46d8300
1 parent 70037ad
commit 46d8300
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 2 deletions.
diff --git a/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/README.md b/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/README.md
@@ -23,6 +23,8 @@ pip install bitsandbytes scipy
 source /opt/intel/oneapi/setvars.sh # necessary to run before installing deepspeed
 pip install git+https://github.com/microsoft/DeepSpeed.git@78c518e
 pip install git+https://github.com/intel/intel-extension-for-deepspeed.git@ec33277
+# (optional) install mpirun to run multi-card finetuning
+sudo apt install openmpi-bin
 ```
 
 ### 2. Configures OneAPI environment variables

diff --git a/python/llm/src/ipex_llm/transformers/low_bit_linear.py b/python/llm/src/ipex_llm/transformers/low_bit_linear.py
@@ -794,8 +794,12 @@ def forward(self, x: torch.Tensor):
                                                    self.weight.qtype, input_seq_size)
                     result = result.to(x.dtype)
                 else:
-                    result = xe_linear.forward_new(x_2d, self.weight.data,
-                                                   self.weight.qtype, input_seq_size)
+                    if self.weight.qtype == NF4:
+                        result = xe_linear.forward_new(x_2d, self.weight.data.view(torch.uint8),
+                                                       self.weight.qtype, input_seq_size)
+                    else:
+                        result = xe_linear.forward_new(x_2d, self.weight.data,
+                                                       self.weight.qtype, input_seq_size)
 
                 if do_empty_cache:
                     torch.xpu.empty_cache()