Fix bug that torch.ops.torch_ipex.matmul_bias_out cannot work on Linu…

…x MTL for short input (#11292)
intel-analytics · Jun 12, 2024 · 8edcdeb · 8edcdeb
1 parent b61f6e3
commit 8edcdeb
Showing 1 changed file with 6 additions and 3 deletions.
diff --git a/python/llm/src/ipex_llm/transformers/low_bit_linear.py b/python/llm/src/ipex_llm/transformers/low_bit_linear.py
@@ -813,14 +813,17 @@ def forward(self, x: torch.Tensor):
             self.weight.data = self.weight.data.to(x.dtype)
 
         if not self.use_esimd_kernel(x):
-            if get_ipex_version() < "2.1.10+xpu":
+            if get_ipex_version() < "2.1.10+xpu" \
+                    or get_xpu_device_type(x) not in ["arc", "flex", "pvc"]:
                 if self.weight_type == 2:
-                    self.weight = self.weight.transpose(0, 1).contiguous()
+                    self.weight = torch.nn.Parameter(self.weight.transpose(0, 1).contiguous(),
+                                                     requires_grad=False)
                     self.weight_type = 1
                 result = F.linear(x, self.weight, self.bias)
             else:
                 if self.weight_type == 1:
-                    self.weight = self.weight.transpose(0, 1).contiguous()
+                    self.weight = torch.nn.Parameter(self.weight.transpose(0, 1).contiguous(),
+                                                     requires_grad=False)
                     self.weight_type = 2
                 result = torch.ops.torch_ipex.matmul_bias_out(x, self.weight, self.bias)
             if self.mp_group is not None: