diff --git a/python/llm/src/ipex_llm/transformers/convert.py b/python/llm/src/ipex_llm/transformers/convert.py index 6f78b9a8a0f..0e4676068c3 100644 --- a/python/llm/src/ipex_llm/transformers/convert.py +++ b/python/llm/src/ipex_llm/transformers/convert.py @@ -847,18 +847,9 @@ def replace_with_low_bit_linear_for_module(model, qtype, module_name=None, mp_group=mp_group, ) device = module.weight.data.device - from ipex_llm.transformers.utils import get_ipex_version - if get_ipex_version() < "2.1.10+xpu": - new_linear._parameters['weight'] = nn.Parameter(module.weight) - else: - # only from 2.1, ipex provides matmul_bias_out - # so we need to transpose weight - new_weight = module.weight.transpose(0, 1).contiguous() - new_linear._parameters['weight'] = nn.Parameter(new_weight) - new_linear.weight_type = 2 + new_linear._parameters['weight'] = nn.Parameter(module.weight) if module.bias is not None: - new_linear._parameters['bias'] = nn.Parameter(module.bias.data)\ - .to(device) + new_linear._parameters['bias'] = nn.Parameter(module.bias.data).to(device) elif qtype == ggml_tensor_qtype["bf16"]: module.to(torch.bfloat16) new_linear = BF16Linear( diff --git a/python/llm/src/ipex_llm/transformers/low_bit_linear.py b/python/llm/src/ipex_llm/transformers/low_bit_linear.py index 848ffe179fe..a9dad42ed33 100644 --- a/python/llm/src/ipex_llm/transformers/low_bit_linear.py +++ b/python/llm/src/ipex_llm/transformers/low_bit_linear.py @@ -51,8 +51,7 @@ from operator import mul from functools import reduce from ipex_llm.transformers.xpu_customize_fwd import custom_fwd, custom_bwd -from ipex_llm.transformers.utils import get_autocast_dtype, get_xpu_device_name, \ - get_ipex_version +from ipex_llm.transformers.utils import get_autocast_dtype, get_xpu_device_name from ipex_llm.transformers.convert import is_deepspeed_available, get_use_vllm T = TypeVar("T", bound="torch.nn.Module") diff --git a/python/llm/src/ipex_llm/transformers/models/utils.py b/python/llm/src/ipex_llm/transformers/models/utils.py index 2a4d2f518d4..93836afb611 100644 --- a/python/llm/src/ipex_llm/transformers/models/utils.py +++ b/python/llm/src/ipex_llm/transformers/models/utils.py @@ -19,7 +19,7 @@ import warnings from ipex_llm.utils.common import invalidInputError from ipex_llm.ggml.quantize import ggml_tensor_qtype -from ipex_llm.transformers.utils import get_ipex_version, get_xpu_device_name +from ipex_llm.transformers.utils import get_xpu_device_name from ipex_llm.transformers.low_bit_linear import SYM_INT4, SYM_INT8, FP8E5, IQ2_XXS, FP4, FP8E4,\ FP6, ASYM_INT4 diff --git a/python/llm/src/ipex_llm/transformers/utils.py b/python/llm/src/ipex_llm/transformers/utils.py index 056e2455be8..329a3a4b79f 100644 --- a/python/llm/src/ipex_llm/transformers/utils.py +++ b/python/llm/src/ipex_llm/transformers/utils.py @@ -154,20 +154,6 @@ def get_autocast_dtype(x): f"Device {x.device} is not supported.") -_ipex_version = None - - -def get_ipex_version(): - - global _ipex_version - if _ipex_version is not None: - return _ipex_version - - import intel_extension_for_pytorch as ipex - _ipex_version = ipex.__version__ - return _ipex_version - - def get_xpu_device_name(device: torch.device): if device.type != "xpu": return device.type