Skip to content

Commit

Permalink
trial
Browse files Browse the repository at this point in the history
  • Loading branch information
plusbang committed Dec 19, 2024
1 parent caf15cc commit 3bcc542
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ def optimize_llm_pre(model: torch.nn.Module, qtype, mixed_precision,
os.environ["IPEX_LLM_NPU_USE_LEVEL0"] = "0"
os.environ["IPEX_LLM_NPU_DISABLE_COMPILE_OPT"] = "1"

if os.environ.get("IPEX_LLM_NPU_ARL", "0") == "1":
# For ARL support
os.environ["IPEX_LLM_NPU_DISABLE_COMPILE_OPT"] = "1"

if model.config.model_type == "baichuan":
# process NormHead module in Baichuan2 7B
if hasattr(model, 'lm_head') and model.lm_head is not None:
Expand Down Expand Up @@ -144,7 +148,9 @@ def optimize_llm_pre(model: torch.nn.Module, qtype, mixed_precision,
# do not split mlp down_proj for Qwen2-7B & sym_int8
n_splits_down_proj = 1
else:
n_splits_down_proj = 2 if model.config.intermediate_size == 18944 else 1
n_splits_down_proj = 2 if (model.config.intermediate_size == 18944 or
os.environ.get("IPEX_LLM_NPU_MTL", "0") == "1" or
os.environ.get("IPEX_LLM_NPU_ARL", "0") == "1") else 1
else:
invalidInputError(
model.config.hidden_size % quantization_group_size == 0 and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,9 @@ def convert_llm(model: torch.nn.Module,
# do not split mlp down_proj for Qwen2-7B & sym_int8
n_splits_down_proj = 1
else:
n_splits_down_proj = 2 if model.config.intermediate_size == 18944 else 1
n_splits_down_proj = 2 if (model.config.intermediate_size == 18944 or
os.environ.get("IPEX_LLM_NPU_MTL", "0") == "1" or
os.environ.get("IPEX_LLM_NPU_ARL", "0") == "1") else 1
else:
n_splits_linear = model.config.hidden_size // group_size
n_splits_down_proj = model.config.intermediate_size // group_size
Expand Down

0 comments on commit 3bcc542

Please sign in to comment.