fix

intel-analytics · Dec 26, 2024 · 5b07f16 · 5b07f16
1 parent 327bcf6
commit 5b07f16
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 9 deletions.
diff --git a/python/llm/src/ipex_llm/transformers/npu_pipeline_model/common.py b/python/llm/src/ipex_llm/transformers/npu_pipeline_model/common.py
@@ -29,7 +29,6 @@ def update_names_of_IR_and_export_blob(model, model_name, dir, compile_blob=True
     xml_path = os.path.join(dir, model_name + ".xml")
     bin_path = os.path.join(dir, model_name + ".bin")
     model.serialize(xml_path, bin_path)
-    # model.save(xml_path)
     new_ir_path = os.path.join(dir, model_name + "_new.xml")
     new_bin_path = os.path.join(dir, model_name + "_new.bin")
     blob_path = os.path.join(dir, model_name + ".blob")
@@ -178,9 +177,9 @@ def obtain_weight_from_single_layer(attn_layer, mlp_layer):
     weights = []
     if hasattr(attn_layer, "q_proj_dq_list"):
         for layer_list in [attn_layer.q_proj_dq_list, attn_layer.k_proj_dq_list,
-                            attn_layer.v_proj_dq_list, attn_layer.o_proj_dq_list,
-                            mlp_layer.gate_proj_dq_list, mlp_layer.up_proj_dq_list,
-                            mlp_layer.down_proj_dq_list]:
+                           attn_layer.v_proj_dq_list, attn_layer.o_proj_dq_list,
+                           mlp_layer.gate_proj_dq_list, mlp_layer.up_proj_dq_list,
+                           mlp_layer.down_proj_dq_list]:
             l_weights = []
             scales = []
             zeros = []
@@ -197,9 +196,9 @@ def obtain_weight_from_single_layer(attn_layer, mlp_layer):
                                 torch.stack(scales, axis=0)))
     else:
         for layer in [attn_layer.q_proj, attn_layer.k_proj,
-                        attn_layer.v_proj, attn_layer.o_proj,
-                        mlp_layer.gate_proj, mlp_layer.up_proj,
-                        mlp_layer.down_proj]:
+                      attn_layer.v_proj, attn_layer.o_proj,
+                      mlp_layer.gate_proj, mlp_layer.up_proj,
+                      mlp_layer.down_proj]:
             if layer.zero is not None:
                 weights.append((layer.weight, layer.scale, layer.zero))
             else:

diff --git a/python/llm/src/ipex_llm/transformers/npu_pipeline_model/qwen.py b/python/llm/src/ipex_llm/transformers/npu_pipeline_model/qwen.py
@@ -134,7 +134,7 @@ def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
     attn_layer = curr_layer.self_attn
     mlp_layer = curr_layer.mlp
     weights = obtain_weight_from_single_layer(attn_layer, mlp_layer)
-    q_bias, k_bias, v_bias = obtain_qkv_bias_from_single_layer
+    q_bias, k_bias, v_bias = obtain_qkv_bias_from_single_layer(attn_layer)
     cached_cos = curr_layer.self_attn.rotary_emb.cos_cached.to(torch.float16)
     cached_sin = curr_layer.self_attn.rotary_emb.sin_cached.to(torch.float16)
     layer_norm_0 = curr_layer.input_layernorm.weight.to(torch.float16)
@@ -256,7 +256,7 @@ def convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down
             layer_weights.extend(weights)
             input_layer_norm_weights.append(layer_norm_0)
             post_attn_layernorm_weights.append(layer_norm_1)
-            q_bias, k_bias, v_bias = obtain_qkv_bias_from_single_layer
+            q_bias, k_bias, v_bias = obtain_qkv_bias_from_single_layer(attn_layer)
             q_biases.append(q_bias)
             k_biases.append(k_bias)
             v_biases.append(v_bias)