rm gc collect in packing (#438)

intel · Feb 13, 2025 · 082f01e · 082f01e
1 parent edc2b03
commit 082f01e
Show file tree

Hide file tree

Showing 3 changed files with 2 additions and 8 deletions.
diff --git a/auto_round/export/export_to_autogptq/export.py b/auto_round/export/export_to_autogptq/export.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 # MIT License
 #
 # Copyright (c) 2023 潘其威(William)
@@ -117,7 +116,6 @@ def pack_layer(name, model, layer_config, backend, pbar):
         qlayer.to(device)
         pbar.update(1)
 
-
 def save_quantized_as_autogptq(output_dir, inplace=True, backend="auto_gptq:exllamav2",
                                **kwargs):
     """Export the model to autogptq format to easily leverage cuda kernel."""

diff --git a/auto_round/export/export_to_autoround/export.py b/auto_round/export/export_to_autoround/export.py
@@ -21,15 +21,13 @@
 import torch.nn as nn
 import transformers
 
-from auto_round.export.register import register_format
 from auto_round.utils import get_layer_names_in_block, get_module, logger, set_module
 import threadpoolctl as tctl
 import inspect
 from tqdm import tqdm
 from concurrent.futures import ThreadPoolExecutor
 from auto_round.utils import get_autogptq_packing_qlinear
 
-
 def check_neq_config(config, data_type, bits, group_size, sym):
     """
     Checks if the provided configuration parameters are not equal to the values in the config dictionary.
@@ -133,7 +131,6 @@ def pack_layer(name, model, layer_config, backend, pbar):
                 qlayer.pack(layer, scale, zero, None)
             qlayer.to(device)
         else:
-            from ..export_to_awq.utils import clear_memory
             scale, zp = layer_config[name]["scale"].to(torch.float32), layer_config[name]["zp"].to(torch.float32)
             scale = scale.t().contiguous()
             zp = zp.t().contiguous()
@@ -149,7 +146,7 @@ def pack_layer(name, model, layer_config, backend, pbar):
             )
             qlayer.to(device)
             set_module(model, name, qlayer)
-            clear_memory()
+
         pbar.update(1)
 
 

diff --git a/auto_round/export/export_to_awq/export.py b/auto_round/export/export_to_awq/export.py
@@ -31,12 +31,11 @@
                               extract_block_names_to_str)
 import copy
 import json
-from .utils import WQLinear_GEMM, clear_memory
+from .utils import WQLinear_GEMM
 from concurrent.futures import ThreadPoolExecutor
 import threadpoolctl as tctl
 from tqdm import tqdm
 
-
 def pack_layer(name, model, layer_config, backend, pbar):
     with tctl.threadpool_limits(limits=1):
         pbar.set_description(f"packing {name}")