Skip to content

Commit

Permalink
rm gc collect in packing (#438)
Browse files Browse the repository at this point in the history
  • Loading branch information
wenhuach21 authored Feb 13, 2025
1 parent edc2b03 commit 082f01e
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 8 deletions.
2 changes: 0 additions & 2 deletions auto_round/export/export_to_autogptq/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.


# MIT License
#
# Copyright (c) 2023 潘其威(William)
Expand Down Expand Up @@ -117,7 +116,6 @@ def pack_layer(name, model, layer_config, backend, pbar):
qlayer.to(device)
pbar.update(1)


def save_quantized_as_autogptq(output_dir, inplace=True, backend="auto_gptq:exllamav2",
**kwargs):
"""Export the model to autogptq format to easily leverage cuda kernel."""
Expand Down
5 changes: 1 addition & 4 deletions auto_round/export/export_to_autoround/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,13 @@
import torch.nn as nn
import transformers

from auto_round.export.register import register_format
from auto_round.utils import get_layer_names_in_block, get_module, logger, set_module
import threadpoolctl as tctl
import inspect
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
from auto_round.utils import get_autogptq_packing_qlinear


def check_neq_config(config, data_type, bits, group_size, sym):
"""
Checks if the provided configuration parameters are not equal to the values in the config dictionary.
Expand Down Expand Up @@ -133,7 +131,6 @@ def pack_layer(name, model, layer_config, backend, pbar):
qlayer.pack(layer, scale, zero, None)
qlayer.to(device)
else:
from ..export_to_awq.utils import clear_memory
scale, zp = layer_config[name]["scale"].to(torch.float32), layer_config[name]["zp"].to(torch.float32)
scale = scale.t().contiguous()
zp = zp.t().contiguous()
Expand All @@ -149,7 +146,7 @@ def pack_layer(name, model, layer_config, backend, pbar):
)
qlayer.to(device)
set_module(model, name, qlayer)
clear_memory()

pbar.update(1)


Expand Down
3 changes: 1 addition & 2 deletions auto_round/export/export_to_awq/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,11 @@
extract_block_names_to_str)
import copy
import json
from .utils import WQLinear_GEMM, clear_memory
from .utils import WQLinear_GEMM
from concurrent.futures import ThreadPoolExecutor
import threadpoolctl as tctl
from tqdm import tqdm


def pack_layer(name, model, layer_config, backend, pbar):
with tctl.threadpool_limits(limits=1):
pbar.set_description(f"packing {name}")
Expand Down

0 comments on commit 082f01e

Please sign in to comment.