From 85148435c1c3b85b8d3d1d4beaee0a125a80ff7f Mon Sep 17 00:00:00 2001 From: Xiaozhe Yao Date: Thu, 4 Apr 2024 14:22:24 +0200 Subject: [PATCH 01/14] debug --- deltazip/modeling/_base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/deltazip/modeling/_base.py b/deltazip/modeling/_base.py index 494c303..f898729 100644 --- a/deltazip/modeling/_base.py +++ b/deltazip/modeling/_base.py @@ -682,7 +682,10 @@ def skip(*args, **kwargs): model_init_kwargs["low_cpu_mem_usage"] = True del model else: - model_init_kwargs["device_map"] = "auto" + logger.info(f"Using [{model_init_kwargs['device_map']}] to load model.") + if "device_map" not in model_init_kwargs: + model_init_kwargs["device_map"] = model_init_kwargs + # model_init_kwargs["low_cpu_mem_usage"] = True torch.cuda.empty_cache() From e9d79ecbcd94b4d74fba1f3dc1dbc5b8bb486ed3 Mon Sep 17 00:00:00 2001 From: Xiaozhe Yao Date: Thu, 4 Apr 2024 14:25:38 +0200 Subject: [PATCH 02/14] . --- deltazip/modeling/_base.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/deltazip/modeling/_base.py b/deltazip/modeling/_base.py index f898729..f35a1a8 100644 --- a/deltazip/modeling/_base.py +++ b/deltazip/modeling/_base.py @@ -682,10 +682,9 @@ def skip(*args, **kwargs): model_init_kwargs["low_cpu_mem_usage"] = True del model else: - logger.info(f"Using [{model_init_kwargs['device_map']}] to load model.") if "device_map" not in model_init_kwargs: - model_init_kwargs["device_map"] = model_init_kwargs - + model_init_kwargs["device_map"] = None + logger.info(f"Using [{model_init_kwargs['device_map']}] to load model.") # model_init_kwargs["low_cpu_mem_usage"] = True torch.cuda.empty_cache() From 365f09690515c38a8178bd1ed77d2ec74195d1d4 Mon Sep 17 00:00:00 2001 From: Xiaozhe Yao Date: Thu, 4 Apr 2024 14:30:23 +0200 Subject: [PATCH 03/14] . --- deltazip/modeling/_base.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/deltazip/modeling/_base.py b/deltazip/modeling/_base.py index f35a1a8..d4dd625 100644 --- a/deltazip/modeling/_base.py +++ b/deltazip/modeling/_base.py @@ -634,6 +634,7 @@ def from_pretrained( pretrained_model_name_or_path: str, compress_config: BaseCompressionConfig, max_memory: Optional[dict] = None, + device_map: Optional[str] = None, **model_init_kwargs, ): """load un-quantized pretrained model to cpu""" @@ -682,8 +683,11 @@ def skip(*args, **kwargs): model_init_kwargs["low_cpu_mem_usage"] = True del model else: - if "device_map" not in model_init_kwargs: + if device_map is None: model_init_kwargs["device_map"] = None + else: + model_init_kwargs["device_map"] = device_map + logger.info(f"Using [{model_init_kwargs['device_map']}] to load model.") # model_init_kwargs["low_cpu_mem_usage"] = True From 971ec47f2ea14f03966bde2f0800fc3a69349257 Mon Sep 17 00:00:00 2001 From: Xiaozhe Yao Date: Thu, 4 Apr 2024 14:31:41 +0200 Subject: [PATCH 04/14] . --- cli/compress.py | 9 +++++++-- deltazip/modeling/auto.py | 2 ++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/cli/compress.py b/cli/compress.py index 9514cd8..5664626 100644 --- a/cli/compress.py +++ b/cli/compress.py @@ -23,13 +23,18 @@ def main(args): ) print("[info] compress config:", compress_config) target_model = AutoDeltaZipModelForCausalLM.from_pretrained( - args.target_model, compress_config=compress_config, torch_dtype=torch.float16 + args.target_model, + compress_config=compress_config, + torch_dtype=torch.float16, + device_map="auto" ) target_model.requires_grad_(False) if args.base_model != "" and args.delta != "": print("[info] base model is defined, delta mode enabled") base_model = AutoDeltaZipModelForCausalLM.from_pretrained( - args.base_model, compress_config=compress_config, torch_dtype=torch.float16 + args.base_model, + compress_config=compress_config, + torch_dtype=torch.float16, ) base_model.requires_grad_(False) torch.cuda.empty_cache() diff --git a/deltazip/modeling/auto.py b/deltazip/modeling/auto.py index 4792eda..a7f7505 100644 --- a/deltazip/modeling/auto.py +++ b/deltazip/modeling/auto.py @@ -36,6 +36,7 @@ def from_pretrained( pretrained_model_name_or_path: str, compress_config: BaseCompressionConfig, max_memory: Optional[dict] = None, + device_map: Optional[str] = None, **model_init_kwargs ) -> BaseDeltaZipModelForCausalLM: model_type = check_and_get_model_type(pretrained_model_name_or_path) @@ -43,6 +44,7 @@ def from_pretrained( pretrained_model_name_or_path=pretrained_model_name_or_path, compress_config=compress_config, max_memory=max_memory, + device_map=device_map, **model_init_kwargs ) From bc72e119932a620f63c2a2693ac878d87cdd390a Mon Sep 17 00:00:00 2001 From: Xiaozhe Yao Date: Thu, 4 Apr 2024 14:48:39 +0200 Subject: [PATCH 05/14] . --- cli/compress.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/compress.py b/cli/compress.py index 5664626..9c038ad 100644 --- a/cli/compress.py +++ b/cli/compress.py @@ -26,7 +26,7 @@ def main(args): args.target_model, compress_config=compress_config, torch_dtype=torch.float16, - device_map="auto" + max_memory = {0: "2GIB", 1: "48GIB", 2: "48GIB", 3:"48GIB"} ) target_model.requires_grad_(False) if args.base_model != "" and args.delta != "": From 73f934877caa55dff14cc7eb30c1d94b8e559952 Mon Sep 17 00:00:00 2001 From: Xiaozhe Yao Date: Thu, 4 Apr 2024 21:09:34 +0200 Subject: [PATCH 06/14] update --- deltazip/modeling/_base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deltazip/modeling/_base.py b/deltazip/modeling/_base.py index d4dd625..c8be9a0 100644 --- a/deltazip/modeling/_base.py +++ b/deltazip/modeling/_base.py @@ -529,9 +529,9 @@ def tmp(_, inp, out): base_weight = base_model.model.state_dict()[ f"{self.layers_block_name}.{i}.{name}.weight" ] - assert torch.equal( - finetuned_weight, base_weight + delta_only - ) + # assert torch.equal( + # finetuned_weight, base_weight + delta_only + # ) subset[name].weight.data = compressed_ws[ f"{self.layers_block_name}.{i}.{name}" ] From 71f0eaec044d4b53834358458471155fbeb4d645 Mon Sep 17 00:00:00 2001 From: Xiaozhe Yao Date: Thu, 4 Apr 2024 21:10:41 +0200 Subject: [PATCH 07/14] minor --- cli/compress.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/compress.py b/cli/compress.py index 9c038ad..0497fa9 100644 --- a/cli/compress.py +++ b/cli/compress.py @@ -26,7 +26,7 @@ def main(args): args.target_model, compress_config=compress_config, torch_dtype=torch.float16, - max_memory = {0: "2GIB", 1: "48GIB", 2: "48GIB", 3:"48GIB"} + max_memory = {0: "10GIB", 1: "10GIB", 2: "10GIB", 3:"10GIB", "cpu": "140GIB"} ) target_model.requires_grad_(False) if args.base_model != "" and args.delta != "": From 89d7bbf9fb31a08b53c9bb8411b7a0eef2eff217 Mon Sep 17 00:00:00 2001 From: Xiaozhe Yao Date: Sat, 6 Apr 2024 17:52:20 +0200 Subject: [PATCH 08/14] debugging compress large models with offloading --- cli/compress.py | 39 +- deltazip/modeling/_base.py | 22 +- deltazip/modeling/_utils.py | 4 +- packing_experiments.ipynb | 1738 ++++++++++++++++++++++++++++ scripts/compresses/compress_70b.sh | 4 +- scripts/manual/compress_manual.sh | 3 - 6 files changed, 1791 insertions(+), 19 deletions(-) create mode 100644 packing_experiments.ipynb diff --git a/cli/compress.py b/cli/compress.py index 0497fa9..43d168b 100644 --- a/cli/compress.py +++ b/cli/compress.py @@ -2,6 +2,7 @@ import json import torch import argparse +import safetensors as st from transformers import AutoTokenizer from deltazip import AutoDeltaZipModelForCausalLM, BaseCompressionConfig @@ -26,7 +27,10 @@ def main(args): args.target_model, compress_config=compress_config, torch_dtype=torch.float16, - max_memory = {0: "10GIB", 1: "10GIB", 2: "10GIB", 3:"10GIB", "cpu": "140GIB"} + # max_memory = {0: "1GIB", 1: "1GIB", "cpu": "140GIB"} + # simulate large model + max_memory = {0: "400MIB", 1: "400MIB", "cpu": "140GIB"} + ) target_model.requires_grad_(False) if args.base_model != "" and args.delta != "": @@ -64,6 +68,27 @@ def main(args): # write to folder os.makedirs(args.outdir, exist_ok=True) # for weights that are not compressed, we calculate delta afterward compression + if args.large_model: + # for large models - save a temporary results to avoid re-run + tensors = {} + for name, param in target_model.named_parameters(): + if not param.is_meta: + tensors[name] = param.data.cpu().clone().detach() + st.torch.save_file(tensors, os.path.join(args.outdir, "temp.safetensors")) + + target_model_ref = AutoDeltaZipModelForCausalLM.from_pretrained( + args.target_model, + compress_config=compress_config, + torch_dtype=torch.float16, + ) + missing_state_dict = target_model_ref.state_dict() + missing_state_dict = { + k: v for k, v in missing_state_dict.items() if k not in tensors + } + target_model.load_state_dict(missing_state_dict, strict = False, assign=True) + for name, param in target_model.named_parameters(): + if param.is_meta: + print(f"[info] {name} is on meta") if args.base_model != "" and args.delta != "": compressed_modules = [] for x in base_model.inside_layer_modules: @@ -72,9 +97,12 @@ def main(args): if "bias" in name or all( [modules not in name for modules in compressed_modules] ): - target_model.state_dict()[name].copy_( - param - base_model.state_dict()[name] - ) + base_weight = base_model.state_dict()[name] + if base_weight.device != param.device: + base_weight = base_weight.to(param.device) + target_model.state_dict()[name] = param - base_weight + del base_model + # run a forward pass to make sure the model is working target_model.save_compressed(args.outdir) @@ -102,7 +130,8 @@ def main(args): parser.add_argument( "--lossless", type=str, default="gdeflate", choices=["gdeflate"] ) - parser.add_argument("--delta", type=str, choices=["subtract", "xor"], default="") + parser.add_argument("--delta", type=str, choices=["subtract", "xor"], default="subtract") + parser.add_argument("--large-model", action="store_true") parser.add_argument("--perc-damp", type=float, default=0.01) parser.add_argument("--outdir", type=str, default=".cache/compressed_models") parser.add_argument("--fast-tokenizer", action="store_true") diff --git a/deltazip/modeling/_base.py b/deltazip/modeling/_base.py index c8be9a0..9560232 100644 --- a/deltazip/modeling/_base.py +++ b/deltazip/modeling/_base.py @@ -244,7 +244,7 @@ def lossless_compress( use_triton: bool = False, use_cuda_fp16: bool = True, autotune_warmup_after_quantized: bool = False, - cache_examples_on_gpu: bool = True, + cache_examples_on_gpu: bool = False, ): self._compressed = True @@ -256,7 +256,7 @@ def lossy_compress( use_triton: bool = False, use_cuda_fp16: bool = True, autotune_warmup_after_quantized: bool = False, - cache_examples_on_gpu: bool = True, + cache_examples_on_gpu: bool = False, base_model=None, ): assert self.compressed == False, "Model is already compressed." @@ -526,16 +526,24 @@ def tmp(_, inp, out): delta_only = compressed_ws[ f"{self.layers_block_name}.{i}.{name}" ] - base_weight = base_model.model.state_dict()[ - f"{self.layers_block_name}.{i}.{name}.weight" - ] + # base_weight = base_model.model.state_dict()[ + # f"{self.layers_block_name}.{i}.{name}.weight" + # ] # assert torch.equal( # finetuned_weight, base_weight + delta_only # ) - subset[name].weight.data = compressed_ws[ + key_weight = compressed_ws[ f"{self.layers_block_name}.{i}.{name}" ] - + if subset[name].weight.is_meta: + subset[name].weight = torch.nn.Parameter(key_weight.clone().detach(), requires_grad=False).to(CPU) + else: + subset[name].weight.copy_(compressed_ws[ + f"{self.layers_block_name}.{i}.{name}" + ]) + + for name, param in self.model.named_parameters(): + print(f"{name}: {param.device}") self.model.config.use_cache = forward_pass_use_cache self._compressed = True torch.cuda.empty_cache() diff --git a/deltazip/modeling/_utils.py b/deltazip/modeling/_utils.py index 7455064..34d50ec 100644 --- a/deltazip/modeling/_utils.py +++ b/deltazip/modeling/_utils.py @@ -152,7 +152,7 @@ def pack_model( force_layer_back_to_cpu: bool = False, ): if force_layer_back_to_cpu: - model.to(CPU) + model = model.to(CPU) layers = find_layers(model) layers = {n: layers[n] for n in quantizers} make_quant( @@ -180,8 +180,6 @@ def pack_model( qlayers[name].pack(layers[name], scale, zero, g_idx) qlayers[name].to(layer_device) logger.info("Model packed.") - # after packing, we further apply lossless compression - def check_and_get_model_type(model_dir): config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) diff --git a/packing_experiments.ipynb b/packing_experiments.ipynb new file mode 100644 index 0000000..9fc9803 --- /dev/null +++ b/packing_experiments.ipynb @@ -0,0 +1,1738 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "import torch\n", + "import argparse\n", + "from transformers import AutoTokenizer\n", + "from deltazip import AutoDeltaZipModelForCausalLM, BaseCompressionConfig" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[info] compress config: BaseCompressionConfig(bits=4, sparsity=0.5, prunen=0, prunem=0, group_size=-1, group_rows=-1, block_size=128, damp_percent=0.01, desc_act=True, sym=False, true_sequential=True, lossless='gdeflate', dtype='fp16')\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Some parameters are on the meta device device because they were offloaded to the cpu.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[info] base model is defined, delta mode enabled\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-04-06 16:22:38.875 | INFO | deltazip.modeling._base:from_pretrained:699 - Using [None] to load model.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "target model on cuda:0, base model on cpu\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-04-06 16:22:43.451 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 1/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:22:43.455 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:22:44.024 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:22:44.025 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.53s\n", + "2024-04-06 16:22:44.026 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5076503753662109\n", + "2024-04-06 16:22:44.028 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 2.1982583348290063e-05\n", + "2024-04-06 16:22:44.034 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 1/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:22:44.040 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:22:44.528 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:22:44.531 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:22:44.532 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5074329376220703\n", + "2024-04-06 16:22:44.533 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 2.5729643311933614e-05\n", + "2024-04-06 16:22:44.536 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 1/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:22:44.543 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:22:45.029 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:22:45.031 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:22:45.032 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5031425952911377\n", + "2024-04-06 16:22:45.033 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.00010592874605208635\n", + "2024-04-06 16:22:46.113 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 1/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:22:46.118 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:22:46.604 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:22:46.605 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:22:46.606 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5063765048980713\n", + "2024-04-06 16:22:46.608 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 9.472193482906732e-07\n", + "2024-04-06 16:22:46.608 | WARNING | deltazip.core.sparsegpt:fasterprune:186 - high sparsity change detected: 0.006271839141845703 -> 0.5063765048980713\n", + "2024-04-06 16:22:47.801 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 1/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:22:47.813 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:22:48.316 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:22:48.317 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.50s\n", + "2024-04-06 16:22:48.318 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5006497990001332\n", + "2024-04-06 16:22:48.319 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0010901934001594782\n", + "2024-04-06 16:22:48.332 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 1/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:22:48.348 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:22:48.844 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:22:48.846 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.50s\n", + "2024-04-06 16:22:48.846 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5006132992831144\n", + "2024-04-06 16:22:48.848 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0010906874667853117\n", + "2024-04-06 16:22:50.609 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 1/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:22:50.617 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:22:51.996 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:22:51.998 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.38s\n", + "2024-04-06 16:22:51.999 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5017923008311879\n", + "2024-04-06 16:22:52.000 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 4.012502358818892e-06\n", + "2024-04-06 16:22:54.540 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 2/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:22:54.546 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:22:55.106 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:22:55.107 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.53s\n", + "2024-04-06 16:22:55.108 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5072593688964844\n", + "2024-04-06 16:22:55.110 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 7.576734060421586e-05\n", + "2024-04-06 16:22:55.117 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 2/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:22:55.126 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:22:55.639 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:22:55.642 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.51s\n", + "2024-04-06 16:22:55.643 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5053768157958984\n", + "2024-04-06 16:22:55.643 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 5.5007407354423776e-05\n", + "2024-04-06 16:22:55.646 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 2/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:22:55.654 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:22:56.165 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:22:56.167 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.51s\n", + "2024-04-06 16:22:56.168 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5049338340759277\n", + "2024-04-06 16:22:56.169 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.000469283084385097\n", + "2024-04-06 16:22:57.152 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 2/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:22:57.159 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:22:57.670 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:22:57.671 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.51s\n", + "2024-04-06 16:22:57.672 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5042991638183594\n", + "2024-04-06 16:22:57.674 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 7.153379556257278e-06\n", + "2024-04-06 16:22:58.867 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 2/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:22:58.879 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:22:59.399 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:22:59.401 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.52s\n", + "2024-04-06 16:22:59.402 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000615553422407\n", + "2024-04-06 16:22:59.403 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0029966800939291716\n", + "2024-04-06 16:22:59.417 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 2/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:22:59.433 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:22:59.951 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:22:59.953 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.52s\n", + "2024-04-06 16:22:59.954 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000563534823331\n", + "2024-04-06 16:22:59.955 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.002944690641015768\n", + "2024-04-06 16:23:01.661 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 2/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:01.678 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:03.155 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:03.157 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.48s\n", + "2024-04-06 16:23:03.157 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5002418864857067\n", + "2024-04-06 16:23:03.159 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 1.0214722351520322e-05\n", + "2024-04-06 16:23:05.549 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 3/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:05.554 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:06.065 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:06.066 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.51s\n", + "2024-04-06 16:23:06.067 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5068206787109375\n", + "2024-04-06 16:23:06.069 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.00015555755817331374\n", + "2024-04-06 16:23:06.073 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 3/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:06.077 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:06.588 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:06.590 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.51s\n", + "2024-04-06 16:23:06.591 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5059642791748047\n", + "2024-04-06 16:23:06.592 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.00010672895587049425\n", + "2024-04-06 16:23:06.595 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 3/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:06.602 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:07.120 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:07.121 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.52s\n", + "2024-04-06 16:23:07.122 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5049021244049072\n", + "2024-04-06 16:23:07.123 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0009432487422600389\n", + "2024-04-06 16:23:08.122 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 3/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:08.129 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:08.642 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:08.643 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.51s\n", + "2024-04-06 16:23:08.644 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5026555061340332\n", + "2024-04-06 16:23:08.645 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 1.561260432936251e-05\n", + "2024-04-06 16:23:09.827 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 3/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:09.840 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:10.370 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:10.372 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.53s\n", + "2024-04-06 16:23:10.373 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000283501364968\n", + "2024-04-06 16:23:10.374 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.00503050209954381\n", + "2024-04-06 16:23:10.388 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 3/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:10.405 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:10.920 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:10.922 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.52s\n", + "2024-04-06 16:23:10.923 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000424818559126\n", + "2024-04-06 16:23:10.924 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.004969435278326273\n", + "2024-04-06 16:23:12.619 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 3/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:12.631 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:14.084 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:14.086 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.45s\n", + "2024-04-06 16:23:14.087 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5002686760642312\n", + "2024-04-06 16:23:14.088 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0003014366375282407\n", + "2024-04-06 16:23:16.444 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 4/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:16.447 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:16.960 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:16.961 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.51s\n", + "2024-04-06 16:23:16.962 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5020866394042969\n", + "2024-04-06 16:23:16.963 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0006573480786755681\n", + "2024-04-06 16:23:16.968 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 4/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:16.973 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:17.485 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:17.487 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.51s\n", + "2024-04-06 16:23:17.488 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5021152496337891\n", + "2024-04-06 16:23:17.489 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0006055326666682959\n", + "2024-04-06 16:23:17.491 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 4/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:17.499 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:18.011 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:18.013 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.51s\n", + "2024-04-06 16:23:18.014 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5013325214385986\n", + "2024-04-06 16:23:18.015 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0048964861780405045\n", + "2024-04-06 16:23:19.052 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 4/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:19.059 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:19.580 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:19.584 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.52s\n", + "2024-04-06 16:23:19.585 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5016167163848877\n", + "2024-04-06 16:23:19.586 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 1.9268416508566588e-05\n", + "2024-04-06 16:23:20.777 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 4/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:20.789 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:21.323 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:21.325 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.53s\n", + "2024-04-06 16:23:21.326 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000227147882635\n", + "2024-04-06 16:23:21.327 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0070329029113054276\n", + "2024-04-06 16:23:21.341 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 4/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:21.358 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:21.887 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:21.889 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.53s\n", + "2024-04-06 16:23:21.890 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000091032548384\n", + "2024-04-06 16:23:21.891 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.00704065291211009\n", + "2024-04-06 16:23:23.634 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 4/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:23.646 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:25.126 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:25.127 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.48s\n", + "2024-04-06 16:23:25.128 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.500228535045277\n", + "2024-04-06 16:23:25.129 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 3.76606076315511e-05\n", + "2024-04-06 16:23:27.676 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 5/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:27.682 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:28.184 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:28.185 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.50s\n", + "2024-04-06 16:23:28.186 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5022754669189453\n", + "2024-04-06 16:23:28.187 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0015082701575011015\n", + "2024-04-06 16:23:28.193 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 5/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:28.196 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:28.698 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:28.700 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.50s\n", + "2024-04-06 16:23:28.700 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5019741058349609\n", + "2024-04-06 16:23:28.702 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0012655367609113455\n", + "2024-04-06 16:23:28.704 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 5/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:28.711 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:29.215 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:29.217 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.50s\n", + "2024-04-06 16:23:29.218 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5016508102416992\n", + "2024-04-06 16:23:29.219 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.011699706315994263\n", + "2024-04-06 16:23:30.211 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 5/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:30.217 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:30.721 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:30.722 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.50s\n", + "2024-04-06 16:23:30.723 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5023784637451172\n", + "2024-04-06 16:23:30.724 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 6.390993075910956e-05\n", + "2024-04-06 16:23:31.944 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 5/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:31.955 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:32.467 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:32.469 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.51s\n", + "2024-04-06 16:23:32.470 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000099702314897\n", + "2024-04-06 16:23:32.471 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.009830418042838573\n", + "2024-04-06 16:23:32.485 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 5/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:32.502 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:33.013 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:33.015 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.51s\n", + "2024-04-06 16:23:33.016 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.500006068836559\n", + "2024-04-06 16:23:33.017 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.009835522621870041\n", + "2024-04-06 16:23:34.747 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 5/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:34.759 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:36.191 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:36.192 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.43s\n", + "2024-04-06 16:23:36.193 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5001527612859552\n", + "2024-04-06 16:23:36.194 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 6.474729161709547e-05\n", + "2024-04-06 16:23:38.581 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 6/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:38.584 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:39.057 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:39.058 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:23:39.059 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5016460418701172\n", + "2024-04-06 16:23:39.061 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.00136222830042243\n", + "2024-04-06 16:23:39.066 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 6/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:39.070 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:39.542 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:39.543 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:23:39.544 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5017166137695312\n", + "2024-04-06 16:23:39.545 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.00111478753387928\n", + "2024-04-06 16:23:39.548 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 6/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:39.555 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:40.035 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:40.037 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:23:40.038 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5011279582977295\n", + "2024-04-06 16:23:40.039 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.010472704656422138\n", + "2024-04-06 16:23:41.097 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 6/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:41.103 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:41.582 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:41.584 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:23:41.584 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5011661052703857\n", + "2024-04-06 16:23:41.586 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 6.352206401061267e-05\n", + "2024-04-06 16:23:42.768 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 6/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:42.785 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:43.274 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:43.275 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:23:43.276 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.50002366846258\n", + "2024-04-06 16:23:43.277 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.012194154784083366\n", + "2024-04-06 16:23:43.291 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 6/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:43.308 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:43.790 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:43.792 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:23:43.793 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.500019680369984\n", + "2024-04-06 16:23:43.794 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.012199473567306995\n", + "2024-04-06 16:23:45.474 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 6/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:45.490 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:46.838 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:46.840 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.35s\n", + "2024-04-06 16:23:46.840 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5001662861217152\n", + "2024-04-06 16:23:46.842 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 9.253668395103887e-05\n", + "2024-04-06 16:23:49.320 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 7/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:49.323 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:49.797 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:49.798 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:23:49.799 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5035476684570312\n", + "2024-04-06 16:23:49.800 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0011800797656178474\n", + "2024-04-06 16:23:49.805 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 7/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:49.809 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:50.282 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:50.283 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:23:50.284 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5044460296630859\n", + "2024-04-06 16:23:50.285 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.001060723327100277\n", + "2024-04-06 16:23:50.288 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 7/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:50.295 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:50.773 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:50.775 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:23:50.776 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.502988338470459\n", + "2024-04-06 16:23:50.777 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.00928463228046894\n", + "2024-04-06 16:23:51.805 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 7/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:51.811 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:52.287 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:52.289 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:23:52.290 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5015163421630859\n", + "2024-04-06 16:23:52.291 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 7.255504897329956e-05\n", + "2024-04-06 16:23:53.478 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 7/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:53.494 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:53.980 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:53.982 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:23:53.983 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000402277166193\n", + "2024-04-06 16:23:53.984 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.013560131192207336\n", + "2024-04-06 16:23:53.996 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 7/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:54.013 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:54.497 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:54.500 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:23:54.501 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000317313454368\n", + "2024-04-06 16:23:54.502 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.013523709960281849\n", + "2024-04-06 16:23:56.202 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 7/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:56.218 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:23:57.566 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:23:57.568 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.35s\n", + "2024-04-06 16:23:57.569 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.500144438310103\n", + "2024-04-06 16:23:57.570 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.00012878183042630553\n", + "2024-04-06 16:23:59.959 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 8/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:23:59.962 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:00.436 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:00.438 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:24:00.438 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5019321441650391\n", + "2024-04-06 16:24:00.440 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.001484193024225533\n", + "2024-04-06 16:24:00.446 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 8/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:00.450 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:00.923 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:00.924 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:24:00.925 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5022869110107422\n", + "2024-04-06 16:24:00.926 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0012478764401748776\n", + "2024-04-06 16:24:00.928 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 8/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:00.936 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:01.415 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:01.416 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:24:01.417 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5013854503631592\n", + "2024-04-06 16:24:01.418 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.013156186789274216\n", + "2024-04-06 16:24:02.460 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 8/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:02.466 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:02.940 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:02.942 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:24:02.943 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5009360313415527\n", + "2024-04-06 16:24:02.944 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.00011269289097981527\n", + "2024-04-06 16:24:04.149 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 8/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:04.164 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:04.652 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:04.654 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:24:04.655 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000595612959429\n", + "2024-04-06 16:24:04.656 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.01642456464469433\n", + "2024-04-06 16:24:04.668 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 8/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:04.685 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:05.168 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:05.169 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:24:05.170 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000600814819336\n", + "2024-04-06 16:24:05.171 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.016113294288516045\n", + "2024-04-06 16:24:06.909 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 8/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:06.921 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:08.275 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:08.276 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.35s\n", + "2024-04-06 16:24:08.277 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5007985721934926\n", + "2024-04-06 16:24:08.278 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0004247925244271755\n", + "2024-04-06 16:24:10.720 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 9/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:10.726 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:11.200 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:11.201 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:24:11.202 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5050716400146484\n", + "2024-04-06 16:24:11.203 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0018115532584488392\n", + "2024-04-06 16:24:11.209 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 9/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:11.213 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:11.686 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:11.687 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:24:11.688 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5054264068603516\n", + "2024-04-06 16:24:11.689 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0014785039238631725\n", + "2024-04-06 16:24:11.691 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 9/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:11.699 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:12.176 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:12.178 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:24:12.179 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5038654804229736\n", + "2024-04-06 16:24:12.180 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.01343337632715702\n", + "2024-04-06 16:24:13.185 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 9/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:13.192 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:13.665 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:13.667 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:24:13.668 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5028519630432129\n", + "2024-04-06 16:24:13.668 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0001210411501233466\n", + "2024-04-06 16:24:14.847 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 9/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:14.863 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:15.351 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:15.353 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:24:15.354 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000303441827948\n", + "2024-04-06 16:24:15.355 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.018729006871581078\n", + "2024-04-06 16:24:15.367 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 9/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:15.384 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:15.866 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:15.868 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:24:15.869 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000260959972035\n", + "2024-04-06 16:24:15.870 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.019087491557002068\n", + "2024-04-06 16:24:17.592 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 9/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:17.607 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:18.957 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:18.959 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.35s\n", + "2024-04-06 16:24:18.960 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5009544545953925\n", + "2024-04-06 16:24:18.961 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.00021135879796929657\n", + "2024-04-06 16:24:21.387 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 10/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:21.390 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:21.864 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:21.866 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:24:21.867 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5021705627441406\n", + "2024-04-06 16:24:21.868 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.001680182060226798\n", + "2024-04-06 16:24:21.873 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 10/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:21.876 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:22.348 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:22.350 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:24:22.351 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5024509429931641\n", + "2024-04-06 16:24:22.352 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0013616220094263554\n", + "2024-04-06 16:24:22.354 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 10/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:22.361 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:22.840 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:22.841 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:24:22.842 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5017237663269043\n", + "2024-04-06 16:24:22.843 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.012833856046199799\n", + "2024-04-06 16:24:23.851 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 10/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:23.858 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:24.334 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:24.336 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:24:24.337 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5012185573577881\n", + "2024-04-06 16:24:24.338 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.000249100528890267\n", + "2024-04-06 16:24:25.528 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 10/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:25.540 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:26.025 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:26.026 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:24:26.027 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000765540383079\n", + "2024-04-06 16:24:26.029 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.020836159586906433\n", + "2024-04-06 16:24:26.040 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 10/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:26.058 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:26.541 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:26.543 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:24:26.544 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000566135753285\n", + "2024-04-06 16:24:26.545 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.021911511197686195\n", + "2024-04-06 16:24:28.258 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 10/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:28.273 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:29.619 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:29.621 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.35s\n", + "2024-04-06 16:24:29.621 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5004780509255149\n", + "2024-04-06 16:24:29.623 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0003382523718755692\n", + "2024-04-06 16:24:32.019 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 11/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:32.022 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:32.493 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:32.495 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:24:32.496 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5018558502197266\n", + "2024-04-06 16:24:32.497 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.001964160008355975\n", + "2024-04-06 16:24:32.502 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 11/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:32.506 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:32.978 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:32.979 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:24:32.980 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5021896362304688\n", + "2024-04-06 16:24:32.981 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.001581624848768115\n", + "2024-04-06 16:24:32.983 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 11/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:32.991 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:33.471 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:33.472 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:24:33.473 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.501248836517334\n", + "2024-04-06 16:24:33.475 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.015915337949991226\n", + "2024-04-06 16:24:34.482 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 11/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:34.488 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:34.963 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:34.964 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:24:34.965 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5011732578277588\n", + "2024-04-06 16:24:34.966 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.000285788846667856\n", + "2024-04-06 16:24:36.147 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 11/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:36.163 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:36.650 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:36.651 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:24:36.652 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000655434348367\n", + "2024-04-06 16:24:36.654 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.022175664082169533\n", + "2024-04-06 16:24:36.666 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 11/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:36.683 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:37.165 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:37.166 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:24:37.167 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000515851107511\n", + "2024-04-06 16:24:37.169 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.023308206349611282\n", + "2024-04-06 16:24:38.882 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 11/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:38.897 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:40.244 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:40.246 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.35s\n", + "2024-04-06 16:24:40.247 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5005718577991832\n", + "2024-04-06 16:24:40.248 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0003610536514315754\n", + "2024-04-06 16:24:42.628 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 12/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:42.632 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:43.107 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:43.108 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:24:43.109 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5020961761474609\n", + "2024-04-06 16:24:43.110 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.002461210824549198\n", + "2024-04-06 16:24:43.115 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 12/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:43.118 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:43.591 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:43.592 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:24:43.593 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5027484893798828\n", + "2024-04-06 16:24:43.595 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.002057537669315934\n", + "2024-04-06 16:24:43.597 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 12/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:43.603 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:44.082 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:44.084 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:24:44.085 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5014543533325195\n", + "2024-04-06 16:24:44.086 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.021472211927175522\n", + "2024-04-06 16:24:45.122 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 12/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:45.128 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:45.602 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:45.604 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:24:45.604 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5018658638000488\n", + "2024-04-06 16:24:45.606 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0004631059127859771\n", + "2024-04-06 16:24:46.804 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 12/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:46.816 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:47.304 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:47.306 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:24:47.307 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000824494795366\n", + "2024-04-06 16:24:47.308 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.029433075338602066\n", + "2024-04-06 16:24:47.320 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 12/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:47.337 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:47.821 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:47.823 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:24:47.823 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000599080866034\n", + "2024-04-06 16:24:47.825 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.030373087152838707\n", + "2024-04-06 16:24:49.526 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 12/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:49.541 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:50.888 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:50.889 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.35s\n", + "2024-04-06 16:24:50.890 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.500440077348189\n", + "2024-04-06 16:24:50.892 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0005395205225795507\n", + "2024-04-06 16:24:53.352 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 13/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:53.356 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:53.829 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:53.830 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:24:53.831 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5017890930175781\n", + "2024-04-06 16:24:53.832 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.002193980850279331\n", + "2024-04-06 16:24:53.836 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 13/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:53.840 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:54.312 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:54.313 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:24:54.314 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5025844573974609\n", + "2024-04-06 16:24:54.315 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0017616496188566089\n", + "2024-04-06 16:24:54.317 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 13/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:54.326 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:54.803 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:54.804 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:24:54.805 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5014488697052002\n", + "2024-04-06 16:24:54.807 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.01832873746752739\n", + "2024-04-06 16:24:55.816 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 13/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:55.822 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:56.297 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:56.299 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:24:56.300 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5005347728729248\n", + "2024-04-06 16:24:56.301 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0005491751944646239\n", + "2024-04-06 16:24:57.499 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 13/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:57.512 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:58.000 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:58.001 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:24:58.002 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5001244978471235\n", + "2024-04-06 16:24:58.003 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.03536701202392578\n", + "2024-04-06 16:24:58.015 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 13/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:24:58.032 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:24:58.514 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:24:58.516 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:24:58.517 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000939802689985\n", + "2024-04-06 16:24:58.518 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.03793979808688164\n", + "2024-04-06 16:25:00.227 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 13/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:00.240 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:01.588 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:01.590 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.35s\n", + "2024-04-06 16:25:01.591 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5002488222989169\n", + "2024-04-06 16:25:01.592 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0008146624313667417\n", + "2024-04-06 16:25:04.013 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 14/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:04.017 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:04.489 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:04.491 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:25:04.492 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5016460418701172\n", + "2024-04-06 16:25:04.493 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.002949060872197151\n", + "2024-04-06 16:25:04.497 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 14/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:04.501 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:04.973 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:04.974 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:25:04.975 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5022354125976562\n", + "2024-04-06 16:25:04.977 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0022565608378499746\n", + "2024-04-06 16:25:04.979 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 14/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:04.986 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:05.466 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:05.469 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:25:05.470 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5013332366943359\n", + "2024-04-06 16:25:05.471 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.02494869753718376\n", + "2024-04-06 16:25:06.474 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 14/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:06.480 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:06.954 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:06.955 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:25:06.956 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5009686946868896\n", + "2024-04-06 16:25:06.958 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0007251641945913434\n", + "2024-04-06 16:25:08.151 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 14/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:08.164 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:08.651 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:08.653 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:25:08.654 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000849637118253\n", + "2024-04-06 16:25:08.656 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.043785590678453445\n", + "2024-04-06 16:25:08.667 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 14/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:08.684 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:09.168 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:09.171 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:25:09.172 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000794150612571\n", + "2024-04-06 16:25:09.173 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.04604436084628105\n", + "2024-04-06 16:25:10.871 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 14/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:10.887 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:12.234 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:12.236 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.35s\n", + "2024-04-06 16:25:12.237 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5005337975241921\n", + "2024-04-06 16:25:12.238 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0011046254076063633\n", + "2024-04-06 16:25:14.649 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 15/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:14.653 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:15.127 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:15.128 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:25:15.129 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5011196136474609\n", + "2024-04-06 16:25:15.130 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.002981733065098524\n", + "2024-04-06 16:25:15.134 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 15/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:15.138 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:15.610 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:15.612 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:25:15.613 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5018310546875\n", + "2024-04-06 16:25:15.614 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0022357390262186527\n", + "2024-04-06 16:25:15.616 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 15/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:15.623 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:16.103 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:16.105 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:25:16.105 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5009865760803223\n", + "2024-04-06 16:25:16.107 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.024414964020252228\n", + "2024-04-06 16:25:17.130 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 15/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:17.137 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:17.610 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:17.612 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:25:17.613 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5011510848999023\n", + "2024-04-06 16:25:17.614 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0012784330174326897\n", + "2024-04-06 16:25:18.813 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 15/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:18.825 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:19.314 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:19.315 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:25:19.316 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000607750632546\n", + "2024-04-06 16:25:19.318 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.05294129624962807\n", + "2024-04-06 16:25:19.329 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 15/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:19.345 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:19.829 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:19.831 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:25:19.832 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000459497625177\n", + "2024-04-06 16:25:19.833 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.05615214630961418\n", + "2024-04-06 16:25:21.559 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 15/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:21.571 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:22.920 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:22.921 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.35s\n", + "2024-04-06 16:25:22.922 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5006797096946023\n", + "2024-04-06 16:25:22.924 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0012955213896930218\n", + "2024-04-06 16:25:25.329 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 16/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:25.333 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:25.806 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:25.807 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:25:25.808 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5014247894287109\n", + "2024-04-06 16:25:25.809 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0038415922317653894\n", + "2024-04-06 16:25:25.814 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 16/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:25.816 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:26.291 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:26.292 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:25:26.293 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5024127960205078\n", + "2024-04-06 16:25:26.294 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0027475005481392145\n", + "2024-04-06 16:25:26.296 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 16/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:26.302 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:26.781 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:26.782 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:25:26.783 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5010077953338623\n", + "2024-04-06 16:25:26.784 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.03421148657798767\n", + "2024-04-06 16:25:27.792 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 16/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:27.798 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:28.273 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:28.274 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:25:28.275 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5010995864868164\n", + "2024-04-06 16:25:28.276 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0012806300073862076\n", + "2024-04-06 16:25:29.465 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 16/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:29.479 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:29.966 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:29.968 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:25:29.969 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000422217629172\n", + "2024-04-06 16:25:29.970 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.06549375504255295\n", + "2024-04-06 16:25:29.981 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 16/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:29.997 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:30.480 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:30.482 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:25:30.483 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000415281815962\n", + "2024-04-06 16:25:30.484 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.06718239188194275\n", + "2024-04-06 16:25:32.196 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 16/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:32.210 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:33.559 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:33.560 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.35s\n", + "2024-04-06 16:25:33.561 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5008833625099876\n", + "2024-04-06 16:25:33.563 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0022927632089704275\n", + "2024-04-06 16:25:35.957 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 17/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:35.961 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:36.435 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:36.436 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:25:36.437 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5015907287597656\n", + "2024-04-06 16:25:36.438 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0037893822882324457\n", + "2024-04-06 16:25:36.443 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 17/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:36.447 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:36.919 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:36.920 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:25:36.920 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.50238037109375\n", + "2024-04-06 16:25:36.922 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0026723630726337433\n", + "2024-04-06 16:25:36.924 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 17/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:36.930 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:37.409 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:37.411 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:25:37.412 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5013575553894043\n", + "2024-04-06 16:25:37.413 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.03465843200683594\n", + "2024-04-06 16:25:38.421 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 17/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:38.427 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:38.902 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:38.903 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:25:38.904 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.500582218170166\n", + "2024-04-06 16:25:38.905 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0013825519708916545\n", + "2024-04-06 16:25:40.105 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 17/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:40.118 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:40.606 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:40.608 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:25:40.609 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000259226018732\n", + "2024-04-06 16:25:40.610 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.08530180156230927\n", + "2024-04-06 16:25:40.621 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 17/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:40.637 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:41.120 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:41.122 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:25:41.122 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000261826948686\n", + "2024-04-06 16:25:41.124 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.08672088384628296\n", + "2024-04-06 16:25:42.826 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 17/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:42.840 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:44.189 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:44.190 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.35s\n", + "2024-04-06 16:25:44.191 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.50042724609375\n", + "2024-04-06 16:25:44.192 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.003855520859360695\n", + "2024-04-06 16:25:46.632 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 18/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:46.636 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:47.109 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:47.110 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:25:47.111 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5016193389892578\n", + "2024-04-06 16:25:47.112 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0052176592871546745\n", + "2024-04-06 16:25:47.117 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 18/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:47.121 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:47.593 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:47.594 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:25:47.595 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.50201416015625\n", + "2024-04-06 16:25:47.597 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.004013842437416315\n", + "2024-04-06 16:25:47.599 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 18/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:47.605 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:48.084 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:48.086 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:25:48.087 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5011568069458008\n", + "2024-04-06 16:25:48.088 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.04289015382528305\n", + "2024-04-06 16:25:49.102 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 18/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:49.108 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:49.582 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:49.584 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:25:49.585 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5005784034729004\n", + "2024-04-06 16:25:49.586 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0019721752032637596\n", + "2024-04-06 16:25:50.790 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 18/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:50.804 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:51.291 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:51.293 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:25:51.294 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000153454867277\n", + "2024-04-06 16:25:51.295 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.10207733511924744\n", + "2024-04-06 16:25:51.306 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 18/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:51.322 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:51.806 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:51.808 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:25:51.809 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.500025749206543\n", + "2024-04-06 16:25:51.810 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.1032959520816803\n", + "2024-04-06 16:25:53.544 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 18/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:53.558 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:54.910 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:54.911 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.35s\n", + "2024-04-06 16:25:54.912 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5002855821089311\n", + "2024-04-06 16:25:54.913 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.004597197286784649\n", + "2024-04-06 16:25:57.370 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 19/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:57.374 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:57.848 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:57.849 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:25:57.850 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5012321472167969\n", + "2024-04-06 16:25:57.851 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.006456299219280481\n", + "2024-04-06 16:25:57.856 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 19/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:57.860 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:58.331 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:58.333 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:25:58.334 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5020523071289062\n", + "2024-04-06 16:25:58.335 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0044739749282598495\n", + "2024-04-06 16:25:58.337 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 19/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:58.344 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:25:58.822 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:25:58.824 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:25:58.825 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5011134147644043\n", + "2024-04-06 16:25:58.826 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.052539028227329254\n", + "2024-04-06 16:25:59.838 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 19/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:25:59.844 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:00.320 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:00.322 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:26:00.323 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5006330013275146\n", + "2024-04-06 16:26:00.324 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.002799475332722068\n", + "2024-04-06 16:26:01.516 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 19/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:01.526 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:02.014 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:02.015 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:26:02.016 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000250556252219\n", + "2024-04-06 16:26:02.018 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.1313001960515976\n", + "2024-04-06 16:26:02.028 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 19/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:02.044 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:02.527 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:02.529 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:26:02.530 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000277432528409\n", + "2024-04-06 16:26:02.531 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.13446550071239471\n", + "2024-04-06 16:26:04.235 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 19/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:04.248 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:05.597 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:05.599 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.35s\n", + "2024-04-06 16:26:05.600 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5006138194691051\n", + "2024-04-06 16:26:05.602 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.007972543127834797\n", + "2024-04-06 16:26:07.993 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 20/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:07.996 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:08.468 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:08.469 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:26:08.470 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5012111663818359\n", + "2024-04-06 16:26:08.471 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.006476877257227898\n", + "2024-04-06 16:26:08.476 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 20/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:08.480 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:08.951 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:08.952 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:26:08.953 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5019054412841797\n", + "2024-04-06 16:26:08.954 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.00455900002270937\n", + "2024-04-06 16:26:08.956 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 20/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:08.963 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:09.442 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:09.443 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:26:09.444 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5011072158813477\n", + "2024-04-06 16:26:09.446 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.05025868117809296\n", + "2024-04-06 16:26:10.453 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 20/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:10.458 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:10.934 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:10.937 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:26:10.938 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5006864070892334\n", + "2024-04-06 16:26:10.939 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.002847852883860469\n", + "2024-04-06 16:26:12.133 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 20/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:12.141 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:12.628 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:12.629 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:26:12.630 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000218478116122\n", + "2024-04-06 16:26:12.632 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.16629165410995483\n", + "2024-04-06 16:26:12.643 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 20/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:12.659 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:13.142 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:13.144 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:26:13.145 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000241886485707\n", + "2024-04-06 16:26:13.146 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.17055238783359528\n", + "2024-04-06 16:26:14.854 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 20/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:14.865 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:16.213 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:16.215 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.35s\n", + "2024-04-06 16:26:16.216 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5006816170432351\n", + "2024-04-06 16:26:16.217 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.01261807605624199\n", + "2024-04-06 16:26:18.640 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 21/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:18.644 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:19.117 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:19.118 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:26:19.119 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5009040832519531\n", + "2024-04-06 16:26:19.120 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.006057701539248228\n", + "2024-04-06 16:26:19.125 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 21/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:19.129 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:19.602 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:19.604 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:26:19.604 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5013103485107422\n", + "2024-04-06 16:26:19.605 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.005127266515046358\n", + "2024-04-06 16:26:19.608 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 21/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:19.614 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:20.093 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:20.095 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:26:20.095 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5008242130279541\n", + "2024-04-06 16:26:20.097 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.04915214702486992\n", + "2024-04-06 16:26:21.123 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 21/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:21.128 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:21.604 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:21.605 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:26:21.606 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5004251003265381\n", + "2024-04-06 16:26:21.608 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0038547725416719913\n", + "2024-04-06 16:26:22.807 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 21/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:22.815 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:23.303 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:23.304 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:26:23.305 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000276565551758\n", + "2024-04-06 16:26:23.307 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.2006303071975708\n", + "2024-04-06 16:26:23.317 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 21/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:23.333 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:23.817 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:23.820 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.49s\n", + "2024-04-06 16:26:23.821 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000185533003374\n", + "2024-04-06 16:26:23.822 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.2124703824520111\n", + "2024-04-06 16:26:25.551 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 21/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:25.566 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:26.911 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:26.912 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.34s\n", + "2024-04-06 16:26:26.913 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5006301186301492\n", + "2024-04-06 16:26:26.915 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.0179909560829401\n", + "2024-04-06 16:26:29.317 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.k_proj in layer 22/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:29.321 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:29.794 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:29.795 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:26:29.796 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5008163452148438\n", + "2024-04-06 16:26:29.797 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.004926344845443964\n", + "2024-04-06 16:26:29.802 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.v_proj in layer 22/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:29.804 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:30.277 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:30.279 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:26:30.279 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5006866455078125\n", + "2024-04-06 16:26:30.281 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.005220985971391201\n", + "2024-04-06 16:26:30.283 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.q_proj in layer 22/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:30.289 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:30.769 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:30.770 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:26:30.771 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5005145072937012\n", + "2024-04-06 16:26:30.772 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.04660412669181824\n", + "2024-04-06 16:26:31.793 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression self_attn.o_proj in layer 22/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:31.798 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:32.273 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:32.274 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.47s\n", + "2024-04-06 16:26:32.275 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5010039806365967\n", + "2024-04-06 16:26:32.276 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.005558461416512728\n", + "2024-04-06 16:26:33.468 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.up_proj in layer 22/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:33.479 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:33.963 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:33.964 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:26:33.965 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000232349742543\n", + "2024-04-06 16:26:33.967 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.19974088668823242\n", + "2024-04-06 16:26:33.978 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.gate_proj in layer 22/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:33.994 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:34.478 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:34.480 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 0.48s\n", + "2024-04-06 16:26:34.481 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5000215877186168\n", + "2024-04-06 16:26:34.482 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.21952283382415771\n", + "2024-04-06 16:26:36.197 | DEBUG | deltazip.modeling._base:lossy_compress:425 - Compression mlp.down_proj in layer 22/22 - sparsity: 0.5, bits: 4\n", + "2024-04-06 16:26:36.211 | INFO | deltazip.core.sparsegpt:fasterprune:59 - compression operates on delta...\n", + "2024-04-06 16:26:37.560 | DEBUG | deltazip.core.sparsegpt:fasterprune:165 - adding base weight for correct forward...\n", + "2024-04-06 16:26:37.561 | INFO | deltazip.core.sparsegpt:fasterprune:175 - duration: 1.35s\n", + "2024-04-06 16:26:37.562 | INFO | deltazip.core.sparsegpt:fasterprune:176 - sparsity: 0.5026656931096857\n", + "2024-04-06 16:26:37.564 | INFO | deltazip.core.sparsegpt:fasterprune:182 - avg loss: 0.02478119730949402\n", + "WARNING:root:Some parameters are on the meta device device because they were offloaded to the cpu.\n", + "2024-04-06 16:26:38.673 | INFO | deltazip.modeling._base:lossy_compress:516 - Compress finished... moving compressed delta back\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "model.embed_tokens.weight: cuda:0\n", + "model.layers.0.self_attn.q_proj.weight: cuda:0\n", + "model.layers.0.self_attn.k_proj.weight: cuda:0\n", + "model.layers.0.self_attn.v_proj.weight: cuda:0\n", + "model.layers.0.self_attn.o_proj.weight: cuda:0\n", + "model.layers.0.mlp.gate_proj.weight: cuda:0\n", + "model.layers.0.mlp.up_proj.weight: cuda:0\n", + "model.layers.0.mlp.down_proj.weight: cuda:0\n", + "model.layers.0.input_layernorm.weight: cuda:0\n", + "model.layers.0.post_attention_layernorm.weight: cuda:0\n", + "model.layers.1.self_attn.q_proj.weight: cuda:1\n", + "model.layers.1.self_attn.k_proj.weight: cuda:1\n", + "model.layers.1.self_attn.v_proj.weight: cuda:1\n", + "model.layers.1.self_attn.o_proj.weight: cuda:1\n", + "model.layers.1.mlp.gate_proj.weight: cuda:1\n", + "model.layers.1.mlp.up_proj.weight: cuda:1\n", + "model.layers.1.mlp.down_proj.weight: cuda:1\n", + "model.layers.1.input_layernorm.weight: cuda:1\n", + "model.layers.1.post_attention_layernorm.weight: cuda:1\n", + "model.layers.2.self_attn.q_proj.weight: cuda:1\n", + "model.layers.2.self_attn.k_proj.weight: cuda:1\n", + "model.layers.2.self_attn.v_proj.weight: cuda:1\n", + "model.layers.2.self_attn.o_proj.weight: cuda:1\n", + "model.layers.2.mlp.gate_proj.weight: cuda:1\n", + "model.layers.2.mlp.up_proj.weight: cuda:1\n", + "model.layers.2.mlp.down_proj.weight: cuda:1\n", + "model.layers.2.input_layernorm.weight: cuda:1\n", + "model.layers.2.post_attention_layernorm.weight: cuda:1\n", + "model.layers.3.self_attn.q_proj.weight: cuda:1\n", + "model.layers.3.self_attn.k_proj.weight: cuda:1\n", + "model.layers.3.self_attn.v_proj.weight: cuda:1\n", + "model.layers.3.self_attn.o_proj.weight: cuda:1\n", + "model.layers.3.mlp.gate_proj.weight: cuda:1\n", + "model.layers.3.mlp.up_proj.weight: cuda:1\n", + "model.layers.3.mlp.down_proj.weight: cuda:1\n", + "model.layers.3.input_layernorm.weight: cuda:1\n", + "model.layers.3.post_attention_layernorm.weight: cuda:1\n", + "model.layers.4.self_attn.q_proj.weight: cuda:1\n", + "model.layers.4.self_attn.k_proj.weight: cuda:1\n", + "model.layers.4.self_attn.v_proj.weight: cuda:1\n", + "model.layers.4.self_attn.o_proj.weight: cuda:1\n", + "model.layers.4.mlp.gate_proj.weight: cuda:1\n", + "model.layers.4.mlp.up_proj.weight: cuda:1\n", + "model.layers.4.mlp.down_proj.weight: cuda:1\n", + "model.layers.4.input_layernorm.weight: cuda:1\n", + "model.layers.4.post_attention_layernorm.weight: cuda:1\n", + "model.layers.5.self_attn.q_proj.weight: cpu\n", + "model.layers.5.self_attn.k_proj.weight: cpu\n", + "model.layers.5.self_attn.v_proj.weight: cpu\n", + "model.layers.5.self_attn.o_proj.weight: cpu\n", + "model.layers.5.mlp.gate_proj.weight: cpu\n", + "model.layers.5.mlp.up_proj.weight: cpu\n", + "model.layers.5.mlp.down_proj.weight: cpu\n", + "model.layers.5.input_layernorm.weight: meta\n", + "model.layers.5.post_attention_layernorm.weight: meta\n", + "model.layers.6.self_attn.q_proj.weight: cpu\n", + "model.layers.6.self_attn.k_proj.weight: cpu\n", + "model.layers.6.self_attn.v_proj.weight: cpu\n", + "model.layers.6.self_attn.o_proj.weight: cpu\n", + "model.layers.6.mlp.gate_proj.weight: cpu\n", + "model.layers.6.mlp.up_proj.weight: cpu\n", + "model.layers.6.mlp.down_proj.weight: cpu\n", + "model.layers.6.input_layernorm.weight: meta\n", + "model.layers.6.post_attention_layernorm.weight: meta\n", + "model.layers.7.self_attn.q_proj.weight: cpu\n", + "model.layers.7.self_attn.k_proj.weight: cpu\n", + "model.layers.7.self_attn.v_proj.weight: cpu\n", + "model.layers.7.self_attn.o_proj.weight: cpu\n", + "model.layers.7.mlp.gate_proj.weight: cpu\n", + "model.layers.7.mlp.up_proj.weight: cpu\n", + "model.layers.7.mlp.down_proj.weight: cpu\n", + "model.layers.7.input_layernorm.weight: meta\n", + "model.layers.7.post_attention_layernorm.weight: meta\n", + "model.layers.8.self_attn.q_proj.weight: cpu\n", + "model.layers.8.self_attn.k_proj.weight: cpu\n", + "model.layers.8.self_attn.v_proj.weight: cpu\n", + "model.layers.8.self_attn.o_proj.weight: cpu\n", + "model.layers.8.mlp.gate_proj.weight: cpu\n", + "model.layers.8.mlp.up_proj.weight: cpu\n", + "model.layers.8.mlp.down_proj.weight: cpu\n", + "model.layers.8.input_layernorm.weight: meta\n", + "model.layers.8.post_attention_layernorm.weight: meta\n", + "model.layers.9.self_attn.q_proj.weight: cpu\n", + "model.layers.9.self_attn.k_proj.weight: cpu\n", + "model.layers.9.self_attn.v_proj.weight: cpu\n", + "model.layers.9.self_attn.o_proj.weight: cpu\n", + "model.layers.9.mlp.gate_proj.weight: cpu\n", + "model.layers.9.mlp.up_proj.weight: cpu\n", + "model.layers.9.mlp.down_proj.weight: cpu\n", + "model.layers.9.input_layernorm.weight: meta\n", + "model.layers.9.post_attention_layernorm.weight: meta\n", + "model.layers.10.self_attn.q_proj.weight: cpu\n", + "model.layers.10.self_attn.k_proj.weight: cpu\n", + "model.layers.10.self_attn.v_proj.weight: cpu\n", + "model.layers.10.self_attn.o_proj.weight: cpu\n", + "model.layers.10.mlp.gate_proj.weight: cpu\n", + "model.layers.10.mlp.up_proj.weight: cpu\n", + "model.layers.10.mlp.down_proj.weight: cpu\n", + "model.layers.10.input_layernorm.weight: meta\n", + "model.layers.10.post_attention_layernorm.weight: meta\n", + "model.layers.11.self_attn.q_proj.weight: cpu\n", + "model.layers.11.self_attn.k_proj.weight: cpu\n", + "model.layers.11.self_attn.v_proj.weight: cpu\n", + "model.layers.11.self_attn.o_proj.weight: cpu\n", + "model.layers.11.mlp.gate_proj.weight: cpu\n", + "model.layers.11.mlp.up_proj.weight: cpu\n", + "model.layers.11.mlp.down_proj.weight: cpu\n", + "model.layers.11.input_layernorm.weight: meta\n", + "model.layers.11.post_attention_layernorm.weight: meta\n", + "model.layers.12.self_attn.q_proj.weight: cpu\n", + "model.layers.12.self_attn.k_proj.weight: cpu\n", + "model.layers.12.self_attn.v_proj.weight: cpu\n", + "model.layers.12.self_attn.o_proj.weight: cpu\n", + "model.layers.12.mlp.gate_proj.weight: cpu\n", + "model.layers.12.mlp.up_proj.weight: cpu\n", + "model.layers.12.mlp.down_proj.weight: cpu\n", + "model.layers.12.input_layernorm.weight: meta\n", + "model.layers.12.post_attention_layernorm.weight: meta\n", + "model.layers.13.self_attn.q_proj.weight: cpu\n", + "model.layers.13.self_attn.k_proj.weight: cpu\n", + "model.layers.13.self_attn.v_proj.weight: cpu\n", + "model.layers.13.self_attn.o_proj.weight: cpu\n", + "model.layers.13.mlp.gate_proj.weight: cpu\n", + "model.layers.13.mlp.up_proj.weight: cpu\n", + "model.layers.13.mlp.down_proj.weight: cpu\n", + "model.layers.13.input_layernorm.weight: meta\n", + "model.layers.13.post_attention_layernorm.weight: meta\n", + "model.layers.14.self_attn.q_proj.weight: cpu\n", + "model.layers.14.self_attn.k_proj.weight: cpu\n", + "model.layers.14.self_attn.v_proj.weight: cpu\n", + "model.layers.14.self_attn.o_proj.weight: cpu\n", + "model.layers.14.mlp.gate_proj.weight: cpu\n", + "model.layers.14.mlp.up_proj.weight: cpu\n", + "model.layers.14.mlp.down_proj.weight: cpu\n", + "model.layers.14.input_layernorm.weight: meta\n", + "model.layers.14.post_attention_layernorm.weight: meta\n", + "model.layers.15.self_attn.q_proj.weight: cpu\n", + "model.layers.15.self_attn.k_proj.weight: cpu\n", + "model.layers.15.self_attn.v_proj.weight: cpu\n", + "model.layers.15.self_attn.o_proj.weight: cpu\n", + "model.layers.15.mlp.gate_proj.weight: cpu\n", + "model.layers.15.mlp.up_proj.weight: cpu\n", + "model.layers.15.mlp.down_proj.weight: cpu\n", + "model.layers.15.input_layernorm.weight: meta\n", + "model.layers.15.post_attention_layernorm.weight: meta\n", + "model.layers.16.self_attn.q_proj.weight: cpu\n", + "model.layers.16.self_attn.k_proj.weight: cpu\n", + "model.layers.16.self_attn.v_proj.weight: cpu\n", + "model.layers.16.self_attn.o_proj.weight: cpu\n", + "model.layers.16.mlp.gate_proj.weight: cpu\n", + "model.layers.16.mlp.up_proj.weight: cpu\n", + "model.layers.16.mlp.down_proj.weight: cpu\n", + "model.layers.16.input_layernorm.weight: meta\n", + "model.layers.16.post_attention_layernorm.weight: meta\n", + "model.layers.17.self_attn.q_proj.weight: cpu\n", + "model.layers.17.self_attn.k_proj.weight: cpu\n", + "model.layers.17.self_attn.v_proj.weight: cpu\n", + "model.layers.17.self_attn.o_proj.weight: cpu\n", + "model.layers.17.mlp.gate_proj.weight: cpu\n", + "model.layers.17.mlp.up_proj.weight: cpu\n", + "model.layers.17.mlp.down_proj.weight: cpu\n", + "model.layers.17.input_layernorm.weight: meta\n", + "model.layers.17.post_attention_layernorm.weight: meta\n", + "model.layers.18.self_attn.q_proj.weight: cpu\n", + "model.layers.18.self_attn.k_proj.weight: cpu\n", + "model.layers.18.self_attn.v_proj.weight: cpu\n", + "model.layers.18.self_attn.o_proj.weight: cpu\n", + "model.layers.18.mlp.gate_proj.weight: cpu\n", + "model.layers.18.mlp.up_proj.weight: cpu\n", + "model.layers.18.mlp.down_proj.weight: cpu\n", + "model.layers.18.input_layernorm.weight: meta\n", + "model.layers.18.post_attention_layernorm.weight: meta\n", + "model.layers.19.self_attn.q_proj.weight: cpu\n", + "model.layers.19.self_attn.k_proj.weight: cpu\n", + "model.layers.19.self_attn.v_proj.weight: cpu\n", + "model.layers.19.self_attn.o_proj.weight: cpu\n", + "model.layers.19.mlp.gate_proj.weight: cpu\n", + "model.layers.19.mlp.up_proj.weight: cpu\n", + "model.layers.19.mlp.down_proj.weight: cpu\n", + "model.layers.19.input_layernorm.weight: meta\n", + "model.layers.19.post_attention_layernorm.weight: meta\n", + "model.layers.20.self_attn.q_proj.weight: cpu\n", + "model.layers.20.self_attn.k_proj.weight: cpu\n", + "model.layers.20.self_attn.v_proj.weight: cpu\n", + "model.layers.20.self_attn.o_proj.weight: cpu\n", + "model.layers.20.mlp.gate_proj.weight: cpu\n", + "model.layers.20.mlp.up_proj.weight: cpu\n", + "model.layers.20.mlp.down_proj.weight: cpu\n", + "model.layers.20.input_layernorm.weight: meta\n", + "model.layers.20.post_attention_layernorm.weight: meta\n", + "model.layers.21.self_attn.q_proj.weight: cpu\n", + "model.layers.21.self_attn.k_proj.weight: cpu\n", + "model.layers.21.self_attn.v_proj.weight: cpu\n", + "model.layers.21.self_attn.o_proj.weight: cpu\n", + "model.layers.21.mlp.gate_proj.weight: cpu\n", + "model.layers.21.mlp.up_proj.weight: cpu\n", + "model.layers.21.mlp.down_proj.weight: cpu\n", + "model.layers.21.input_layernorm.weight: meta\n", + "model.layers.21.post_attention_layernorm.weight: meta\n", + "model.norm.weight: meta\n", + "lm_head.weight: meta\n" + ] + } + ], + "source": [ + "base_model = \"TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T\"\n", + "target_model = \"TinyLlama/TinyLlama-1.1B-Chat-v1.0\"\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(\n", + " base_model, use_fast=True\n", + ")\n", + "compress_config = BaseCompressionConfig(\n", + " bits=4,\n", + " sparsity=0.5,\n", + " prunen=0,\n", + " block_size=128,\n", + " prunem=0,\n", + " lossless='gdeflate',\n", + " damp_percent=0.01,\n", + " sym=False,\n", + ")\n", + "shuffle_dataset = True\n", + "dataset = \".local/datasets/lmsys.jsonl\"\n", + "n_samples = 128\n", + "outdir = \".cache/compressed_models/TinyLlama.TinyLlama-1.1B-Chat-v1.0.4b0s128g\"\n", + "\n", + "print(\"[info] compress config:\", compress_config)\n", + "target_model = AutoDeltaZipModelForCausalLM.from_pretrained(\n", + " target_model, \n", + " compress_config=compress_config,\n", + " torch_dtype=torch.float16,\n", + " # max_memory = {0: \"1GIB\", 1: \"1GIB\", \"cpu\": \"140GIB\"}\n", + " max_memory = {0: \"400MIB\", 1: \"400MIB\", \"cpu\": \"140GIB\"}\n", + ")\n", + "delta = \"subtract\"\n", + "target_model.requires_grad_(False)\n", + "if base_model != \"\" and delta != \"\":\n", + " print(\"[info] base model is defined, delta mode enabled\")\n", + " base_model = AutoDeltaZipModelForCausalLM.from_pretrained(\n", + " base_model,\n", + " compress_config=compress_config,\n", + " torch_dtype=torch.float16,\n", + " )\n", + " base_model.requires_grad_(False)\n", + "torch.cuda.empty_cache()\n", + "print(f\"target model on {target_model.device}, base model on {base_model.device}\")\n", + "# now time to prepare inspect dataset\n", + "with open(dataset, \"r\") as fp:\n", + " examples = [json.loads(line)[\"text\"] for line in fp.readlines()]\n", + "if n_samples <= 0:\n", + " examples = examples\n", + "else:\n", + " if shuffle_dataset:\n", + " import random\n", + " random.seed(42)\n", + " random.shuffle(examples)\n", + " examples = examples[: n_samples]\n", + "examples = [tokenizer(x) for x in examples]\n", + "if base_model != \"\" and delta != \"\":\n", + " target_model.lossy_compress(\n", + " examples,\n", + " batch_size=1,\n", + " base_model=base_model,\n", + " )\n", + "else:\n", + " target_model.lossy_compress(\n", + " examples,\n", + " batch_size=1,\n", + " )\n", + "# write to folder\n", + "os.makedirs(outdir, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# safe a safe tensor copy just in case\n", + "import safetensors as st\n", + "tensors = {}\n", + "for name, param in target_model.named_parameters():\n", + " if not param.is_meta:\n", + " tensors[name] = param.data.cpu().clone().detach()\n", + "st.torch.save_file(tensors, os.path.join(outdir, \"temp.safetensors\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "model.model.layers.5.input_layernorm.weight\n", + "model.model.layers.5.post_attention_layernorm.weight\n", + "model.model.layers.6.input_layernorm.weight\n", + "model.model.layers.6.post_attention_layernorm.weight\n", + "model.model.layers.7.input_layernorm.weight\n", + "model.model.layers.7.post_attention_layernorm.weight\n", + "model.model.layers.8.input_layernorm.weight\n", + "model.model.layers.8.post_attention_layernorm.weight\n", + "model.model.layers.9.input_layernorm.weight\n", + "model.model.layers.9.post_attention_layernorm.weight\n", + "model.model.layers.10.input_layernorm.weight\n", + "model.model.layers.10.post_attention_layernorm.weight\n", + "model.model.layers.11.input_layernorm.weight\n", + "model.model.layers.11.post_attention_layernorm.weight\n", + "model.model.layers.12.input_layernorm.weight\n", + "model.model.layers.12.post_attention_layernorm.weight\n", + "model.model.layers.13.input_layernorm.weight\n", + "model.model.layers.13.post_attention_layernorm.weight\n", + "model.model.layers.14.input_layernorm.weight\n", + "model.model.layers.14.post_attention_layernorm.weight\n", + "model.model.layers.15.input_layernorm.weight\n", + "model.model.layers.15.post_attention_layernorm.weight\n", + "model.model.layers.16.input_layernorm.weight\n", + "model.model.layers.16.post_attention_layernorm.weight\n", + "model.model.layers.17.input_layernorm.weight\n", + "model.model.layers.17.post_attention_layernorm.weight\n", + "model.model.layers.18.input_layernorm.weight\n", + "model.model.layers.18.post_attention_layernorm.weight\n", + "model.model.layers.19.input_layernorm.weight\n", + "model.model.layers.19.post_attention_layernorm.weight\n", + "model.model.layers.20.input_layernorm.weight\n", + "model.model.layers.20.post_attention_layernorm.weight\n", + "model.model.layers.21.input_layernorm.weight\n", + "model.model.layers.21.post_attention_layernorm.weight\n", + "model.model.norm.weight\n", + "model.lm_head.weight\n" + ] + } + ], + "source": [ + "for name, param in target_model.named_parameters():\n", + " if param.is_meta:\n", + " print(name)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cuda:0\n" + ] + } + ], + "source": [ + "# for weights that are not compressed, we calculate delta afterward compression\n", + "if base_model != \"\" and delta != \"\":\n", + " compressed_modules = []\n", + " for x in base_model.inside_layer_modules:\n", + " compressed_modules.extend(x)\n", + " for name, param in target_model.named_parameters():\n", + " if \"bias\" in name or all(\n", + " [modules not in name for modules in compressed_modules]\n", + " ):\n", + " base_weight = base_model.state_dict()[name]\n", + " if base_weight.device != param.device:\n", + " base_weight = base_weight.to(param.device)\n", + " target_model.state_dict()[name] = param - base_weight\n", + "del base_model\n", + "print(target_model.device)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-04-06 17:08:22.646 | INFO | deltazip.modeling._base:from_pretrained:699 - Using [None] to load model.\n", + "WARNING:accelerate.big_modeling:You shouldn't move a model when it is dispatched on multiple devices.\n", + "WARNING:accelerate.big_modeling:You shouldn't move a model when it is dispatched on multiple devices.\n", + "/mnt/scratch/xiayao/mamba/envs/deltazip/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n", + " warnings.warn(\n", + "The cos_cached attribute will be removed in 4.39. Bear in mind that its contents changed in v4.38. Use the forward method of RoPE from now on instead. It is not used in the `LlamaAttention` class\n", + "The sin_cached attribute will be removed in 4.39. Bear in mind that its contents changed in v4.38. Use the forward method of RoPE from now on instead. It is not used in the `LlamaAttention` class\n", + "2024-04-06 17:08:23.350 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.0.self_attn.k_proj\n", + "2024-04-06 17:08:23.351 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:23.406 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.0.self_attn.o_proj\n", + "2024-04-06 17:08:23.407 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:23.539 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.0.self_attn.q_proj\n", + "2024-04-06 17:08:23.540 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:23.658 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.0.self_attn.v_proj\n", + "2024-04-06 17:08:23.660 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:23.713 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.0.mlp.down_proj\n", + "2024-04-06 17:08:23.714 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:23.989 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.0.mlp.gate_proj\n", + "2024-04-06 17:08:23.990 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:24.314 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.0.mlp.up_proj\n", + "2024-04-06 17:08:24.315 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:24.632 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.1.self_attn.k_proj\n", + "2024-04-06 17:08:24.633 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:24.684 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.1.self_attn.o_proj\n", + "2024-04-06 17:08:24.686 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:24.805 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.1.self_attn.q_proj\n", + "2024-04-06 17:08:24.806 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:24.929 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.1.self_attn.v_proj\n", + "2024-04-06 17:08:24.930 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:24.982 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.1.mlp.down_proj\n", + "2024-04-06 17:08:24.983 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:25.249 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.1.mlp.gate_proj\n", + "2024-04-06 17:08:25.251 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:25.576 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.1.mlp.up_proj\n", + "2024-04-06 17:08:25.577 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:25.893 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.2.self_attn.k_proj\n", + "2024-04-06 17:08:25.895 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:25.946 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.2.self_attn.o_proj\n", + "2024-04-06 17:08:25.947 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:26.064 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.2.self_attn.q_proj\n", + "2024-04-06 17:08:26.066 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:26.186 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.2.self_attn.v_proj\n", + "2024-04-06 17:08:26.188 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:26.240 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.2.mlp.down_proj\n", + "2024-04-06 17:08:26.241 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:26.506 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.2.mlp.gate_proj\n", + "2024-04-06 17:08:26.507 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:26.819 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.2.mlp.up_proj\n", + "2024-04-06 17:08:26.821 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:27.140 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.3.self_attn.k_proj\n", + "2024-04-06 17:08:27.142 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:27.192 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.3.self_attn.o_proj\n", + "2024-04-06 17:08:27.193 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:27.313 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.3.self_attn.q_proj\n", + "2024-04-06 17:08:27.314 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:27.434 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.3.self_attn.v_proj\n", + "2024-04-06 17:08:27.436 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:27.487 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.3.mlp.down_proj\n", + "2024-04-06 17:08:27.489 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:27.755 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.3.mlp.gate_proj\n", + "2024-04-06 17:08:27.757 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:28.130 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.3.mlp.up_proj\n", + "2024-04-06 17:08:28.131 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:28.453 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.4.self_attn.k_proj\n", + "2024-04-06 17:08:28.454 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:28.504 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.4.self_attn.o_proj\n", + "2024-04-06 17:08:28.505 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:28.625 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.4.self_attn.q_proj\n", + "2024-04-06 17:08:28.627 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:28.750 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.4.self_attn.v_proj\n", + "2024-04-06 17:08:28.751 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:28.803 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.4.mlp.down_proj\n", + "2024-04-06 17:08:28.804 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:29.069 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.4.mlp.gate_proj\n", + "2024-04-06 17:08:29.071 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:29.382 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.4.mlp.up_proj\n", + "2024-04-06 17:08:29.383 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:29.705 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.5.self_attn.k_proj\n", + "2024-04-06 17:08:29.706 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:29.757 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.5.self_attn.o_proj\n", + "2024-04-06 17:08:29.758 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:29.880 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.5.self_attn.q_proj\n", + "2024-04-06 17:08:29.881 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:30.001 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.5.self_attn.v_proj\n", + "2024-04-06 17:08:30.002 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:30.054 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.5.mlp.down_proj\n", + "2024-04-06 17:08:30.055 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:30.322 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.5.mlp.gate_proj\n", + "2024-04-06 17:08:30.323 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:30.639 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.5.mlp.up_proj\n", + "2024-04-06 17:08:30.639 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:30.961 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.6.self_attn.k_proj\n", + "2024-04-06 17:08:30.962 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:31.013 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.6.self_attn.o_proj\n", + "2024-04-06 17:08:31.014 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:31.134 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.6.self_attn.q_proj\n", + "2024-04-06 17:08:31.136 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:31.257 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.6.self_attn.v_proj\n", + "2024-04-06 17:08:31.258 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:31.310 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.6.mlp.down_proj\n", + "2024-04-06 17:08:31.311 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:31.595 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.6.mlp.gate_proj\n", + "2024-04-06 17:08:31.596 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:31.911 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.6.mlp.up_proj\n", + "2024-04-06 17:08:31.912 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:32.270 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.7.self_attn.k_proj\n", + "2024-04-06 17:08:32.270 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:32.324 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.7.self_attn.o_proj\n", + "2024-04-06 17:08:32.325 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:32.446 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.7.self_attn.q_proj\n", + "2024-04-06 17:08:32.447 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:32.569 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.7.self_attn.v_proj\n", + "2024-04-06 17:08:32.570 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:32.622 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.7.mlp.down_proj\n", + "2024-04-06 17:08:32.623 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:32.891 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.7.mlp.gate_proj\n", + "2024-04-06 17:08:32.892 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:33.218 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.7.mlp.up_proj\n", + "2024-04-06 17:08:33.219 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:33.559 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.8.self_attn.k_proj\n", + "2024-04-06 17:08:33.560 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:33.612 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.8.self_attn.o_proj\n", + "2024-04-06 17:08:33.612 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:33.733 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.8.self_attn.q_proj\n", + "2024-04-06 17:08:33.734 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:33.853 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.8.self_attn.v_proj\n", + "2024-04-06 17:08:33.855 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:33.908 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.8.mlp.down_proj\n", + "2024-04-06 17:08:33.909 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:34.178 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.8.mlp.gate_proj\n", + "2024-04-06 17:08:34.179 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:34.516 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.8.mlp.up_proj\n", + "2024-04-06 17:08:34.517 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:34.836 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.9.self_attn.k_proj\n", + "2024-04-06 17:08:34.837 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:34.889 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.9.self_attn.o_proj\n", + "2024-04-06 17:08:34.889 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:35.008 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.9.self_attn.q_proj\n", + "2024-04-06 17:08:35.009 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:35.129 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.9.self_attn.v_proj\n", + "2024-04-06 17:08:35.130 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:35.182 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.9.mlp.down_proj\n", + "2024-04-06 17:08:35.183 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:35.455 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.9.mlp.gate_proj\n", + "2024-04-06 17:08:35.456 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:35.822 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.9.mlp.up_proj\n", + "2024-04-06 17:08:35.825 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:36.171 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.10.self_attn.k_proj\n", + "2024-04-06 17:08:36.173 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:36.224 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.10.self_attn.o_proj\n", + "2024-04-06 17:08:36.225 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:36.351 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.10.self_attn.q_proj\n", + "2024-04-06 17:08:36.352 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:36.473 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.10.self_attn.v_proj\n", + "2024-04-06 17:08:36.474 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:36.526 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.10.mlp.down_proj\n", + "2024-04-06 17:08:36.527 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:36.798 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.10.mlp.gate_proj\n", + "2024-04-06 17:08:36.799 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:37.120 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.10.mlp.up_proj\n", + "2024-04-06 17:08:37.121 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:37.421 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.11.self_attn.k_proj\n", + "2024-04-06 17:08:37.422 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:37.474 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.11.self_attn.o_proj\n", + "2024-04-06 17:08:37.474 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:37.595 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.11.self_attn.q_proj\n", + "2024-04-06 17:08:37.596 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:37.722 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.11.self_attn.v_proj\n", + "2024-04-06 17:08:37.723 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:37.774 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.11.mlp.down_proj\n", + "2024-04-06 17:08:37.775 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:38.039 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.11.mlp.gate_proj\n", + "2024-04-06 17:08:38.040 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:38.355 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.11.mlp.up_proj\n", + "2024-04-06 17:08:38.356 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:38.649 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.12.self_attn.k_proj\n", + "2024-04-06 17:08:38.650 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:38.701 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.12.self_attn.o_proj\n", + "2024-04-06 17:08:38.702 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:38.826 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.12.self_attn.q_proj\n", + "2024-04-06 17:08:38.827 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:38.946 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.12.self_attn.v_proj\n", + "2024-04-06 17:08:38.948 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:39.000 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.12.mlp.down_proj\n", + "2024-04-06 17:08:39.001 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:39.276 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.12.mlp.gate_proj\n", + "2024-04-06 17:08:39.277 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:39.590 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.12.mlp.up_proj\n", + "2024-04-06 17:08:39.591 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:39.905 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.13.self_attn.k_proj\n", + "2024-04-06 17:08:39.906 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:39.956 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.13.self_attn.o_proj\n", + "2024-04-06 17:08:39.957 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:40.080 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.13.self_attn.q_proj\n", + "2024-04-06 17:08:40.081 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:40.200 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.13.self_attn.v_proj\n", + "2024-04-06 17:08:40.201 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:40.253 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.13.mlp.down_proj\n", + "2024-04-06 17:08:40.254 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:40.518 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.13.mlp.gate_proj\n", + "2024-04-06 17:08:40.519 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:40.811 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.13.mlp.up_proj\n", + "2024-04-06 17:08:40.812 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:41.129 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.14.self_attn.k_proj\n", + "2024-04-06 17:08:41.129 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:41.180 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.14.self_attn.o_proj\n", + "2024-04-06 17:08:41.181 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:41.301 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.14.self_attn.q_proj\n", + "2024-04-06 17:08:41.302 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:41.421 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.14.self_attn.v_proj\n", + "2024-04-06 17:08:41.422 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:41.473 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.14.mlp.down_proj\n", + "2024-04-06 17:08:41.474 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:41.735 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.14.mlp.gate_proj\n", + "2024-04-06 17:08:41.736 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:42.048 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.14.mlp.up_proj\n", + "2024-04-06 17:08:42.049 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:42.345 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.15.self_attn.k_proj\n", + "2024-04-06 17:08:42.346 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:42.397 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.15.self_attn.o_proj\n", + "2024-04-06 17:08:42.397 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:42.520 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.15.self_attn.q_proj\n", + "2024-04-06 17:08:42.521 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:42.639 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.15.self_attn.v_proj\n", + "2024-04-06 17:08:42.640 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:42.692 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.15.mlp.down_proj\n", + "2024-04-06 17:08:42.693 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:42.965 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.15.mlp.gate_proj\n", + "2024-04-06 17:08:42.966 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:43.261 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.15.mlp.up_proj\n", + "2024-04-06 17:08:43.261 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:43.559 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.16.self_attn.k_proj\n", + "2024-04-06 17:08:43.560 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:43.611 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.16.self_attn.o_proj\n", + "2024-04-06 17:08:43.611 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:43.733 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.16.self_attn.q_proj\n", + "2024-04-06 17:08:43.734 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:43.852 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.16.self_attn.v_proj\n", + "2024-04-06 17:08:43.853 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:43.904 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.16.mlp.down_proj\n", + "2024-04-06 17:08:43.905 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:44.328 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.16.mlp.gate_proj\n", + "2024-04-06 17:08:44.329 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:44.632 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.16.mlp.up_proj\n", + "2024-04-06 17:08:44.633 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:44.932 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.17.self_attn.k_proj\n", + "2024-04-06 17:08:44.933 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:44.985 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.17.self_attn.o_proj\n", + "2024-04-06 17:08:44.985 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:45.108 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.17.self_attn.q_proj\n", + "2024-04-06 17:08:45.109 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:45.230 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.17.self_attn.v_proj\n", + "2024-04-06 17:08:45.231 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:45.283 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.17.mlp.down_proj\n", + "2024-04-06 17:08:45.284 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:45.557 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.17.mlp.gate_proj\n", + "2024-04-06 17:08:45.558 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:45.871 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.17.mlp.up_proj\n", + "2024-04-06 17:08:45.872 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:46.178 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.18.self_attn.k_proj\n", + "2024-04-06 17:08:46.179 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:46.236 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.18.self_attn.o_proj\n", + "2024-04-06 17:08:46.237 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:46.357 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.18.self_attn.q_proj\n", + "2024-04-06 17:08:46.358 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:46.477 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.18.self_attn.v_proj\n", + "2024-04-06 17:08:46.478 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:46.530 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.18.mlp.down_proj\n", + "2024-04-06 17:08:46.531 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:46.795 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.18.mlp.gate_proj\n", + "2024-04-06 17:08:46.796 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:47.094 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.18.mlp.up_proj\n", + "2024-04-06 17:08:47.095 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:47.405 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.19.self_attn.k_proj\n", + "2024-04-06 17:08:47.406 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:47.458 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.19.self_attn.o_proj\n", + "2024-04-06 17:08:47.459 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:47.578 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.19.self_attn.q_proj\n", + "2024-04-06 17:08:47.579 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:47.695 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.19.self_attn.v_proj\n", + "2024-04-06 17:08:47.696 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:47.749 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.19.mlp.down_proj\n", + "2024-04-06 17:08:47.750 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:48.033 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.19.mlp.gate_proj\n", + "2024-04-06 17:08:48.034 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:48.355 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.19.mlp.up_proj\n", + "2024-04-06 17:08:48.356 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:48.680 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.20.self_attn.k_proj\n", + "2024-04-06 17:08:48.681 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:48.733 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.20.self_attn.o_proj\n", + "2024-04-06 17:08:48.734 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:48.854 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.20.self_attn.q_proj\n", + "2024-04-06 17:08:48.855 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:48.971 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.20.self_attn.v_proj\n", + "2024-04-06 17:08:48.972 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:49.025 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.20.mlp.down_proj\n", + "2024-04-06 17:08:49.026 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:49.340 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.20.mlp.gate_proj\n", + "2024-04-06 17:08:49.341 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:49.659 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.20.mlp.up_proj\n", + "2024-04-06 17:08:49.660 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:49.962 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.21.self_attn.k_proj\n", + "2024-04-06 17:08:49.962 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:50.015 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.21.self_attn.o_proj\n", + "2024-04-06 17:08:50.015 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:50.132 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.21.self_attn.q_proj\n", + "2024-04-06 17:08:50.133 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:50.256 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.21.self_attn.v_proj\n", + "2024-04-06 17:08:50.257 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:50.309 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.21.mlp.down_proj\n", + "2024-04-06 17:08:50.310 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([5632])\n", + "2024-04-06 17:08:50.599 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.21.mlp.gate_proj\n", + "2024-04-06 17:08:50.600 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:50.916 | INFO | deltazip.modeling._utils:pack_model:168 - model.layers.21.mlp.up_proj\n", + "2024-04-06 17:08:50.917 | INFO | deltazip.modeling._utils:pack_model:179 - g_idx: torch.Size([2048])\n", + "2024-04-06 17:08:51.225 | INFO | deltazip.modeling._utils:pack_model:182 - Model packed.\n", + "WARNING:accelerate.big_modeling:You shouldn't move a model when it is dispatched on multiple devices.\n", + "WARNING:accelerate.big_modeling:You shouldn't move a model when it is dispatched on multiple devices.\n" + ] + } + ], + "source": [ + "# from accelerate import dispatch_model\n", + "# from accelerate import infer_auto_device_map\n", + "\n", + "target_model_name = \"TinyLlama/TinyLlama-1.1B-Chat-v1.0\"\n", + "target_model_ref = AutoDeltaZipModelForCausalLM.from_pretrained(\n", + " target_model_name, \n", + " compress_config=compress_config,\n", + " torch_dtype=torch.float16,\n", + ")\n", + "missing_state_dict = target_model_ref.state_dict()\n", + "missing_state_dict = {k: v for k, v in missing_state_dict.items() if k not in tensors}\n", + "target_model.load_state_dict(missing_state_dict, strict = False, assign=True)\n", + "for name, param in target_model.named_parameters():\n", + " if param.is_meta:\n", + " print(name)\n", + "target_model.save_compressed(outdir)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/scripts/compresses/compress_70b.sh b/scripts/compresses/compress_70b.sh index dfc8062..3ce285f 100644 --- a/scripts/compresses/compress_70b.sh +++ b/scripts/compresses/compress_70b.sh @@ -1,3 +1,5 @@ python cli/compress.py --target-model xzyao/openllama-3b-chat --outdir .cache/compressed_models/3b-parameters/2bits-openllama-0 --dataset .cache/datasets/dialogs.jsonl --n-samples 256 --bits 2 --sparsity 0 --lossless gdeflate --base-model openlm-research/open_llama_3b_v2 --perc-damp 0.01 --block-size 128 --shuffle-dataset --fast-tokenizer --delta subtract -CUDA_VISIBLE_DEVICES=0 python cli/compress.py --target-model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --outdir .cache/compressed_models/TinyLlama.TinyLlama-1.1B-Chat-v1.0.8b0s128g --dataset .local/datasets/lmsys.jsonl --n-samples 256 --bits 8 --sparsity 0 --lossless gdeflate --base-model TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T --perc-damp 0.01 --block-size 128 --shuffle-dataset --fast-tokenizer --delta subtract \ No newline at end of file +CUDA_VISIBLE_DEVICES=0 python cli/compress.py --target-model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --outdir .cache/compressed_models/TinyLlama.TinyLlama-1.1B-Chat-v1.0.8b0s128g --dataset .local/datasets/lmsys.jsonl --n-samples 256 --bits 8 --sparsity 0 --lossless gdeflate --base-model TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T --perc-damp 0.01 --block-size 128 --shuffle-dataset --fast-tokenizer --delta subtract + +python cli/compress.py --target-model JackFram/llama-160m-cbt-2 --outdir .cache/compressed_models/JackFram.llama-160m-cbt-2.4b0s --dataset .local/datasets/lmsys.jsonl --n-samples 256 --bits 4 --sparsity 0 --lossless gdeflate --base-model JackFram/llama-160m --perc-damp 0.01 --block-size 128 --shuffle-dataset --fast-tokenizer --delta subtract \ No newline at end of file diff --git a/scripts/manual/compress_manual.sh b/scripts/manual/compress_manual.sh index 2ff4269..e69de29 100644 --- a/scripts/manual/compress_manual.sh +++ b/scripts/manual/compress_manual.sh @@ -1,3 +0,0 @@ -python cli/compress.py --target-model /mnt/scratch/xiayao/cache/experiments/fmzip/finetuned_raw/llama-3b/task065_timetravel_consistent_sentence_classification/global_step102 --outdir .cache/test --dataset /mnt/scratch/xiayao/cache/datasets/qi/ar/task065_timetravel_consistent_sentence_classification.train.jsonl --n-samples 512 --bits 4 --group-size 128 --sparsity 0 --lossless gdeflate --delta subtract --base-model openlm-research/open_llama_3b_v2 --block-size 128 --perc-damp 0.001 - -python cli/compress.py --target-model /mnt/scratch/xiayao/cache/experiments/fmzip/finetuned_raw/llama-3b/task380_boolq_yes_no_question/global_step270 --outdir .cache/test --dataset /mnt/scratch/xiayao/cache/datasets/qi/ar/task380_boolq_yes_no_question.train.jsonl --n-samples 512 --bits 4 --group-size 128 --sparsity 0 --lossless gdeflate --delta subtract --base-model openlm-research/open_llama_3b_v2 --block-size 128 From dcc47eb6dafd02a5591e4301cd279feb3ae4903c Mon Sep 17 00:00:00 2001 From: Xiaozhe Yao Date: Sat, 6 Apr 2024 20:08:37 +0200 Subject: [PATCH 09/14] . --- cli/compress.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cli/compress.py b/cli/compress.py index 43d168b..882ec85 100644 --- a/cli/compress.py +++ b/cli/compress.py @@ -27,9 +27,9 @@ def main(args): args.target_model, compress_config=compress_config, torch_dtype=torch.float16, - # max_memory = {0: "1GIB", 1: "1GIB", "cpu": "140GIB"} + max_memory = {0: "10GIB", 1: "10GIB", 2: "10GIB", 3: "10GIB", "cpu": "140GIB"} # simulate large model - max_memory = {0: "400MIB", 1: "400MIB", "cpu": "140GIB"} + # max_memory = {0: "400MIB", 1: "400MIB", "cpu": "140GIB"} ) target_model.requires_grad_(False) @@ -105,7 +105,6 @@ def main(args): # run a forward pass to make sure the model is working target_model.save_compressed(args.outdir) - if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--base-model", type=str, default="") From cdad7fe5132b9ac33d53c37b3d95081c242f1be5 Mon Sep 17 00:00:00 2001 From: Xiaozhe Yao Date: Mon, 8 Apr 2024 00:53:01 +0200 Subject: [PATCH 10/14] debugging offloaded params --- cli/chat.py | 6 +- cli/compress.py | 12 +- deltazip/modeling/_utils.py | 1 - .../packing_experiments.ipynb | 0 playground.ipynb | 18127 ++++++++++++++++ scripts/compresses/compress.sh | 4 +- 6 files changed, 18140 insertions(+), 10 deletions(-) rename packing_experiments.ipynb => notebooks/packing_experiments.ipynb (100%) create mode 100644 playground.ipynb diff --git a/cli/chat.py b/cli/chat.py index ff5526d..0ece6b6 100644 --- a/cli/chat.py +++ b/cli/chat.py @@ -27,13 +27,9 @@ def chat(base_model:str, model_path: str): base_model = base_model.half() print("[deltazip] Loading target model...") delta_model = AutoDeltaZipModelForCausalLM.from_compressed( - args.model_path, strict=True, device="cpu", unpack=True + model_path, strict=True, device="cpu", unpack=True ) delta_model = delta_model.half() - - compressed_modules = [] - for x in base_model.inside_layer_modules: - compressed_modules.extend(x) for name, param in base_model.model.named_parameters(): delta_model.model.state_dict()[name].copy_( param + delta_model.model.state_dict()[name] diff --git a/cli/compress.py b/cli/compress.py index fe6fc04..379671f 100644 --- a/cli/compress.py +++ b/cli/compress.py @@ -28,9 +28,9 @@ def main(args): compress_config=compress_config, torch_dtype=torch.float16, # max_memory = {0: "2GIB", 1: "48GIB", 2: "48GIB", 3:"48GIB"} - max_memory = {0: "10GIB", 1: "10GIB", 2: "10GIB", 3: "10GIB", "cpu": "140GIB"} + # max_memory = {0: "10GIB", 1: "10GIB", 2: "10GIB", 3: "10GIB", "cpu": "140GIB"} # simulate large model - # max_memory = {0: "400MIB", 1: "400MIB", "cpu": "140GIB"} + max_memory = {0: "2GIB", 1: "2GIB", "cpu": "140GIB"} ) target_model.requires_grad_(False) if args.base_model != "" and args.delta != "": @@ -85,10 +85,15 @@ def main(args): missing_state_dict = { k: v for k, v in missing_state_dict.items() if k not in tensors } - target_model.load_state_dict(missing_state_dict, strict = False, assign=True) + print(f"[info] loaded keys: {missing_state_dict.keys()}") + missing_key, unexpected_key = target_model.load_state_dict(missing_state_dict, strict = False, assign=True) + print(f"[info] missing keys: {missing_key}") + print(f"[info] unexpected keys: {unexpected_key}") for name, param in target_model.named_parameters(): if param.is_meta: print(f"[info] {name} is on meta") + del target_model_ref + if args.base_model != "" and args.delta != "": compressed_modules = [] for x in base_model.inside_layer_modules: @@ -97,6 +102,7 @@ def main(args): if "bias" in name or all( [modules not in name for modules in compressed_modules] ): + print(f"[info] taking delta for {name}") base_weight = base_model.state_dict()[name] if base_weight.device != param.device: base_weight = base_weight.to(param.device) diff --git a/deltazip/modeling/_utils.py b/deltazip/modeling/_utils.py index 34d50ec..91e7e0c 100644 --- a/deltazip/modeling/_utils.py +++ b/deltazip/modeling/_utils.py @@ -126,7 +126,6 @@ def deltazip_post_init( for _, submodule in model.named_modules(): if hasattr(submodule, "QUANT_TYPE"): device = submodule.qweight.device - print(f"model.device_tensors[device] {model.device_tensors[device]}") submodule.post_init(temp_dq=model.device_tensors[device]) torch.cuda.empty_cache() diff --git a/packing_experiments.ipynb b/notebooks/packing_experiments.ipynb similarity index 100% rename from packing_experiments.ipynb rename to notebooks/packing_experiments.ipynb diff --git a/playground.ipynb b/playground.ipynb new file mode 100644 index 0000000..5c041cf --- /dev/null +++ b/playground.ipynb @@ -0,0 +1,18127 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import safetensors as st\n", + "from deltazip import AutoDeltaZipModelForCausalLM, BaseCompressionConfig" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "tensors = {}\n", + "with st.safe_open(\".local/compressed_models/lmsys.vicuna-7b-v1.5.2b75s128g/deltazip-compressed.safetensors\", framework=\"torch\", device=\"cpu\") as f:\n", + " for key in f.keys():\n", + " tensors[key] = f.get_tensor(key)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-04-07 17:00:57.727 | INFO | deltazip.modeling._base:from_pretrained:699 - Using [None] to load model.\n", + "Downloading shards: 100%|██████████| 2/2 [01:19<00:00, 39.96s/it]\n", + "Loading checkpoint shards: 0%| | 0/2 [00:00 Date: Mon, 8 Apr 2024 02:15:58 +0200 Subject: [PATCH 11/14] fix bug in layernorm --- playground.ipynb | 18464 +-------------------------------------------- 1 file changed, 405 insertions(+), 18059 deletions(-) diff --git a/playground.ipynb b/playground.ipynb index 5c041cf..2471f17 100644 --- a/playground.ipynb +++ b/playground.ipynb @@ -2,18104 +2,450 @@ "cells": [ { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/xiayao/.local/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "import torch\n", - "import safetensors as st\n", - "from deltazip import AutoDeltaZipModelForCausalLM, BaseCompressionConfig" + "import transformers\n", + "from deltazip import AutoDeltaZipModelForCausalLM, BaseCompressionConfig\n", + "compress_config = BaseCompressionConfig(\n", + " bits=4,\n", + " group_size=128,\n", + " sparsity=1,\n", + " prunen=0,\n", + " prunem=0,\n", + " lossless=\"gdeflate\",\n", + " damp_percent=0.02,\n", + ")" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-04-08 02:03:42.434 | INFO | deltazip.modeling._base:from_pretrained:699 - Using [None] to load model.\n", + "Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00, 1.65it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[deltazip] Loading target model...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], "source": [ - "tensors = {}\n", - "with st.safe_open(\".local/compressed_models/lmsys.vicuna-7b-v1.5.2b75s128g/deltazip-compressed.safetensors\", framework=\"torch\", device=\"cpu\") as f:\n", - " for key in f.keys():\n", - " tensors[key] = f.get_tensor(key)" + "base_model = \"/mnt/scratch/xiayao/cache/HF/hub/models--meta-llama--Llama-2-7b-hf/snapshots/6fdf2e60f86ff2481f2241aaee459f85b5b0bbb9\"\n", + "model_path = \".local/compressed_models_test/lmsys.vicuna-7b-v1.5.4b50s128g\"\n", + "tokenizer = transformers.AutoTokenizer.from_pretrained(base_model)\n", + "base_model = AutoDeltaZipModelForCausalLM.from_pretrained(\n", + " base_model, compress_config=compress_config\n", + ")\n", + "base_model = base_model.half()\n", + "print(\"[deltazip] Loading target model...\")" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "2024-04-07 17:00:57.727 | INFO | deltazip.modeling._base:from_pretrained:699 - Using [None] to load model.\n", - "Downloading shards: 100%|██████████| 2/2 [01:19<00:00, 39.96s/it]\n", - "Loading checkpoint shards: 0%| | 0/2 [00:00 Date: Thu, 11 Apr 2024 21:39:51 +0200 Subject: [PATCH 12/14] minor --- cli/chat.py | 9 +++++---- cli/compress.py | 4 +++- playground.ipynb | 11 +++++++++++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/cli/chat.py b/cli/chat.py index 0ece6b6..9579c99 100644 --- a/cli/chat.py +++ b/cli/chat.py @@ -16,7 +16,7 @@ def to_chatml(prompt): return f": {prompt}<|endoftext|>:" def to_lmsys(prompt): - return f"User: {prompt} Assistant:" + return f"USER: {prompt}\nASSISTANT:" def chat(base_model:str, model_path: str): print("[deltazip] Loading base model...") @@ -31,9 +31,10 @@ def chat(base_model:str, model_path: str): ) delta_model = delta_model.half() for name, param in base_model.model.named_parameters(): - delta_model.model.state_dict()[name].copy_( - param + delta_model.model.state_dict()[name] - ) + if "layernorm" not in name: + delta_model.model.state_dict()[name].copy_( + param + delta_model.model.state_dict()[name] + ) delta_model = delta_model.to(torch.device("cuda")) print("[deltazip] models loaded") pipe = transformers.TextGenerationPipeline( diff --git a/cli/compress.py b/cli/compress.py index 379671f..b07bd82 100644 --- a/cli/compress.py +++ b/cli/compress.py @@ -74,7 +74,9 @@ def main(args): for name, param in target_model.named_parameters(): if not param.is_meta: tensors[name] = param.data.cpu().clone().detach() - st.torch.save_file(tensors, os.path.join(args.outdir, "temp.safetensors")) + st.torch.save_file( + tensors, os.path.join(args.outdir, "temp.safetensors") + ) target_model_ref = AutoDeltaZipModelForCausalLM.from_pretrained( args.target_model, diff --git a/playground.ipynb b/playground.ipynb index 2471f17..39f81a2 100644 --- a/playground.ipynb +++ b/playground.ipynb @@ -431,6 +431,17 @@ "USER: Who is Alan Turing?\n", "ASSISTANT: ​Alan Turing is a British mathematician, computer scientist, and cryptanalyst who is widely regarded as the father of computer science and artificial intelligence. He made significant contributions to the development of computer science, including the design of the first computer, the Turing machine, and the development of the first computer program. He also made important contributions to the field of cryptography, including the breaking of the En\n" ] + }, + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", + "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", + "\u001b[1;31mClick here for more info. \n", + "\u001b[1;31mView Jupyter log for further details." + ] } ], "source": [ From f5d5e09ee3f9b471d18059fff0457a967651f2b3 Mon Sep 17 00:00:00 2001 From: Xiaozhe Yao Date: Fri, 24 May 2024 14:05:13 +0200 Subject: [PATCH 13/14] update --- .../utils/compression/compress_llamas.sh | 2 +- cli/chat.py | 11 +- cli/compress.py | 71 +++-- cli/merge.py | 25 +- deltazip/core/sparsegpt.py | 5 + deltazip/modeling/_base.py | 261 +++++++++++------- scripts/compresses/compress_13b.sh | 4 +- scripts/compresses/compress_baseline.sh | 11 + scripts/utils/merge.sh | 9 +- 9 files changed, 275 insertions(+), 124 deletions(-) create mode 100644 scripts/compresses/compress_baseline.sh diff --git a/artifact/benchmarks/utils/compression/compress_llamas.sh b/artifact/benchmarks/utils/compression/compress_llamas.sh index 52274fc..2b418cc 100644 --- a/artifact/benchmarks/utils/compression/compress_llamas.sh +++ b/artifact/benchmarks/utils/compression/compress_llamas.sh @@ -1,6 +1,6 @@ python cli/compress.py --target-model FlagAlpha/Llama2-Chinese-7b-Chat --outdir .cache/compressed_models/7b-parameters/llama2-chinese-7b-chat-2bits --dataset .cache/datasets/meta.jsonl --n-samples 256 --bits 2 --sparsity 0.5 --lossless gdeflate --delta subtract --base-model meta-llama/Llama-2-7b-hf --shuffle-dataset --fast-tokenizer --perc-damp 0.01 --block-size 128 -python cli/compress.py --target-model lmsys/vicuna-7b-v1.5 --outdir .cache/compressed_models/7b-parameters/vicuna-7b-v1.5-2bits --dataset .cache/datasets/lmsys.jsonl --n-samples 256 --bits 2 --sparsity 0.5 --lossless gdeflate --delta subtract --base-model meta-llama/Llama-2-7b-hf --shuffle-dataset --fast-tokenizer --perc-damp 0.01 --block-size 128 +python cli/compress.py --target-model lmsys/vicuna-7b-v1.5 --outdir .cache/compressed_models/vicuna-7b.2b50s --dataset .local/datasets/lmsys.jsonl --n-samples 256 --bits 2 --sparsity 0.5 --lossless gdeflate --delta subtract --base-model meta-llama/Llama-2-7b-hf --shuffle-dataset --fast-tokenizer --perc-damp 0.01 --block-size 128 python cli/compress.py --target-model meta-llama/Llama-2-7b-chat-hf --outdir .cache/compressed_models/7b-parameters/llama-2-7b-chat --dataset .cache/datasets/meta.jsonl --n-samples 256 --bits 2 --sparsity 0.5 --lossless gdeflate --delta subtract --base-model meta-llama/Llama-2-7b-hf --shuffle-dataset --fast-tokenizer --perc-damp 0.01 --block-size 128 diff --git a/cli/chat.py b/cli/chat.py index 9579c99..57564dc 100644 --- a/cli/chat.py +++ b/cli/chat.py @@ -2,6 +2,12 @@ import transformers from deltazip import AutoDeltaZipModelForCausalLM, BaseCompressionConfig +ignore_keywords = [ + 'norm', + 'embed', + 'lm_head' +] + compress_config = BaseCompressionConfig( bits=4, group_size=128, @@ -31,7 +37,10 @@ def chat(base_model:str, model_path: str): ) delta_model = delta_model.half() for name, param in base_model.model.named_parameters(): - if "layernorm" not in name: + if any([kw in name for kw in ignore_keywords]): + #delta_model.model.state_dict()[name].copy_(param) + pass + else: delta_model.model.state_dict()[name].copy_( param + delta_model.model.state_dict()[name] ) diff --git a/cli/compress.py b/cli/compress.py index b07bd82..4b2bf34 100644 --- a/cli/compress.py +++ b/cli/compress.py @@ -5,7 +5,16 @@ import safetensors as st from transformers import AutoTokenizer from deltazip import AutoDeltaZipModelForCausalLM, BaseCompressionConfig +import os +import math +max_threads = str(min(8, os.cpu_count())) +os.environ['OMP_NUM_THREADS'] = max_threads +os.environ['OPENBLAS_NUM_THREADS'] = max_threads +os.environ['MKL_NUM_THREADS'] = max_threads +os.environ['VECLIB_MAXIMUM_THREADS'] = max_threads +os.environ['NUMEXPR_NUM_THREADS'] = max_threads +os.environ['NUMEXPR_MAX_THREADS'] = max_threads def main(args): print(args) @@ -15,23 +24,38 @@ def main(args): compress_config = BaseCompressionConfig( bits=args.bits, sparsity=args.sparsity, - prunen=args.prunen, block_size=args.block_size, + prunen=args.prunen, prunem=args.prunem, lossless=args.lossless, damp_percent=args.perc_damp, - sym=False, + sym=args.sym, ) print("[info] compress config:", compress_config) target_model = AutoDeltaZipModelForCausalLM.from_pretrained( args.target_model, compress_config=compress_config, torch_dtype=torch.float16, - # max_memory = {0: "2GIB", 1: "48GIB", 2: "48GIB", 3:"48GIB"} - # max_memory = {0: "10GIB", 1: "10GIB", 2: "10GIB", 3: "10GIB", "cpu": "140GIB"} - # simulate large model - max_memory = {0: "2GIB", 1: "2GIB", "cpu": "140GIB"} + # max_memory = { + # 0: "60GIB", + # 1: "60GIB", + # 2: "60GIB", + # 3: "60GIB", + # 4: "60GIB", + # 5: "60GIB", + # 6: "60GIB", + # 7: "60GIB", + # "cpu": "140GIB" + # } ) + ignore_keywords = [ + 'norm', + 'embed', + 'lm_head' + ] + not_save_keywords = [ + 'norm', + ] target_model.requires_grad_(False) if args.base_model != "" and args.delta != "": print("[info] base model is defined, delta mode enabled") @@ -77,7 +101,6 @@ def main(args): st.torch.save_file( tensors, os.path.join(args.outdir, "temp.safetensors") ) - target_model_ref = AutoDeltaZipModelForCausalLM.from_pretrained( args.target_model, compress_config=compress_config, @@ -101,17 +124,30 @@ def main(args): for x in base_model.inside_layer_modules: compressed_modules.extend(x) for name, param in target_model.named_parameters(): - if "bias" in name or all( - [modules not in name for modules in compressed_modules] - ): - print(f"[info] taking delta for {name}") - base_weight = base_model.state_dict()[name] - if base_weight.device != param.device: - base_weight = base_weight.to(param.device) - target_model.state_dict()[name] = param - base_weight - del base_model + # if all([module not in name for module in compressed_modules]): + # print(f"[info] {name} is compressed, saving in full...") + + # target_model.state_dict()[name] = param + # else: + # print(f"[info] {name} is not compressed, saving in full...") + # target_model.state_dict()[name] = param + if any([keyword in name for keyword in not_save_keywords]): + print(f"[info] {name} is not saved") + del target_model.state_dict()[name] + # if "bias" in name or all( + # [modules not in name for modules in compressed_modules] + # ): + # base_weight = base_model.state_dict()[name] + # if base_weight.device != param.device: + # base_weight = base_weight.to(param.device) + # target_model.state_dict()[name] = param - base_weight + + if args.base_model != "" and args.delta != "": + del base_model # run a forward pass to make sure the model is working target_model.save_compressed(args.outdir) + with open(os.path.join(args.outdir, "compressed_modules.json"), "w") as fp: + json.dump(compressed_modules, fp) if __name__ == "__main__": parser = argparse.ArgumentParser() @@ -137,7 +173,8 @@ def main(args): parser.add_argument( "--lossless", type=str, default="gdeflate", choices=["gdeflate"] ) - parser.add_argument("--delta", type=str, choices=["subtract", "xor"], default="subtract") + parser.add_argument("--delta", type=str, choices=["subtract", "xor"], default="") + parser.add_argument("--sym", action="store_true") parser.add_argument("--large-model", action="store_true") parser.add_argument("--perc-damp", type=float, default=0.01) parser.add_argument("--outdir", type=str, default=".cache/compressed_models") diff --git a/cli/merge.py b/cli/merge.py index 36863ce..a6cb133 100644 --- a/cli/merge.py +++ b/cli/merge.py @@ -4,6 +4,7 @@ from loguru import logger from deltazip import AutoDeltaZipModelForCausalLM, BaseCompressionConfig from transformers import AutoTokenizer + compress_config = BaseCompressionConfig( bits=4, group_size=128, @@ -13,7 +14,11 @@ lossless="gdeflate", damp_percent=0.02, ) - +ignore_keywords = [ + 'norm', + 'embed', + 'lm_head' +] def merge(args): print(args) with torch.inference_mode(): @@ -29,14 +34,23 @@ def merge(args): compressed_modules = [] for x in base_model.inside_layer_modules: compressed_modules.extend(x) + for name, param in base_model.model.named_parameters(): - delta_model.model.state_dict()[name].copy_( - param + delta_model.model.state_dict()[name] - ) - # save model to output directory + if args.delta == "subtract": + if any([kw in name for kw in ignore_keywords]): + #delta_model.model.state_dict()[name].copy_(param) + pass + else: + delta_model.model.state_dict()[name].copy_( + param + delta_model.model.state_dict()[name] + ) + else: + logger.warning("Skipping due to unknown delta mode") + for name, param in delta_model.model.state_dict().items(): param = param.contiguous() delta_model.model.save_pretrained(args.output_dir, safe_serialization=False, max_shard_size="10GB") + os.makedirs(args.output_dir, exist_ok=True) tokenizer = AutoTokenizer.from_pretrained(args.base_model, use_fast=True) tokenizer.save_pretrained(args.output_dir) @@ -46,5 +60,6 @@ def merge(args): parser.add_argument("--base-model", type=str, default="gpt2") parser.add_argument("--target-model", type=str, default="gpt2") parser.add_argument("--output-dir", type=str, default="output") + parser.add_argument("--delta", type=str, default="") args = parser.parse_args() merge(args) diff --git a/deltazip/core/sparsegpt.py b/deltazip/core/sparsegpt.py index b219e7a..a346378 100644 --- a/deltazip/core/sparsegpt.py +++ b/deltazip/core/sparsegpt.py @@ -120,6 +120,11 @@ def fasterprune( for i in range(count): w = W1[:, i] d = Hinv1[i, i] + + if prunen != 0 and i % prunem == 0: + tmp = W1[:, i:(i + prunem)] ** 2 / (torch.diag(Hinv1)[i:(i + prunem)].reshape((1, -1))) ** 2 + mask1.scatter_(1, i + torch.topk(tmp, prunen, dim=1, largest=False)[1], True) + q = w.clone() q[mask1[:, i]] = 0 if hasattr(self, "quantizer"): diff --git a/deltazip/modeling/_base.py b/deltazip/modeling/_base.py index 9560232..5bb4829 100644 --- a/deltazip/modeling/_base.py +++ b/deltazip/modeling/_base.py @@ -49,7 +49,7 @@ class AutoCompressionConfig(PushToHubMixin): prunem: int = field(default=0) block_size: int = field(default=128) damp_percent: float = field(default=0.01) - desc_act: bool = field(default=True) + desc_act: bool = field(default=False) sym: bool = field(default=True) true_sequential: bool = field(default=True) lossless: str = field(default="none") @@ -63,7 +63,8 @@ def __post_init__(self): raise ValueError(f"bit must be one of [2,3,4,8]. Got {bit}") for sparsity in self.sparsity: if not (0 <= sparsity <= 1): - raise ValueError(f"sparsity must between 0 and 1. Got {sparsity}") + raise ValueError( + f"sparsity must between 0 and 1. Got {sparsity}") if not (0 < self.damp_percent < 1): raise ValueError("damp_percent must between 0 and 1.") @@ -105,10 +106,11 @@ class BaseCompressionConfig(PushToHubMixin): prunen: int = field(default=0) prunem: int = field(default=0) group_size: int = field(default=-1) - group_rows: int = field(default=-1) # deprecated, for backward compatibility + # deprecated, for backward compatibility + group_rows: int = field(default=-1) block_size: int = field(default=128) damp_percent: float = field(default=0.01) - desc_act: bool = field(default=True) + desc_act: bool = field(default=False) sym: bool = field(default=True) true_sequential: bool = field(default=True) lossless: str = field(default="none") @@ -123,7 +125,8 @@ def __post_init__(self): f"only support quantize to {fields_info[0].metadata['choices']} bits." ) if self.group_size != -1 and self.group_size <= 0: - raise ValueError("unless equal to -1, group_size must greater then 0.") + raise ValueError( + "unless equal to -1, group_size must greater then 0.") if not (0 < self.damp_percent < 1): raise ValueError("damp_percent must between 0 and 1.") @@ -228,7 +231,7 @@ def _convert_tensor_to_list(tensor): if not pad_token_id: pad_token_id = self.config.eos_token_id new_examples = [ - collate_data(new_examples[start : start + batch_size], pad_token_id) + collate_data(new_examples[start: start + batch_size], pad_token_id) for start in range(0, len(new_examples), batch_size) ] for new_example in new_examples: @@ -293,9 +296,11 @@ def forward(self, inp=None, **kwargs): inp = kwargs[kwarg_name] break layer_inputs.append(move_to_device(inp, self.data_device)) - attention_masks.append(kwargs["attention_mask"].to(self.data_device)) + attention_masks.append( + kwargs["attention_mask"].to(self.data_device)) if (pos_ids := kwargs.get("position_ids", None)) is not None: - position_ids.append(move_to_device(pos_ids, self.data_device)) + position_ids.append(move_to_device( + pos_ids, self.data_device)) one_kwargs = dict() for ( k, @@ -346,11 +351,13 @@ def forward(self, inp=None, **kwargs): pass layers[0] = layers[0].module - move_to_device(layers[0], CPU if force_layer_back_to_cpu else cur_layer_device) + move_to_device( + layers[0], CPU if force_layer_back_to_cpu else cur_layer_device) for module_name in self.outside_layer_modules: module = get_module_by_name(self.model, module_name) if module is not None: - move_to_device(module, ori_outside_layer_module_devices[module_name]) + move_to_device( + module, ori_outside_layer_module_devices[module_name]) torch.cuda.empty_cache() @@ -395,14 +402,17 @@ def tmp(_, inp, out): handles = [] for name in subset: - handles.append(subset[name].register_forward_hook(add_batch(name))) + handles.append( + subset[name].register_forward_hook(add_batch(name))) for j in range(num_batches): - layer_input = move_to_device(layer_inputs[j], cur_layer_device) + layer_input = move_to_device( + layer_inputs[j], cur_layer_device) layer_attention_mask = move_to_device( attention_masks[j], cur_layer_device ) - additional_layer_inputs = {"attention_mask": layer_attention_mask} + additional_layer_inputs = { + "attention_mask": layer_attention_mask} if ( layer_position_ids := None if not position_ids @@ -429,7 +439,8 @@ def tmp(_, inp, out): base_weight = base_model.model.state_dict()[ f"{self.layers_block_name}.{i}.{name}.weight" ] - base_weight = move_to_device(base_weight, cur_layer_device) + base_weight = move_to_device( + base_weight, cur_layer_device) scale, zero, g_idx, avg_loss, compressed_w = sparsegpt[ name ].fasterprune( @@ -476,7 +487,8 @@ def tmp(_, inp, out): layer_attention_mask = move_to_device( attention_masks[j], cur_layer_device ) - additional_layer_inputs = {"attention_mask": layer_attention_mask} + additional_layer_inputs = { + "attention_mask": layer_attention_mask} if ( layer_position_ids := None if not position_ids @@ -485,7 +497,8 @@ def tmp(_, inp, out): additional_layer_inputs["position_ids"] = layer_position_ids for k, v in layer_input_kwargs[j].items(): if isinstance(v, torch.Tensor): - additional_layer_inputs[k] = move_to_device(v, cur_layer_device) + additional_layer_inputs[k] = move_to_device( + v, cur_layer_device) else: additional_layer_inputs[k] = v layer_output = move_to_device( @@ -536,12 +549,13 @@ def tmp(_, inp, out): f"{self.layers_block_name}.{i}.{name}" ] if subset[name].weight.is_meta: - subset[name].weight = torch.nn.Parameter(key_weight.clone().detach(), requires_grad=False).to(CPU) + subset[name].weight = torch.nn.Parameter( + key_weight.clone().detach(), requires_grad=False).to(CPU) else: subset[name].weight.copy_(compressed_ws[ f"{self.layers_block_name}.{i}.{name}" ]) - + for name, param in self.model.named_parameters(): print(f"{name}: {param.device}") self.model.config.use_cache = forward_pass_use_cache @@ -592,7 +606,8 @@ def save_compressed(self, save_dir: str): state_dict = self.model.state_dict() state_dict = {k: v.clone().contiguous() for k, v in state_dict.items()} if self.compress_config.lossless != "none": - lossless_compressor = LosslessCompressor(self.compress_config.lossless) + lossless_compressor = LosslessCompressor( + self.compress_config.lossless) ( state_dict, tensor_shapes, @@ -672,7 +687,8 @@ def skip(*args, **kwargs): if "disk" in max_memory: raise NotImplementedError("disk offload not support yet.") with accelerate.init_empty_weights(): - model = AutoModelForCausalLM.from_config(config, trust_remote_code=True) + model = AutoModelForCausalLM.from_config( + config, trust_remote_code=True) model.tie_weights() max_memory = accelerate.utils.get_balanced_memory( @@ -695,8 +711,9 @@ def skip(*args, **kwargs): model_init_kwargs["device_map"] = None else: model_init_kwargs["device_map"] = device_map - - logger.info(f"Using [{model_init_kwargs['device_map']}] to load model.") + + logger.info( + f"Using [{model_init_kwargs['device_map']}] to load model.") # model_init_kwargs["low_cpu_mem_usage"] = True torch.cuda.empty_cache() @@ -748,9 +765,11 @@ def from_compressed( if compress_config is None: # check if "auto_compression_config.json" exists if isfile(join(save_dir, "auto_compress_config.json")): - compress_config = AutoCompressionConfig.from_pretrained(save_dir) + compress_config = AutoCompressionConfig.from_pretrained( + save_dir) else: - compress_config = BaseCompressionConfig.from_pretrained(save_dir) + compress_config = BaseCompressionConfig.from_pretrained( + save_dir) logger.info(f"compress config: {compress_config}") if model_basename is None: @@ -778,76 +797,114 @@ def skip(*args, **kwargs): init_contexts = [no_init_weights()] # if low_cpu_mem_usage: # init_contexts.append(accelerate.init_empty_weights(include_buffers=False)) - - if isinstance( - compress_config, AutoCompressionConfig - ) or compress_config.bits in [2, 3, 4, 8]: - with ContextManagers(init_contexts): + if compress_config.lossless != "none": + if isinstance( + compress_config, AutoCompressionConfig + ) or compress_config.bits in [2, 3, 4, 8]: + with ContextManagers(init_contexts): + model = AutoModelForCausalLM.from_config( + config, + trust_remote_code=trust_remote_code, + torch_dtype=torch.float16, + ) + layers = find_layers(model) + ignore_layers = [cls.lm_head_name] + \ + cls.outside_layer_modules + for name in list(layers.keys()): + if any( + [ + name.startswith(ignore_layer) + for ignore_layer in ignore_layers + ] + ): + logger.info( + f"{name} not been quantized, will be ignored when make_quant." + ) + del layers[name] + make_quant( + model, + layers, + bits=compress_config.bits + if isinstance(compress_config, BaseCompressionConfig) + else compress_config.final_bit, + use_triton=use_triton, + use_cuda_fp16=use_cuda_fp16, + desc_act=compress_config.desc_act, + use_exllama=use_exllama, + ) + model.tie_weights() + if device is None and not device_map and not max_memory: + device_map = "auto" + if device is not None: + device = torch.device(device) + else: model = AutoModelForCausalLM.from_config( config, trust_remote_code=trust_remote_code, torch_dtype=torch.float16, ) - layers = find_layers(model) - ignore_layers = [cls.lm_head_name] + cls.outside_layer_modules - for name in list(layers.keys()): - if any( - [ - name.startswith(ignore_layer) - for ignore_layer in ignore_layers - ] - ): - logger.info( - f"{name} not been quantized, will be ignored when make_quant." - ) - del layers[name] - make_quant( - model, - layers, - bits=compress_config.bits - if isinstance(compress_config, BaseCompressionConfig) - else compress_config.final_bit, - use_triton=use_triton, - use_cuda_fp16=use_cuda_fp16, - desc_act=compress_config.desc_act, - use_exllama=use_exllama, - ) - model.tie_weights() - if device is None and not device_map and not max_memory: - device_map = "auto" - if device is not None: - device = torch.device(device) + # now load compressed data + losslesscompressor = LosslessCompressor( + compress_config.lossless, device_id=0) + metadata = None + tensors = {} + + with safe_open(model_save_name, framework="numpy") as f: + metadata = f.metadata() + keys = f.keys() + for key in keys: + tensors[key] = f.get_tensor(key) + tensor_dtypes = json.loads(metadata["dtype"]) + tensor_shapes = json.loads(metadata["shape"]) + # (todo: xiaozhe), (todo: minor) + # seems like we cannot use arbitrary device to decompress + # for now use device=0 to decompress and then move to target device + with cp.cuda.Device(0): + for key in tensors.keys(): + tensors[key] = cp.array(tensors[key], copy=False) + tensors = losslesscompressor.decompress_state_dict( + tensors, + tensor_shapes, + tensor_dtypes, + use_bfloat16=use_bfloat16, + target_device=device, + ) else: model = AutoModelForCausalLM.from_config( config, trust_remote_code=trust_remote_code, torch_dtype=torch.float16, ) - # now load compressed data - losslesscompressor = LosslessCompressor(compress_config.lossless, device_id=0) - metadata = None - tensors = {} - - with safe_open(model_save_name, framework="numpy") as f: - metadata = f.metadata() - keys = f.keys() - for key in keys: - tensors[key] = f.get_tensor(key) - tensor_dtypes = json.loads(metadata["dtype"]) - tensor_shapes = json.loads(metadata["shape"]) - # (todo: xiaozhe), (todo: minor) - # seems like we cannot use arbitrary device to decompress - # for now use device=0 to decompress and then move to target device - with cp.cuda.Device(0): - for key in tensors.keys(): - tensors[key] = cp.array(tensors[key], copy=False) - tensors = losslesscompressor.decompress_state_dict( - tensors, - tensor_shapes, - tensor_dtypes, - use_bfloat16=use_bfloat16, - target_device=device, - ) + layers = find_layers(model) + ignore_layers = [cls.lm_head_name] + cls.outside_layer_modules + for name in list(layers.keys()): + if any( + [ + name.startswith(ignore_layer) + for ignore_layer in ignore_layers + ] + ): + logger.info( + f"{name} not been quantized, will be ignored when make_quant." + ) + del layers[name] + make_quant( + model, + layers, + bits=compress_config.bits + if isinstance(compress_config, BaseCompressionConfig) + else compress_config.final_bit, + use_triton=use_triton, + use_cuda_fp16=use_cuda_fp16, + desc_act=compress_config.desc_act, + use_exllama=use_exllama, + ) + tensors = {} + with safe_open(model_save_name, framework="pt") as f: + metadata = f.metadata() + keys = f.keys() + for key in keys: + tensors[key] = f.get_tensor(key) # move tensors to target device # print model keys missing_keys, unexpected_keys = model.load_state_dict( @@ -858,20 +915,29 @@ def skip(*args, **kwargs): if unexpected_keys: logger.debug(f"unexpected keys: {unexpected_keys}") model = model.to(device) - model = deltazip_post_init(model, use_act_order=compress_config.desc_act) + model = deltazip_post_init( + model, use_act_order=compress_config.desc_act) model.eval() - if isinstance( - compress_config, AutoCompressionConfig - ) or compress_config.bits in [2, 3, 4, 8]: - del tensor_dtypes - del tensor_shapes - del tensors - del layers - if unpack and ( - isinstance(compress_config, AutoCompressionConfig) - or compress_config.bits in [2, 3, 4, 8] - ): - unpack_model(model) + if compress_config.lossless != "none": + if isinstance( + compress_config, AutoCompressionConfig + ) or compress_config.bits in [2, 3, 4, 8]: + del tensor_dtypes + del tensor_shapes + del tensors + del layers + if unpack and ( + isinstance(compress_config, AutoCompressionConfig) + or compress_config.bits in [2, 3, 4, 8] + ): + unpack_model(model) + del losslesscompressor + else: + if unpack and ( + isinstance(compress_config, AutoCompressionConfig) + or compress_config.bits in [2, 3, 4, 8] + ): + unpack_model(model) # print keys in the model # set seqlen model_config = model.config.to_dict() @@ -886,12 +952,11 @@ def skip(*args, **kwargs): "can't get model's sequence length from model config, will set to 2048." ) model.seqlen = 2048 + global triton_has_warmup if not triton_has_warmup and use_triton: QuantLinear.warmup(model, seqlen=model.seqlen) triton_has_warmup = True - - del losslesscompressor torch.cuda.empty_cache() return cls(model, True, compress_config) diff --git a/scripts/compresses/compress_13b.sh b/scripts/compresses/compress_13b.sh index ffd4c09..50235ed 100644 --- a/scripts/compresses/compress_13b.sh +++ b/scripts/compresses/compress_13b.sh @@ -1 +1,3 @@ -python cli/compress.py --base-model meta-llama/Llama-2-70b-hf --target-model meta-llama/Llama-2-70b-chat-hf --outdir .local/compressed_models/meta-llama.Llama-2-70b-chat-hf.2b50s128g --dataset .local/datasets/meta.jsonl --n-samples 256 --bits 2 --sparsity 0.5 --lossless gdeflate --delta subtract --shuffle-dataset --fast-tokenizer --perc-damp 0.01 --block-size 128 \ No newline at end of file +python cli/compress.py --base-model meta-llama/Llama-2-70b-hf --target-model meta-llama/Llama-2-70b-chat-hf --outdir .local/compressed_models/meta-llama.Llama-2-70b-chat-hf.2b50s128g --dataset .local/datasets/meta.jsonl --n-samples 256 --bits 2 --sparsity 0.5 --lossless gdeflate --delta subtract --shuffle-dataset --fast-tokenizer --perc-damp 0.01 --block-size 128 + +python cli/compress.py --base-model meta-llama/Llama-2-70b-hf --target-model meta-llama/Llama-2-70b-chat-hf --outdir .local/compressed_models/meta-llama.Llama-2-70b-chat-hf.2b50s128g --dataset .local/datasets/meta.jsonl --n-samples 256 --bits 2 --sparsity 0.5 --lossless gdeflate --delta subtract --shuffle-dataset --fast-tokenizer --perc-damp 0.01 --block-size 128 --large-model \ No newline at end of file diff --git a/scripts/compresses/compress_baseline.sh b/scripts/compresses/compress_baseline.sh new file mode 100644 index 0000000..3e528fa --- /dev/null +++ b/scripts/compresses/compress_baseline.sh @@ -0,0 +1,11 @@ +export TS_VISIBLE_DEVICES=2,3 +ts -S 2 +ts -G 1 python cli/compress.py --base-model meta-llama/Llama-2-7b-hf --target-model lmsys/vicuna-7b-v1.5 --outdir .local/compressed_models/sparsegpt.lmsys.vicuna-7b-v1.5.4b75s128g --dataset .local/datasets/lmsys.jsonl --n-samples 256 --bits 4 --sparsity 0.75 --lossless gdeflate --shuffle-dataset --fast-tokenizer --perc-damp 0.01 --block-size 128 + +python cli/compress.py --base-model meta-llama/Llama-2-13b-hf --target-model lmsys/vicuna-13b-v1.5 --outdir .local/compressed_models/sparsegpt.lmsys.vicuna-13b-v1.5.4b75s128g --dataset .local/datasets/lmsys.jsonl --n-samples 256 --bits 4 --sparsity 0.75 --lossless gdeflate --shuffle-dataset --fast-tokenizer --perc-damp 0.01 --block-size 128 + +python cli/compress.py --base-model meta-llama/Llama-2-7b-hf --target-model lmsys/vicuna-7b-v1.5 --outdir .local/compressed_models/lmsys.vicuna-7b-v1.5.4b75s128g.sym --dataset .local/datasets/lmsys.jsonl --n-samples 256 --bits 2 --sparsity 0.75 --lossless gdeflate --shuffle-dataset --fast-tokenizer --perc-damp 0.01 --block-size 128 --delta subtract --sym + +python cli/compress.py --base-model meta-llama/Llama-2-7b-hf --target-model lmsys/vicuna-7b-v1.5 --outdir .local/compressed_models/lmsys.vicuna-7b-v1.5.4b75s128gn2m4.sym. --dataset .local/datasets/lmsys.jsonl --n-samples 256 --bits 4 --sparsity 0.75 --lossless gdeflate --shuffle-dataset --fast-tokenizer --perc-damp 0.01 --block-size 128 --delta subtract --sym --prunen 2 --prunem 4 + +python cli/compress.py --base-model meta-llama/Llama-2-13b-hf --target-model lmsys/vicuna-13b-v1.5 --outdir .local/compressed_models/lmsys.vicuna-13b-v1.5.4bn2m4.sym. --dataset .local/datasets/lmsys.jsonl --n-samples 256 --bits 4 --sparsity 0.75 --lossless gdeflate --shuffle-dataset --fast-tokenizer --perc-damp 0.01 --block-size 128 --delta subtract --sym --prunen 2 --prunem 4 \ No newline at end of file diff --git a/scripts/utils/merge.sh b/scripts/utils/merge.sh index fdf4205..54d31ce 100644 --- a/scripts/utils/merge.sh +++ b/scripts/utils/merge.sh @@ -18,4 +18,11 @@ python cli/merge.py --base-model meta-llama/Llama-2-7b-hf --target-model .local/ python cli/merge.py --base-model meta-llama/Llama-2-7b-hf --target-model .local/compressed_models/meta-llama.Llama-2-7b-chat-hf.4b75s128g --output-dir .local/merged_models/meta-llama.Llama-2-7b-chat-hf.4b75s128g -python cli/merge.py --base-model meta-llama/Llama-2-7b-hf --target-model .local/compressed_models/meta-llama.Llama-2-7b-chat-hf.4b90s128g --output-dir .local/merged_models/meta-llama.Llama-2-7b-chat-hf.4b90s128g \ No newline at end of file +python cli/merge.py --base-model meta-llama/Llama-2-7b-hf --target-model .local/compressed_models/meta-llama.Llama-2-7b-chat-hf.4b90s128g --output-dir .local/merged_models/meta-llama.Llama-2-7b-chat-hf.4b90s128g + + +### baselines + +python cli/merge.py --base-model meta-llama/Llama-2-7b-hf --target-model .local/compressed_models/sparsegpt.lmsys.vicuna-7b-v1.5.4b75s128g --output-dir .local/merged_models/sparsegpt.lmsys.vicuna-7b-v1.5.4b75s128g + +python cli/merge.py --base-model meta-llama/Llama-2-13b-hf --target-model .local/compressed_models/sparsegpt.lmsys.vicuna-13b-v1.5.4b75s128g --output-dir .local/merged_models/sparsegpt.lmsys.vicuna-13b-v1.5.4b75s128g \ No newline at end of file From 9fd712a449f920f26427cfe153417958ec2fe189 Mon Sep 17 00:00:00 2001 From: Xiaozhe Yao Date: Mon, 3 Jun 2024 13:27:53 +0200 Subject: [PATCH 14/14] minor --- cli/compress.py | 2 ++ cli/merge.py | 2 +- deltazip/core/sparsegpt.py | 2 +- deltazip/modeling/_base.py | 2 ++ deltazip/modeling/_utils.py | 3 ++- 5 files changed, 8 insertions(+), 3 deletions(-) diff --git a/cli/compress.py b/cli/compress.py index 4b2bf34..ba668c3 100644 --- a/cli/compress.py +++ b/cli/compress.py @@ -29,6 +29,7 @@ def main(args): prunem=args.prunem, lossless=args.lossless, damp_percent=args.perc_damp, + desc_act=args.desc_act, sym=args.sym, ) print("[info] compress config:", compress_config) @@ -175,6 +176,7 @@ def main(args): ) parser.add_argument("--delta", type=str, choices=["subtract", "xor"], default="") parser.add_argument("--sym", action="store_true") + parser.add_argument("--desc-act", action="store_true") parser.add_argument("--large-model", action="store_true") parser.add_argument("--perc-damp", type=float, default=0.01) parser.add_argument("--outdir", type=str, default=".cache/compressed_models") diff --git a/cli/merge.py b/cli/merge.py index a6cb133..a9895cb 100644 --- a/cli/merge.py +++ b/cli/merge.py @@ -39,7 +39,7 @@ def merge(args): if args.delta == "subtract": if any([kw in name for kw in ignore_keywords]): #delta_model.model.state_dict()[name].copy_(param) - pass + print(f"Ignoring {name}") else: delta_model.model.state_dict()[name].copy_( param + delta_model.model.state_dict()[name] diff --git a/deltazip/core/sparsegpt.py b/deltazip/core/sparsegpt.py index a346378..08feb96 100644 --- a/deltazip/core/sparsegpt.py +++ b/deltazip/core/sparsegpt.py @@ -77,6 +77,7 @@ def fasterprune( perm = torch.argsort(torch.diag(H), descending=True) W = W[:, perm] H = H[perm][:, perm] + invperm = torch.argsort(perm) Losses = torch.zeros(self.rows, device=self.dev) damp = percdamp * torch.mean(torch.diag(H)) diag = torch.arange(self.columns, device=self.dev) @@ -162,7 +163,6 @@ def fasterprune( g_idx = [i // self.columns for i in range(self.columns)] g_idx = torch.tensor(g_idx, dtype=torch.int32, device=W.device) if actorder: - invperm = torch.argsort(perm) Q = Q[:, invperm] g_idx = g_idx[invperm] W = W.reshape(self.layer.weight.shape).to(self.layer.weight.data.dtype) diff --git a/deltazip/modeling/_base.py b/deltazip/modeling/_base.py index 5bb4829..92a012c 100644 --- a/deltazip/modeling/_base.py +++ b/deltazip/modeling/_base.py @@ -263,6 +263,7 @@ def lossy_compress( base_model=None, ): assert self.compressed == False, "Model is already compressed." + logger.info(f"Compression Config: {self.compress_config}") device_map = self.hf_device_map if device_map: for name, device in device_map.items(): @@ -449,6 +450,7 @@ def tmp(_, inp, out): prunem=self.compress_config.prunem, percdamp=self.compress_config.damp_percent, blocksize=self.compress_config.block_size, + actorder=self.compress_config.desc_act, base_weight=base_weight if base_model is not None else None, ) if self.compress_config.bits < 16: diff --git a/deltazip/modeling/_utils.py b/deltazip/modeling/_utils.py index 91e7e0c..d18e3e6 100644 --- a/deltazip/modeling/_utils.py +++ b/deltazip/modeling/_utils.py @@ -112,7 +112,8 @@ def deltazip_post_init( device = submodule.qweight.device scratch_fixed = submodule.scratch_space_fixed() fixed_bytes[device] = max(scratch_fixed, fixed_bytes.get(device, 0)) - print(f"fixed bytes: {fixed_bytes}") + + if model_uses_exllamav2: from deltazip.nn_modules.exllama_utils import ExLlamaV2DeviceTensors