From ea31bab93410788dfb7f8fc3dcb34dd8bd85145d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Pablo=20Mu=C3=B1oz?= Date: Thu, 12 Dec 2024 13:16:11 -0800 Subject: [PATCH] Add MultiPruner results and improvements to install and readme Co-authored-by: Yuan, Jinjie --- MultiPruner/README.md | 60 +++++++------ MultiPruner/install.sh | 21 +++-- MultiPruner/requirements.txt | 9 ++ .../results/Llama-2-7B/ratio_10/eval.res.json | 12 +++ .../Llama-2-7B/ratio_10/pruning_config.json | 78 +++++++++++++++++ .../results/Llama-2-7B/ratio_12/eval.res.json | 12 +++ .../Llama-2-7B/ratio_12/pruning_config.json | 79 +++++++++++++++++ .../results/Llama-2-7B/ratio_14/eval.res.json | 12 +++ .../Llama-2-7B/ratio_14/pruning_config.json | 80 ++++++++++++++++++ .../results/Llama-2-7B/ratio_15/eval.res.json | 12 +++ .../Llama-2-7B/ratio_15/pruning_config.json | 80 ++++++++++++++++++ .../results/Llama-2-7B/ratio_18/eval.res.json | 12 +++ .../Llama-2-7B/ratio_18/pruning_config.json | 81 ++++++++++++++++++ .../results/Llama-2-7B/ratio_22/eval.res.json | 12 +++ .../Llama-2-7B/ratio_22/pruning_config.json | 84 +++++++++++++++++++ .../results/Llama-2-7B/ratio_7/eval.res.json | 12 +++ .../Llama-2-7B/ratio_7/pruning_config.json | 77 +++++++++++++++++ MultiPruner/results/Qwen1.5-7B/eval.res.json | 12 +++ .../results/Qwen1.5-7B/pruning_config.json | 84 +++++++++++++++++++ 19 files changed, 798 insertions(+), 31 deletions(-) create mode 100644 MultiPruner/requirements.txt create mode 100644 MultiPruner/results/Llama-2-7B/ratio_10/eval.res.json create mode 100644 MultiPruner/results/Llama-2-7B/ratio_10/pruning_config.json create mode 100644 MultiPruner/results/Llama-2-7B/ratio_12/eval.res.json create mode 100644 MultiPruner/results/Llama-2-7B/ratio_12/pruning_config.json create mode 100644 MultiPruner/results/Llama-2-7B/ratio_14/eval.res.json create mode 100644 MultiPruner/results/Llama-2-7B/ratio_14/pruning_config.json create mode 100644 MultiPruner/results/Llama-2-7B/ratio_15/eval.res.json create mode 100644 MultiPruner/results/Llama-2-7B/ratio_15/pruning_config.json create mode 100644 MultiPruner/results/Llama-2-7B/ratio_18/eval.res.json create mode 100644 MultiPruner/results/Llama-2-7B/ratio_18/pruning_config.json create mode 100644 MultiPruner/results/Llama-2-7B/ratio_22/eval.res.json create mode 100644 MultiPruner/results/Llama-2-7B/ratio_22/pruning_config.json create mode 100644 MultiPruner/results/Llama-2-7B/ratio_7/eval.res.json create mode 100644 MultiPruner/results/Llama-2-7B/ratio_7/pruning_config.json create mode 100644 MultiPruner/results/Qwen1.5-7B/eval.res.json create mode 100644 MultiPruner/results/Qwen1.5-7B/pruning_config.json diff --git a/MultiPruner/README.md b/MultiPruner/README.md index e286e94..6323968 100644 --- a/MultiPruner/README.md +++ b/MultiPruner/README.md @@ -3,22 +3,22 @@ Official implementation of [Fine-Grained Training-Free Structure Removal in Foundation Models](). This repo contains the code for **MultiPruner**, a novel pruning approach that surpasses recent training-free pruning -methods by adopting a multidimensional, iterative, fine-grained pruning strategy. +methods, e.g., BlockPruner (Zhong el al., 2024) and ShortGPT (Men et al., 2024), by adopting a multidimensional, iterative, fine-grained pruning strategy. Please refer to our paper for more details. ## News -- **[2025.xx.xx]** Release the code for **MultiPruner**. :tada: +- **[2024.12.14]** Release the code for **MultiPruner**. :tada: + +## Supported Models + +- Llama: [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) +- Qwen: [Qwen/Qwen2.5-7B](https://huggingface.co/Qwen/Qwen2.5-7B), [Qwen/Qwen1.5-7B](https://huggingface.co/Qwen/Qwen1.5-7B) ## Setup -Here is an installation script developed from scratch. +Use the following instructions to create a virtual environment with the required dependencies. ``` -pip install virtualenv -virtualenv multipruner-env -source multipruner-env/bin/activate -pip install torch==2.3.1 - # install dependencies bash install.sh ``` @@ -115,25 +115,22 @@ This investigation may facilitate practical applications. The results of Llama-2 | MultiPruner w/ finetune | 18% | 66.16 | -2.80% | 95.94% | -## Released Pruned Models 🤗 +## Released Pruned Models and Configurations 🤗 -We have released several compressed models by MultiPruner: +We have released several compressed models or pruning configurations to reproduce the results in the paper: -| Source Model | Pruning Ratio | Recovery Tuning | Pruned Model | -|-----------------------------------------------------------------------------------------|---------------|-----------------|---------------------------------------------------------------------------------------------------------------| -| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 7% | ✘ | [IntelLabs/MultiPruner-Llama-2-6.3b](https://huggingface.co/IntelLabs/MultiPruner-Llama-2-6.3b) | -| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 10% | ✘ | [IntelLabs/MultiPruner-Llama-2-6.1b](https://huggingface.co/IntelLabs/MultiPruner-Llama-2-6.1b) | -| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 12% | ✘ | [IntelLabs/MultiPruner-Llama-2-5.9b](https://huggingface.co/IntelLabs/MultiPruner-Llama-2-5.9b) | -| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 12% | ✔ | [IntelLabs/MultiPruner-Llama-2-5.9b-alpaca](https://huggingface.co/IntelLabs/MultiPruner-Llama-2-5.9b-alpaca) | -| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 14% | ✘ | [IntelLabs/MultiPruner-Llama-2-5.8b](https://huggingface.co/IntelLabs/MultiPruner-Llama-2-5.8b) | -| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 15% | ✘ | [IntelLabs/MultiPruner-Llama-2-5.7b](https://huggingface.co/IntelLabs/MultiPruner-Llama-2-5.7b) | -| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 15% | ✔ | [IntelLabs/MultiPruner-Llama-2-5.7b-alpaca](https://huggingface.co/IntelLabs/MultiPruner-Llama-2-5.7b-alpaca) | -| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 18% | ✘ | [IntelLabs/MultiPruner-Llama-2-5.5b](https://huggingface.co/IntelLabs/MultiPruner-Llama-2-5.5b) | -| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 18% | ✔ | [IntelLabs/MultiPruner-Llama-2-5.5b-alpaca](https://huggingface.co/IntelLabs/MultiPruner-Llama-2-5.5b-alpaca) | -| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 22% | ✘ | [IntelLabs/MultiPruner-Llama-2-5.3b](https://huggingface.co/IntelLabs/MultiPruner-Llama-2-5.3b) | -| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 22% | ✔ | [IntelLabs/MultiPruner-Llama-2-5.3b-alpaca](https://huggingface.co/IntelLabs/MultiPruner-Llama-2-5.3b-alpaca) | -| [Qwen/Qwen1.5-7B](https://huggingface.co/Qwen/Qwen1.5-7B) | 22% | ✘ | [IntelLabs/MultiPruner-Qwen1.5-6b](https://huggingface.co/IntelLabs/MultiPruner-Qwen1.5-6b) | -| [baichuan-inc/Baichuan2-7B-Base](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base) | 22% | ✘ | [IntelLabs/MultiPruner-Baichuan2-5.8b](https://huggingface.co/IntelLabs/MultiPruner-Baichuan2-5.8b) | +| Source Model | Pruning Ratio | Pruned Model Configuration / HF link | +|-----------------------------------------------------------------------------------------|-----------------|-----------------------------------------------------------------------------------------------------| +| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 7% | [MultiPruner-Llama-2-6.3b Config File](./results/Llama-2-7B/ratio_7) | +| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 10% | [MultiPruner-Llama-2-6.3b Config File](./results/Llama-2-7B/ratio_10) | +| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 12% | [MultiPruner-Llama-2-6.3b Config File](./results/Llama-2-7B/ratio_12) | +| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 14% | [MultiPruner-Llama-2-6.3b Config File](./results/Llama-2-7B/ratio_14) | +| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 15% | [MultiPruner-Llama-2-6.3b Config File](./results/Llama-2-7B/ratio_15) | +| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 18% | [MultiPruner-Llama-2-6.3b Config File](./results/Llama-2-7B/ratio_18) | +| [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 22% | [MultiPruner-Llama-2-6.3b Config File](./results/Llama-2-7B/ratio_22) | +| [Qwen/Qwen1.5-7B](https://huggingface.co/Qwen/Qwen1.5-7B) | 22% | [IntelLabs/MultiPruner-Qwen1.5-6b](https://huggingface.co/IntelLabs/MultiPruner-Qwen1.5-6b) | +| [baichuan-inc/Baichuan2-7B-Base](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base) | 22% | [IntelLabs/MultiPruner-Baichuan2-5.8b](https://huggingface.co/IntelLabs/MultiPruner-Baichuan2-5.8b) | +* *For Llama models, we provide the pruning configuration files to reproduce the results in the paper.* ### Loading the compressed model for evaluation @@ -141,6 +138,19 @@ We have released several compressed models by MultiPruner: python eval.py --model_path --output_path ``` +## Acknowledgement + +MultiPruner benefits from the following work: + +```bibtex +@article{zhong2024blockpruner, + title={BlockPruner: Fine-grained Pruning for Large Language Models}, + author={Zhong, Longguang and Wan, Fanqi and Chen, Ruijun and Quan, Xiaojun and Li, Liangzhi}, + journal={arXiv preprint arXiv:2406.10594}, + year={2024} +} +``` + ## Citation If you find MultiPruner's code and paper helpful, please kindly cite: ```bibtex diff --git a/MultiPruner/install.sh b/MultiPruner/install.sh index e0c764e..cf2a93e 100644 --- a/MultiPruner/install.sh +++ b/MultiPruner/install.sh @@ -3,13 +3,22 @@ set -e set -x MULTIPRUNER_PATH=$PWD -mkdir third_party && cd third_party -pip install 'numpy<2.0.0' setuptools==69.5.1 +python3.10 -m venv venv +source venv/bin/activate + +mkdir -pv third_party +pushd third_party -mkdir third_party && cd third_party git clone https://github.com/huggingface/transformers.git -cd transformers && git checkout v4.42.4 && git apply --ignore-space-change --ignore-whitespace ${MULTIPRUNER_PATH}/patches/transformers-v4.42.4.patch && pip install -e . && cd .. +pushd transformers +git checkout v4.42.4 +git apply --ignore-space-change --ignore-whitespace ${MULTIPRUNER_PATH}/patches/transformers-v4.42.4.patch +pip install -e . + +pushd ${MULTIPRUNER_PATH} + +pip install -r requirements.txt + +echo "Environment all ready. execute 'source venv/bin/activate' to run" -pip install datasets accelerate sentencepiece protobuf bitsandbytes -pip install lm-eval==0.4.2 diff --git a/MultiPruner/requirements.txt b/MultiPruner/requirements.txt new file mode 100644 index 0000000..4f86cfa --- /dev/null +++ b/MultiPruner/requirements.txt @@ -0,0 +1,9 @@ +numpy<2.0.0 +setuptools==69.5.1 +datasets +accelerate +sentencepiece +protobuf +bitsandbytes +lm-eval==0.4.2 +torch==2.3.1 diff --git a/MultiPruner/results/Llama-2-7B/ratio_10/eval.res.json b/MultiPruner/results/Llama-2-7B/ratio_10/eval.res.json new file mode 100644 index 0000000..1bb5926 --- /dev/null +++ b/MultiPruner/results/Llama-2-7B/ratio_10/eval.res.json @@ -0,0 +1,12 @@ +{ + "total_params": 6738415616, + "pruned_params": 6063132672, + "ratio": 10.02139052385812, + "ppl_wikitext2": 6.55, + "5cs_acc_avg": 67.02, + "arc_challenge": 44.45, + "arc_easy": 71.0, + "hellaswag": 74.07000000000001, + "winogrande": 68.19, + "piqa": 77.37 +} \ No newline at end of file diff --git a/MultiPruner/results/Llama-2-7B/ratio_10/pruning_config.json b/MultiPruner/results/Llama-2-7B/ratio_10/pruning_config.json new file mode 100644 index 0000000..90702ce --- /dev/null +++ b/MultiPruner/results/Llama-2-7B/ratio_10/pruning_config.json @@ -0,0 +1,78 @@ +{ + "pruned_attn_idx": [ + 25, + 27, + 21, + 23, + 24 + ], + "pruned_mlp_idx": [], + "pruned_attn_width": { + "0": 4096, + "1": 3840, + "2": 3840, + "3": 4096, + "4": 4096, + "5": 3968, + "6": 4096, + "7": 4096, + "8": 3968, + "9": 4096, + "10": 4096, + "11": 4096, + "12": 4096, + "13": 4096, + "14": 4096, + "15": 4096, + "16": 4096, + "17": 3968, + "18": 4096, + "19": 3968, + "20": 3968, + "21": 4096, + "22": 3968, + "23": 4096, + "24": 4096, + "25": 4096, + "26": 4096, + "27": 4096, + "28": 3968, + "29": 4096, + "30": 3968, + "31": 4096 + }, + "pruned_mlp_width": { + "0": 11008, + "1": 11008, + "2": 5888, + "3": 11008, + "4": 11008, + "5": 11008, + "6": 11008, + "7": 9984, + "8": 11008, + "9": 11008, + "10": 11008, + "11": 9984, + "12": 11008, + "13": 11008, + "14": 11008, + "15": 11008, + "16": 11008, + "17": 11008, + "18": 11008, + "19": 11008, + "20": 11008, + "21": 11008, + "22": 11008, + "23": 1792, + "24": 11008, + "25": 11008, + "26": 11008, + "27": 1792, + "28": 11008, + "29": 11008, + "30": 11008, + "31": 11008 + } +} \ No newline at end of file diff --git a/MultiPruner/results/Llama-2-7B/ratio_12/eval.res.json b/MultiPruner/results/Llama-2-7B/ratio_12/eval.res.json new file mode 100644 index 0000000..ce792e1 --- /dev/null +++ b/MultiPruner/results/Llama-2-7B/ratio_12/eval.res.json @@ -0,0 +1,12 @@ +{ + "total_params": 6738415616, + "pruned_params": 5931012096, + "ratio": 11.982097365482536, + "ppl_wikitext2": 7.1, + "5cs_acc_avg": 66.47999999999999, + "arc_challenge": 44.03, + "arc_easy": 69.82000000000001, + "hellaswag": 73.77, + "winogrande": 68.43, + "piqa": 76.33 +} \ No newline at end of file diff --git a/MultiPruner/results/Llama-2-7B/ratio_12/pruning_config.json b/MultiPruner/results/Llama-2-7B/ratio_12/pruning_config.json new file mode 100644 index 0000000..73a0dce --- /dev/null +++ b/MultiPruner/results/Llama-2-7B/ratio_12/pruning_config.json @@ -0,0 +1,79 @@ +{ + "pruned_attn_idx": [ + 25, + 27, + 21, + 23, + 24, + 29 + ], + "pruned_mlp_idx": [], + "pruned_attn_width": { + "0": 4096, + "1": 4096, + "2": 3840, + "3": 3968, + "4": 4096, + "5": 4096, + "6": 4096, + "7": 4096, + "8": 3968, + "9": 4096, + "10": 4096, + "11": 4096, + "12": 4096, + "13": 4096, + "14": 4096, + "15": 4096, + "16": 3968, + "17": 3968, + "18": 4096, + "19": 3968, + "20": 3968, + "21": 4096, + "22": 3968, + "23": 4096, + "24": 4096, + "25": 4096, + "26": 4096, + "27": 4096, + "28": 3712, + "29": 4096, + "30": 3968, + "31": 4096 + }, + "pruned_mlp_width": { + "0": 11008, + "1": 11008, + "2": 5888, + "3": 11008, + "4": 11008, + "5": 11008, + "6": 11008, + "7": 9984, + "8": 11008, + "9": 11008, + "10": 11008, + "11": 11008, + "12": 11008, + "13": 11008, + "14": 11008, + "15": 11008, + "16": 11008, + "17": 11008, + "18": 11008, + "19": 11008, + "20": 11008, + "21": 11008, + "22": 11008, + "23": 1792, + "24": 11008, + "25": 1792, + "26": 11008, + "27": 4864, + "28": 11008, + "29": 11008, + "30": 11008, + "31": 11008 + } +} \ No newline at end of file diff --git a/MultiPruner/results/Llama-2-7B/ratio_14/eval.res.json b/MultiPruner/results/Llama-2-7B/ratio_14/eval.res.json new file mode 100644 index 0000000..cf330ea --- /dev/null +++ b/MultiPruner/results/Llama-2-7B/ratio_14/eval.res.json @@ -0,0 +1,12 @@ +{ + "total_params": 6738415616, + "pruned_params": 5796794368, + "ratio": 13.973926537926385, + "ppl_wikitext2": 7.56, + "5cs_acc_avg": 65.93, + "arc_challenge": 43.519999999999996, + "arc_easy": 68.64, + "hellaswag": 72.27, + "winogrande": 67.96, + "piqa": 77.25999999999999 +} \ No newline at end of file diff --git a/MultiPruner/results/Llama-2-7B/ratio_14/pruning_config.json b/MultiPruner/results/Llama-2-7B/ratio_14/pruning_config.json new file mode 100644 index 0000000..eddd06b --- /dev/null +++ b/MultiPruner/results/Llama-2-7B/ratio_14/pruning_config.json @@ -0,0 +1,80 @@ +{ + "pruned_attn_idx": [ + 25, + 27, + 21, + 23, + 24, + 29, + 28 + ], + "pruned_mlp_idx": [], + "pruned_attn_width": { + "0": 4096, + "1": 3712, + "2": 3840, + "3": 3840, + "4": 4096, + "5": 4096, + "6": 4096, + "7": 4096, + "8": 3968, + "9": 4096, + "10": 4096, + "11": 4096, + "12": 4096, + "13": 4096, + "14": 4096, + "15": 4096, + "16": 3968, + "17": 3712, + "18": 3968, + "19": 3968, + "20": 4096, + "21": 4096, + "22": 3968, + "23": 4096, + "24": 4096, + "25": 4096, + "26": 4096, + "27": 4096, + "28": 4096, + "29": 4096, + "30": 4096, + "31": 4096 + }, + "pruned_mlp_width": { + "0": 11008, + "1": 11008, + "2": 5888, + "3": 11008, + "4": 11008, + "5": 11008, + "6": 11008, + "7": 9984, + "8": 11008, + "9": 11008, + "10": 11008, + "11": 1792, + "12": 11008, + "13": 11008, + "14": 11008, + "15": 11008, + "16": 11008, + "17": 11008, + "18": 11008, + "19": 11008, + "20": 11008, + "21": 11008, + "22": 11008, + "23": 1792, + "24": 11008, + "25": 1792, + "26": 11008, + "27": 8960, + "28": 11008, + "29": 11008, + "30": 11008, + "31": 11008 + } +} \ No newline at end of file diff --git a/MultiPruner/results/Llama-2-7B/ratio_15/eval.res.json b/MultiPruner/results/Llama-2-7B/ratio_15/eval.res.json new file mode 100644 index 0000000..d30f522 --- /dev/null +++ b/MultiPruner/results/Llama-2-7B/ratio_15/eval.res.json @@ -0,0 +1,12 @@ +{ + "total_params": 6738415616, + "pruned_params": 5727588352, + "ratio": 15.000963454967753, + "ppl_wikitext2": 7.66, + "5cs_acc_avg": 65.25999999999999, + "arc_challenge": 42.24, + "arc_easy": 68.10000000000001, + "hellaswag": 71.82, + "winogrande": 67.4, + "piqa": 76.77000000000001 +} \ No newline at end of file diff --git a/MultiPruner/results/Llama-2-7B/ratio_15/pruning_config.json b/MultiPruner/results/Llama-2-7B/ratio_15/pruning_config.json new file mode 100644 index 0000000..88fa40f --- /dev/null +++ b/MultiPruner/results/Llama-2-7B/ratio_15/pruning_config.json @@ -0,0 +1,80 @@ +{ + "pruned_attn_idx": [ + 25, + 27, + 21, + 23, + 24, + 29, + 28 + ], + "pruned_mlp_idx": [], + "pruned_attn_width": { + "0": 4096, + "1": 3712, + "2": 3456, + "3": 3840, + "4": 4096, + "5": 4096, + "6": 4096, + "7": 4096, + "8": 3968, + "9": 4096, + "10": 4096, + "11": 4096, + "12": 4096, + "13": 4096, + "14": 4096, + "15": 4096, + "16": 3968, + "17": 3712, + "18": 3968, + "19": 3968, + "20": 4096, + "21": 4096, + "22": 3968, + "23": 4096, + "24": 4096, + "25": 4096, + "26": 4096, + "27": 4096, + "28": 4096, + "29": 4096, + "30": 4096, + "31": 4096 + }, + "pruned_mlp_width": { + "0": 11008, + "1": 11008, + "2": 5888, + "3": 11008, + "4": 11008, + "5": 11008, + "6": 11008, + "7": 9984, + "8": 11008, + "9": 11008, + "10": 11008, + "11": 1792, + "12": 11008, + "13": 11008, + "14": 11008, + "15": 11008, + "16": 11008, + "17": 11008, + "18": 11008, + "19": 11008, + "20": 11008, + "21": 11008, + "22": 11008, + "23": 1792, + "24": 11008, + "25": 1792, + "26": 11008, + "27": 3840, + "28": 11008, + "29": 11008, + "30": 11008, + "31": 11008 + } +} \ No newline at end of file diff --git a/MultiPruner/results/Llama-2-7B/ratio_18/eval.res.json b/MultiPruner/results/Llama-2-7B/ratio_18/eval.res.json new file mode 100644 index 0000000..4326768 --- /dev/null +++ b/MultiPruner/results/Llama-2-7B/ratio_18/eval.res.json @@ -0,0 +1,12 @@ +{ + "total_params": 6738415616, + "pruned_params": 5524164608, + "ratio": 18.01982954445296, + "ppl_wikitext2": 8.62, + "5cs_acc_avg": 64.2, + "arc_challenge": 41.89, + "arc_easy": 65.11, + "hellaswag": 71.45, + "winogrande": 66.61, + "piqa": 75.94999999999999 +} \ No newline at end of file diff --git a/MultiPruner/results/Llama-2-7B/ratio_18/pruning_config.json b/MultiPruner/results/Llama-2-7B/ratio_18/pruning_config.json new file mode 100644 index 0000000..586b89b --- /dev/null +++ b/MultiPruner/results/Llama-2-7B/ratio_18/pruning_config.json @@ -0,0 +1,81 @@ +{ + "pruned_attn_idx": [ + 25, + 27, + 21, + 23, + 24, + 29, + 28, + 18 + ], + "pruned_mlp_idx": [], + "pruned_attn_width": { + "0": 4096, + "1": 3712, + "2": 3456, + "3": 3712, + "4": 4096, + "5": 4096, + "6": 4096, + "7": 4096, + "8": 3968, + "9": 4096, + "10": 4096, + "11": 4096, + "12": 3840, + "13": 4096, + "14": 4096, + "15": 4096, + "16": 3968, + "17": 3712, + "18": 4096, + "19": 3840, + "20": 3968, + "21": 4096, + "22": 3968, + "23": 4096, + "24": 4096, + "25": 4096, + "26": 4096, + "27": 4096, + "28": 4096, + "29": 4096, + "30": 3968, + "31": 4096 + }, + "pruned_mlp_width": { + "0": 11008, + "1": 11008, + "2": 5888, + "3": 11008, + "4": 11008, + "5": 11008, + "6": 11008, + "7": 9984, + "8": 11008, + "9": 11008, + "10": 11008, + "11": 768, + "12": 11008, + "13": 11008, + "14": 11008, + "15": 11008, + "16": 11008, + "17": 3840, + "18": 11008, + "19": 11008, + "20": 11008, + "21": 11008, + "22": 11008, + "23": 1792, + "24": 11008, + "25": 1792, + "26": 11008, + "27": 1792, + "28": 11008, + "29": 11008, + "30": 11008, + "31": 11008 + } +} \ No newline at end of file diff --git a/MultiPruner/results/Llama-2-7B/ratio_22/eval.res.json b/MultiPruner/results/Llama-2-7B/ratio_22/eval.res.json new file mode 100644 index 0000000..80733f4 --- /dev/null +++ b/MultiPruner/results/Llama-2-7B/ratio_22/eval.res.json @@ -0,0 +1,12 @@ +{ + "total_params": 6738415616, + "pruned_params": 5258874880, + "ratio": 21.956804393111508, + "ppl_wikitext2": 9.33, + "5cs_acc_avg": 62.83, + "arc_challenge": 41.13, + "arc_easy": 64.77000000000001, + "hellaswag": 68.94, + "winogrande": 64.64, + "piqa": 74.65 +} \ No newline at end of file diff --git a/MultiPruner/results/Llama-2-7B/ratio_22/pruning_config.json b/MultiPruner/results/Llama-2-7B/ratio_22/pruning_config.json new file mode 100644 index 0000000..95f35ca --- /dev/null +++ b/MultiPruner/results/Llama-2-7B/ratio_22/pruning_config.json @@ -0,0 +1,84 @@ +{ + "pruned_attn_idx": [ + 25, + 27, + 24, + 23, + 21, + 29, + 28, + 18, + 8 + ], + "pruned_mlp_idx": [ + 8 + ], + "pruned_attn_width": { + "0": 4096, + "1": 3328, + "2": 3328, + "3": 4096, + "4": 4096, + "5": 3968, + "6": 4096, + "7": 4096, + "8": 4096, + "9": 4096, + "10": 4096, + "11": 4096, + "12": 4096, + "13": 4096, + "14": 4096, + "15": 4096, + "16": 3968, + "17": 3584, + "18": 4096, + "19": 3840, + "20": 3968, + "21": 4096, + "22": 3968, + "23": 4096, + "24": 4096, + "25": 4096, + "26": 4096, + "27": 4096, + "28": 4096, + "29": 4096, + "30": 3968, + "31": 4096 + }, + "pruned_mlp_width": { + "0": 11008, + "1": 11008, + "2": 5888, + "3": 11008, + "4": 11008, + "5": 11008, + "6": 11008, + "7": 6912, + "8": 11008, + "9": 11008, + "10": 11008, + "11": 11008, + "12": 5888, + "13": 11008, + "14": 11008, + "15": 11008, + "16": 11008, + "17": 3840, + "18": 11008, + "19": 11008, + "20": 11008, + "21": 11008, + "22": 11008, + "23": 1792, + "24": 11008, + "25": 1792, + "26": 3840, + "27": 1792, + "28": 11008, + "29": 11008, + "30": 11008, + "31": 11008 + } +} \ No newline at end of file diff --git a/MultiPruner/results/Llama-2-7B/ratio_7/eval.res.json b/MultiPruner/results/Llama-2-7B/ratio_7/eval.res.json new file mode 100644 index 0000000..5d92088 --- /dev/null +++ b/MultiPruner/results/Llama-2-7B/ratio_7/eval.res.json @@ -0,0 +1,12 @@ +{ + "total_params": 6738415616, + "pruned_params": 6268653568, + "ratio": 6.971402103553482, + "ppl_wikitext2": 6.33, + "5cs_acc_avg": 67.94, + "arc_challenge": 44.62, + "arc_easy": 73.44000000000001, + "hellaswag": 74.32, + "winogrande": 69.46, + "piqa": 77.86 +} \ No newline at end of file diff --git a/MultiPruner/results/Llama-2-7B/ratio_7/pruning_config.json b/MultiPruner/results/Llama-2-7B/ratio_7/pruning_config.json new file mode 100644 index 0000000..0818000 --- /dev/null +++ b/MultiPruner/results/Llama-2-7B/ratio_7/pruning_config.json @@ -0,0 +1,77 @@ +{ + "pruned_attn_idx": [ + 25, + 27, + 21, + 23 + ], + "pruned_mlp_idx": [], + "pruned_attn_width": { + "0": 4096, + "1": 4096, + "2": 3968, + "3": 3712, + "4": 4096, + "5": 4096, + "6": 4096, + "7": 4096, + "8": 4096, + "9": 4096, + "10": 4096, + "11": 4096, + "12": 4096, + "13": 4096, + "14": 4096, + "15": 4096, + "16": 4096, + "17": 4096, + "18": 4096, + "19": 4096, + "20": 4096, + "21": 4096, + "22": 3968, + "23": 4096, + "24": 4096, + "25": 4096, + "26": 4096, + "27": 4096, + "28": 4096, + "29": 4096, + "30": 3968, + "31": 4096 + }, + "pruned_mlp_width": { + "0": 11008, + "1": 11008, + "2": 8960, + "3": 11008, + "4": 11008, + "5": 11008, + "6": 11008, + "7": 11008, + "8": 11008, + "9": 11008, + "10": 11008, + "11": 11008, + "12": 11008, + "13": 11008, + "14": 11008, + "15": 11008, + "16": 11008, + "17": 11008, + "18": 11008, + "19": 11008, + "20": 11008, + "21": 11008, + "22": 11008, + "23": 6912, + "24": 11008, + "25": 11008, + "26": 11008, + "27": 1792, + "28": 11008, + "29": 11008, + "30": 11008, + "31": 11008 + } +} \ No newline at end of file diff --git a/MultiPruner/results/Qwen1.5-7B/eval.res.json b/MultiPruner/results/Qwen1.5-7B/eval.res.json new file mode 100644 index 0000000..3f19c90 --- /dev/null +++ b/MultiPruner/results/Qwen1.5-7B/eval.res.json @@ -0,0 +1,12 @@ +{ + "total_params": 7721324544, + "pruned_params": 6037311488, + "ratio": 21.809898630780832, + "ppl_wikitext2": 18.22, + "5cs_acc_avg": 57.379999999999995, + "arc_challenge": 35.92, + "arc_easy": 59.01, + "hellaswag": 60.6, + "winogrande": 59.589999999999996, + "piqa": 71.76 +} \ No newline at end of file diff --git a/MultiPruner/results/Qwen1.5-7B/pruning_config.json b/MultiPruner/results/Qwen1.5-7B/pruning_config.json new file mode 100644 index 0000000..24f9115 --- /dev/null +++ b/MultiPruner/results/Qwen1.5-7B/pruning_config.json @@ -0,0 +1,84 @@ +{ + "pruned_attn_idx": [ + 19, + 6, + 27, + 24, + 26, + 5, + 22, + 25 + ], + "pruned_mlp_idx": [ + 6, + 5 + ], + "pruned_attn_width": { + "0": 3968, + "1": 4096, + "2": 3968, + "3": 3968, + "4": 4096, + "5": 4096, + "6": 4096, + "7": 3968, + "8": 4096, + "9": 3712, + "10": 3968, + "11": 4096, + "12": 4096, + "13": 4096, + "14": 4096, + "15": 3968, + "16": 4096, + "17": 4096, + "18": 3584, + "19": 4096, + "20": 4096, + "21": 3968, + "22": 4096, + "23": 3968, + "24": 4096, + "25": 4096, + "26": 4096, + "27": 4096, + "28": 4096, + "29": 2560, + "30": 4096, + "31": 3968 + }, + "pruned_mlp_width": { + "32": 11008, + "33": 11008, + "34": 11008, + "35": 1792, + "36": 11008, + "37": 11008, + "38": 11008, + "39": 11008, + "40": 11008, + "41": 11008, + "42": 4864, + "43": 11008, + "44": 11008, + "45": 2816, + "46": 11008, + "47": 11008, + "48": 11008, + "49": 11008, + "50": 768, + "51": 11008, + "52": 11008, + "53": 1792, + "54": 11008, + "55": 11008, + "56": 11008, + "57": 7936, + "58": 768, + "59": 768, + "60": 11008, + "61": 11008, + "62": 11008, + "63": 11008 + } +} \ No newline at end of file