Add NEFTune models to AlpacaEval (#146)

Note we only added the models were the Baseline was on the leaderboard; LLaMA-1 Alpaca and LLaMA-2-Chat 7B Co-authored-by: neelsjain <[email protected]>
tatsu-lab · Oct 19, 2023 · a957814 · a957814
1 parent e060c6d
commit a957814
Show file tree

Hide file tree

Showing 9 changed files with 25,799 additions and 1 deletion.
diff --git a/results/alpaca-7b-neft/annotations.json b/results/alpaca-7b-neft/annotations.json
diff --git a/results/alpaca-7b-neft/model_outputs.json b/results/alpaca-7b-neft/model_outputs.json
diff --git a/results/llama-2-chat-7b-evol70k-neft/annotations.json b/results/llama-2-chat-7b-evol70k-neft/annotations.json
diff --git a/results/llama-2-chat-7b-evol70k-neft/model_outputs.json b/results/llama-2-chat-7b-evol70k-neft/model_outputs.json
diff --git a/src/alpaca_eval/leaderboards/data_AlpacaEval/alpaca_eval_gpt4_leaderboard.csv b/src/alpaca_eval/leaderboards/data_AlpacaEval/alpaca_eval_gpt4_leaderboard.csv
@@ -22,6 +22,7 @@ openchat-v2-13b,84.9689441,1.257297984,683,120,2,805,community,1564
 humpback-llama-65b,83.70646766,1.307103474,672,130,2,804,community,1269
 ultralm-13b-v2.0,83.60248447,1.305781745,673,132,0,805,community,1399
 vicuna-13b-v1.3,82.11180124,1.348769958,660,143,2,805,verified,1132
+llama-2-chat-7b-evol70k-neft,82.08955223880598,1.3531295937660437,660,144,0,804,community,1612
 platolm-7b,81.94271482,1.35256737,656,143,4,803,community,1344
 gpt35_turbo_instruct,81.71036205,1.330613333,642,134,25,801,community,1018
 openbuddy-llama-30b-v7.1,81.54613466,1.370658001,654,148,0,802,community,968
@@ -49,9 +50,10 @@ guanaco-33b,65.96273292,1.671085371,531,274,0,805,verified,1311
 nous-hermes-13b,65.46583851,1.669962276,524,275,6,805,verified,844
 vicuna-7b,64.40993789,1.685110726,517,285,3,805,verified,1044
 baize-v2-7b,63.85093168,1.694598186,514,291,0,805,community,1127
+alpaca-7b-neft,61.92259675405742,1.7167744779965055,496,305,0,801,community,1067
 oasst-sft-llama-33b,54.9689441,1.740266793,436,356,13,805,verified,748
 guanaco-13b,52.60869565,1.75766903,422,380,3,805,verified,1774
-text_davinci_003,50,0,0,0,805,805,minimal,307
+text_davinci_003,50.0,0.0,0,0,805,805,minimal,307
 chatglm2-6b,47.12858926,1.759314322,375,421,5,801,community,1027
 guanaco-7b,46.58385093,1.757046491,374,429,2,805,verified,1364
 falcon-40b-instruct,45.71428571,1.752471706,366,435,4,805,minimal,662

diff --git a/src/alpaca_eval/models_configs/alpaca-7b-neft/configs.yaml b/src/alpaca_eval/models_configs/alpaca-7b-neft/configs.yaml
@@ -0,0 +1,8 @@
+alpaca-7b:
+  prompt_template: "alpaca-7b/prompt.txt"
+    model_kwargs:
+      torch_dtype: 'bfloat16'
+  pretty_name: "Alpaca-7B-NEFT"
+  link: https://github.com/neelsjain/NEFTune
+  # Completions with precomputed per the github repo linked. Particularly this link: https://github.com/neelsjain/NEFTune/blob/main/experiment_code/eval_generate.py. 
+  # Note this is a LLaMA-1 base model
diff --git a/src/alpaca_eval/models_configs/alpaca-7b-neft/prompt.txt b/src/alpaca_eval/models_configs/alpaca-7b-neft/prompt.txt
@@ -0,0 +1,6 @@
+Below is an instruction that describes a task. Write a response that appropriately completes the request.
+
+### Instruction:
+{instruction}
+
+### Response:
diff --git a/src/alpaca_eval/models_configs/llama-2-chat-7b-evol70k-neft/configs.yaml b/src/alpaca_eval/models_configs/llama-2-chat-7b-evol70k-neft/configs.yaml
@@ -0,0 +1,8 @@
+alpaca-7b:
+  prompt_template: "llama-2-chat-7b-Evol70k-neft/prompt.txt"
+    model_kwargs:
+      torch_dtype: 'bfloat16'
+  pretty_name: "LLaMA2 Chat 7B Evol70k-NEFT"
+  link: https://github.com/neelsjain/NEFTune
+  # Completions with precomputed per the github repo linked. Particularly this link: https://github.com/neelsjain/NEFTune/blob/main/experiment_code/eval_generate.py. 
+  # Note this is a LLaMA-2-chat base model
diff --git a/src/alpaca_eval/models_configs/llama-2-chat-7b-evol70k-neft/prompt.txt b/src/alpaca_eval/models_configs/llama-2-chat-7b-evol70k-neft/prompt.txt
@@ -0,0 +1,6 @@
+Below is an instruction that describes a task. Write a response that appropriately completes the request.
+
+### Instruction:
+{instruction}
+
+### Response: