Skip to content

Commit

Permalink
fix: neft config
Browse files Browse the repository at this point in the history
  • Loading branch information
YannDubs committed Oct 19, 2023
1 parent a957814 commit 9d0eef9
Show file tree
Hide file tree
Showing 7 changed files with 16 additions and 13 deletions.
2 changes: 2 additions & 0 deletions docs/alpaca_eval_gpt4_leaderboard.csv
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ OpenChat V2 13B,84.9689441,1564,https://github.com/imoneoi/openchat,https://gith
Humpback LLaMa 65B,83.70646766,1269,https://arxiv.org/abs/2308.06259,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/humpback-llama-65b/model_outputs.json,community
UltraLM 13B V2.0,83.60248447,1399,https://github.com/thunlp/UltraChat,,community
Vicuna 13B v1.3,82.11180124,1132,https://huggingface.co/lmsys/vicuna-13b-v1.3,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/vicuna-13b-v1.3/model_outputs.json,verified
LLaMA2 Chat 7B Evol70k-NEFT,82.08955223880598,1612,https://github.com/neelsjain/NEFTune,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/llama-2-chat-7b-evol70k-neft/model_outputs.json,community
PlatoLM 7B,81.94271482,1344,https://huggingface.co/FreedomIntelligence/PlatoLM-7B,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/platolm-7b/model_outputs.json,community
GPT-3.5,81.71036205,1018,,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/gpt35_turbo_instruct/model_outputs.json,community
OpenBuddy-LLaMA-30B-v7.1,81.54613466,968,https://huggingface.co/OpenBuddy/openbuddy-llama-30b-v7.1-bf16,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/openbuddy-llama-30b-v7.1/model_outputs.json,community
Expand Down Expand Up @@ -49,6 +50,7 @@ Guanaco 33B,65.96273292,1311,https://huggingface.co/timdettmers/guanaco-33b,http
Nous Hermes 13B,65.46583851,844,https://huggingface.co/NousResearch/Nous-Hermes-13b,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/nous-hermes-13b/model_outputs.json,verified
Vicuna 7B,64.40993789,1044,https://huggingface.co/lmsys/vicuna-7b-delta-v1.1,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/vicuna-7b/model_outputs.json,verified
Baize-v2 7B,63.85093168,1127,https://huggingface.co/project-baize/baize-v2-7b,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/baize-v2-7b/model_outputs.json,community
Alpaca-7B-NEFT,61.92259675405742,1067,https://github.com/neelsjain/NEFTune,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/alpaca-7b-neft/model_outputs.json,community
LLaMA 33B OASST SFT,54.9689441,748,https://huggingface.co/OpenAssistant/oasst-sft-7-llama-30b-xor,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/oasst-sft-llama-33b/model_outputs.json,verified
Guanaco 13B,52.60869565,1774,https://huggingface.co/timdettmers/guanaco-13b,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/guanaco-13b/model_outputs.json,verified
Davinci003,50.0,307,,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/text_davinci_003/model_outputs.json,minimal
Expand Down
2 changes: 1 addition & 1 deletion docs/claude_leaderboard.csv
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ Guanaco 65B,62.60869565217392,1249,https://huggingface.co/timdettmers/guanaco-65
Vicuna 7B v1.3,62.54658385093168,1110,https://huggingface.co/lmsys/vicuna-7b-v1.3,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/vicuna-7b-v1.3/model_outputs.json,verified
Nous Hermes 13B,60.86956521739131,844,https://huggingface.co/NousResearch/Nous-Hermes-13b,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/nous-hermes-13b/model_outputs.json,verified
Guanaco 33B,57.88819875776397,1311,https://huggingface.co/timdettmers/guanaco-33b,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/guanaco-33b/model_outputs.json,verified
Vicuna 7B,57.329192546583855,1044,https://huggingface.co/lmsys/vicuna-7b-delta-v1.1,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/vicuna-7b/model_outputs.json,verified
LLaMA 33B OASST RLHF,57.329192546583855,1079,https://huggingface.co/OpenAssistant/oasst-rlhf-2-llama-30b-7k-steps-xor,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/oasst-rlhf-llama-33b/model_outputs.json,minimal
Vicuna 7B,57.329192546583855,1044,https://huggingface.co/lmsys/vicuna-7b-delta-v1.1,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/vicuna-7b/model_outputs.json,verified
LLaMA2 Chat 13B,56.14906832298136,1513,https://ai.meta.com/llama/,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/llama-2-13b-chat-hf/model_outputs.json,minimal
Guanaco 13B,53.36239103362392,1774,https://huggingface.co/timdettmers/guanaco-13b,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/guanaco-13b/model_outputs.json,verified
LLaMA2 Chat 7B,51.98757763975155,1479,https://ai.meta.com/llama/,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/llama-2-7b-chat-hf/model_outputs.json,minimal
Expand Down
10 changes: 5 additions & 5 deletions src/alpaca_eval/decoders/cohere.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import copy
import functools
import logging
import math
import multiprocessing
import os
import random
from typing import Optional, Sequence, Tuple

Expand Down Expand Up @@ -50,7 +48,9 @@ def cohere_completions(

with utils.Timer() as t:
if num_procs == 1:
completions_and_token_counts = [_cohere_completion_helper(prompt, **kwargs) for prompt in tqdm.tqdm(prompts, desc="prompts")]
completions_and_token_counts = [
_cohere_completion_helper(prompt, **kwargs) for prompt in tqdm.tqdm(prompts, desc="prompts")
]
else:
with multiprocessing.Pool(num_procs) as p:
partial_completion_helper = functools.partial(_cohere_completion_helper, **kwargs)
Expand All @@ -63,7 +63,7 @@ def cohere_completions(
)
logging.info(f"Completed {n_examples} examples in {t}.")
completions, num_tokens = zip(*completions_and_token_counts)
price_per_token = 0.000015 # cohere charges $0.000015 per token.
price_per_token = 0.000015 # cohere charges $0.000015 per token.
price_per_example = [price_per_token * n for n in num_tokens]
avg_time = [t.duration / n_examples] * len(completions)

Expand All @@ -77,7 +77,7 @@ def _cohere_completion_helper(
temperature: Optional[float] = 0.7,
max_tries=5,
**kwargs,
) -> Tuple[str,int]:
) -> Tuple[str, int]:
cohere_api_key = random.choice(cohere_api_keys)
client = cohere.Client(cohere_api_key)

Expand Down
6 changes: 3 additions & 3 deletions src/alpaca_eval/models_configs/alpaca-7b-neft/configs.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
alpaca-7b:
alpaca-7b-neft:
prompt_template: "alpaca-7b/prompt.txt"
model_kwargs:
torch_dtype: 'bfloat16'
model_kwargs:
torch_dtype: 'bfloat16'
pretty_name: "Alpaca-7B-NEFT"
link: https://github.com/neelsjain/NEFTune
# Completions with precomputed per the github repo linked. Particularly this link: https://github.com/neelsjain/NEFTune/blob/main/experiment_code/eval_generate.py.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
alpaca-7b:
llama-2-chat-7b-evol70k-neft:
prompt_template: "llama-2-chat-7b-Evol70k-neft/prompt.txt"
model_kwargs:
torch_dtype: 'bfloat16'
model_kwargs:
torch_dtype: 'bfloat16'
pretty_name: "LLaMA2 Chat 7B Evol70k-NEFT"
link: https://github.com/neelsjain/NEFTune
# Completions with precomputed per the github repo linked. Particularly this link: https://github.com/neelsjain/NEFTune/blob/main/experiment_code/eval_generate.py.
Expand Down
1 change: 1 addition & 0 deletions src/alpaca_eval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,7 @@ def load_configs(configs: Union[AnyPath, dict], relative_to: Optional[AnyPath] =
configs = yaml.safe_load(stream)
except yaml.YAMLError as exc:
logging.exception(exc)
assert isinstance(configs, dict)

return configs

Expand Down
2 changes: 1 addition & 1 deletion tests/test_decoders_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_anthropic_completions(mocker):
def test_cohere_completions(mocker):
mocker.patch(
"alpaca_eval.decoders.cohere._cohere_completion_helper",
return_value=["Mocked completion text",42],
return_value=["Mocked completion text", 42],
)
result = cohere_completions(["Prompt 1", "Prompt 2"], num_procs=1)
_run_all_asserts_completions(result)
Expand Down

0 comments on commit 9d0eef9

Please sign in to comment.