Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] add fixed gpt4 version annotator #127

Merged
merged 2 commits into from
Aug 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions src/alpaca_eval/decoders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,18 +71,16 @@ def get_fn_completions(name: Union[str, Callable]) -> Callable:
from .jinachat import jina_chat_completions

return jina_chat_completions

elif name == "vllm_local_completions":
try:
from .vllm_local import vllm_local_completions

return vllm_local_completions
except ImportError as e:
packages = ["vllm", "ray", "transformers"]
logging.exception(f"You need {packages} to use vllm_completions. Error:")
raise e




else:
raise ValueError(f"Unknown decoder: {name}")
33 changes: 13 additions & 20 deletions src/alpaca_eval/decoders/vllm_local.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,24 @@
import logging
from typing import Optional, Sequence
from typing import Sequence

import numpy as np
import torch
from vllm import LLM, SamplingParams


from .. import constants, utils
from .. import utils

__all__ = ["vllm_local_completions"]

llm = None
llmModelName = None


def vllm_local_completions(
prompts: Sequence[str],
model_name: str,
max_new_tokens: int,
do_sample: bool = False,
batch_size: int = 1,
model_kwargs=None,
cache_dir: Optional[str] = constants.DEFAULT_CACHE_DIR,
is_fast_tokenizer: bool = True,
adapters_name: Optional[str] = None,
**kwargs,
) -> dict[str, list]:
"""Decode locally using vllm transformers pipeline.
Expand All @@ -44,16 +40,13 @@ def vllm_local_completions(
model_kwargs : dict, optional
Additional kwargs to pass to from_pretrained.

cache_dir : str, optional
Directory to use for caching the model.

kwargs :
Additional kwargs to pass to `InferenceApi.__call__`.
"""
global llm, llmModelName
tp = 1
if 'tp' in model_kwargs:
tp = model_kwargs['tp']
if "tp" in model_kwargs:
tp = model_kwargs["tp"]
if llm is None:
logging.info("vllm: loading model: %s, tp=%d", model_name, tp)
llm = LLM(model=model_name, tokenizer=model_name, tensor_parallel_size=tp)
Expand All @@ -62,21 +55,21 @@ def vllm_local_completions(
assert False, "vllm_local_completions can only be used with a single model"

sampling_params = SamplingParams(max_tokens=max_new_tokens)
if 'temperature' in kwargs:
sampling_params.temperature = kwargs['temperature']
if 'top_p' in kwargs:
sampling_params.top_p = kwargs['top_p']
if 'top_k' in kwargs:
sampling_params.top_k = kwargs['top_k']
if "temperature" in kwargs:
sampling_params.temperature = kwargs["temperature"]
if "top_p" in kwargs:
sampling_params.top_p = kwargs["top_p"]
if "top_k" in kwargs:
sampling_params.top_k = kwargs["top_k"]
if do_sample:
sampling_params.use_beam_search = True
completions = []
with utils.Timer() as t:
for i in range(0, len(prompts), batch_size):
batch = prompts[i:i + batch_size]
batch = prompts[i : i + batch_size]
outputs = llm.generate(batch, sampling_params)
for j in range(0, len(batch)):
completions.append(outputs[j].outputs[0].text)
price = [np.nan] * len(completions)
avg_time = [t.duration / len(prompts)] * len(completions)
return dict(completions=completions, price_per_example=price, time_per_example=avg_time)
return dict(completions=completions, price_per_example=price, time_per_example=avg_time)
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
alpaca_eval_gpt4_0314:
prompt_template: "alpaca_eval_gpt4/alpaca_eval.txt"
fn_completions: "openai_completions"
completions_kwargs:
model_name: "gpt-4-0314"
max_tokens: 100
temperature: 0
fn_completion_parser: "ranking_parser"
batch_size: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
alpaca_eval_gpt4_0613:
prompt_template: "alpaca_eval_gpt4/alpaca_eval.txt"
fn_completions: "openai_completions"
completions_kwargs:
model_name: "gpt-4-0613"
max_tokens: 100
temperature: 0
fn_completion_parser: "ranking_parser"
batch_size: 1
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ alpaca_eval_gpt4_fn:
prompt_template: "alpaca_eval_gpt4_fn/alpaca_eval_fn.txt"
fn_completions: "openai_completions"
completions_kwargs:
model_name: "gpt-4" # TODO: need to update to gpt-4 on the 26th of june
model_name: "gpt-4"
max_tokens: 100
temperature: 0
function_call:
Expand Down
7 changes: 5 additions & 2 deletions tests/integration_tests/test_decoders_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ def test_huggingface_local_completions_integration():
@pytest.mark.slow
def test_vllm_local_completions_integration():
from alpaca_eval.decoders.vllm_local import vllm_local_completions

prompts = _get_formatted_prompts("text_davinci_003") # nor formatting
results = vllm_local_completions(prompts, model_name="OpenBuddy/openbuddy-openllama-3b-v10-bf16", max_new_tokens=100)
assert len(results["completions"]) == len(prompts)
results = vllm_local_completions(
prompts, model_name="OpenBuddy/openbuddy-openllama-3b-v10-bf16", max_new_tokens=100
)
assert len(results["completions"]) == len(prompts)