Skip to content

Commit

Permalink
Merge pull request #163 from leseb/http-client-endpoint
Browse files Browse the repository at this point in the history
feat: use custom http_client
  • Loading branch information
mergify[bot] authored Oct 31, 2024
2 parents 7e4fc36 + 010ff8c commit bd42ab8
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 5 deletions.
1 change: 1 addition & 0 deletions .spellcheck-en-custom.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ dr
eval
gpt
hoc
http
instructlab
jsonl
justfile
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
## 0.4

* Added ability to specify a custom http client to MT-Bench

## v0.2
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ accelerate
pandas
pandas-stubs
lm-eval>=0.4.4
httpx
15 changes: 15 additions & 0 deletions src/instructlab/eval/mt_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
import multiprocessing
import os

# Third Party
import httpx

# First Party
from instructlab.eval import (
mt_bench_answers,
Expand Down Expand Up @@ -110,6 +113,7 @@ def gen_answers(
api_key: str | None = None,
max_workers: int | str | None = None,
serving_gpus: int | None = None,
http_client: httpx.Client | None = None,
) -> None:
"""
Asks questions to model
Expand All @@ -119,6 +123,7 @@ def gen_answers(
api_key API token for authenticating with model server
max_workers Max parallel workers to run the evaluation with (int or "auto"). None indicates to use value specified in constructor.
serving_gpus Number of gpus allocated for serving. Used to tune with max_workers=auto. None indicates to use value specified in constructor.
http_client Custom http client to use for requests
"""
logger.debug(locals())
mt_bench_answers.generate_answers(
Expand All @@ -127,6 +132,7 @@ def gen_answers(
api_key=api_key,
output_dir=self.output_dir,
max_workers=self._get_effective_max_workers(max_workers, serving_gpus),
http_client=http_client,
)

def judge_answers(
Expand All @@ -135,6 +141,7 @@ def judge_answers(
api_key: str | None = None,
max_workers: int | str | None = None,
serving_gpus: int | None = None,
http_client: httpx.Client | None = None,
) -> tuple:
"""
Runs MT-Bench judgment
Expand All @@ -144,6 +151,7 @@ def judge_answers(
api_key API token for authenticating with model server
max_workers Max parallel workers to run the evaluation with (int or "auto"). None indicates to use value specified in constructor.
serving_gpus Number of gpus allocated for serving. Used to tune with max_workers=auto. None indicates to use value specified in constructor.
http_client Custom http client to use for requests
Returns:
overall_score MT-Bench score for the overall model evaluation
Expand All @@ -160,6 +168,7 @@ def judge_answers(
max_workers=self._get_effective_max_workers(max_workers, serving_gpus),
output_dir=self.output_dir,
merge_system_user_message=self.merge_system_user_message,
http_client=http_client,
)


Expand Down Expand Up @@ -202,6 +211,7 @@ def gen_answers(
api_key: str | None = None,
max_workers: int | str | None = None,
serving_gpus: int | None = None,
http_client: httpx.Client | None = None,
) -> None:
"""
Asks questions to model
Expand All @@ -211,6 +221,7 @@ def gen_answers(
api_key API token for authenticating with model server
max_workers Max parallel workers to run the evaluation with (int or "auto"). None indicates to use value specified in constructor.
serving_gpus Number of gpus allocated for serving. Used to tune with max_workers=auto. None indicates to use value specified in constructor.
http_client Custom http client to use for requests
"""
logger.debug(locals())
mt_bench_branch_generator.generate(
Expand All @@ -228,6 +239,7 @@ def gen_answers(
data_dir=self.output_dir,
max_workers=self._get_effective_max_workers(max_workers, serving_gpus),
bench_name="mt_bench_branch",
http_client=http_client,
)

def judge_answers(
Expand All @@ -236,6 +248,7 @@ def judge_answers(
api_key: str | None = None,
max_workers: int | str | None = None,
serving_gpus: int | None = None,
http_client: httpx.Client | None = None,
) -> tuple:
"""
Runs MT-Bench-Branch judgment. Judgments can be compared across runs with consistent question_id -> qna file name.
Expand All @@ -245,6 +258,7 @@ def judge_answers(
api_key API token for authenticating with model server
max_workers Max parallel workers to run the evaluation with (int or "auto"). None indicates to use value specified in constructor.
serving_gpus Number of gpus allocated for serving. Used to tune with max_workers=auto. None indicates to use value specified in constructor.
http_client Custom http client to use for requests
Returns:
overall_score Overall score from the evaluation
Expand All @@ -263,5 +277,6 @@ def judge_answers(
data_dir=self.output_dir,
bench_name="mt_bench_branch",
merge_system_user_message=self.merge_system_user_message,
http_client=http_client,
)
return overall_score, qa_pairs, error_rate
3 changes: 2 additions & 1 deletion src/instructlab/eval/mt_bench_answers.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,12 @@ def generate_answers(
max_tokens=1024,
max_workers=1,
bench_name="mt_bench",
http_client=None,
):
"""Generate model answers to be judged"""
logger.debug(locals())

openai_client = get_openai_client(model_api_base, api_key)
openai_client = get_openai_client(model_api_base, api_key, http_client)

if data_dir is None:
data_dir = os.path.join(os.path.dirname(__file__), "data")
Expand Down
11 changes: 9 additions & 2 deletions src/instructlab/eval/mt_bench_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import time

# Third Party
import httpx
import openai

# First Party
Expand Down Expand Up @@ -365,8 +366,14 @@ def get_model_list(answer_file):
return [os.path.splitext(os.path.basename(answer_file))[0]]


def get_openai_client(model_api_base, api_key):
def get_openai_client(
model_api_base,
api_key,
http_client: httpx.Client | None = None,
):
if api_key is None:
api_key = "NO_API_KEY"
openai_client = openai.OpenAI(base_url=model_api_base, api_key=api_key)
openai_client = openai.OpenAI(
base_url=model_api_base, api_key=api_key, http_client=http_client
)
return openai_client
3 changes: 2 additions & 1 deletion src/instructlab/eval/mt_bench_judgment.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,11 +286,12 @@ def generate_judgment(
max_workers=1,
first_n=None,
merge_system_user_message=False,
http_client=None,
):
"""Generate judgment with scores and qa_pairs for a model"""
logger.debug(locals())

openai_client = get_openai_client(model_api_base, api_key)
openai_client = get_openai_client(model_api_base, api_key, http_client)

first_n_env = os.environ.get("INSTRUCTLAB_EVAL_FIRST_N_QUESTIONS")
if first_n_env is not None and first_n is None:
Expand Down
8 changes: 7 additions & 1 deletion tests/test_branch_gen_answers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Third Party
import httpx

# First Party
from instructlab.eval.mt_bench import MTBenchBranchEvaluator

Expand All @@ -7,4 +10,7 @@
"../taxonomy",
"main",
)
mt_bench_branch.gen_answers("http://localhost:8000/v1")
mt_bench_branch.gen_answers(
"http://localhost:8000/v1",
http_client=httpx.Client(verify=False),
)

0 comments on commit bd42ab8

Please sign in to comment.