Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: use custom http_client #163

Merged
merged 1 commit into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .spellcheck-en-custom.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ dr
eval
gpt
hoc
http
instructlab
jsonl
justfile
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
## 0.4

* Added ability to specify a custom http client to MT-Bench

## v0.2
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ accelerate
pandas
pandas-stubs
lm-eval>=0.4.4
httpx
15 changes: 15 additions & 0 deletions src/instructlab/eval/mt_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
import multiprocessing
import os

# Third Party
import httpx

# First Party
from instructlab.eval import (
mt_bench_answers,
Expand Down Expand Up @@ -110,6 +113,7 @@ def gen_answers(
api_key: str | None = None,
max_workers: int | str | None = None,
serving_gpus: int | None = None,
http_client: httpx.Client | None = None,
) -> None:
"""
Asks questions to model
Expand All @@ -119,6 +123,7 @@ def gen_answers(
api_key API token for authenticating with model server
max_workers Max parallel workers to run the evaluation with (int or "auto"). None indicates to use value specified in constructor.
serving_gpus Number of gpus allocated for serving. Used to tune with max_workers=auto. None indicates to use value specified in constructor.
http_client Custom http client to use for requests
"""
logger.debug(locals())
mt_bench_answers.generate_answers(
Expand All @@ -127,6 +132,7 @@ def gen_answers(
api_key=api_key,
output_dir=self.output_dir,
max_workers=self._get_effective_max_workers(max_workers, serving_gpus),
http_client=http_client,
)

def judge_answers(
Expand All @@ -135,6 +141,7 @@ def judge_answers(
api_key: str | None = None,
max_workers: int | str | None = None,
serving_gpus: int | None = None,
http_client: httpx.Client | None = None,
) -> tuple:
"""
Runs MT-Bench judgment
Expand All @@ -144,6 +151,7 @@ def judge_answers(
api_key API token for authenticating with model server
max_workers Max parallel workers to run the evaluation with (int or "auto"). None indicates to use value specified in constructor.
serving_gpus Number of gpus allocated for serving. Used to tune with max_workers=auto. None indicates to use value specified in constructor.
http_client Custom http client to use for requests
Returns:
overall_score MT-Bench score for the overall model evaluation
Expand All @@ -160,6 +168,7 @@ def judge_answers(
max_workers=self._get_effective_max_workers(max_workers, serving_gpus),
output_dir=self.output_dir,
merge_system_user_message=self.merge_system_user_message,
http_client=http_client,
)


Expand Down Expand Up @@ -202,6 +211,7 @@ def gen_answers(
api_key: str | None = None,
max_workers: int | str | None = None,
serving_gpus: int | None = None,
http_client: httpx.Client | None = None,
) -> None:
"""
Asks questions to model
Expand All @@ -211,6 +221,7 @@ def gen_answers(
api_key API token for authenticating with model server
max_workers Max parallel workers to run the evaluation with (int or "auto"). None indicates to use value specified in constructor.
serving_gpus Number of gpus allocated for serving. Used to tune with max_workers=auto. None indicates to use value specified in constructor.
http_client Custom http client to use for requests
"""
logger.debug(locals())
mt_bench_branch_generator.generate(
Expand All @@ -228,6 +239,7 @@ def gen_answers(
data_dir=self.output_dir,
max_workers=self._get_effective_max_workers(max_workers, serving_gpus),
bench_name="mt_bench_branch",
http_client=http_client,
)

def judge_answers(
Expand All @@ -236,6 +248,7 @@ def judge_answers(
api_key: str | None = None,
max_workers: int | str | None = None,
serving_gpus: int | None = None,
http_client: httpx.Client | None = None,
) -> tuple:
"""
Runs MT-Bench-Branch judgment. Judgments can be compared across runs with consistent question_id -> qna file name.
Expand All @@ -245,6 +258,7 @@ def judge_answers(
api_key API token for authenticating with model server
max_workers Max parallel workers to run the evaluation with (int or "auto"). None indicates to use value specified in constructor.
serving_gpus Number of gpus allocated for serving. Used to tune with max_workers=auto. None indicates to use value specified in constructor.
http_client Custom http client to use for requests
Returns:
overall_score Overall score from the evaluation
Expand All @@ -263,5 +277,6 @@ def judge_answers(
data_dir=self.output_dir,
bench_name="mt_bench_branch",
merge_system_user_message=self.merge_system_user_message,
http_client=http_client,
)
return overall_score, qa_pairs, error_rate
3 changes: 2 additions & 1 deletion src/instructlab/eval/mt_bench_answers.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,12 @@ def generate_answers(
max_tokens=1024,
max_workers=1,
bench_name="mt_bench",
http_client=None,
):
"""Generate model answers to be judged"""
logger.debug(locals())

openai_client = get_openai_client(model_api_base, api_key)
openai_client = get_openai_client(model_api_base, api_key, http_client)

if data_dir is None:
data_dir = os.path.join(os.path.dirname(__file__), "data")
Expand Down
11 changes: 9 additions & 2 deletions src/instructlab/eval/mt_bench_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import time

# Third Party
import httpx
import openai

# First Party
Expand Down Expand Up @@ -365,8 +366,14 @@ def get_model_list(answer_file):
return [os.path.splitext(os.path.basename(answer_file))[0]]


def get_openai_client(model_api_base, api_key):
def get_openai_client(
model_api_base,
api_key,
http_client: httpx.Client | None = None,
):
if api_key is None:
api_key = "NO_API_KEY"
openai_client = openai.OpenAI(base_url=model_api_base, api_key=api_key)
openai_client = openai.OpenAI(
base_url=model_api_base, api_key=api_key, http_client=http_client
)
return openai_client
3 changes: 2 additions & 1 deletion src/instructlab/eval/mt_bench_judgment.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,11 +286,12 @@ def generate_judgment(
max_workers=1,
first_n=None,
merge_system_user_message=False,
http_client=None,
):
"""Generate judgment with scores and qa_pairs for a model"""
logger.debug(locals())

openai_client = get_openai_client(model_api_base, api_key)
openai_client = get_openai_client(model_api_base, api_key, http_client)

first_n_env = os.environ.get("INSTRUCTLAB_EVAL_FIRST_N_QUESTIONS")
if first_n_env is not None and first_n is None:
Expand Down
8 changes: 7 additions & 1 deletion tests/test_branch_gen_answers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Third Party
import httpx

# First Party
from instructlab.eval.mt_bench import MTBenchBranchEvaluator

Expand All @@ -7,4 +10,7 @@
"../taxonomy",
"main",
)
mt_bench_branch.gen_answers("http://localhost:8000/v1")
mt_bench_branch.gen_answers(
"http://localhost:8000/v1",
http_client=httpx.Client(verify=False),
)