Skip to content

Commit

Permalink
Updating to > 1.0 openai
Browse files Browse the repository at this point in the history
Signed-off-by: Dan McPherson <[email protected]>
  • Loading branch information
danmcp committed Jun 25, 2024
1 parent 68185e4 commit 841db16
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 18 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ source venv/bin/activate
pip install -r requirements.txt
pip install -r requirements-dev.txt
pip install -e .
pip install vllm==0.3.3
pip install vllm
python -m vllm.entrypoints.openai.api_server --model instructlab/granite-7b-lab --tensor-parallel-size 1
```

Expand Down Expand Up @@ -65,7 +65,7 @@ eval_output/
```

```shell
export INSTRUCT_LAB_EVAL_FIRST_N_QUESTIONS=10 # Optional if you want to shorten run times
export INSTRUCT_LAB_EVAL_FIRST_N_QUESTIONS=40 # Optional if you want to shorten run times
python3 tests/test_judge_answers.py
python3 tests/test_branch_judge_answers.py
```
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
FastChat
GitPython>=3.1.42,<4.0.0
shortuuid
openai<1.0.0
openai>=1.13.3,<2.0.0
psutil
torch
transformers
Expand Down
10 changes: 8 additions & 2 deletions src/instructlab/eval/mt_bench_answers.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def get_answer(
max_tokens: int,
answer_file: str,
force_temperature: float,
openai_client,
):
"""Answer a question with the model"""
assert force_temperature is None or question.get("required_temperature") is None
Expand All @@ -62,7 +63,9 @@ def get_answer(
conv.append_message(conv.roles[0], question["turns"][j])
conv.append_message(conv.roles[1], None)

output = chat_completion_openai(model, conv, temperature, max_tokens)
output = chat_completion_openai(
openai_client, model, conv, temperature, max_tokens
)

conv.update_last_message(output)
turns.append(output)
Expand Down Expand Up @@ -99,7 +102,9 @@ def generate_answers(
):
"""Generate model answers to be judged"""
if model_api_base is not None:
openai.api_base = model_api_base
openai_client = openai.OpenAI(base_url=model_api_base)
else:
openai_client = openai.OpenAI()

if data_dir is None:
data_dir = os.path.join(os.path.dirname(__file__), "data")
Expand All @@ -125,6 +130,7 @@ def generate_answers(
max_tokens,
answer_file,
force_temperature,
openai_client,
)
futures.append(future)

Expand Down
20 changes: 10 additions & 10 deletions src/instructlab/eval/mt_bench_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def load_judge_prompts(prompt_file: str) -> dict:


def run_judge_single(
question, answer, judge, ref_answer, multi_turn=False, judgment=None
question, answer, judge, ref_answer, openai_client, multi_turn=False, judgment=None
):
kwargs = {}
model = judge.model_name
Expand Down Expand Up @@ -150,7 +150,9 @@ def run_judge_single(
conv.append_message(conv.roles[1], None)

if judgment is None:
judgment = chat_completion_openai(model, conv, temperature=0, max_tokens=2048)
judgment = chat_completion_openai(
openai_client, model, conv, temperature=0, max_tokens=2048
)

if judge.prompt_template["output_format"] == "[[rating]]":
match = re.search(one_score_pattern, judgment)
Expand All @@ -169,7 +171,7 @@ def run_judge_single(
return rating, user_prompt, judgment


def play_a_match_single(match: MatchSingle, output_file: str) -> dict:
def play_a_match_single(openai_client, match: MatchSingle, output_file: str) -> dict:
question, model, answer, judge, ref_answer, multi_turn = (
match.question,
match.model,
Expand All @@ -186,6 +188,7 @@ def play_a_match_single(match: MatchSingle, output_file: str) -> dict:
answer,
judge,
ref_answer,
openai_client,
multi_turn=multi_turn,
judgment=judgment,
)
Expand Down Expand Up @@ -215,10 +218,7 @@ def play_a_match_single(match: MatchSingle, output_file: str) -> dict:
return result


def chat_completion_openai(model, conv, temperature, max_tokens, api_dict=None) -> str:
if api_dict is not None:
openai.api_base = api_dict["api_base"]
openai.api_key = api_dict["api_key"]
def chat_completion_openai(openai_client, model, conv, temperature, max_tokens) -> str:
output = API_ERROR_OUTPUT
for _ in range(API_MAX_RETRY):
try:
Expand All @@ -232,16 +232,16 @@ def chat_completion_openai(model, conv, temperature, max_tokens, api_dict=None)
messages[0]["content"] + "\n" + messages[1]["content"]
)
messages = messages[1:]
response = openai.ChatCompletion.create(
response = openai_client.chat.completions.create(
model=model,
messages=messages,
n=1,
temperature=temperature,
max_tokens=max_tokens,
)
output = response["choices"][0]["message"]["content"]
output = response.choices[0].message.content
break
except openai.error.OpenAIError as e:
except openai.OpenAIError as e:
print(type(e), e)
time.sleep(API_RETRY_SLEEP)

Expand Down
10 changes: 7 additions & 3 deletions src/instructlab/eval/mt_bench_judgment.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def make_judgment(
def judge_model(
model_name,
judge_model_name,
openai_client,
branch=None,
bench_name="mt_bench",
output_dir="eval_output",
Expand Down Expand Up @@ -218,11 +219,11 @@ def judge_model(
# Play matches
if max_workers == 1:
for match in tqdm(matches):
play_a_match_single(match, output_file=output_file)
play_a_match_single(openai_client, match, output_file=output_file)
else:

def play_a_match_wrapper(match):
play_a_match_single(match, output_file=output_file)
play_a_match_single(openai_client, match, output_file=output_file)

np.random.seed(0)
np.random.shuffle(matches)
Expand Down Expand Up @@ -250,7 +251,9 @@ def generate_judgment(
):
"""Generate judgment with scores and qa_pairs for a model"""
if model_api_base is not None:
openai.api_base = model_api_base
openai_client = openai.OpenAI(base_url=model_api_base)
else:
openai_client = openai.OpenAI()

first_n_env = os.environ.get("INSTRUCT_LAB_EVAL_FIRST_N_QUESTIONS")
if first_n_env is not None and first_n is None:
Expand All @@ -259,6 +262,7 @@ def generate_judgment(
question_file, judgment_file, answer_file = judge_model(
model_name,
judge_model_name,
openai_client,
bench_name=bench_name,
output_dir=output_dir,
data_dir=data_dir,
Expand Down

0 comments on commit 841db16

Please sign in to comment.