Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
NGUYEN, Xuan Phi committed Apr 12, 2024
1 parent 14240e0 commit 7106d8c
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 8 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.vscode
37 changes: 29 additions & 8 deletions evaluation/vmlu/vmlu_run.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# make sure to use vllm 0.2.7 and transformers 4.36+
# make sure to use vllm 0.3.3 and transformers 4.40+

import json
from vllm import LLM, SamplingParams
Expand All @@ -15,16 +15,32 @@ def read_json(json_file):

questions = read_json(jsonl_path)

# SeaLLM-7B-v2.5
# model_path = "SeaLLMs/SeaLLM-7B-v2"
model_path = "SeaLLMs/SeaLLM-7B-v2.5"

sampling_params = SamplingParams(temperature=0.0, max_tokens=1, stop=["</s>"])
model_path = "SeaLLMs/SeaLLM-7B-v2"
model = LLM(model_path, dtype="bfloat16")

# previous commit incorrect put a \n between </s> & <|im_start|>, there should not be any \n between </s> and <|im_start|>
CHAT_TEMPLATE = """<|im_start|>system
if model_path == "SeaLLMs/SeaLLM-7B-v2":
eos_token = "</s>"
CHAT_TEMPLATE = """<|im_start|>system
You are a helpful assistant.</s><|im_start|>user
{prompt}</s><|im_start|>assistant
"""

elif model_path == "SeaLLMs/SeaLLM-7B-v2.5":
eos_token = "<eos>"
CHAT_TEMPLATE = """<|im_start|>user
{prompt}<eos>
<|im_start|>assistant
"""

else:
ValueError('invalid model name', model_path)


sampling_params = SamplingParams(temperature=0.0, max_tokens=5, stop=[eos_token])
model = LLM(model_path, dtype="bfloat16")


question_template = """Chỉ đưa ra chữ cái đứng trước câu trả lời đúng (A, B, C, D hoặc E) của câu hỏi trắc nghiệm sau:
{question}
Expand All @@ -46,7 +62,12 @@ def to_prompt(item):

generated = model.generate(prompts, sampling_params)
responses = [g.outputs[0].text for g in generated]
answers = [r.strip()[0] for r in responses]

answers = [r.strip() for r in responses]
# first output can be "A" or " A" (2 different token)
# extract first character, empty '' if nothing in the answer
answers = [(r[0] if len(r) > 0 else '') for r in answers]


assert len(answers) == len(questions)
print(answers[:10])
Expand Down
4 changes: 4 additions & 0 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,10 @@ <h2 class="title is-4">World Knowledge</h2>
We evaluate models on 3 benchmarks following the recommended default setups: 5-shot MMLU for Eng, 3-shot <a href="https://arxiv.org/pdf/2306.05179.pdf">M3Exam</a>
for Eng, Zho, Vie, Ind, Tha, and zero-shot <a href="https://vmlu.ai/">VMLU</a> for Vie.
</p>
<p>
M3Exam was evaluated using the <a href="https://github.com/DAMO-NLP-SG/M3Exam">standard prompting implementation</a>,
while 0-shot VMLU was run with <a href="https://github.com/DAMO-NLP-SG/SeaLLMs/blob/main/evaluation/vmlu/vmlu_run.py">vmlu_run.py</a> for SeaLLMs.
</p>
<div class="table-container">
<table class="table is-bordered is-striped is-narrow is-hoverable is-fullwidth">
<!-- Your table content -->
Expand Down

0 comments on commit 7106d8c

Please sign in to comment.