Skip to content

Commit

Permalink
Added more judge models.
Browse files Browse the repository at this point in the history
  • Loading branch information
JoelNiklaus committed Feb 5, 2025
1 parent 186a6c8 commit c62647e
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions community_tasks/swiss_legal_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,8 @@ def prompt_fn(line: dict, task_name: str = None):


JUDGE_MODELS = {
"o1": "openai/o1-2024-12-17",
"o1-mini": "openai/o1-mini-2024-09-12",
"gpt-4o-mini": "openai/gpt-4o-mini-2024-07-18",
"gpt-4o": "openai/gpt-4o-2024-11-20",
# The Gemini models are not very good judges.
Expand All @@ -970,6 +972,8 @@ def prompt_fn(line: dict, task_name: str = None):
# The Claude models do not follow the required output format.
# "claude-3-5-haiku": "anthropic/claude-3-5-haiku-20241022",
# "claude-3-5-sonnet": "anthropic/claude-3-5-sonnet-20241022",
"llama-3-3-70b": "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo",
"llama-3-1-405b": "together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
}

LEXICAL_METRICS = [
Expand Down

0 comments on commit c62647e

Please sign in to comment.