From 788fdbc9ef882d20f88a92b7bb09080f6d2eee2a Mon Sep 17 00:00:00 2001 From: Martin Krasser Date: Fri, 17 Jan 2025 16:53:16 +0100 Subject: [PATCH] Remove obsolete eval data file --- docs/eval/eval-data.csv | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 docs/eval/eval-data.csv diff --git a/docs/eval/eval-data.csv b/docs/eval/eval-data.csv deleted file mode 100644 index 1d7ad1e..0000000 --- a/docs/eval/eval-data.csv +++ /dev/null @@ -1,20 +0,0 @@ -agent,model,prompt,subset,eval_protocol,correct -smolagents,claude-3-5-sonnet-20241022,few-shot,GAIA,exact_match,43.8 -smolagents,claude-3-5-sonnet-20241022,few-shot,GSM8K,exact_match,91.4 -smolagents,claude-3-5-sonnet-20241022,few-shot,SimpleQA,exact_match,47.5 -freeact,claude-3-5-sonnet-20241022,zero-shot,GAIA,exact_match,53.1 -freeact,claude-3-5-sonnet-20241022,zero-shot,GSM8K,exact_match,95.7 -freeact,claude-3-5-sonnet-20241022,zero-shot,SimpleQA,exact_match,57.5 -freeact,claude-3-5-sonnet-20241022,zero-shot,SimpleQA,llm_as_judge,72.5 -freeact,claude-3-5-haiku-20241022,zero-shot,GAIA,exact_match,31.2 -freeact,claude-3-5-haiku-20241022,zero-shot,GSM8K,exact_match,90.0 -freeact,claude-3-5-haiku-20241022,zero-shot,SimpleQA,exact_match,52.5 -freeact,claude-3-5-haiku-20241022,zero-shot,SimpleQA,llm_as_judge,70.0 -freeact,gemini-2.0-flash-exp,zero-shot,GAIA,exact_match,34.4 -freeact,gemini-2.0-flash-exp,zero-shot,GSM8K,exact_match,95.7 -freeact,gemini-2.0-flash-exp,zero-shot,SimpleQA,exact_match,50.0 -freeact,gemini-2.0-flash-exp,zero-shot,SimpleQA,llm_as_judge,65.0 -freeact,qwen2p5-coder-32b-instruct,zero-shot,GAIA,exact_match,25.0 -freeact,qwen2p5-coder-32b-instruct,zero-shot,GSM8K,exact_match,95.7 -freeact,qwen2p5-coder-32b-instruct,zero-shot,SimpleQA,exact_match,52.5 -freeact,qwen2p5-coder-32b-instruct,zero-shot,SimpleQA,llm_as_judge,65.0