Skip to content

Commit

Permalink
update scripts and notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
semio committed Nov 21, 2024
1 parent 7d77230 commit b3abfa9
Show file tree
Hide file tree
Showing 4 changed files with 533 additions and 329 deletions.
32 changes: 26 additions & 6 deletions automation-api/yival_experiments/notebooks/compare_evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,12 @@
conn = duckdb.connect()


conn.query("select * from df")
simple_eval_check = conn.query("select * from df where simple_evaluator_matching <> auto_mark_correctness")
simple_eval_check
simple_eval_check.to_csv("./simple_eval_check.csv")


# NEXT: review the query and begin to check results.
q = """select
*
from
Expand All @@ -22,19 +26,35 @@
not (
llama3_evaluator_correctness = vertex_ai_evaluator_correctness
and gpt4_evaluator_correctness = vertex_ai_evaluator_correctness
)
or (
auto_mark_correctness <> 0
and (
llama3_evaluator_correctness = vertex_ai_evaluator_correctness
and gpt4_evaluator_correctness = vertex_ai_evaluator_correctness
)
and auto_mark_correctness <> gpt4_evaluator_correctness
)
or (
simple_evaluator_matching <> 0
and (
simple_evaluator_matching <> 0
and simple_evaluator_matching = gpt4_evaluator_correctness
llama3_evaluator_correctness = vertex_ai_evaluator_correctness
and gpt4_evaluator_correctness = vertex_ai_evaluator_correctness
)
and simple_evaluator_matching <> gpt4_evaluator_correctness
)
and simple_evaluator_matching <> 0"""
"""

diffs = conn.query(q)

conn.query("select count(*) from df")
conn.query("select count(*) from diffs")

diffs.to_csv("to_check.csv")
diffs.to_csv("to_check_all.csv")

410 / 30780

1281 / 30780
# FIXME: the simple evaluator seems not working very well?
# just use the exact matching algo

Loading

0 comments on commit b3abfa9

Please sign in to comment.