Skip to content

Commit

Permalink
deprecate old response_generation metrics
Browse files Browse the repository at this point in the history
Signed-off-by: lilacheden <[email protected]>
  • Loading branch information
lilacheden committed Jan 16, 2025
1 parent 5f045d3 commit 06da986
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 10 deletions.
27 changes: 21 additions & 6 deletions prepare/metrics/rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,12 +347,24 @@
# metrics.rag.recall
# metrics.rag.bert_recall

for axis, base_metric, main_score in [
("correctness", "token_overlap", "f1"),
("correctness", "bert_score.deberta_large_mnli", "recall"),
("correctness", "bert_score.deberta_v3_base_mnli_xnli_ml", "recall"),
("faithfullness", "token_overlap", "precision"),
for axis, base_metric, main_score, new_metric in [
("correctness", "token_overlap", "f1", "answer_correctness.token_recall"),
(
"correctness",
"bert_score.deberta_large_mnli",
"recall",
"answer_correctness.bert_score_recall",
),
(
"correctness",
"bert_score.deberta_v3_base_mnli_xnli_ml",
"recall",
"answer_correctness.bert_score_recall_ml",
),
("faithfullness", "token_overlap", "precision", "faithfulness.token_k_precision"),
]:
deprecated_path = f"metrics.rag.response_generation.{axis}.{base_metric}"
new_metric_path = f"metrics.rag.response_generation.{new_metric}"
preprocess_steps = (
[
Copy(field="task_data/contexts", to_field="references"),
Expand All @@ -379,10 +391,13 @@
],
metric=f"metrics.{base_metric}",
prediction_type=str,
__deprecated_msg__=f"Metric {deprecated_path} is deprecated. Please use {new_metric_path} instead.",
)

add_to_catalog(
metric, f"metrics.rag.response_generation.{axis}.{base_metric}", overwrite=True
metric,
f"metrics.rag.response_generation.{axis}.{base_metric}",
overwrite=True,
)

# end to end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@
}
],
"metric": "metrics.bert_score.deberta_large_mnli",
"prediction_type": "str"
"prediction_type": "str",
"__deprecated_msg__": "Metric metrics.rag.response_generation.correctness.bert_score.deberta_large_mnli is deprecated. Please use metrics.rag.response_generation.answer_correctness.bert_score_recall instead."
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@
}
],
"metric": "metrics.bert_score.deberta_v3_base_mnli_xnli_ml",
"prediction_type": "str"
"prediction_type": "str",
"__deprecated_msg__": "Metric metrics.rag.response_generation.correctness.bert_score.deberta_v3_base_mnli_xnli_ml is deprecated. Please use metrics.rag.response_generation.answer_correctness.bert_score_recall_ml instead."
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@
}
],
"metric": "metrics.token_overlap",
"prediction_type": "str"
"prediction_type": "str",
"__deprecated_msg__": "Metric metrics.rag.response_generation.correctness.token_overlap is deprecated. Please use metrics.rag.response_generation.answer_correctness.token_recall instead."
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@
}
],
"metric": "metrics.token_overlap",
"prediction_type": "str"
"prediction_type": "str",
"__deprecated_msg__": "Metric metrics.rag.response_generation.faithfullness.token_overlap is deprecated. Please use metrics.rag.response_generation.faithfulness.token_k_precision instead."
}

0 comments on commit 06da986

Please sign in to comment.