Skip to content

Commit

Permalink
add additional step to pipeline to generate a metrics report
Browse files Browse the repository at this point in the history
Signed-off-by: Michael Clifford <[email protected]>
  • Loading branch information
MichaelClifford committed Dec 19, 2024
1 parent cd8bce7 commit 50a0b31
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 50 deletions.
4 changes: 2 additions & 2 deletions eval/final/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .components import run_final_eval_op
from .components import generate_metrics_report_op, run_final_eval_op

# from . import faked

__all__ = ["run_final_eval_op"]
__all__ = ["run_final_eval_op", "generate_metrics_report_op"]
55 changes: 49 additions & 6 deletions eval/final/components.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# type: ignore
# pylint: disable=no-value-for-parameter,import-outside-toplevel,import-error

from kfp.dsl import Artifact, Output, component
from kfp.dsl import Artifact, Input, Metrics, Output, component

from utils.consts import RHELAI_IMAGE
from utils.consts import PYTHON_IMAGE, RHELAI_IMAGE


@component(base_image=RHELAI_IMAGE, install_kfp_package=False)
def run_final_eval_op(
mmlu_branch_output: Output[Artifact],
mt_bench_branch_output: Output[Artifact],
# mmlu_branch_output: Output[Artifact],
# mt_bench_branch_output: Output[Artifact],
base_model_dir: str,
base_branch: str,
candidate_branch: str,
Expand All @@ -20,6 +20,8 @@ def run_final_eval_op(
candidate_model: str = None,
taxonomy_path: str = "/input/taxonomy",
sdg_path: str = "/input/sdg",
mmlu_branch_output_path: str = "/output/mmlu_branch",
mt_bench_branch_output_path: str = "/output/mt_bench_branch",
):
import json
import os
Expand Down Expand Up @@ -336,8 +338,13 @@ def find_node_dataset_directories(base_dir: str):
"summary": summary,
}

with open(mmlu_branch_output.path, "w", encoding="utf-8") as f:
if not os.path.exists(mmlu_branch_output_path):
os.makedirs(mmlu_branch_output_path)
with open(
f"{mmlu_branch_output_path}/mmlu_branch_data.json", "w", encoding="utf-8"
) as f:
json.dump(mmlu_branch_data, f, indent=4)

else:
print("No MMLU tasks directories found, skipping MMLU_branch evaluation.")

Expand Down Expand Up @@ -478,5 +485,41 @@ def find_node_dataset_directories(base_dir: str):
"summary": summary,
}

with open(mt_bench_branch_output.path, "w", encoding="utf-8") as f:
if not os.path.exists(mt_bench_branch_output_path):
os.makedirs(mt_bench_branch_output_path)
with open(
f"{mt_bench_branch_output_path}/mt_bench_branch_data.json",
"w",
encoding="utf-8",
) as f:
json.dump(mt_bench_branch_data, f, indent=4)


@component(base_image=PYTHON_IMAGE, install_kfp_package=False)
def generate_metrics_report_op(
metrics: Output[Metrics],
):
import ast
import json

with open("/output/mt_bench_data.json", "r") as f:
mt_bench_data = f.read()
mt_bench_data = ast.literal_eval(mt_bench_data)[0]

metrics.log_metric("mt_bench_best_model", mt_bench_data["model"])
metrics.log_metric("mt_bench_best_score", mt_bench_data["overall_score"])
metrics.log_metric("mt_bench_best_model_error_rate", mt_bench_data["error_rate"])

with open("/output/mt_bench_branch/mt_bench_branch_data.json", "r") as f:
mt_bench_branch_data = json.loads(f.read())

metrics.log_metric("mt_bench_branch_score", mt_bench_branch_data["overall_score"])
metrics.log_metric(
"mt_bench_branch_base_score", mt_bench_branch_data["base_overall_score"]
)

with open("/output/mmlu_branch/mmlu_branch_data.json", "r") as f:
mmlu_branch_data = json.loads(f.read())

metrics.log_metric("mmlu_branch_score", mmlu_branch_data["model_score"])
metrics.log_metric("mmlu_branch_base_score", mmlu_branch_data["base_model_score"])
24 changes: 18 additions & 6 deletions pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def ilab_pipeline_wrapper(mock: List[Literal[MOCKED_STAGES]]):
)

# Imports for evaluation
from eval.final import run_final_eval_op
from eval.final import generate_metrics_report_op, run_final_eval_op
from eval.mt_bench import run_mt_bench_op

@dsl.pipeline(
Expand Down Expand Up @@ -417,17 +417,29 @@ def pipeline(
mount_path="/output",
)

output_pvc_delete_task = DeletePVC(pvc_name=output_pvc_task.output)
output_pvc_delete_task.after(
output_model_task, output_mt_bench_task, final_eval_task
)

sdg_pvc_delete_task = DeletePVC(pvc_name=sdg_input_pvc_task.output)
sdg_pvc_delete_task.after(final_eval_task)

model_pvc_delete_task = DeletePVC(pvc_name=model_pvc_task.output)
model_pvc_delete_task.after(final_eval_task)

generate_metrics_report_task = generate_metrics_report_op()
generate_metrics_report_task.after(output_mt_bench_task, final_eval_task)
generate_metrics_report_task.set_caching_options(False)
mount_pvc(
task=generate_metrics_report_task,
pvc_name=output_pvc_task.output,
mount_path="/output",
)

output_pvc_delete_task = DeletePVC(pvc_name=output_pvc_task.output)
output_pvc_delete_task.after(
output_model_task,
output_mt_bench_task,
final_eval_task,
generate_metrics_report_task,
)

return

return pipeline
Expand Down
Loading

0 comments on commit 50a0b31

Please sign in to comment.