diff --git a/src/components/aggregate_eval_results/Dockerfile b/src/components/aggregate_eval_results/Dockerfile new file mode 100644 index 0000000..43bc363 --- /dev/null +++ b/src/components/aggregate_eval_results/Dockerfile @@ -0,0 +1,18 @@ +FROM --platform=linux/amd64 python:3.8-slim as base + +# System dependencies +RUN apt-get update && \ + apt-get upgrade -y && \ + apt-get install git -y + +# Install requirements +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r requirements.txt + +# Set the working directory to the component folder +WORKDIR /component/src + +# Copy over src-files +COPY src/ . + +ENTRYPOINT ["fondant", "execute", "main"] \ No newline at end of file diff --git a/src/components/aggregate_eval_results/fondant_component.yaml b/src/components/aggregate_eval_results/fondant_component.yaml new file mode 100644 index 0000000..4215b5c --- /dev/null +++ b/src/components/aggregate_eval_results/fondant_component.yaml @@ -0,0 +1,20 @@ +#metadata: to be matched w/ docker image +name: aggregate_eval_results +description: Component that aggregates results of the evaluation of the retriever +image: ghcr.io/ml6team/aggregate_eval_results:dev + +consumes: + text: #TODO: add/retrieve metrics to consider + fields: + context+precision: + type: float32 + context+relevancy: + type: float32 + +produces: + text: + fields: + metric: + type: string + score: + type: float32 diff --git a/src/components/aggregate_eval_results/requirements.txt b/src/components/aggregate_eval_results/requirements.txt new file mode 100644 index 0000000..2f5a3a1 --- /dev/null +++ b/src/components/aggregate_eval_results/requirements.txt @@ -0,0 +1 @@ +fondant[component]==0.7.0 \ No newline at end of file diff --git a/src/components/aggregate_eval_results/src/main.py b/src/components/aggregate_eval_results/src/main.py new file mode 100644 index 0000000..067f10e --- /dev/null +++ b/src/components/aggregate_eval_results/src/main.py @@ -0,0 +1,19 @@ +import dask.dataframe as dd +from fondant.component import DaskTransformComponent + + +class AggregateResults(DaskTransformComponent): + def __init__(self, *_) -> None: + return None + + def transform(self, dataframe: dd.DataFrame) -> dd.DataFrame: + metrics = list(dataframe.select_dtypes(["float", "int"]).columns) + agg = dataframe[metrics].mean() + agg_df = agg.to_frame(name="score") + agg_df["metric"] = agg.index + agg_results_df = agg_df[["metric", "score"]] + agg_results_df = agg_results_df.reset_index(drop=True).rename( + columns={"metric": "text_metric", "score": "text_score"} + ) + + return agg_results_df