Skip to content

Commit

Permalink
aggregate component (#26)
Browse files Browse the repository at this point in the history
Dask transform component to aggregate the evaluation results. Might have
to be adapted to new dataset format.
  • Loading branch information
Hakimovich99 authored Nov 29, 2023
1 parent 2629689 commit b47ab9a
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 0 deletions.
18 changes: 18 additions & 0 deletions src/components/aggregate_eval_results/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM --platform=linux/amd64 python:3.8-slim as base

# System dependencies
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install git -y

# Install requirements
COPY requirements.txt /
RUN pip3 install --no-cache-dir -r requirements.txt

# Set the working directory to the component folder
WORKDIR /component/src

# Copy over src-files
COPY src/ .

ENTRYPOINT ["fondant", "execute", "main"]
20 changes: 20 additions & 0 deletions src/components/aggregate_eval_results/fondant_component.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#metadata: to be matched w/ docker image
name: aggregate_eval_results
description: Component that aggregates results of the evaluation of the retriever
image: ghcr.io/ml6team/aggregate_eval_results:dev

consumes:
text: #TODO: add/retrieve metrics to consider
fields:
context+precision:
type: float32
context+relevancy:
type: float32

produces:
text:
fields:
metric:
type: string
score:
type: float32
1 change: 1 addition & 0 deletions src/components/aggregate_eval_results/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
fondant[component]==0.7.0
19 changes: 19 additions & 0 deletions src/components/aggregate_eval_results/src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import dask.dataframe as dd
from fondant.component import DaskTransformComponent


class AggregateResults(DaskTransformComponent):
def __init__(self, *_) -> None:
return None

def transform(self, dataframe: dd.DataFrame) -> dd.DataFrame:
metrics = list(dataframe.select_dtypes(["float", "int"]).columns)
agg = dataframe[metrics].mean()
agg_df = agg.to_frame(name="score")
agg_df["metric"] = agg.index
agg_results_df = agg_df[["metric", "score"]]
agg_results_df = agg_results_df.reset_index(drop=True).rename(
columns={"metric": "text_metric", "score": "text_score"}
)

return agg_results_df

0 comments on commit b47ab9a

Please sign in to comment.