From ee6f03f8160efa90ba954e4bc4c7438a824b2b2d Mon Sep 17 00:00:00 2001 From: Riccardo Orlando Date: Tue, 6 Aug 2024 15:28:07 +0200 Subject: [PATCH] chore: Add skip_metadata parameter to `RelikServer` --- README.md | 14 +++++++++++++- relik/inference/serve/backend/fastapi_be.py | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 666ad15..f9542b3 100644 --- a/README.md +++ b/README.md @@ -407,7 +407,19 @@ docker pull sapienzanlp/relik:latest and run the image with: ```bash -docker run -p 12345:8000 sapienzanlp/relik:latest -c relik-ie/relik-cie-small +docker run -p 12345:8000 sapienzanlp/relik:latest --config relik-ie/relik-cie-small +``` + +`sapienzanlp/relik:latest` points to the latest CUDA version of the model. To run with GPU support: + +```bash +docker run -p 12345:8000 --gpus all sapienzanlp/relik:latest --config relik-ie/relik-cie-small --device cuda +``` + +Tip: you can mount your `.cache/huggingface` folder to the docker container to avoid downloading the model weights every time you run the container. + +```bash +docker run -p 12345:8000 -v ~/.cache/huggingface:/home/relik-user/.cache/huggingface sapienzanlp/relik:latest --config relik-ie/relik-cie-small ``` The API will be available at `http://localhost:12345`. It exposes a single endpoint `/relik` with several parameters that can be passed to the model. diff --git a/relik/inference/serve/backend/fastapi_be.py b/relik/inference/serve/backend/fastapi_be.py index d6e0a3d..6698bfe 100644 --- a/relik/inference/serve/backend/fastapi_be.py +++ b/relik/inference/serve/backend/fastapi_be.py @@ -95,6 +95,7 @@ def __init__( retriever_precision=self.retriever_precision, document_index_precision=self.document_index_precision, reader_precision=self.reader_precision, + skip_metadata=True, ) self.router = APIRouter()