From ee6f03f8160efa90ba954e4bc4c7438a824b2b2d Mon Sep 17 00:00:00 2001
From: Riccardo Orlando <orlandoricc@gmail.com>
Date: Tue, 6 Aug 2024 15:28:07 +0200
Subject: [PATCH] chore: Add skip_metadata parameter to `RelikServer`

---
 README.md                                   | 14 +++++++++++++-
 relik/inference/serve/backend/fastapi_be.py |  1 +
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 666ad15..f9542b3 100644
--- a/README.md
+++ b/README.md
@@ -407,7 +407,19 @@ docker pull sapienzanlp/relik:latest
 and run the image with:
 
 ```bash
-docker run -p 12345:8000 sapienzanlp/relik:latest -c relik-ie/relik-cie-small
+docker run -p 12345:8000 sapienzanlp/relik:latest --config relik-ie/relik-cie-small
+```
+
+`sapienzanlp/relik:latest` points to the latest CUDA version of the model. To run with GPU support:
+
+```bash
+docker run -p 12345:8000 --gpus all sapienzanlp/relik:latest --config relik-ie/relik-cie-small --device cuda
+```
+
+Tip: you can mount your `.cache/huggingface` folder to the docker container to avoid downloading the model weights every time you run the container.
+
+```bash
+docker run -p 12345:8000 -v ~/.cache/huggingface:/home/relik-user/.cache/huggingface sapienzanlp/relik:latest --config relik-ie/relik-cie-small
 ```
 
 The API will be available at `http://localhost:12345`. It exposes a single endpoint `/relik` with several parameters that can be passed to the model.
diff --git a/relik/inference/serve/backend/fastapi_be.py b/relik/inference/serve/backend/fastapi_be.py
index d6e0a3d..6698bfe 100644
--- a/relik/inference/serve/backend/fastapi_be.py
+++ b/relik/inference/serve/backend/fastapi_be.py
@@ -95,6 +95,7 @@ def __init__(
             retriever_precision=self.retriever_precision,
             document_index_precision=self.document_index_precision,
             reader_precision=self.reader_precision,
+            skip_metadata=True,
         )
 
         self.router = APIRouter()