diff --git a/cuda/gguf.Dockerfile b/cuda/gguf.Dockerfile
index 4b53eed..1a2f4f8 100644
--- a/cuda/gguf.Dockerfile
+++ b/cuda/gguf.Dockerfile
@@ -18,10 +18,10 @@ ENV CUDA_DOCKER_ARCH=all
 ENV LLAMA_CUBLAS=1
 
 # Install depencencies
-RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings
+RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context
 
 # Install llama-cpp-python 0.1.80 which has GGUF support (build with cuda)
-RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python==0.1.80
+RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python==0.2.6
 
 # Run the server
 CMD python3 -m llama_cpp.server
diff --git a/docker-compose-gguf.yml b/docker-compose-gguf.yml
index 1d21353..b74170f 100644
--- a/docker-compose-gguf.yml
+++ b/docker-compose-gguf.yml
@@ -3,7 +3,7 @@ version: '3.6'
 services:
   llama-gpt-api:
     # Pin to llama-cpp-python 0.1.80 with GGUF support
-    image: ghcr.io/abetlen/llama-cpp-python:latest@sha256:de0fd227f348b5e43d4b5b7300f1344e712c14132914d1332182e9ecfde502b2
+    image: ghcr.io/abetlen/llama-cpp-python:latest #@sha256:de0fd227f348b5e43d4b5b7300f1344e712c14132914d1332182e9ecfde502b2
     restart: on-failure
     volumes:
       - './models:/models'