diff --git a/cuda/gguf.Dockerfile b/cuda/gguf.Dockerfile index 4b53eed..1a2f4f8 100644 --- a/cuda/gguf.Dockerfile +++ b/cuda/gguf.Dockerfile @@ -18,10 +18,10 @@ ENV CUDA_DOCKER_ARCH=all ENV LLAMA_CUBLAS=1 # Install depencencies -RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings +RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context # Install llama-cpp-python 0.1.80 which has GGUF support (build with cuda) -RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python==0.1.80 +RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python==0.2.6 # Run the server CMD python3 -m llama_cpp.server diff --git a/docker-compose-gguf.yml b/docker-compose-gguf.yml index 1d21353..b74170f 100644 --- a/docker-compose-gguf.yml +++ b/docker-compose-gguf.yml @@ -3,7 +3,7 @@ version: '3.6' services: llama-gpt-api: # Pin to llama-cpp-python 0.1.80 with GGUF support - image: ghcr.io/abetlen/llama-cpp-python:latest@sha256:de0fd227f348b5e43d4b5b7300f1344e712c14132914d1332182e9ecfde502b2 + image: ghcr.io/abetlen/llama-cpp-python:latest #@sha256:de0fd227f348b5e43d4b5b7300f1344e712c14132914d1332182e9ecfde502b2 restart: on-failure volumes: - './models:/models'