From be96b7e0eb2268e68e713328bc6d7904d72fe12d Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 20 Mar 2024 09:05:15 +0800 Subject: [PATCH] use raw-loader to embed files --- .../{index.md => index.mdx} | 56 ++++--------------- 1 file changed, 10 insertions(+), 46 deletions(-) rename website/blog/2024-03-19-deploy-tabby-with-replicas-behind-reverse-proxy/{index.md => index.mdx} (78%) diff --git a/website/blog/2024-03-19-deploy-tabby-with-replicas-behind-reverse-proxy/index.md b/website/blog/2024-03-19-deploy-tabby-with-replicas-behind-reverse-proxy/index.mdx similarity index 78% rename from website/blog/2024-03-19-deploy-tabby-with-replicas-behind-reverse-proxy/index.md rename to website/blog/2024-03-19-deploy-tabby-with-replicas-behind-reverse-proxy/index.mdx index ead579a4b242..f3b7f180bcbf 100644 --- a/website/blog/2024-03-19-deploy-tabby-with-replicas-behind-reverse-proxy/index.md +++ b/website/blog/2024-03-19-deploy-tabby-with-replicas-behind-reverse-proxy/index.mdx @@ -3,6 +3,10 @@ authors: [meng] tags: [deployment, reverse proxy] --- +import CodeBlock from '@theme/CodeBlock'; +import Caddyfile from "raw-loader!./Caddyfile" +import DockerComposeYaml from "raw-loader!./docker-compose.yml" + # Deploying Tabby with Replicas and a Reverse Proxy Welcome to our tutorial on how to set up Tabby, the self-hosted AI coding assistant, with Caddy serving as a reverse proxy (load balancer). This guide assumes that you have a Linux machine with Docker, CUDA drivers, and the [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) already installed. @@ -13,13 +17,9 @@ Let's dive in! Before configuring our services, we need to create a `Caddyfile` that will define how Caddy should handle incoming requests and reverse proxy them to Tabby: -``` -http://*:8080 { - handle_path /* { - reverse_proxy worker-0:8080 worker-1:8080 - } -} -``` + +{Caddyfile} + Note that we are assuming we have two GPUs in the machine; therefore, we should redirect traffic to two worker nodes. @@ -39,45 +39,9 @@ Since we are only downloading the model file, we override the entrypoint to `tab Next, create a `docker-compose.yml` file to orchestrate the Tabby and Caddy services. Here is the configuration for both services: -```yaml -version: '3.5' - -services: - worker-0: - restart: always - image: tabbyml/tabby - command: serve --model StarCoder-1B --device cuda - volumes: - - "$HOME/.tabby:/data" - deploy: - resources: - reservations: - devices: - - driver: nvidia - device_ids: ["0"] - capabilities: [gpu] - - worker-1: - restart: always - image: tabbyml/tabby - command: serve --model StarCoder-1B --device cuda - volumes: - - "$HOME/.tabby:/data" - deploy: - resources: - reservations: - devices: - - driver: nvidia - device_ids: ["1"] - capabilities: [gpu] - - web: - image: caddy - volumes: - - "./Caddyfile:/etc/caddy/Caddyfile:ro" - ports: - - "8080:8080" -``` + +{DockerComposeYaml} + Note that we have two worker nodes, and we are using the same model for both of them, with each assigned to a different GPU (0 and 1, respectively). If you have more GPUs, you can add more worker nodes and assign them to the available GPUs (remember to update the `Caddyfile` accordingly!).