diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml
new file mode 100644
index 0000000..31439ed
--- /dev/null
+++ b/.github/workflows/build-docker.yml
@@ -0,0 +1,68 @@
+name: Build and Publish Docker Image
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - 'main'
+  release:
+    types: [published]
+
+jobs:
+  build-and-push-image:
+    runs-on: ubuntu-latest
+
+    permissions:
+      contents: read
+      packages: write
+
+    env:
+      # Set up environment variables for the job
+      DOCKER_REGISTRY: ghcr.io
+      IMAGE_NAME: ${{ github.repository }}
+      TAG: ${{ github.sha }}
+
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+        with:
+          install: true
+
+      # Log in to the GitHub Container Registry only when not running on a pull request event
+      - name: Login to Docker Registry
+        uses: docker/login-action@v2
+        with:
+          registry: ${{ env.DOCKER_REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v4
+        with:
+          images: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}
+
+      # Build and push the Docker image to GHCR for the main branch or specific tags
+      - name: Build and Push Docker Image
+        if: github.ref == 'refs/heads/main'
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          file: Dockerfile
+          push: true
+          tags: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}:latest
+          labels: version=${{ github.run_id }}
+
+      # For tagged releases, build and push the Docker image with the corresponding tag
+      - name: Build and Push Docker Image (Tagged)
+        if: startsWith(github.ref, 'refs/tags/')
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          file: Dockerfile
+          push: true
+          tags: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
+          labels: version=${{ github.run_id }}
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 68bc17f..7504e96 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+#
+hf_home/
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..bfa36a3
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,9 @@
+FROM python:3-slim
+
+RUN mkdir -p /app
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+COPY *.py .
+COPY backend /app/backend
+CMD python vision.py
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f975113
--- /dev/null
+++ b/README.md
@@ -0,0 +1,70 @@
+OpenedAI Vision
+---------------
+
+An OpenAI API compatible vision server, it functions like `gpt-4-vision-preview` and lets you chat about the contents of an image.
+
+- Compatible with the OpenAI Vision API (aka "chat with images")
+- Does not connect to the OpenAI API and does not require an OpenAI API Key
+- Not affiliated with OpenAI in any way
+
+Backend Model support:
+- [X] Moondream2 [vikhyatk/moondream2](https://huggingface.co/vikhyatk/moondream2) *(only a single image and single question currently supported)
+- [ ] Deepseek-VL - (in progress) [deepseek-ai/deepseek-vl-7b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-7b-chat)
+- [ ] ...
+
+Version: 0.1.0
+
+
+API Documentation
+-----------------
+
+* [OpenAI Vision guide](https://platform.openai.com/docs/guides/vision)
+
+Installation instructions
+-------------------------
+
+```shell
+# install the python dependencies
+pip install -r requirements.txt
+# run the server
+python vision.py
+```
+
+Usage
+-----
+
+```
+usage: vision.py [-h] [-m MODEL] [-b BACKEND] [-d DEVICE] [-P PORT] [-H HOST] [--preload]
+
+OpenedAI Vision API Server
+
+options:
+  -h, --help            show this help message and exit
+  -m MODEL, --model MODEL
+                        The model to use, Ex. deepseek-ai/deepseek-vl-7b-chat (default: vikhyatk/moondream2)
+  -b BACKEND, --backend BACKEND
+                        The backend to use (moondream, deepseek) (default: moondream)
+  -d DEVICE, --device DEVICE
+                        Set the torch device for the model. Ex. cuda:1 (default: auto)
+  -P PORT, --port PORT  Server tcp port (default: 5006)
+  -H HOST, --host HOST  Host to listen on, Ex. 0.0.0.0 (default: localhost)
+  --preload             Preload model and exit. (default: False)
+```
+
+Docker support
+--------------
+
+You can run the server via docker like so:
+```shell
+docker compose up
+```
+
+Sample API Usage
+----------------
+
+`test_vision.py` has a sample of how to use the API.
+Example:
+```
+$ test_vision.py https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg
+The image features a long wooden boardwalk running through a lush green field. The boardwalk is situated in a grassy area with trees in the background, creating a serene and picturesque scene. The sky above is filled with clouds, adding to the beauty of the landscape. The boardwalk appears to be a peaceful path for people to walk or hike along, providing a connection between the grassy field and the surrounding environment.
+```
diff --git a/backend/moondream.py b/backend/moondream.py
new file mode 100644
index 0000000..4e5fa36
--- /dev/null
+++ b/backend/moondream.py
@@ -0,0 +1,31 @@
+
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+
+from vision_qna import VisionQnABase
+
+class VisionQnA(VisionQnABase):
+    model_name: str = "moondream2"
+    revision: str = '2024-03-13'
+    
+    def __init__(self, model_id: str, device: str):
+        if device == 'auto':
+            device = self.select_device()
+
+        params = {
+            'pretrained_model_name_or_path': model_id,
+            'trust_remote_code': True,
+            'revision': self.revision,
+            'torch_dtype': torch.float32 if device == 'cpu' else torch.float16,
+        }
+        
+        self.model = AutoModelForCausalLM.from_pretrained(**params).to(device)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
+
+    def select_device(self):
+        return 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
+    
+    async def single_question(self, image_url: str, prompt: str) -> str:
+        image = await self.url_to_image(image_url)
+        encoded_image = self.model.encode_image(image)
+        return self.model.answer_question(encoded_image, prompt, self.tokenizer)
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..db376d1
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,23 @@
+services:
+  server:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    tty: true
+    image: ghcr.io/matatonic/openedai-vision
+    environment:
+      - HF_HOME=/app/hf_home
+    volumes:
+      - ./hf_home:/app/hf_home
+    ports:
+      - 5006:5006
+    command: ["python", "vision.py", "--host", "0.0.0.0", "--port", "5006"]
+    runtime: nvidia
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              #device_ids: ['0', '1'] # Select a gpu, or
+              count: all
+              capabilities: [gpu]
diff --git a/hf_home/hf_home.txt b/hf_home/hf_home.txt
new file mode 100644
index 0000000..e69de29
diff --git a/openedai.py b/openedai.py
new file mode 100644
index 0000000..0de55b3
--- /dev/null
+++ b/openedai.py
@@ -0,0 +1,66 @@
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import PlainTextResponse
+
+class OpenAIStub(FastAPI):
+    def __init__(self) -> None:
+        super().__init__()
+        self.models = {}
+            
+        self.add_middleware(
+            CORSMiddleware,
+            allow_origins=["*"],
+            allow_credentials=True,
+            allow_methods=["*"],
+            allow_headers=["*"]
+        )
+
+        @self.get('/v1/billing/usage')
+        @self.get('/v1/dashboard/billing/usage')
+        async def handle_billing_usage():
+            return { 'total_usage': 0 }
+
+        @self.get("/", response_class=PlainTextResponse)
+        @self.head("/", response_class=PlainTextResponse)
+        @self.options("/", response_class=PlainTextResponse)
+        async def root():
+            return PlainTextResponse(content="", status_code=200 if self.models else 503)
+
+        @self.get("/health")
+        async def health():
+            return {"status": "ok" if self.models else "unk" }
+
+        @self.get("/v1/models")
+        async def get_model_list():
+            return self.model_list()
+
+        @self.get("/v1/models/{model}")
+        async def get_model_info(model_id: str):
+            return self.model_info(model_id)
+
+    def register_model(self, name: str, model: str = None) -> None:
+        self.models[name] = model if model else name
+
+    def deregister_model(self, name: str) -> None:
+        if name in self.models:
+            del self.models[name]
+
+    def model_info(self, model: str) -> dict:
+        result = {
+            "id": model,
+            "object": "model",
+            "created": 0,
+            "owned_by": "user"
+        }
+        return result
+
+    def model_list(self) -> dict:
+        if not self.models:
+            return {}
+        
+        result = {
+            "object": "list",
+            "data": [ self.model_info(model) for model in list(set(self.models.keys() | self.models.values())) if model ]
+        }
+
+        return result
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..b73db69
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,9 @@
+python-datauri
+requests
+uvicorn
+fastapi
+
+# moondream
+timm
+einops
+transformers>=4.39.*
\ No newline at end of file
diff --git a/test_vision.py b/test_vision.py
new file mode 100755
index 0000000..747a50b
--- /dev/null
+++ b/test_vision.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+import argparse
+from datauri import DataURI
+from openai import OpenAI
+
+# Initialize argparse
+parser = argparse.ArgumentParser(description='Test vision using OpenAI')
+parser.add_argument('image_url', type=str, help='URL or image file to be tested')
+parser.add_argument('question', type=str, nargs='?', default='Describe the image', help='The question to ask the image')
+args = parser.parse_args()
+
+client = OpenAI(base_url='http://localhost:5006/v1', api_key='skip')
+
+image_url = args.image_url
+question = args.question
+
+if not image_url.startswith('http'):
+    image_url = str(DataURI.from_file(image_url))
+
+response = client.chat.completions.create(
+  model="gpt-4-vision-preview",
+  messages=[
+    {
+      "role": "user",
+      "content": [
+        {"type": "text", "text": question},
+        {
+          "type": "image_url",
+          "image_url": {
+            "url": image_url,
+          },
+        },
+      ],
+    }
+  ],
+  max_tokens=300,
+)
+
+print(response.choices[0].message.content)
\ No newline at end of file
diff --git a/vision.py b/vision.py
new file mode 100644
index 0000000..3955c5f
--- /dev/null
+++ b/vision.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+import sys
+import time
+import argparse
+import importlib
+
+from typing import Optional, List, Literal
+import uvicorn
+from pydantic import BaseModel
+
+import openedai
+
+
+app = openedai.OpenAIStub()
+
+class ImageURL(BaseModel):
+    url: str
+    detail: Optional[str] = "auto" # auto -> low (512) or high (Nx512) based on res.
+
+class Content(BaseModel):
+    type: Literal["text", "image_url"]
+    text: Optional[str] = None
+    image_url: Optional[ImageURL] = None
+
+class Message(BaseModel):
+    role: str
+    content: List[Content]
+
+class ImageChatRequest(BaseModel):
+    model: str # = "gpt-4-vision-preview"
+    messages: List[Message]
+    max_tokens: int = 300
+
+@app.post(path="/v1/chat/completions")
+async def chat_with_images(request: ImageChatRequest):
+
+    # XXX only single image & prompt for now
+    for c in request.messages[0].content:
+        if c.image_url:
+            image_url = c.image_url.url
+        elif c.text:
+            prompt = c.text
+
+    text = await vision_qna.single_question(image_url, prompt)
+
+    t_id = int(time.time() * 1e9)
+
+    vis_chat_resp = {
+        "id": f"chatcmpl-{t_id}",
+        "object": "chat.completion",
+        "created": t_id,
+        "model": vision_qna.model_name,
+        "system_fingerprint": "fp_111111111",
+        "choices": [{
+            "index": 0,
+            "message": {
+                "role": "assistant",
+                "content": text,
+            },
+            "logprobs": None,
+            "finish_reason": "stop"
+        }],
+        "usage": {
+            "prompt_tokens": 0,
+            "completion_tokens": 0,
+            "total_tokens": 0
+        }
+    }
+
+    return vis_chat_resp
+
+def parse_args(argv=None):
+    parser = argparse.ArgumentParser(
+        description='OpenedAI Vision API Server',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument('-m', '--model', action='store', default="vikhyatk/moondream2", help="The model to use, Ex. deepseek-ai/deepseek-vl-7b-chat")
+    parser.add_argument('-b', '--backend', action='store', default="moondream", help="The backend to use (moondream, deepseek)")
+    parser.add_argument('-d', '--device', action='store', default="auto", help="Set the torch device for the model. Ex. cuda:1")
+    parser.add_argument('-P', '--port', action='store', default=5006, type=int, help="Server tcp port")
+    parser.add_argument('-H', '--host', action='store', default='localhost', help="Host to listen on, Ex. 0.0.0.0")
+    parser.add_argument('--preload', action='store_true', help="Preload model and exit.")
+    return parser.parse_args()
+
+if __name__ == "__main__":
+    args = parse_args(sys.argv[1:])
+
+    print(f"Loading VisionQnA[{args.backend}] with {args.model}")
+    backend = importlib.import_module(f'backend.{args.backend}')
+    vision_qna = backend.VisionQnA(args.model, args.device)
+
+    if args.preload:
+        sys.exit(0)
+        
+    app.register_model('gpt-4-vision-preview', args.model)
+
+    uvicorn.run(app, host=args.host, port=args.port)
diff --git a/vision_qna.py b/vision_qna.py
new file mode 100644
index 0000000..ceeb1a9
--- /dev/null
+++ b/vision_qna.py
@@ -0,0 +1,24 @@
+
+import io
+import requests
+from datauri import DataURI
+from PIL import Image
+
+class VisionQnABase:
+    model_name: str = None
+    
+    def __init__(self, model_id: str, device: str):
+        pass
+
+    async def url_to_image(self, img_url: str) -> Image.Image:
+        if img_url.startswith('http'):
+            response = requests.get(img_url)
+            
+            img_data = response.content
+        elif img_url.startswith('data:'):
+            img_data = DataURI(img_url).data
+
+        return Image.open(io.BytesIO(img_data)).convert("RGB")
+    
+    async def single_question(self, image_url: str, prompt: str) -> str:
+        pass