-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
matatonic
committed
Mar 31, 2024
1 parent
7fba7e2
commit 93bc2bc
Showing
12 changed files
with
439 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
name: Build and Publish Docker Image | ||
|
||
on: | ||
workflow_dispatch: | ||
push: | ||
branches: | ||
- 'main' | ||
release: | ||
types: [published] | ||
|
||
jobs: | ||
build-and-push-image: | ||
runs-on: ubuntu-latest | ||
|
||
permissions: | ||
contents: read | ||
packages: write | ||
|
||
env: | ||
# Set up environment variables for the job | ||
DOCKER_REGISTRY: ghcr.io | ||
IMAGE_NAME: ${{ github.repository }} | ||
TAG: ${{ github.sha }} | ||
|
||
steps: | ||
- name: Check out code | ||
uses: actions/checkout@v4 | ||
|
||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@v2 | ||
with: | ||
install: true | ||
|
||
# Log in to the GitHub Container Registry only when not running on a pull request event | ||
- name: Login to Docker Registry | ||
uses: docker/login-action@v2 | ||
with: | ||
registry: ${{ env.DOCKER_REGISTRY }} | ||
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
- name: Extract metadata (tags, labels) for Docker | ||
id: meta | ||
uses: docker/metadata-action@v4 | ||
with: | ||
images: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }} | ||
|
||
# Build and push the Docker image to GHCR for the main branch or specific tags | ||
- name: Build and Push Docker Image | ||
if: github.ref == 'refs/heads/main' | ||
uses: docker/build-push-action@v4 | ||
with: | ||
context: . | ||
file: Dockerfile | ||
push: true | ||
tags: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}:latest | ||
labels: version=${{ github.run_id }} | ||
|
||
# For tagged releases, build and push the Docker image with the corresponding tag | ||
- name: Build and Push Docker Image (Tagged) | ||
if: startsWith(github.ref, 'refs/tags/') | ||
uses: docker/build-push-action@v4 | ||
with: | ||
context: . | ||
file: Dockerfile | ||
push: true | ||
tags: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }} | ||
labels: version=${{ github.run_id }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
# | ||
hf_home/ | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
FROM python:3-slim | ||
|
||
RUN mkdir -p /app | ||
WORKDIR /app | ||
COPY requirements.txt . | ||
RUN pip install -r requirements.txt | ||
COPY *.py . | ||
COPY backend /app/backend | ||
CMD python vision.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
OpenedAI Vision | ||
--------------- | ||
|
||
An OpenAI API compatible vision server, it functions like `gpt-4-vision-preview` and lets you chat about the contents of an image. | ||
|
||
- Compatible with the OpenAI Vision API (aka "chat with images") | ||
- Does not connect to the OpenAI API and does not require an OpenAI API Key | ||
- Not affiliated with OpenAI in any way | ||
|
||
Backend Model support: | ||
- [X] Moondream2 [vikhyatk/moondream2](https://huggingface.co/vikhyatk/moondream2) *(only a single image and single question currently supported) | ||
- [ ] Deepseek-VL - (in progress) [deepseek-ai/deepseek-vl-7b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-7b-chat) | ||
- [ ] ... | ||
|
||
Version: 0.1.0 | ||
|
||
|
||
API Documentation | ||
----------------- | ||
|
||
* [OpenAI Vision guide](https://platform.openai.com/docs/guides/vision) | ||
|
||
Installation instructions | ||
------------------------- | ||
|
||
```shell | ||
# install the python dependencies | ||
pip install -r requirements.txt | ||
# run the server | ||
python vision.py | ||
``` | ||
|
||
Usage | ||
----- | ||
|
||
``` | ||
usage: vision.py [-h] [-m MODEL] [-b BACKEND] [-d DEVICE] [-P PORT] [-H HOST] [--preload] | ||
OpenedAI Vision API Server | ||
options: | ||
-h, --help show this help message and exit | ||
-m MODEL, --model MODEL | ||
The model to use, Ex. deepseek-ai/deepseek-vl-7b-chat (default: vikhyatk/moondream2) | ||
-b BACKEND, --backend BACKEND | ||
The backend to use (moondream, deepseek) (default: moondream) | ||
-d DEVICE, --device DEVICE | ||
Set the torch device for the model. Ex. cuda:1 (default: auto) | ||
-P PORT, --port PORT Server tcp port (default: 5006) | ||
-H HOST, --host HOST Host to listen on, Ex. 0.0.0.0 (default: localhost) | ||
--preload Preload model and exit. (default: False) | ||
``` | ||
|
||
Docker support | ||
-------------- | ||
|
||
You can run the server via docker like so: | ||
```shell | ||
docker compose up | ||
``` | ||
|
||
Sample API Usage | ||
---------------- | ||
|
||
`test_vision.py` has a sample of how to use the API. | ||
Example: | ||
``` | ||
$ test_vision.py https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg | ||
The image features a long wooden boardwalk running through a lush green field. The boardwalk is situated in a grassy area with trees in the background, creating a serene and picturesque scene. The sky above is filled with clouds, adding to the beauty of the landscape. The boardwalk appears to be a peaceful path for people to walk or hike along, providing a connection between the grassy field and the surrounding environment. | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
|
||
import torch | ||
from transformers import AutoTokenizer, AutoModelForCausalLM | ||
|
||
from vision_qna import VisionQnABase | ||
|
||
class VisionQnA(VisionQnABase): | ||
model_name: str = "moondream2" | ||
revision: str = '2024-03-13' | ||
|
||
def __init__(self, model_id: str, device: str): | ||
if device == 'auto': | ||
device = self.select_device() | ||
|
||
params = { | ||
'pretrained_model_name_or_path': model_id, | ||
'trust_remote_code': True, | ||
'revision': self.revision, | ||
'torch_dtype': torch.float32 if device == 'cpu' else torch.float16, | ||
} | ||
|
||
self.model = AutoModelForCausalLM.from_pretrained(**params).to(device) | ||
self.tokenizer = AutoTokenizer.from_pretrained(model_id) | ||
|
||
def select_device(self): | ||
return 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' | ||
|
||
async def single_question(self, image_url: str, prompt: str) -> str: | ||
image = await self.url_to_image(image_url) | ||
encoded_image = self.model.encode_image(image) | ||
return self.model.answer_question(encoded_image, prompt, self.tokenizer) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
services: | ||
server: | ||
build: | ||
context: . | ||
dockerfile: Dockerfile | ||
tty: true | ||
image: ghcr.io/matatonic/openedai-vision | ||
environment: | ||
- HF_HOME=/app/hf_home | ||
volumes: | ||
- ./hf_home:/app/hf_home | ||
ports: | ||
- 5006:5006 | ||
command: ["python", "vision.py", "--host", "0.0.0.0", "--port", "5006"] | ||
runtime: nvidia | ||
deploy: | ||
resources: | ||
reservations: | ||
devices: | ||
- driver: nvidia | ||
#device_ids: ['0', '1'] # Select a gpu, or | ||
count: all | ||
capabilities: [gpu] |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
from fastapi import FastAPI | ||
from fastapi.middleware.cors import CORSMiddleware | ||
from fastapi.responses import PlainTextResponse | ||
|
||
class OpenAIStub(FastAPI): | ||
def __init__(self) -> None: | ||
super().__init__() | ||
self.models = {} | ||
|
||
self.add_middleware( | ||
CORSMiddleware, | ||
allow_origins=["*"], | ||
allow_credentials=True, | ||
allow_methods=["*"], | ||
allow_headers=["*"] | ||
) | ||
|
||
@self.get('/v1/billing/usage') | ||
@self.get('/v1/dashboard/billing/usage') | ||
async def handle_billing_usage(): | ||
return { 'total_usage': 0 } | ||
|
||
@self.get("/", response_class=PlainTextResponse) | ||
@self.head("/", response_class=PlainTextResponse) | ||
@self.options("/", response_class=PlainTextResponse) | ||
async def root(): | ||
return PlainTextResponse(content="", status_code=200 if self.models else 503) | ||
|
||
@self.get("/health") | ||
async def health(): | ||
return {"status": "ok" if self.models else "unk" } | ||
|
||
@self.get("/v1/models") | ||
async def get_model_list(): | ||
return self.model_list() | ||
|
||
@self.get("/v1/models/{model}") | ||
async def get_model_info(model_id: str): | ||
return self.model_info(model_id) | ||
|
||
def register_model(self, name: str, model: str = None) -> None: | ||
self.models[name] = model if model else name | ||
|
||
def deregister_model(self, name: str) -> None: | ||
if name in self.models: | ||
del self.models[name] | ||
|
||
def model_info(self, model: str) -> dict: | ||
result = { | ||
"id": model, | ||
"object": "model", | ||
"created": 0, | ||
"owned_by": "user" | ||
} | ||
return result | ||
|
||
def model_list(self) -> dict: | ||
if not self.models: | ||
return {} | ||
|
||
result = { | ||
"object": "list", | ||
"data": [ self.model_info(model) for model in list(set(self.models.keys() | self.models.values())) if model ] | ||
} | ||
|
||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
python-datauri | ||
requests | ||
uvicorn | ||
fastapi | ||
|
||
# moondream | ||
timm | ||
einops | ||
transformers>=4.39.* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
#!/usr/bin/env python | ||
import argparse | ||
from datauri import DataURI | ||
from openai import OpenAI | ||
|
||
# Initialize argparse | ||
parser = argparse.ArgumentParser(description='Test vision using OpenAI') | ||
parser.add_argument('image_url', type=str, help='URL or image file to be tested') | ||
parser.add_argument('question', type=str, nargs='?', default='Describe the image', help='The question to ask the image') | ||
args = parser.parse_args() | ||
|
||
client = OpenAI(base_url='http://localhost:5006/v1', api_key='skip') | ||
|
||
image_url = args.image_url | ||
question = args.question | ||
|
||
if not image_url.startswith('http'): | ||
image_url = str(DataURI.from_file(image_url)) | ||
|
||
response = client.chat.completions.create( | ||
model="gpt-4-vision-preview", | ||
messages=[ | ||
{ | ||
"role": "user", | ||
"content": [ | ||
{"type": "text", "text": question}, | ||
{ | ||
"type": "image_url", | ||
"image_url": { | ||
"url": image_url, | ||
}, | ||
}, | ||
], | ||
} | ||
], | ||
max_tokens=300, | ||
) | ||
|
||
print(response.choices[0].message.content) |
Oops, something went wrong.