Skip to content

Commit

Permalink
Merge pull request #6 from matatonic/dev
Browse files Browse the repository at this point in the history
0.27.0
  • Loading branch information
matatonic authored Jul 15, 2024
2 parents 4970f2b + 719c0b5 commit cc8a8eb
Show file tree
Hide file tree
Showing 17 changed files with 207 additions and 220 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN git clone https://github.com/togethercomputer/Dragonfly --single-branch /app

COPY requirements.txt .
ARG VERSION=latest
RUN if [ "$VERSION" = "alt" ]; then echo "transformers==4.36.2" >> requirements.txt; else echo "transformers>=4.41.2\nautoawq>=0.2.5" >> requirements.txt ; fi
RUN if [ "$VERSION" = "alt" ]; then echo "transformers==4.36.2" >> requirements.txt; else echo "transformers==4.41.2\nautoawq>=0.2.5" >> requirements.txt ; fi
# TODO: nvidia apex wheel
RUN --mount=type=cache,target=/root/.cache/pip pip install -U -r requirements.txt

Expand Down
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@ An OpenAI API compatible vision server, it functions like `gpt-4-vision-preview`
## Model support

- [X] [OpenGVLab](https://huggingface.co/OpenGVLab)
- - [X] [InternVL2-40B](https://huggingface.co/OpenGVLab/InternVL2-40B)
- - [X] [InternVL2-26B](https://huggingface.co/OpenGVLab/InternVL2-26B)
- - [X] [InternVL2-8B](https://huggingface.co/OpenGVLab/InternVL2-8B)
- - [X] [InternVL2-4B](https://huggingface.co/OpenGVLab/InternVL2-4B) (alternate docker only)
- - [X] [InternVL2-2B](https://huggingface.co/OpenGVLab/InternVL2-2B)
- - [X] [InternVL2-2B-AWQ](https://huggingface.co/OpenGVLab/InternVL2-2B-AWQ)
- - [X] [InternVL2-1B](https://huggingface.co/OpenGVLab/InternVL2-1B)
- - [X] [InternVL-Chat-V1-5](https://huggingface.co/OpenGVLab/InternVL-Chat-V1-5) (wont gpu split yet)
- - [X] [InternVL-Chat-V1-5-Int8](https://huggingface.co/OpenGVLab/InternVL-Chat-V1-5-Int8) (wont gpu split yet)
- - [X] [Mini-InternVL-Chat-4B-V1-5](https://huggingface.co/OpenGVLab/Mini-InternVL-Chat-4B-V1-5)
Expand Down Expand Up @@ -42,7 +49,7 @@ An OpenAI API compatible vision server, it functions like `gpt-4-vision-preview`
- - [X] [360VL-8B](https://huggingface.co/qihoo360/360VL-8B)
- - [X] [360VL-70B](https://huggingface.co/qihoo360/360VL-70B) (untested)
- [X] [LlavaNext](https://huggingface.co/llava-hf)
- - [X] [llava-v1.6-34b-hf](https://huggingface.co/llava-hf/llava-v1.6-34b-hf) (currently errors, use an image before 0.26.0)
- - [X] [llava-v1.6-34b-hf](https://huggingface.co/llava-hf/llava-v1.6-34b-hf)
- - [X] [llava-v1.6-vicuna-13b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf)
- - [X] [llava-v1.6-vicuna-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-7b-hf)
- - [X] [llava-v1.6-mistral-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf)
Expand Down Expand Up @@ -106,6 +113,10 @@ See: [OpenVLM Leaderboard](https://huggingface.co/spaces/opencompass/open_vlm_le

## Recent updates

Version 0.27.0

- new model support: OpenGVLab/InternVL2 series of models (1B, 2B, 4B, 8B*, 26B*, 40B*) - *(current top open source models)

Version 0.26.0

- new model support: cognitivecomputations/dolphin-vision-72b
Expand Down
1 change: 1 addition & 0 deletions backend/idefics2.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# HuggingFaceM4/idefics2-8b-chatty-AWQ

class VisionQnA(VisionQnABase):
format: str = 'internal'
model_name: str = "idefics2"
vision_layers: List[str] = ['vision_model', 'connector']

Expand Down
24 changes: 15 additions & 9 deletions backend/internvl-chat-v1-5.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,16 @@
# OpenGVLab/InternVL-Chat-V1-5
# OpenGVLab/InternVL-Chat-V1-5-Int8
# OpenGVLab/Mini-InternVL-Chat-2B-V1-5
# OpenGVLab/Mini-InternVL-Chat-4B-V1-5
# OpenGVLab/Mini-InternVL-Chat-4B-V1-5 (phintern)
# OpenGVLab/InternVL2-1B
# OpenGVLab/InternVL2-2B-AWQ (empty response)
# OpenGVLab/InternVL2-2B
# OpenGVLab/InternVL2-4B
# OpenGVLab/InternVL2-4B (phintern)
# OpenGVLab/InternVL2-8B
# OpenGVLab/InternVL2-26B
# OpenGVLab/InternVL2-40B (yi-34- nous-hermes-2)


MAX_TILES = 6

Expand Down Expand Up @@ -118,10 +127,7 @@ def __init__(self, model_id: str, device: str, device_map: str = 'auto', extra_p


async def stream_chat_with_images(self, request: ImageChatRequest) -> AsyncGenerator[str, None]:
if self.format == 'phintern':
images, prompt = await phintern_prompt_from_messages(request.messages, img_tok='')
else:
images, prompt = await chatml_prompt_from_messages(request.messages, img_tok='')
images, prompt = await prompt_from_messages(request.messages, self.format)

# TODO: use detail to set max tiles if detail=low (=512)
# if .detail == 'low': max_num=1
Expand All @@ -134,11 +140,11 @@ async def stream_chat_with_images(self, request: ImageChatRequest) -> AsyncGener
pixel_values = None

if pixel_values is not None:
image_tokens = '<img>' + '<IMG_CONTEXT>' * self.model.num_image_token * pixel_values.shape[0] + '</img>\n'
else:
image_tokens = ''
for img in images:
image_tokens = '<img>' + '<IMG_CONTEXT>' * self.model.num_image_token * img.size(0) + '</img>'
prompt = prompt.replace('<image>', image_tokens, 1)

model_inputs = self.tokenizer(image_tokens + prompt, return_tensors='pt')
model_inputs = self.tokenizer(prompt, return_tensors='pt')
input_ids = model_inputs['input_ids'].cuda()
attention_mask = model_inputs['attention_mask'].cuda()

Expand Down
1 change: 1 addition & 0 deletions backend/mantis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from vision_qna import *

class VisionQnA(VisionQnABase):
format: str = 'internal'
model_name: str = "mantis"
vision_layers: List[str] = ["vision_tower", "multi_modal_projector"]

Expand Down
1 change: 1 addition & 0 deletions backend/minicpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# openbmb/MiniCPM-V aka OmniLMM-3B

class VisionQnA(VisionQnABase):
format: str = 'internal'
model_name: str = "minicpm"
vision_layers: List[str] = ["resampler", "vpm"]

Expand Down
1 change: 1 addition & 0 deletions backend/omnilmm12b.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# openbmb/OmniLMM-12B

class VisionQnA(VisionQnABase):
format: str = 'internal'
model_name: str = "omnilmm12b"

def __init__(self, model_id: str, device: str, device_map: str = 'auto', extra_params = {}, format = None):
Expand Down
1 change: 1 addition & 0 deletions backend/phi3.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# failspy/Phi-3-vision-128k-instruct-abliterated-alpha

class VisionQnA(VisionQnABase):
format: str = 'phi3'
model_name: str = "phi3"
vision_layers: List[str] = ["vision_embed_tokens"]

Expand Down
1 change: 1 addition & 0 deletions backend/xcomposer2-vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class InternLMXComposer2QForCausalLM(auto_gptq.modeling.BaseGPTQForCausalLM):
]

class VisionQnA(VisionQnABase):
format: str = 'internal'
model_name: str = "xcomposer2-vl"
vision_layers: List[str] = ['vit', 'vision_proj']

Expand Down
1 change: 1 addition & 0 deletions backend/xcomposer2.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class InternLMXComposer2QForCausalLM(auto_gptq.modeling.BaseGPTQForCausalLM):
]

class VisionQnA(VisionQnABase):
format: str = 'internal'
model_name: str = "xcomposer2"
vision_layers: List[str] = ['vit', 'vision_proj']

Expand Down
2 changes: 1 addition & 1 deletion backend/yi-vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

class VisionQnA(VisionQnABase):
model_name: str = "qwen-vl"
format: 'chatml'
format: str = 'chatml'

def __init__(self, model_id: str, device: str, device_map: str = 'auto', extra_params = {}, format = None):
super().__init__(model_id, device, device_map, extra_params, format)
Expand Down
43 changes: 5 additions & 38 deletions model_conf_tests.alt.json
Original file line number Diff line number Diff line change
@@ -1,52 +1,19 @@
[
["OpenGVLab/InternVL-Chat-V1-5", "--device-map", "cuda:0"],
["OpenGVLab/InternVL-Chat-V1-5", "--load-in-4bit", "--device-map", "cuda:0"],
["OpenGVLab/InternVL-Chat-V1-5-Int8", "--device-map", "cuda:0"],
["OpenGVLab/Mini-InternVL-Chat-2B-V1-5"],
["Qwen/Qwen-VL-Chat"],
["THUDM/cogagent-chat-hf", "--load-in-4bit"],
["THUDM/cogagent-chat-hf"],
["THUDM/cogvlm-chat-hf", "--load-in-4bit"],
["THUDM/cogvlm-chat-hf"],
["THUDM/cogvlm2-llama3-chat-19B"],
["THUDM/cogvlm2-llama3-chinese-chat-19B"],
["YanweiLi/MGM-7B", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-7B", "--use-flash-attn"],
["YanweiLi/MGM-7B-HD", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-7B-HD", "--use-flash-attn"],
["YanweiLi/MGM-13B", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-13B", "--use-flash-attn"],
["YanweiLi/MGM-13B-HD", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-13B-HD", "--use-flash-attn"],
["YanweiLi/MGM-2B", "--use-flash-attn"],
["YanweiLi/MGM-34B", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-34B", "--use-flash-attn"],
["YanweiLi/MGM-34B-HD", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-34B-HD", "--use-flash-attn"],
["YanweiLi/MGM-7B", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-7B", "--use-flash-attn"],
["YanweiLi/MGM-7B-HD", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-7B-HD", "--use-flash-attn"],
["YanweiLi/MGM-8x7B", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-8x7B", "--use-flash-attn"],
["YanweiLi/MGM-8x7B-HD", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-8x7B-HD", "--use-flash-attn"],
["adept/fuyu-8b", "--device-map", "cuda:0"],
["echo840/Monkey"],
["echo840/Monkey-Chat"],
["internlm/internlm-xcomposer2-4khd-7b", "--use-flash-attn", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-7b", "--use-flash-attn", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-7b-4bit", "--use-flash-attn", "--device", "cuda:0"],
["internlm/internlm-xcomposer2-vl-1_8b", "--use-flash-attn", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-vl-7b", "--use-flash-attn", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-vl-7b-4bit", "--use-flash-attn", "--device", "cuda:0"],
["llava-hf/bakLlava-v1-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/bakLlava-v1-hf", "--use-flash-attn", "--device-map", "cuda:0"],
["llava-hf/llava-1.5-13b-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/llava-1.5-13b-hf", "--use-flash-attn", "--device-map", "cuda:0"],
["llava-hf/llava-1.5-7b-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/llava-1.5-7b-hf", "--use-flash-attn", "--device-map", "cuda:0"],
["openbmb/MiniCPM-Llama3-V-2_5", "--use-flash-attn", "--device-map", "cuda:0"],
["openbmb/MiniCPM-V", "--use-flash-attn", "--device-map", "cuda:0"],
["openbmb/MiniCPM-V-2", "--use-flash-attn", "--device-map", "cuda:0"],
["qihoo360/360VL-8B", "--use-flash-attn", "--load-in-4bit"],
["qihoo360/360VL-8B", "--use-flash-attn"],
["vikhyatk/moondream1"],
["vikhyatk/moondream2", "--use-flash-attn"]
["vikhyatk/moondream1"]
]
12 changes: 12 additions & 0 deletions model_conf_tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,18 @@
["OpenGVLab/InternVL-Chat-V1-5", "--device-map", "cuda:0", "--max-tiles", "40"],
["OpenGVLab/InternVL-Chat-V1-5", "--device-map", "cuda:0"],
["OpenGVLab/InternVL-Chat-V1-5-Int8", "--device-map", "cuda:0"],
["OpenGVLab/InternVL2-1B", "--device-map", "cuda:0", "--load-in-4bit"],
["OpenGVLab/InternVL2-1B", "--device-map", "cuda:0"],
["OpenGVLab/InternVL2-2B", "--device-map", "cuda:0", "--load-in-4bit"],
["OpenGVLab/InternVL2-2B", "--device-map", "cuda:0"],
["OpenGVLab/InternVL2-4B", "--device-map", "cuda:0", "--load-in-4bit"],
["OpenGVLab/InternVL2-4B", "--device-map", "cuda:0"],
["OpenGVLab/InternVL2-8B", "--device-map", "cuda:0", "--load-in-4bit"],
["OpenGVLab/InternVL2-8B", "--device-map", "cuda:0"],
["OpenGVLab/InternVL2-26B", "--device-map", "cuda:0", "--load-in-4bit"],
["OpenGVLab/InternVL2-26B", "--device-map", "cuda:0"],
["OpenGVLab/InternVL2-40B", "--device-map", "cuda:0", "--load-in-4bit"],
["OpenGVLab/InternVL2-40B", "--device-map", "cuda:0"],
["OpenGVLab/Mini-InternVL-Chat-2B-V1-5", "--load-in-4bit"],
["OpenGVLab/Mini-InternVL-Chat-2B-V1-5", "--max-tiles", "40", "--load-in-4bit"],
["OpenGVLab/Mini-InternVL-Chat-2B-V1-5", "--max-tiles", "40"],
Expand Down
16 changes: 10 additions & 6 deletions test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,14 +309,18 @@ def no_image_response(prompt):

print(f"### End tests.")

print("""# This sample env file can be used to set environment variables for the docker-compose.yml
fname = f"sample.env-{time.time()}"
with open(fname,'w') as results_file:
print("""# This sample env file can be used to set environment variables for the docker-compose.yml
# Copy this file to vision.env and uncomment the model of your choice.
HF_HOME=hf_home
HF_HUB_ENABLE_HF_TRANSFER=1
#HF_TOKEN=hf-...
#CUDA_VISIBLE_DEVICES=1,0""")
#CUDA_VISIBLE_DEVICES=1,0""", file=results_file)

for r in all_results:
cmdl = ' '.join(r['args'])
result = all(r['results'])
print(f"#CLI_COMMAND=\"python vision.py -m {cmdl}\" # test {green_pass if result else red_fail}, time: {r['time']:.1f}s, mem: {r['mem']:.1f}GB, {r['note']}")
for r in all_results:
cmdl = ' '.join(r['args'])
result = all(r['results'])
print(f"#CLI_COMMAND=\"python vision.py -m {cmdl}\" # test {green_pass if result else red_fail}, time: {r['time']:.1f}s, mem: {r['mem']:.1f}GB, {r['note']}", file=results_file)

print(open(fname,'r').read())
Loading

0 comments on commit cc8a8eb

Please sign in to comment.