Skip to content

Commit

Permalink
fix convert
Browse files Browse the repository at this point in the history
  • Loading branch information
tc-mb committed Aug 12, 2024
1 parent 1123376 commit f30c5e1
Showing 1 changed file with 17 additions and 10 deletions.
27 changes: 17 additions & 10 deletions examples/llava/minicpmv-convert-image-encoder-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ def get_input_embeddings(self) -> nn.Module:

import numpy as np
from gguf import *
from transformers.models.idefics2.modeling_idefics2 import Idefics2VisionTransformer, Idefics2VisionConfig

TEXT = "clip.text"
VISION = "clip.vision"
Expand Down Expand Up @@ -542,6 +543,15 @@ def bytes_to_unicode():
# model = CLIPModel.from_pretrained(dir_model)
# processor = CLIPProcessor.from_pretrained(dir_model)

minicpmv_version = args.minicpmv_version
emb_dim = 4096
if minicpmv_version == 1:
emb_dim = 2304
elif minicpmv_version == 2:
emb_dim = 4096
elif minicpmv_version == 3:
emb_dim = 3584

default_vision_config = {
"hidden_size": 1152,
"image_size": 980,
Expand All @@ -552,8 +562,12 @@ def bytes_to_unicode():
"patch_size": 14,
}

vision_config = SiglipVisionConfig(**default_vision_config)
model = SiglipVisionTransformer(vision_config)
if minicpmv_version == 3:
vision_config = Idefics2VisionConfig(**default_vision_config)
model = Idefics2VisionTransformer(vision_config)
elif minicpmv_version == 3:
vision_config = SiglipVisionConfig(**default_vision_config)
model = SiglipVisionTransformer(vision_config)

processor = None
# if model.attn_pool is not None:
Expand All @@ -566,14 +580,7 @@ def bytes_to_unicode():
has_text_encoder = True
has_vision_encoder = True
has_minicpmv_projector = False
minicpmv_version = args.minicpmv_version
emb_dim = 4096
if minicpmv_version == 1:
emb_dim = 2304
elif minicpmv_version == 2:
emb_dim = 4096
elif minicpmv_version == 3:
emb_dim = 3584


if args.text_only:
fname_middle = "text-"
Expand Down

0 comments on commit f30c5e1

Please sign in to comment.