Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Back porting to python 3.9 #100

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 59 additions & 58 deletions nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,21 @@
import io
import os
import matplotlib
matplotlib.use('Agg')
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image, ImageDraw, ImageColor, ImageFont
import random
import numpy as np
import re
from pathlib import Path
from typing import Union

#workaround for unnecessary flash_attn requirement
from unittest.mock import patch
from transformers.dynamic_module_utils import get_imports

def fixed_get_imports(filename: str | os.PathLike) -> list[str]:
def fixed_get_imports(filename: Union[str, os.PathLike]) -> list[str]:
try:
if not str(filename).endswith("modeling_florence2.py"):
return get_imports(filename)
Expand All @@ -41,7 +42,7 @@ class DownloadAndLoadFlorence2Model:
def INPUT_TYPES(s):
return {"required": {
"model": (
[
[
'microsoft/Florence-2-base',
'microsoft/Florence-2-base-ft',
'microsoft/Florence-2-large',
Expand Down Expand Up @@ -86,14 +87,14 @@ def loadmodel(self, model, precision, attention, lora=None):

model_name = model.rsplit('/', 1)[-1]
model_path = os.path.join(folder_paths.models_dir, "LLM", model_name)

if not os.path.exists(model_path):
print(f"Downloading Florence2 model to: {model_path}")
from huggingface_hub import snapshot_download
snapshot_download(repo_id=model,
local_dir=model_path,
local_dir_use_symlinks=False)

print(f"using {attention} for attention")
with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports): #workaround for unnecessary flash_attn requirement
model = AutoModelForCausalLM.from_pretrained(model_path, attn_implementation=attention, device_map=device, torch_dtype=dtype,trust_remote_code=True)
Expand All @@ -103,26 +104,26 @@ def loadmodel(self, model, precision, attention, lora=None):
from peft import PeftModel
adapter_name = lora
model = PeftModel.from_pretrained(model, adapter_name, trust_remote_code=True)

florence2_model = {
'model': model,
'model': model,
'processor': processor,
'dtype': dtype
}

return (florence2_model,)

class DownloadAndLoadFlorence2Lora:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": (
[
[
'NikshepShetty/Florence-2-pixelprose',
],
),
),
},

}

RETURN_TYPES = ("PEFTLORA",)
Expand All @@ -133,15 +134,15 @@ def INPUT_TYPES(s):
def loadmodel(self, model):
model_name = model.rsplit('/', 1)[-1]
model_path = os.path.join(folder_paths.models_dir, "LLM", model_name)

if not os.path.exists(model_path):
print(f"Downloading Florence2 lora model to: {model_path}")
from huggingface_hub import snapshot_download
snapshot_download(repo_id=model,
local_dir=model_path,
local_dir_use_symlinks=False)
return (model_path,)

class Florence2ModelLoader:

@classmethod
Expand Down Expand Up @@ -179,15 +180,15 @@ def loadmodel(self, model, precision, attention, lora=None):
from peft import PeftModel
adapter_name = lora
model = PeftModel.from_pretrained(model, adapter_name, trust_remote_code=True)

florence2_model = {
'model': model,
'model': model,
'processor': processor,
'dtype': dtype
}

return (florence2_model,)

class Florence2Run:
@classmethod
def INPUT_TYPES(s):
Expand All @@ -197,7 +198,7 @@ def INPUT_TYPES(s):
"florence2_model": ("FL2MODEL", ),
"text_input": ("STRING", {"default": "", "multiline": True}),
"task": (
[
[
'region_caption',
'dense_region_caption',
'region_proposal',
Expand Down Expand Up @@ -226,9 +227,9 @@ def INPUT_TYPES(s):
"seed": ("INT", {"default": 1, "min": 1, "max": 0xffffffffffffffff}),
}
}

RETURN_TYPES = ("IMAGE", "MASK", "STRING", "JSON")
RETURN_NAMES =("image", "mask", "caption", "data")
RETURN_NAMES =("image", "mask", "caption", "data")
FUNCTION = "encode"
CATEGORY = "Florence2"

Expand All @@ -243,7 +244,7 @@ def hash_seed(self, seed):
# Ensure the hashed seed is within the acceptable range for set_seed
return hashed_seed % (2**32)

def encode(self, image, text_input, florence2_model, task, fill_mask, keep_model_loaded=False,
def encode(self, image, text_input, florence2_model, task, fill_mask, keep_model_loaded=False,
num_beams=3, max_new_tokens=1024, do_sample=True, output_mask_select="", seed=None):
device = mm.get_torch_device()
_, height, width, _ = image.shape
Expand All @@ -254,7 +255,7 @@ def encode(self, image, text_input, florence2_model, task, fill_mask, keep_model
model = florence2_model['model']
dtype = florence2_model['dtype']
model.to(device)

if seed:
set_seed(self.hash_seed(seed))

Expand Down Expand Up @@ -289,7 +290,7 @@ def encode(self, image, text_input, florence2_model, task, fill_mask, keep_model
prompt = task_prompt

image = image.permute(0, 3, 1, 2)

out = []
out_masks = []
out_results = []
Expand All @@ -311,11 +312,11 @@ def encode(self, image, text_input, florence2_model, task, fill_mask, keep_model
print(results)
# cleanup the special tokens from the final list
if task == 'ocr_with_region':
clean_results = str(results)
clean_results = str(results)
cleaned_string = re.sub(r'</?s>|<[^>]*>', '\n', clean_results)
clean_results = re.sub(r'\n+', '\n', cleaned_string)
else:
clean_results = str(results)
clean_results = str(results)
clean_results = clean_results.replace('</s>', '')
clean_results = clean_results.replace('<s>', '')

Expand All @@ -326,10 +327,10 @@ def encode(self, image, text_input, florence2_model, task, fill_mask, keep_model
out_results.append(clean_results)

W, H = image_pil.size

parsed_answer = processor.post_process_generation(results, task=task_prompt, image_size=(W, H))

if task == 'region_caption' or task == 'dense_region_caption' or task == 'caption_to_phrase_grounding' or task == 'region_proposal':
if task == 'region_caption' or task == 'dense_region_caption' or task == 'caption_to_phrase_grounding' or task == 'region_proposal':
fig, ax = plt.subplots(figsize=(W / 100, H / 100), dpi=100)
fig.subplots_adjust(left=0, right=1, top=1, bottom=0)
ax.imshow(image_pil)
Expand All @@ -352,7 +353,7 @@ def encode(self, image, text_input, florence2_model, task, fill_mask, keep_model
for index, (bbox, label) in enumerate(zip(bboxes, labels)):
# Modify the label to include the index
indexed_label = f"{index}.{label}"

if fill_mask:
if str(index) in mask_indexes:
print("match index:", str(index), "in mask_indexes:", mask_indexes)
Expand Down Expand Up @@ -401,20 +402,20 @@ def encode(self, image, text_input, florence2_model, task, fill_mask, keep_model
fontsize=12,
bbox=dict(facecolor=facecolor, alpha=0.5)
)
if fill_mask:
if fill_mask:
mask_tensor = F.to_tensor(mask_layer)
mask_tensor = mask_tensor.unsqueeze(0).permute(0, 2, 3, 1).cpu().float()
mask_tensor = mask_tensor.mean(dim=0, keepdim=True)
mask_tensor = mask_tensor.repeat(1, 1, 1, 3)
mask_tensor = mask_tensor[:, :, :, 0]
out_masks.append(mask_tensor)
out_masks.append(mask_tensor)

# Remove axis and padding around the image
ax.axis('off')
ax.margins(0,0)
ax.get_xaxis().set_major_locator(plt.NullLocator())
ax.get_yaxis().set_major_locator(plt.NullLocator())
fig.canvas.draw()
fig.canvas.draw()
buf = io.BytesIO()
plt.savefig(buf, format='png', pad_inches=0)
buf.seek(0)
Expand All @@ -423,34 +424,34 @@ def encode(self, image, text_input, florence2_model, task, fill_mask, keep_model
annotated_image_tensor = F.to_tensor(annotated_image_pil)
out_tensor = annotated_image_tensor[:3, :, :].unsqueeze(0).permute(0, 2, 3, 1).cpu().float()
out.append(out_tensor)

out_data.append(bboxes)


pbar.update(1)

plt.close(fig)

elif task == 'referring_expression_segmentation':
# Create a new black image
mask_image = Image.new('RGB', (W, H), 'black')
mask_draw = ImageDraw.Draw(mask_image)

predictions = parsed_answer[task_prompt]
# Iterate over polygons and labels

# Iterate over polygons and labels
for polygons, label in zip(predictions['polygons'], predictions['labels']):
color = random.choice(colormap)
for _polygon in polygons:
for _polygon in polygons:
_polygon = np.array(_polygon).reshape(-1, 2)
# Clamp polygon points to image boundaries
_polygon = np.clip(_polygon, [0, 0], [W - 1, H - 1])
if len(_polygon) < 3:
if len(_polygon) < 3:
print('Invalid polygon:', _polygon)
continue
continue

_polygon = _polygon.reshape(-1).tolist()

# Draw the polygon
if fill_mask:
overlay = Image.new('RGBA', image_pil.size, (255, 255, 255, 0))
Expand All @@ -465,9 +466,9 @@ def encode(self, image, text_input, florence2_model, task, fill_mask, keep_model

#draw mask
mask_draw.polygon(_polygon, outline="white", fill="white")

image_tensor = F.to_tensor(image_pil)
image_tensor = image_tensor[:3, :, :].unsqueeze(0).permute(0, 2, 3, 1).cpu().float()
image_tensor = image_tensor[:3, :, :].unsqueeze(0).permute(0, 2, 3, 1).cpu().float()
out.append(image_tensor)

mask_tensor = F.to_tensor(mask_image)
Expand All @@ -489,36 +490,36 @@ def encode(self, image, text_input, florence2_model, task, fill_mask, keep_model
overlay = Image.new('RGBA', image_pil.size, (255, 255, 255, 0))
draw = ImageDraw.Draw(overlay)
bboxes, labels = predictions['quad_boxes'], predictions['labels']

# Create a new black image for the mask
mask_image = Image.new('RGB', (W, H), 'black')
mask_draw = ImageDraw.Draw(mask_image)

for box, label in zip(bboxes, labels):
scaled_box = [v / (width if idx % 2 == 0 else height) for idx, v in enumerate(box)]
out_data.append({"label": label, "box": scaled_box})

color = random.choice(colormap)
new_box = (np.array(box) * scale).tolist()

if fill_mask:
color_with_opacity = ImageColor.getrgb(color) + (180,)
draw.polygon(new_box, outline=color, fill=color_with_opacity, width=3)
else:
draw.polygon(new_box, outline=color, width=3)

draw.text((new_box[0]+8, new_box[1]+2),
"{}".format(label),
align="right",
font=font,
fill=color)

# Draw the mask
mask_draw.polygon(new_box, outline="white", fill="white")

image_pil = Image.alpha_composite(image_pil, overlay)
image_pil = image_pil.convert('RGB')

image_tensor = F.to_tensor(image_pil)
image_tensor = image_tensor[:3, :, :].unsqueeze(0).permute(0, 2, 3, 1).cpu().float()
out.append(image_tensor)
Expand All @@ -532,7 +533,7 @@ def encode(self, image, text_input, florence2_model, task, fill_mask, keep_model
out_masks.append(mask_tensor)

pbar.update(1)

elif task == 'docvqa':
if text_input == "":
raise ValueError("Text input (prompt) is required for 'docvqa'")
Expand All @@ -549,16 +550,16 @@ def encode(self, image, text_input, florence2_model, task, fill_mask, keep_model

results = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
clean_results = results.replace('</s>', '').replace('<s>', '')

if len(image) == 1:
out_results = clean_results
else:
out_results.append(clean_results)

out.append(F.to_tensor(image_pil).unsqueeze(0).permute(0, 2, 3, 1).cpu().float())

pbar.update(1)

if len(out) > 0:
out_tensor = torch.cat(out, dim=0)
else:
Expand All @@ -572,9 +573,9 @@ def encode(self, image, text_input, florence2_model, task, fill_mask, keep_model
print("Offloading model...")
model.to(offload_device)
mm.soft_empty_cache()

return (out_tensor, out_mask_tensor, out_results, out_data)

NODE_CLASS_MAPPINGS = {
"DownloadAndLoadFlorence2Model": DownloadAndLoadFlorence2Model,
"DownloadAndLoadFlorence2Lora": DownloadAndLoadFlorence2Lora,
Expand Down