From 73d4c4d56ddc323f54342cf9b4a9651b7505b987 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Wed, 16 Oct 2024 16:22:35 +0000
Subject: [PATCH 01/30] Naive implementation of CFG for FLUX.

---
 invokeai/app/invocations/flux_denoise.py | 54 ++++++++++++++++++------
 invokeai/backend/flux/denoise.py         | 22 ++++++++++
 2 files changed, 63 insertions(+), 13 deletions(-)

diff --git a/invokeai/app/invocations/flux_denoise.py b/invokeai/app/invocations/flux_denoise.py
index f075ea8c9db..7fba862a455 100644
--- a/invokeai/app/invocations/flux_denoise.py
+++ b/invokeai/app/invocations/flux_denoise.py
@@ -49,7 +49,7 @@
     title="FLUX Denoise",
     tags=["image", "flux"],
     category="image",
-    version="3.1.0",
+    version="3.2.0",
     classification=Classification.Prototype,
 )
 class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
@@ -82,6 +82,12 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
     positive_text_conditioning: FluxConditioningField = InputField(
         description=FieldDescriptions.positive_cond, input=Input.Connection
     )
+    negative_text_conditioning: FluxConditioningField = InputField(
+        description=FieldDescriptions.negative_cond, input=Input.Connection
+    )
+    # TODO(ryand): Add support for cfg_scale to be a list of floats: one for each step.
+    # TODO(ryand): Add cfg_scale range validation.
+    cfg_scale: float = InputField(default=3.0, description=FieldDescriptions.cfg_scale, title="CFG Scale")
     width: int = InputField(default=1024, multiple_of=16, description="Width of the generated image.")
     height: int = InputField(default=1024, multiple_of=16, description="Height of the generated image.")
     num_steps: int = InputField(
@@ -108,6 +114,19 @@ def invoke(self, context: InvocationContext) -> LatentsOutput:
         name = context.tensors.save(tensor=latents)
         return LatentsOutput.build(latents_name=name, latents=latents, seed=None)
 
+    def _load_text_conditioning(
+        self, context: InvocationContext, conditioning_name: str, dtype: torch.dtype
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        # Load the conditioning data.
+        cond_data = context.conditioning.load(conditioning_name)
+        assert len(cond_data.conditionings) == 1
+        flux_conditioning = cond_data.conditionings[0]
+        assert isinstance(flux_conditioning, FLUXConditioningInfo)
+        flux_conditioning = flux_conditioning.to(dtype=dtype)
+        t5_embeddings = flux_conditioning.t5_embeds
+        clip_embeddings = flux_conditioning.clip_embeds
+        return t5_embeddings, clip_embeddings
+
     def _run_diffusion(
         self,
         context: InvocationContext,
@@ -115,13 +134,12 @@ def _run_diffusion(
         inference_dtype = torch.bfloat16
 
         # Load the conditioning data.
-        cond_data = context.conditioning.load(self.positive_text_conditioning.conditioning_name)
-        assert len(cond_data.conditionings) == 1
-        flux_conditioning = cond_data.conditionings[0]
-        assert isinstance(flux_conditioning, FLUXConditioningInfo)
-        flux_conditioning = flux_conditioning.to(dtype=inference_dtype)
-        t5_embeddings = flux_conditioning.t5_embeds
-        clip_embeddings = flux_conditioning.clip_embeds
+        pos_t5_embeddings, pos_clip_embeddings = self._load_text_conditioning(
+            context, self.positive_text_conditioning.conditioning_name, inference_dtype
+        )
+        neg_t5_embeddings, neg_clip_embeddings = self._load_text_conditioning(
+            context, self.negative_text_conditioning.conditioning_name, inference_dtype
+        )
 
         # Load the input latents, if provided.
         init_latents = context.tensors.load(self.latents.latents_name) if self.latents else None
@@ -182,8 +200,14 @@ def _run_diffusion(
         b, _c, latent_h, latent_w = x.shape
         img_ids = generate_img_ids(h=latent_h, w=latent_w, batch_size=b, device=x.device, dtype=x.dtype)
 
-        bs, t5_seq_len, _ = t5_embeddings.shape
-        txt_ids = torch.zeros(bs, t5_seq_len, 3, dtype=inference_dtype, device=TorchDevice.choose_torch_device())
+        pos_bs, pos_t5_seq_len, _ = pos_t5_embeddings.shape
+        pos_txt_ids = torch.zeros(
+            pos_bs, pos_t5_seq_len, 3, dtype=inference_dtype, device=TorchDevice.choose_torch_device()
+        )
+        neg_bs, neg_t5_seq_len, _ = neg_t5_embeddings.shape
+        neg_txt_ids = torch.zeros(
+            neg_bs, neg_t5_seq_len, 3, dtype=inference_dtype, device=TorchDevice.choose_torch_device()
+        )
 
         # Pack all latent tensors.
         init_latents = pack(init_latents) if init_latents is not None else None
@@ -256,12 +280,16 @@ def _run_diffusion(
                 model=transformer,
                 img=x,
                 img_ids=img_ids,
-                txt=t5_embeddings,
-                txt_ids=txt_ids,
-                vec=clip_embeddings,
+                txt=pos_t5_embeddings,
+                txt_ids=pos_txt_ids,
+                vec=pos_clip_embeddings,
+                neg_txt=neg_t5_embeddings,
+                neg_txt_ids=neg_txt_ids,
+                neg_vec=neg_clip_embeddings,
                 timesteps=timesteps,
                 step_callback=self._build_step_callback(context),
                 guidance=self.guidance,
+                cfg_scale=self.cfg_scale,
                 inpaint_extension=inpaint_extension,
                 controlnet_extensions=controlnet_extensions,
             )
diff --git a/invokeai/backend/flux/denoise.py b/invokeai/backend/flux/denoise.py
index 14fafb6e1d8..b524d67e7cd 100644
--- a/invokeai/backend/flux/denoise.py
+++ b/invokeai/backend/flux/denoise.py
@@ -16,13 +16,19 @@ def denoise(
     # model input
     img: torch.Tensor,
     img_ids: torch.Tensor,
+    # positive text conditioning
     txt: torch.Tensor,
     txt_ids: torch.Tensor,
     vec: torch.Tensor,
+    # negative text conditioning
+    neg_txt: torch.Tensor,
+    neg_txt_ids: torch.Tensor,
+    neg_vec: torch.Tensor,
     # sampling parameters
     timesteps: list[float],
     step_callback: Callable[[PipelineIntermediateState], None],
     guidance: float,
+    cfg_scale: float,
     inpaint_extension: InpaintExtension | None,
     controlnet_extensions: list[XLabsControlNetExtension | InstantXControlNetExtension],
 ):
@@ -78,6 +84,22 @@ def denoise(
             controlnet_single_block_residuals=merged_controlnet_residuals.single_block_residuals,
         )
 
+        # TODO(ryand): Add option to apply controlnet to negative conditioning as well.
+        # TODO(ryand): Add option to run positive and negative predictions in a single batch for better performance on
+        # systems with sufficient VRAM.
+        neg_pred = model(
+            img=img,
+            img_ids=img_ids,
+            txt=neg_txt,
+            txt_ids=neg_txt_ids,
+            y=neg_vec,
+            timesteps=t_vec,
+            guidance=guidance_vec,
+            controlnet_double_block_residuals=None,
+            controlnet_single_block_residuals=None,
+        )
+        pred = neg_pred + cfg_scale * (pred - neg_pred)
+
         preview_img = img - t_curr * pred
         img = img + (t_prev - t_curr) * pred
 

From 371742d8f91208984746e3612b8f4d52ed636dda Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Fri, 18 Oct 2024 20:14:47 +0000
Subject: [PATCH 02/30] Add support for cfg_scale list on FLUX Denoise node.

---
 invokeai/app/invocations/flux_denoise.py |  3 +-
 invokeai/backend/flux/denoise.py         | 44 +++++++++++++-----------
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/invokeai/app/invocations/flux_denoise.py b/invokeai/app/invocations/flux_denoise.py
index 7fba862a455..e87c2ff3de9 100644
--- a/invokeai/app/invocations/flux_denoise.py
+++ b/invokeai/app/invocations/flux_denoise.py
@@ -85,9 +85,8 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
     negative_text_conditioning: FluxConditioningField = InputField(
         description=FieldDescriptions.negative_cond, input=Input.Connection
     )
-    # TODO(ryand): Add support for cfg_scale to be a list of floats: one for each step.
     # TODO(ryand): Add cfg_scale range validation.
-    cfg_scale: float = InputField(default=3.0, description=FieldDescriptions.cfg_scale, title="CFG Scale")
+    cfg_scale: float | list[float] = InputField(default=1.0, description=FieldDescriptions.cfg_scale, title="CFG Scale")
     width: int = InputField(default=1024, multiple_of=16, description="Width of the generated image.")
     height: int = InputField(default=1024, multiple_of=16, description="Height of the generated image.")
     num_steps: int = InputField(
diff --git a/invokeai/backend/flux/denoise.py b/invokeai/backend/flux/denoise.py
index b524d67e7cd..bcdb15a18f9 100644
--- a/invokeai/backend/flux/denoise.py
+++ b/invokeai/backend/flux/denoise.py
@@ -1,3 +1,4 @@
+import math
 from typing import Callable
 
 import torch
@@ -28,7 +29,7 @@ def denoise(
     timesteps: list[float],
     step_callback: Callable[[PipelineIntermediateState], None],
     guidance: float,
-    cfg_scale: float,
+    cfg_scale: float | list[float],
     inpaint_extension: InpaintExtension | None,
     controlnet_extensions: list[XLabsControlNetExtension | InstantXControlNetExtension],
 ):
@@ -43,10 +44,9 @@ def denoise(
             latents=img,
         ),
     )
-    step = 1
     # guidance_vec is ignored for schnell.
     guidance_vec = torch.full((img.shape[0],), guidance, device=img.device, dtype=img.dtype)
-    for t_curr, t_prev in tqdm(list(zip(timesteps[:-1], timesteps[1:], strict=True))):
+    for step_index, (t_curr, t_prev) in tqdm(list(enumerate(zip(timesteps[:-1], timesteps[1:], strict=True)))):
         t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
 
         # Run ControlNet models.
@@ -54,7 +54,7 @@ def denoise(
         for controlnet_extension in controlnet_extensions:
             controlnet_residuals.append(
                 controlnet_extension.run_controlnet(
-                    timestep_index=step - 1,
+                    timestep_index=step_index,
                     total_num_timesteps=total_steps,
                     img=img,
                     img_ids=img_ids,
@@ -84,21 +84,24 @@ def denoise(
             controlnet_single_block_residuals=merged_controlnet_residuals.single_block_residuals,
         )
 
-        # TODO(ryand): Add option to apply controlnet to negative conditioning as well.
-        # TODO(ryand): Add option to run positive and negative predictions in a single batch for better performance on
-        # systems with sufficient VRAM.
-        neg_pred = model(
-            img=img,
-            img_ids=img_ids,
-            txt=neg_txt,
-            txt_ids=neg_txt_ids,
-            y=neg_vec,
-            timesteps=t_vec,
-            guidance=guidance_vec,
-            controlnet_double_block_residuals=None,
-            controlnet_single_block_residuals=None,
-        )
-        pred = neg_pred + cfg_scale * (pred - neg_pred)
+        step_cfg_scale = cfg_scale[step_index] if isinstance(cfg_scale, list) else cfg_scale
+
+        # If step_cfg_scale, is 1.0, then we don't need to run the negative prediction.
+        if not math.isclose(step_cfg_scale, 1.0):
+            # TODO(ryand): Add option to run positive and negative predictions in a single batch for better performance on
+            # systems with sufficient VRAM.
+            neg_pred = model(
+                img=img,
+                img_ids=img_ids,
+                txt=neg_txt,
+                txt_ids=neg_txt_ids,
+                y=neg_vec,
+                timesteps=t_vec,
+                guidance=guidance_vec,
+                controlnet_double_block_residuals=None,
+                controlnet_single_block_residuals=None,
+            )
+            pred = neg_pred + step_cfg_scale * (pred - neg_pred)
 
         preview_img = img - t_curr * pred
         img = img + (t_prev - t_curr) * pred
@@ -109,13 +112,12 @@ def denoise(
 
         step_callback(
             PipelineIntermediateState(
-                step=step,
+                step=step_index + 1,
                 order=1,
                 total_steps=total_steps,
                 timestep=int(t_curr),
                 latents=preview_img,
             ),
         )
-        step += 1
 
     return img

From 6df4ee5fc8bcb2317361e9b01de4d1d05186e6af Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Fri, 18 Oct 2024 20:31:27 +0000
Subject: [PATCH 03/30] Make negative_text_conditioning nullable on FLUX
 Denoise invocation.

---
 invokeai/app/invocations/flux_denoise.py | 25 +++++++++++++++---------
 invokeai/backend/flux/denoise.py         | 10 +++++++---
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/invokeai/app/invocations/flux_denoise.py b/invokeai/app/invocations/flux_denoise.py
index e87c2ff3de9..81e2f28a4fe 100644
--- a/invokeai/app/invocations/flux_denoise.py
+++ b/invokeai/app/invocations/flux_denoise.py
@@ -82,8 +82,10 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
     positive_text_conditioning: FluxConditioningField = InputField(
         description=FieldDescriptions.positive_cond, input=Input.Connection
     )
-    negative_text_conditioning: FluxConditioningField = InputField(
-        description=FieldDescriptions.negative_cond, input=Input.Connection
+    negative_text_conditioning: FluxConditioningField | None = InputField(
+        default=None,
+        description="Negative conditioning tensor. Can be None if cfg_scale is 1.0.",
+        input=Input.Connection,
     )
     # TODO(ryand): Add cfg_scale range validation.
     cfg_scale: float | list[float] = InputField(default=1.0, description=FieldDescriptions.cfg_scale, title="CFG Scale")
@@ -136,9 +138,12 @@ def _run_diffusion(
         pos_t5_embeddings, pos_clip_embeddings = self._load_text_conditioning(
             context, self.positive_text_conditioning.conditioning_name, inference_dtype
         )
-        neg_t5_embeddings, neg_clip_embeddings = self._load_text_conditioning(
-            context, self.negative_text_conditioning.conditioning_name, inference_dtype
-        )
+        neg_t5_embeddings: torch.Tensor | None = None
+        neg_clip_embeddings: torch.Tensor | None = None
+        if self.negative_text_conditioning is not None:
+            neg_t5_embeddings, neg_clip_embeddings = self._load_text_conditioning(
+                context, self.negative_text_conditioning.conditioning_name, inference_dtype
+            )
 
         # Load the input latents, if provided.
         init_latents = context.tensors.load(self.latents.latents_name) if self.latents else None
@@ -203,10 +208,12 @@ def _run_diffusion(
         pos_txt_ids = torch.zeros(
             pos_bs, pos_t5_seq_len, 3, dtype=inference_dtype, device=TorchDevice.choose_torch_device()
         )
-        neg_bs, neg_t5_seq_len, _ = neg_t5_embeddings.shape
-        neg_txt_ids = torch.zeros(
-            neg_bs, neg_t5_seq_len, 3, dtype=inference_dtype, device=TorchDevice.choose_torch_device()
-        )
+        neg_txt_ids: torch.Tensor | None = None
+        if neg_t5_embeddings is not None:
+            neg_bs, neg_t5_seq_len, _ = neg_t5_embeddings.shape
+            neg_txt_ids = torch.zeros(
+                neg_bs, neg_t5_seq_len, 3, dtype=inference_dtype, device=TorchDevice.choose_torch_device()
+            )
 
         # Pack all latent tensors.
         init_latents = pack(init_latents) if init_latents is not None else None
diff --git a/invokeai/backend/flux/denoise.py b/invokeai/backend/flux/denoise.py
index bcdb15a18f9..92811f76f64 100644
--- a/invokeai/backend/flux/denoise.py
+++ b/invokeai/backend/flux/denoise.py
@@ -22,9 +22,9 @@ def denoise(
     txt_ids: torch.Tensor,
     vec: torch.Tensor,
     # negative text conditioning
-    neg_txt: torch.Tensor,
-    neg_txt_ids: torch.Tensor,
-    neg_vec: torch.Tensor,
+    neg_txt: torch.Tensor | None,
+    neg_txt_ids: torch.Tensor | None,
+    neg_vec: torch.Tensor | None,
     # sampling parameters
     timesteps: list[float],
     step_callback: Callable[[PipelineIntermediateState], None],
@@ -90,6 +90,10 @@ def denoise(
         if not math.isclose(step_cfg_scale, 1.0):
             # TODO(ryand): Add option to run positive and negative predictions in a single batch for better performance on
             # systems with sufficient VRAM.
+
+            if neg_txt is None or neg_txt_ids is None or neg_vec is None:
+                raise ValueError("Negative text conditioning is required when cfg_scale is not 1.0.")
+
             neg_pred = model(
                 img=img,
                 img_ids=img_ids,

From 32c7cdd856520aee9f22195406e45289377d2d7d Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Mon, 21 Oct 2024 14:52:02 +0000
Subject: [PATCH 04/30] Add cfg_scale_start_step and cfg_scale_end_step to FLUX
 Denoise node.

---
 invokeai/app/invocations/flux_denoise.py   | 71 +++++++++++++++++++++-
 invokeai/backend/flux/denoise.py           |  4 +-
 tests/app/invocations/test_flux_denoise.py | 62 +++++++++++++++++++
 3 files changed, 133 insertions(+), 4 deletions(-)
 create mode 100644 tests/app/invocations/test_flux_denoise.py

diff --git a/invokeai/app/invocations/flux_denoise.py b/invokeai/app/invocations/flux_denoise.py
index 81e2f28a4fe..8120ac400f5 100644
--- a/invokeai/app/invocations/flux_denoise.py
+++ b/invokeai/app/invocations/flux_denoise.py
@@ -87,8 +87,19 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
         description="Negative conditioning tensor. Can be None if cfg_scale is 1.0.",
         input=Input.Connection,
     )
-    # TODO(ryand): Add cfg_scale range validation.
     cfg_scale: float | list[float] = InputField(default=1.0, description=FieldDescriptions.cfg_scale, title="CFG Scale")
+    cfg_scale_start_step: int = InputField(
+        default=0,
+        title="CFG Scale Start Step",
+        description="Index of the first step to apply cfg_scale. Negative indices count backwards from the "
+        + "the last step (e.g. a value of -1 refers to the final step).",
+    )
+    cfg_scale_end_step: int = InputField(
+        default=-1,
+        title="CFG Scale End Step",
+        description="Index of the last step to apply cfg_scale. Negative indices count backwards from the "
+        + "last step (e.g. a value of -1 refers to the final step).",
+    )
     width: int = InputField(default=1024, multiple_of=16, description="Width of the generated image.")
     height: int = InputField(default=1024, multiple_of=16, description="Height of the generated image.")
     num_steps: int = InputField(
@@ -234,6 +245,13 @@ def _run_diffusion(
                 noise=noise,
             )
 
+        cfg_scale = self.prep_cfg_scale(
+            cfg_scale=self.cfg_scale,
+            timesteps=timesteps,
+            cfg_scale_start_step=self.cfg_scale_start_step,
+            cfg_scale_end_step=self.cfg_scale_end_step,
+        )
+
         with ExitStack() as exit_stack:
             # Prepare ControlNet extensions.
             # Note: We do this before loading the transformer model to minimize peak memory (see implementation).
@@ -295,7 +313,7 @@ def _run_diffusion(
                 timesteps=timesteps,
                 step_callback=self._build_step_callback(context),
                 guidance=self.guidance,
-                cfg_scale=self.cfg_scale,
+                cfg_scale=cfg_scale,
                 inpaint_extension=inpaint_extension,
                 controlnet_extensions=controlnet_extensions,
             )
@@ -303,6 +321,55 @@ def _run_diffusion(
         x = unpack(x.float(), self.height, self.width)
         return x
 
+    @classmethod
+    def prep_cfg_scale(
+        cls, cfg_scale: float | list[float], timesteps: list[float], cfg_scale_start_step: int, cfg_scale_end_step: int
+    ) -> list[float]:
+        """Prepare the cfg_scale schedule.
+
+        - Clips the cfg_scale schedule based on cfg_scale_start_step and cfg_scale_end_step.
+        - If cfg_scale is a list, then it is assumed to be a schedule and is returned as-is.
+        - If cfg_scale is a scalar, then a linear schedule is created from cfg_scale_start_step to cfg_scale_end_step.
+        """
+        # num_steps is the number of denoising steps, which is one less than the number of timesteps.
+        num_steps = len(timesteps) - 1
+
+        # Normalize cfg_scale to a list if it is a scalar.
+        cfg_scale_list: list[float]
+        if isinstance(cfg_scale, float):
+            cfg_scale_list = [cfg_scale] * num_steps
+        elif isinstance(cfg_scale, list):
+            cfg_scale_list = cfg_scale
+        else:
+            raise ValueError(f"Unsupported cfg_scale type: {type(cfg_scale)}")
+        assert len(cfg_scale_list) == num_steps
+
+        # Handle negative indices for cfg_scale_start_step and cfg_scale_end_step.
+        start_step_index = cfg_scale_start_step
+        if start_step_index < 0:
+            start_step_index = num_steps + start_step_index
+        end_step_index = cfg_scale_end_step
+        if end_step_index < 0:
+            end_step_index = num_steps + end_step_index
+
+        # Validate the start and end step indices.
+        if not (0 <= start_step_index < num_steps):
+            raise ValueError(f"Invalid cfg_scale_start_step. Out of range: {cfg_scale_start_step}.")
+        if not (0 <= end_step_index < num_steps):
+            raise ValueError(f"Invalid cfg_scale_end_step. Out of range: {cfg_scale_end_step}.")
+        if start_step_index > end_step_index:
+            raise ValueError(
+                f"cfg_scale_start_step ({cfg_scale_start_step}) must be before cfg_scale_end_step "
+                + f"({cfg_scale_end_step})."
+            )
+
+        # Set values outside the start and end step indices to 1.0. This is equivalent to disabling cfg_scale for those
+        # steps.
+        clipped_cfg_scale = [1.0] * num_steps
+        clipped_cfg_scale[start_step_index : end_step_index + 1] = cfg_scale_list[start_step_index : end_step_index + 1]
+
+        return clipped_cfg_scale
+
     def _prep_inpaint_mask(self, context: InvocationContext, latents: torch.Tensor) -> torch.Tensor | None:
         """Prepare the inpaint mask.
 
diff --git a/invokeai/backend/flux/denoise.py b/invokeai/backend/flux/denoise.py
index 92811f76f64..7ce375f4a24 100644
--- a/invokeai/backend/flux/denoise.py
+++ b/invokeai/backend/flux/denoise.py
@@ -29,7 +29,7 @@ def denoise(
     timesteps: list[float],
     step_callback: Callable[[PipelineIntermediateState], None],
     guidance: float,
-    cfg_scale: float | list[float],
+    cfg_scale: list[float],
     inpaint_extension: InpaintExtension | None,
     controlnet_extensions: list[XLabsControlNetExtension | InstantXControlNetExtension],
 ):
@@ -84,7 +84,7 @@ def denoise(
             controlnet_single_block_residuals=merged_controlnet_residuals.single_block_residuals,
         )
 
-        step_cfg_scale = cfg_scale[step_index] if isinstance(cfg_scale, list) else cfg_scale
+        step_cfg_scale = cfg_scale[step_index]
 
         # If step_cfg_scale, is 1.0, then we don't need to run the negative prediction.
         if not math.isclose(step_cfg_scale, 1.0):
diff --git a/tests/app/invocations/test_flux_denoise.py b/tests/app/invocations/test_flux_denoise.py
new file mode 100644
index 00000000000..412ef7a490a
--- /dev/null
+++ b/tests/app/invocations/test_flux_denoise.py
@@ -0,0 +1,62 @@
+import pytest
+
+from invokeai.app.invocations.flux_denoise import FluxDenoiseInvocation
+
+TIMESTEPS = [1.0, 0.75, 0.5, 0.25, 0.0]
+
+
+@pytest.mark.parametrize(
+    ["cfg_scale", "timesteps", "cfg_scale_start_step", "cfg_scale_end_step", "expected"],
+    [
+        # Test scalar cfg_scale.
+        (2.0, TIMESTEPS, 0, -1, [2.0, 2.0, 2.0, 2.0]),
+        # Test list cfg_scale.
+        ([1.0, 2.0, 3.0, 4.0], TIMESTEPS, 0, -1, [1.0, 2.0, 3.0, 4.0]),
+        # Test positive cfg_scale_start_step.
+        (2.0, TIMESTEPS, 1, -1, [1.0, 2.0, 2.0, 2.0]),
+        # Test positive cfg_scale_end_step.
+        (2.0, TIMESTEPS, 0, 2, [2.0, 2.0, 2.0, 1.0]),
+        # Test negative cfg_scale_start_step.
+        (2.0, TIMESTEPS, -3, -1, [1.0, 2.0, 2.0, 2.0]),
+        # Test negative cfg_scale_end_step.
+        (2.0, TIMESTEPS, 0, -2, [2.0, 2.0, 2.0, 1.0]),
+        # Test single step application.
+        (2.0, TIMESTEPS, 2, 2, [1.0, 1.0, 2.0, 1.0]),
+    ],
+)
+def test_prep_cfg_scale(
+    cfg_scale: float | list[float],
+    timesteps: list[float],
+    cfg_scale_start_step: int,
+    cfg_scale_end_step: int,
+    expected: list[float],
+):
+    result = FluxDenoiseInvocation.prep_cfg_scale(cfg_scale, timesteps, cfg_scale_start_step, cfg_scale_end_step)
+    assert result == expected
+
+
+def test_prep_cfg_scale_invalid_type():
+    with pytest.raises(ValueError, match="Unsupported cfg_scale type"):
+        FluxDenoiseInvocation.prep_cfg_scale("invalid", [1.0, 0.5], 0, -1)  # type: ignore
+
+
+@pytest.mark.parametrize("cfg_scale_start_step", [4, -5])
+def test_prep_cfg_scale_invalid_start_step(cfg_scale_start_step: int):
+    with pytest.raises(ValueError, match="Invalid cfg_scale_start_step"):
+        FluxDenoiseInvocation.prep_cfg_scale(2.0, TIMESTEPS, cfg_scale_start_step, -1)
+
+
+@pytest.mark.parametrize("cfg_scale_end_step", [4, -5])
+def test_prep_cfg_scale_invalid_end_step(cfg_scale_end_step: int):
+    with pytest.raises(ValueError, match="Invalid cfg_scale_end_step"):
+        FluxDenoiseInvocation.prep_cfg_scale(2.0, TIMESTEPS, 0, cfg_scale_end_step)
+
+
+def test_prep_cfg_scale_start_after_end():
+    with pytest.raises(ValueError, match="cfg_scale_start_step .* must be before cfg_scale_end_step"):
+        FluxDenoiseInvocation.prep_cfg_scale(2.0, TIMESTEPS, 3, 2)
+
+
+def test_prep_cfg_scale_list_length_mismatch():
+    with pytest.raises(AssertionError):
+        FluxDenoiseInvocation.prep_cfg_scale([1.0, 2.0, 3.0], TIMESTEPS, 0, -1)

From 7bf5927c43342c4413ba39f3e1df220fe7e2cb28 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Fri, 11 Oct 2024 13:12:04 +0000
Subject: [PATCH 05/30] Add XLabs IP-Adapter state dict for unit tests.

---
 .../xlabs_flux_ip_adapter_state_dict.py       | 85 +++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100644 tests/backend/flux/ip_adapter/xlabs_flux_ip_adapter_state_dict.py

diff --git a/tests/backend/flux/ip_adapter/xlabs_flux_ip_adapter_state_dict.py b/tests/backend/flux/ip_adapter/xlabs_flux_ip_adapter_state_dict.py
new file mode 100644
index 00000000000..9d1453aa512
--- /dev/null
+++ b/tests/backend/flux/ip_adapter/xlabs_flux_ip_adapter_state_dict.py
@@ -0,0 +1,85 @@
+# State dict keys and shapes for an XLabs FLUX IP-Adapter model. Intended to be used for unit tests.
+# These keys were extracted from:
+# https://huggingface.co/XLabs-AI/flux-ip-adapter/blob/ad16be50d78a07ea83d8c4bde44ff9753235182e/flux-ip-adapter.safetensors
+xlabs_sd_shapes = {
+    "double_blocks.0.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.0.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.0.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.0.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.1.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.1.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.1.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.1.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.10.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.10.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.10.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.10.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.11.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.11.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.11.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.11.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.12.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.12.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.12.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.12.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.13.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.13.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.13.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.13.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.14.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.14.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.14.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.14.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.15.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.15.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.15.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.15.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.16.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.16.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.16.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.16.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.17.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.17.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.17.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.17.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.18.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.18.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.18.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.18.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.2.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.2.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.2.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.2.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.3.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.3.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.3.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.3.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.4.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.4.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.4.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.4.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.5.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.5.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.5.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.5.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.6.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.6.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.6.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.6.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.7.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.7.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.7.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.7.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.8.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.8.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.8.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.8.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "double_blocks.9.processor.ip_adapter_double_stream_k_proj.bias": [3072],
+    "double_blocks.9.processor.ip_adapter_double_stream_k_proj.weight": [3072, 4096],
+    "double_blocks.9.processor.ip_adapter_double_stream_v_proj.bias": [3072],
+    "double_blocks.9.processor.ip_adapter_double_stream_v_proj.weight": [3072, 4096],
+    "ip_adapter_proj_model.norm.bias": [4096],
+    "ip_adapter_proj_model.norm.weight": [4096],
+    "ip_adapter_proj_model.proj.bias": [16384],
+    "ip_adapter_proj_model.proj.weight": [16384, 768],
+}

From 9c9af312fe87ac8ccfa96689ebc5c0adce13f0ec Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Fri, 11 Oct 2024 14:11:11 +0000
Subject: [PATCH 06/30] Copy IPDoubleStreamBlockProcessor from
 https://github.com/XLabs-AI/x-flux/blob/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/modules/layers.py#L221.

---
 invokeai/backend/flux/ip_adapter/__init__.py  |  0
 .../ip_double_stream_block_processor.py       | 75 +++++++++++++++++++
 2 files changed, 75 insertions(+)
 create mode 100644 invokeai/backend/flux/ip_adapter/__init__.py
 create mode 100644 invokeai/backend/flux/ip_adapter/ip_double_stream_block_processor.py

diff --git a/invokeai/backend/flux/ip_adapter/__init__.py b/invokeai/backend/flux/ip_adapter/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/invokeai/backend/flux/ip_adapter/ip_double_stream_block_processor.py b/invokeai/backend/flux/ip_adapter/ip_double_stream_block_processor.py
new file mode 100644
index 00000000000..0b75b2a52fb
--- /dev/null
+++ b/invokeai/backend/flux/ip_adapter/ip_double_stream_block_processor.py
@@ -0,0 +1,75 @@
+# This file is based on:
+# https://github.com/XLabs-AI/x-flux/blob/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/modules/layers.py#L221
+
+
+class IPDoubleStreamBlockProcessor(nn.Module):
+    """Attention processor for handling IP-adapter with double stream block."""
+
+    def __init__(self, context_dim, hidden_dim):
+        super().__init__()
+        if not hasattr(F, "scaled_dot_product_attention"):
+            raise ImportError("IPDoubleStreamBlockProcessor requires PyTorch 2.0 or higher. Please upgrade PyTorch.")
+
+        # Ensure context_dim matches the dimension of image_proj
+        self.context_dim = context_dim
+        self.hidden_dim = hidden_dim
+
+        # Initialize projections for IP-adapter
+        self.ip_adapter_double_stream_k_proj = nn.Linear(context_dim, hidden_dim, bias=True)
+        self.ip_adapter_double_stream_v_proj = nn.Linear(context_dim, hidden_dim, bias=True)
+
+        nn.init.zeros_(self.ip_adapter_double_stream_k_proj.weight)
+        nn.init.zeros_(self.ip_adapter_double_stream_k_proj.bias)
+
+        nn.init.zeros_(self.ip_adapter_double_stream_v_proj.weight)
+        nn.init.zeros_(self.ip_adapter_double_stream_v_proj.bias)
+
+    def __call__(self, attn, img, txt, vec, pe, image_proj, ip_scale=1.0, **attention_kwargs):
+        # Prepare image for attention
+        img_mod1, img_mod2 = attn.img_mod(vec)
+        txt_mod1, txt_mod2 = attn.txt_mod(vec)
+
+        img_modulated = attn.img_norm1(img)
+        img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift
+        img_qkv = attn.img_attn.qkv(img_modulated)
+        img_q, img_k, img_v = rearrange(img_qkv, "B L (K H D) -> K B H L D", K=3, H=attn.num_heads, D=attn.head_dim)
+        img_q, img_k = attn.img_attn.norm(img_q, img_k, img_v)
+
+        txt_modulated = attn.txt_norm1(txt)
+        txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift
+        txt_qkv = attn.txt_attn.qkv(txt_modulated)
+        txt_q, txt_k, txt_v = rearrange(txt_qkv, "B L (K H D) -> K B H L D", K=3, H=attn.num_heads, D=attn.head_dim)
+        txt_q, txt_k = attn.txt_attn.norm(txt_q, txt_k, txt_v)
+
+        q = torch.cat((txt_q, img_q), dim=2)
+        k = torch.cat((txt_k, img_k), dim=2)
+        v = torch.cat((txt_v, img_v), dim=2)
+
+        attn1 = attention(q, k, v, pe=pe)
+        txt_attn, img_attn = attn1[:, : txt.shape[1]], attn1[:, txt.shape[1] :]
+
+        # print(f"txt_attn shape: {txt_attn.size()}")
+        # print(f"img_attn shape: {img_attn.size()}")
+
+        img = img + img_mod1.gate * attn.img_attn.proj(img_attn)
+        img = img + img_mod2.gate * attn.img_mlp((1 + img_mod2.scale) * attn.img_norm2(img) + img_mod2.shift)
+
+        txt = txt + txt_mod1.gate * attn.txt_attn.proj(txt_attn)
+        txt = txt + txt_mod2.gate * attn.txt_mlp((1 + txt_mod2.scale) * attn.txt_norm2(txt) + txt_mod2.shift)
+
+        # IP-adapter processing
+        ip_query = img_q  # latent sample query
+        ip_key = self.ip_adapter_double_stream_k_proj(image_proj)
+        ip_value = self.ip_adapter_double_stream_v_proj(image_proj)
+
+        # Reshape projections for multi-head attention
+        ip_key = rearrange(ip_key, "B L (H D) -> B H L D", H=attn.num_heads, D=attn.head_dim)
+        ip_value = rearrange(ip_value, "B L (H D) -> B H L D", H=attn.num_heads, D=attn.head_dim)
+
+        # Compute attention between IP projections and the latent query
+        ip_attention = F.scaled_dot_product_attention(ip_query, ip_key, ip_value, dropout_p=0.0, is_causal=False)
+        ip_attention = rearrange(ip_attention, "B H L D -> B L (H D)", H=attn.num_heads, D=attn.head_dim)
+
+        img = img + ip_scale * ip_attention
+
+        return img, txt

From ac7441e606a2d4bfda9b0924bd268ac56113e26f Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Fri, 11 Oct 2024 14:19:37 +0000
Subject: [PATCH 07/30] Fixup typing/imports for IPDoubleStreamBlockProcessor.

---
 .../ip_double_stream_block_processor.py       | 58 ++++++++++++-------
 1 file changed, 38 insertions(+), 20 deletions(-)

diff --git a/invokeai/backend/flux/ip_adapter/ip_double_stream_block_processor.py b/invokeai/backend/flux/ip_adapter/ip_double_stream_block_processor.py
index 0b75b2a52fb..9b1bef7f707 100644
--- a/invokeai/backend/flux/ip_adapter/ip_double_stream_block_processor.py
+++ b/invokeai/backend/flux/ip_adapter/ip_double_stream_block_processor.py
@@ -1,30 +1,42 @@
 # This file is based on:
 # https://github.com/XLabs-AI/x-flux/blob/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/modules/layers.py#L221
+import einops
+import torch
 
+from invokeai.backend.flux.math import attention
+from invokeai.backend.flux.modules.layers import DoubleStreamBlock
 
-class IPDoubleStreamBlockProcessor(nn.Module):
+
+class IPDoubleStreamBlockProcessor(torch.nn.Module):
     """Attention processor for handling IP-adapter with double stream block."""
 
-    def __init__(self, context_dim, hidden_dim):
+    def __init__(self, context_dim: int, hidden_dim: int):
         super().__init__()
-        if not hasattr(F, "scaled_dot_product_attention"):
-            raise ImportError("IPDoubleStreamBlockProcessor requires PyTorch 2.0 or higher. Please upgrade PyTorch.")
 
         # Ensure context_dim matches the dimension of image_proj
         self.context_dim = context_dim
         self.hidden_dim = hidden_dim
 
         # Initialize projections for IP-adapter
-        self.ip_adapter_double_stream_k_proj = nn.Linear(context_dim, hidden_dim, bias=True)
-        self.ip_adapter_double_stream_v_proj = nn.Linear(context_dim, hidden_dim, bias=True)
-
-        nn.init.zeros_(self.ip_adapter_double_stream_k_proj.weight)
-        nn.init.zeros_(self.ip_adapter_double_stream_k_proj.bias)
-
-        nn.init.zeros_(self.ip_adapter_double_stream_v_proj.weight)
-        nn.init.zeros_(self.ip_adapter_double_stream_v_proj.bias)
-
-    def __call__(self, attn, img, txt, vec, pe, image_proj, ip_scale=1.0, **attention_kwargs):
+        self.ip_adapter_double_stream_k_proj = torch.nn.Linear(context_dim, hidden_dim, bias=True)
+        self.ip_adapter_double_stream_v_proj = torch.nn.Linear(context_dim, hidden_dim, bias=True)
+
+        torch.nn.init.zeros_(self.ip_adapter_double_stream_k_proj.weight)
+        torch.nn.init.zeros_(self.ip_adapter_double_stream_k_proj.bias)
+
+        torch.nn.init.zeros_(self.ip_adapter_double_stream_v_proj.weight)
+        torch.nn.init.zeros_(self.ip_adapter_double_stream_v_proj.bias)
+
+    def __call__(
+        self,
+        attn: DoubleStreamBlock,
+        img: torch.Tensor,
+        txt: torch.Tensor,
+        vec: torch.Tensor,
+        pe: torch.Tensor,
+        image_proj: torch.Tensor,
+        ip_scale: float = 1.0,
+    ):
         # Prepare image for attention
         img_mod1, img_mod2 = attn.img_mod(vec)
         txt_mod1, txt_mod2 = attn.txt_mod(vec)
@@ -32,13 +44,17 @@ def __call__(self, attn, img, txt, vec, pe, image_proj, ip_scale=1.0, **attentio
         img_modulated = attn.img_norm1(img)
         img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift
         img_qkv = attn.img_attn.qkv(img_modulated)
-        img_q, img_k, img_v = rearrange(img_qkv, "B L (K H D) -> K B H L D", K=3, H=attn.num_heads, D=attn.head_dim)
+        img_q, img_k, img_v = einops.rearrange(
+            img_qkv, "B L (K H D) -> K B H L D", K=3, H=attn.num_heads, D=attn.head_dim
+        )
         img_q, img_k = attn.img_attn.norm(img_q, img_k, img_v)
 
         txt_modulated = attn.txt_norm1(txt)
         txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift
         txt_qkv = attn.txt_attn.qkv(txt_modulated)
-        txt_q, txt_k, txt_v = rearrange(txt_qkv, "B L (K H D) -> K B H L D", K=3, H=attn.num_heads, D=attn.head_dim)
+        txt_q, txt_k, txt_v = einops.rearrange(
+            txt_qkv, "B L (K H D) -> K B H L D", K=3, H=attn.num_heads, D=attn.head_dim
+        )
         txt_q, txt_k = attn.txt_attn.norm(txt_q, txt_k, txt_v)
 
         q = torch.cat((txt_q, img_q), dim=2)
@@ -63,12 +79,14 @@ def __call__(self, attn, img, txt, vec, pe, image_proj, ip_scale=1.0, **attentio
         ip_value = self.ip_adapter_double_stream_v_proj(image_proj)
 
         # Reshape projections for multi-head attention
-        ip_key = rearrange(ip_key, "B L (H D) -> B H L D", H=attn.num_heads, D=attn.head_dim)
-        ip_value = rearrange(ip_value, "B L (H D) -> B H L D", H=attn.num_heads, D=attn.head_dim)
+        ip_key = einops.rearrange(ip_key, "B L (H D) -> B H L D", H=attn.num_heads, D=attn.head_dim)
+        ip_value = einops.rearrange(ip_value, "B L (H D) -> B H L D", H=attn.num_heads, D=attn.head_dim)
 
         # Compute attention between IP projections and the latent query
-        ip_attention = F.scaled_dot_product_attention(ip_query, ip_key, ip_value, dropout_p=0.0, is_causal=False)
-        ip_attention = rearrange(ip_attention, "B H L D -> B L (H D)", H=attn.num_heads, D=attn.head_dim)
+        ip_attention = torch.nn.functional.scaled_dot_product_attention(
+            ip_query, ip_key, ip_value, dropout_p=0.0, is_causal=False
+        )
+        ip_attention = einops.rearrange(ip_attention, "B H L D -> B L (H D)", H=attn.num_heads, D=attn.head_dim)
 
         img = img + ip_scale * ip_attention
 

From 95c30f6a8be18777d4f0d48ecfe1928cfe2bc552 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Fri, 11 Oct 2024 17:15:10 +0000
Subject: [PATCH 08/30] Add initial logic for inferring FLUX IP-Adapter params
 from a state_dict.

---
 .../flux/ip_adapter/xlabs_ip_adapter_flux.py  | 60 +++++++++++++++++++
 .../ip_adapter/test_xlabs_ip_adapter_flux.py  | 17 ++++++
 2 files changed, 77 insertions(+)
 create mode 100644 invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py
 create mode 100644 tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py

diff --git a/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py b/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py
new file mode 100644
index 00000000000..63fd1212215
--- /dev/null
+++ b/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py
@@ -0,0 +1,60 @@
+from dataclasses import dataclass
+
+import torch
+
+from invokeai.backend.ip_adapter.ip_adapter import ImageProjModel
+
+
+class IPDoubleStreamBlock(torch.nn.Module):
+    def __init__(self, context_dim: int, hidden_dim: int):
+        super().__init__()
+
+        self.context_dim = context_dim
+        self.hidden_dim = hidden_dim
+
+        self.ip_adapter_double_stream_k_proj = torch.nn.Linear(context_dim, hidden_dim, bias=True)
+        self.ip_adapter_double_stream_v_proj = torch.nn.Linear(context_dim, hidden_dim, bias=True)
+
+
+class XlabsIpAdapterFlux:
+    def __init__(self, image_proj: ImageProjModel, double_blocks: list[IPDoubleStreamBlock]):
+        self.image_proj = image_proj
+        self.double_blocks = double_blocks
+
+    @classmethod
+    def from_state_dict(cls, state_dict: dict[str, torch.Tensor]) -> "XlabsIpAdapterFlux":
+        # TODO
+
+        return cls()
+
+
+@dataclass
+class XlabsIpAdapterParams:
+    num_double_blocks: int
+    context_dim: int
+    hidden_dim: int
+
+    clip_embeddings_dim: int
+
+
+def infer_xlabs_ip_adapter_params_from_state_dict(state_dict: dict[str, torch.Tensor]) -> XlabsIpAdapterParams:
+    num_double_blocks = 0
+    context_dim = 0
+    hidden_dim = 0
+
+    # Count the number of double blocks.
+    double_block_index = 0
+    while f"double_blocks.{double_block_index}.processor.ip_adapter_double_stream_k_proj.weight" in state_dict:
+        double_block_index += 1
+    num_double_blocks = double_block_index
+
+    hidden_dim = state_dict["double_blocks.0.processor.ip_adapter_double_stream_k_proj.weight"].shape[0]
+    context_dim = state_dict["double_blocks.0.processor.ip_adapter_double_stream_k_proj.weight"].shape[1]
+    clip_embeddings_dim = state_dict["ip_adapter_proj_model.proj.weight"].shape[1]
+
+    return XlabsIpAdapterParams(
+        num_double_blocks=num_double_blocks,
+        context_dim=context_dim,
+        hidden_dim=hidden_dim,
+        clip_embeddings_dim=clip_embeddings_dim,
+    )
diff --git a/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py b/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py
new file mode 100644
index 00000000000..a4ca8180d03
--- /dev/null
+++ b/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py
@@ -0,0 +1,17 @@
+import torch
+
+from invokeai.backend.flux.ip_adapter.xlabs_ip_adapter_flux import infer_xlabs_ip_adapter_params_from_state_dict
+from tests.backend.flux.ip_adapter.xlabs_flux_ip_adapter_state_dict import xlabs_sd_shapes
+
+
+def test_infer_xlabs_ip_adapter_params_from_state_dict():
+    # Construct a dummy state_dict with tensors of the correct shape on the meta device.
+    with torch.device("meta"):
+        sd = {k: torch.zeros(v) for k, v in xlabs_sd_shapes.items()}
+
+    params = infer_xlabs_ip_adapter_params_from_state_dict(sd)
+
+    assert params.num_double_blocks == 19
+    assert params.context_dim == 4096
+    assert params.hidden_dim == 3072
+    assert params.clip_embeddings_dim == 768

From 24a0ca86f5878595886e5b13eabd32c8b738566a Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Tue, 15 Oct 2024 13:52:07 +0000
Subject: [PATCH 09/30] Add logic for loading an Xlabs IP-Adapter from a state
 dict.

---
 .../flux/ip_adapter/xlabs_ip_adapter_flux.py  | 71 +++++++++++++++----
 .../ip_adapter/test_xlabs_ip_adapter_flux.py  | 21 +++++-
 2 files changed, 79 insertions(+), 13 deletions(-)

diff --git a/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py b/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py
index 63fd1212215..182c8249c0f 100644
--- a/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py
+++ b/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py
@@ -1,5 +1,6 @@
 from dataclasses import dataclass
 
+import accelerate
 import torch
 
 from invokeai.backend.ip_adapter.ip_adapter import ImageProjModel
@@ -16,18 +17,6 @@ def __init__(self, context_dim: int, hidden_dim: int):
         self.ip_adapter_double_stream_v_proj = torch.nn.Linear(context_dim, hidden_dim, bias=True)
 
 
-class XlabsIpAdapterFlux:
-    def __init__(self, image_proj: ImageProjModel, double_blocks: list[IPDoubleStreamBlock]):
-        self.image_proj = image_proj
-        self.double_blocks = double_blocks
-
-    @classmethod
-    def from_state_dict(cls, state_dict: dict[str, torch.Tensor]) -> "XlabsIpAdapterFlux":
-        # TODO
-
-        return cls()
-
-
 @dataclass
 class XlabsIpAdapterParams:
     num_double_blocks: int
@@ -37,6 +26,54 @@ class XlabsIpAdapterParams:
     clip_embeddings_dim: int
 
 
+class XlabsIpAdapterFlux(torch.nn.Module):
+    def __init__(self, params: XlabsIpAdapterParams):
+        super().__init__()
+        self.image_proj = ImageProjModel(
+            cross_attention_dim=params.context_dim, clip_embeddings_dim=params.clip_embeddings_dim
+        )
+        self.double_blocks = torch.nn.ModuleList(
+            [IPDoubleStreamBlock(params.context_dim, params.hidden_dim) for _ in range(params.num_double_blocks)]
+        )
+
+    def load_xlabs_state_dict(self, state_dict: dict[str, torch.Tensor], assign: bool = False):
+        """We need this custom function to load state dicts rather than using .load_state_dict(...) because the model
+        structure does not match the state_dict structure.
+        """
+        # Split the state_dict into the image projection model and the double blocks.
+        image_proj_sd: dict[str, torch.Tensor] = {}
+        double_blocks_sd: dict[str, torch.Tensor] = {}
+        for k, v in state_dict.items():
+            if k.startswith("ip_adapter_proj_model."):
+                image_proj_sd[k] = v
+            elif k.startswith("double_blocks."):
+                double_blocks_sd[k] = v
+            else:
+                raise ValueError(f"Unexpected key: {k}")
+
+        # Initialize the image projection model.
+        image_proj_sd = {k.replace("ip_adapter_proj_model.", ""): v for k, v in image_proj_sd.items()}
+        self.image_proj.load_state_dict(image_proj_sd, assign=assign)
+
+        # Initialize the double blocks.
+        for i, double_block in enumerate(self.double_blocks):
+            double_block_sd: dict[str, torch.Tensor] = {
+                "ip_adapter_double_stream_k_proj.bias": double_blocks_sd[
+                    f"double_blocks.{i}.processor.ip_adapter_double_stream_k_proj.bias"
+                ],
+                "ip_adapter_double_stream_k_proj.weight": double_blocks_sd[
+                    f"double_blocks.{i}.processor.ip_adapter_double_stream_k_proj.weight"
+                ],
+                "ip_adapter_double_stream_v_proj.bias": double_blocks_sd[
+                    f"double_blocks.{i}.processor.ip_adapter_double_stream_v_proj.bias"
+                ],
+                "ip_adapter_double_stream_v_proj.weight": double_blocks_sd[
+                    f"double_blocks.{i}.processor.ip_adapter_double_stream_v_proj.weight"
+                ],
+            }
+            double_block.load_state_dict(double_block_sd, assign=assign)
+
+
 def infer_xlabs_ip_adapter_params_from_state_dict(state_dict: dict[str, torch.Tensor]) -> XlabsIpAdapterParams:
     num_double_blocks = 0
     context_dim = 0
@@ -58,3 +95,13 @@ def infer_xlabs_ip_adapter_params_from_state_dict(state_dict: dict[str, torch.Te
         hidden_dim=hidden_dim,
         clip_embeddings_dim=clip_embeddings_dim,
     )
+
+
+def load_xlabs_ip_adapter_flux(state_dict: dict[str, torch.Tensor]) -> XlabsIpAdapterFlux:
+    params = infer_xlabs_ip_adapter_params_from_state_dict(state_dict)
+
+    with accelerate.init_empty_weights():
+        model = XlabsIpAdapterFlux(params=params)
+
+    model.load_xlabs_state_dict(state_dict)
+    return model
diff --git a/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py b/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py
index a4ca8180d03..c893fec2b81 100644
--- a/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py
+++ b/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py
@@ -1,6 +1,10 @@
+import accelerate
 import torch
 
-from invokeai.backend.flux.ip_adapter.xlabs_ip_adapter_flux import infer_xlabs_ip_adapter_params_from_state_dict
+from invokeai.backend.flux.ip_adapter.xlabs_ip_adapter_flux import (
+    XlabsIpAdapterFlux,
+    infer_xlabs_ip_adapter_params_from_state_dict,
+)
 from tests.backend.flux.ip_adapter.xlabs_flux_ip_adapter_state_dict import xlabs_sd_shapes
 
 
@@ -15,3 +19,18 @@ def test_infer_xlabs_ip_adapter_params_from_state_dict():
     assert params.context_dim == 4096
     assert params.hidden_dim == 3072
     assert params.clip_embeddings_dim == 768
+
+
+def test_initialize_xlabs_ip_adapter_flux_from_state_dict():
+    # Construct a dummy state_dict with tensors of the correct shape on the meta device.
+    with torch.device("meta"):
+        sd = {k: torch.zeros(v) for k, v in xlabs_sd_shapes.items()}
+
+    # Initialize the XLabs IP-Adapter from the state_dict.
+    params = infer_xlabs_ip_adapter_params_from_state_dict(sd)
+
+    with accelerate.init_empty_weights():
+        model = XlabsIpAdapterFlux(params=params)
+
+    # Smoke test state_dict loading.
+    model.load_xlabs_state_dict(sd)

From f939dbdc339b3ecab5a1e69282f58c378b976a66 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Tue, 15 Oct 2024 14:49:00 +0000
Subject: [PATCH 10/30] Add is_state_dict_xlabs_ip_adapter() utility function.

---
 .../flux/ip_adapter/state_dict_utils.py       | 23 +++++++++++++++++++
 .../ip_adapter/test_xlabs_ip_adapter_flux.py  |  8 +++++++
 2 files changed, 31 insertions(+)
 create mode 100644 invokeai/backend/flux/ip_adapter/state_dict_utils.py

diff --git a/invokeai/backend/flux/ip_adapter/state_dict_utils.py b/invokeai/backend/flux/ip_adapter/state_dict_utils.py
new file mode 100644
index 00000000000..96d724f242f
--- /dev/null
+++ b/invokeai/backend/flux/ip_adapter/state_dict_utils.py
@@ -0,0 +1,23 @@
+from typing import Any, Dict
+
+
+def is_state_dict_xlabs_ip_adapter(sd: Dict[str, Any]) -> bool:
+    """Is the state dict for an XLabs FLUX IP-Adapter model?
+
+    This is intended to be a reasonably high-precision detector, but it is not guaranteed to have perfect precision.
+    """
+    # If all of the expected keys are present, then this is very likely an XLabs IP-Adapter model.
+    expected_keys = {
+        "double_blocks.0.processor.ip_adapter_double_stream_k_proj.bias",
+        "double_blocks.0.processor.ip_adapter_double_stream_k_proj.weight",
+        "double_blocks.0.processor.ip_adapter_double_stream_v_proj.bias",
+        "double_blocks.0.processor.ip_adapter_double_stream_v_proj.weight",
+        "ip_adapter_proj_model.norm.bias",
+        "ip_adapter_proj_model.norm.weight",
+        "ip_adapter_proj_model.proj.bias",
+        "ip_adapter_proj_model.proj.weight",
+    }
+
+    if expected_keys.issubset(sd.keys()):
+        return True
+    return False
diff --git a/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py b/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py
index c893fec2b81..6ffb36aeeb8 100644
--- a/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py
+++ b/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py
@@ -1,6 +1,7 @@
 import accelerate
 import torch
 
+from invokeai.backend.flux.ip_adapter.state_dict_utils import is_state_dict_xlabs_ip_adapter
 from invokeai.backend.flux.ip_adapter.xlabs_ip_adapter_flux import (
     XlabsIpAdapterFlux,
     infer_xlabs_ip_adapter_params_from_state_dict,
@@ -8,6 +9,13 @@
 from tests.backend.flux.ip_adapter.xlabs_flux_ip_adapter_state_dict import xlabs_sd_shapes
 
 
+def test_is_state_dict_xlabs_ip_adapter():
+    # Construct a dummy state_dict.
+    sd = {k: None for k in xlabs_sd_shapes}
+
+    assert is_state_dict_xlabs_ip_adapter(sd)
+
+
 def test_infer_xlabs_ip_adapter_params_from_state_dict():
     # Construct a dummy state_dict with tensors of the correct shape on the meta device.
     with torch.device("meta"):

From 412e79d8e6b16340363aac4567897f505a2dd387 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Tue, 15 Oct 2024 14:58:04 +0000
Subject: [PATCH 11/30] Add model probing for XLabs FLUX IP-Adapter.

---
 invokeai/backend/model_manager/probe.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py
index fe8b5669991..c7f3062aa32 100644
--- a/invokeai/backend/model_manager/probe.py
+++ b/invokeai/backend/model_manager/probe.py
@@ -14,6 +14,7 @@
     is_state_dict_instantx_controlnet,
     is_state_dict_xlabs_controlnet,
 )
+from invokeai.backend.flux.ip_adapter.state_dict_utils import is_state_dict_xlabs_ip_adapter
 from invokeai.backend.lora.conversions.flux_diffusers_lora_conversion_utils import (
     is_state_dict_likely_in_flux_diffusers_format,
 )
@@ -243,8 +244,6 @@ def get_model_type_from_checkpoint(cls, model_path: Path, checkpoint: Optional[C
                     "cond_stage_model.",
                     "first_stage_model.",
                     "model.diffusion_model.",
-                    # FLUX models in the official BFL format contain keys with the "double_blocks." prefix.
-                    "double_blocks.",
                     # Some FLUX checkpoint files contain transformer keys prefixed with "model.diffusion_model".
                     # This prefix is typically used to distinguish between multiple models bundled in a single file.
                     "model.diffusion_model.double_blocks.",
@@ -252,6 +251,10 @@ def get_model_type_from_checkpoint(cls, model_path: Path, checkpoint: Optional[C
             ):
                 # Keys starting with double_blocks are associated with Flux models
                 return ModelType.Main
+            # FLUX models in the official BFL format contain keys with the "double_blocks." prefix, but we must be
+            # careful to avoid false positives on XLabs FLUX IP-Adapter models.
+            elif key.startswith("double_blocks.") and "ip_adapter" not in key:
+                return ModelType.Main
             elif key.startswith(("encoder.conv_in", "decoder.conv_in")):
                 return ModelType.VAE
             elif key.startswith(("lora_te_", "lora_unet_")):
@@ -274,7 +277,14 @@ def get_model_type_from_checkpoint(cls, model_path: Path, checkpoint: Optional[C
                 )
             ):
                 return ModelType.ControlNet
-            elif key.startswith(("image_proj.", "ip_adapter.")):
+            elif key.startswith(
+                (
+                    "image_proj.",
+                    "ip_adapter.",
+                    # XLabs FLUX IP-Adapter models have keys startinh with "ip_adapter_proj_model.".
+                    "ip_adapter_proj_model.",
+                )
+            ):
                 return ModelType.IPAdapter
             elif key in {"emb_params", "string_to_param"}:
                 return ModelType.TextualInversion
@@ -672,6 +682,10 @@ class IPAdapterCheckpointProbe(CheckpointProbeBase):
 
     def get_base_type(self) -> BaseModelType:
         checkpoint = self.checkpoint
+
+        if is_state_dict_xlabs_ip_adapter(checkpoint):
+            return BaseModelType.Flux
+
         for key in checkpoint.keys():
             if not key.startswith(("image_proj.", "ip_adapter.")):
                 continue

From d6643d726376f4ff1ef0cd491e7e864c272bbe3d Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Tue, 15 Oct 2024 14:58:37 +0000
Subject: [PATCH 12/30] Add model loading code for xlabs FLUX IP-Adapter (not
 tested).

---
 .../flux/ip_adapter/xlabs_ip_adapter_flux.py  | 11 --------
 .../model_manager/load/model_loaders/flux.py  | 28 +++++++++++++++++++
 2 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py b/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py
index 182c8249c0f..3de2ed2a157 100644
--- a/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py
+++ b/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py
@@ -1,6 +1,5 @@
 from dataclasses import dataclass
 
-import accelerate
 import torch
 
 from invokeai.backend.ip_adapter.ip_adapter import ImageProjModel
@@ -95,13 +94,3 @@ def infer_xlabs_ip_adapter_params_from_state_dict(state_dict: dict[str, torch.Te
         hidden_dim=hidden_dim,
         clip_embeddings_dim=clip_embeddings_dim,
     )
-
-
-def load_xlabs_ip_adapter_flux(state_dict: dict[str, torch.Tensor]) -> XlabsIpAdapterFlux:
-    params = infer_xlabs_ip_adapter_params_from_state_dict(state_dict)
-
-    with accelerate.init_empty_weights():
-        model = XlabsIpAdapterFlux(params=params)
-
-    model.load_xlabs_state_dict(state_dict)
-    return model
diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py
index b82a17c69a1..8d9c3f6f432 100644
--- a/invokeai/backend/model_manager/load/model_loaders/flux.py
+++ b/invokeai/backend/model_manager/load/model_loaders/flux.py
@@ -19,6 +19,10 @@
     is_state_dict_xlabs_controlnet,
 )
 from invokeai.backend.flux.controlnet.xlabs_controlnet_flux import XLabsControlNetFlux
+from invokeai.backend.flux.ip_adapter.xlabs_ip_adapter_flux import (
+    XlabsIpAdapterFlux,
+    infer_xlabs_ip_adapter_params_from_state_dict,
+)
 from invokeai.backend.flux.model import Flux
 from invokeai.backend.flux.modules.autoencoder import AutoEncoder
 from invokeai.backend.flux.util import ae_params, params
@@ -35,6 +39,7 @@
     CLIPEmbedDiffusersConfig,
     ControlNetCheckpointConfig,
     ControlNetDiffusersConfig,
+    IPAdapterCheckpointConfig,
     MainBnbQuantized4bCheckpointConfig,
     MainCheckpointConfig,
     MainGGUFCheckpointConfig,
@@ -352,3 +357,26 @@ def _load_instantx_controlnet(self, sd: dict[str, torch.Tensor]) -> AnyModel:
 
         model.load_state_dict(sd, assign=True)
         return model
+
+
+@ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.IPAdapter, format=ModelFormat.Checkpoint)
+class FluxIpAdapterModel(ModelLoader):
+    """Class to load FLUX IP-Adapter models."""
+
+    def _load_model(
+        self,
+        config: AnyModelConfig,
+        submodel_type: Optional[SubModelType] = None,
+    ) -> AnyModel:
+        if not isinstance(config, IPAdapterCheckpointConfig):
+            raise ValueError(f"Unexpected model config type: {type(config)}.")
+
+        sd = load_file(Path(config.path))
+
+        params = infer_xlabs_ip_adapter_params_from_state_dict(sd)
+
+        with accelerate.init_empty_weights():
+            model = XlabsIpAdapterFlux(params=params)
+
+        model.load_xlabs_state_dict(sd, assign=True)
+        return model

From c2a8fbd8d65ccdcadf305b25804591739c400f1a Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Tue, 15 Oct 2024 15:02:03 +0000
Subject: [PATCH 13/30] (minor) Move
 infer_xlabs_ip_adapter_params_from_state_dict(...) to state_dict_utils.py.

---
 .../flux/ip_adapter/state_dict_utils.py       | 27 +++++++++++++++++++
 .../flux/ip_adapter/xlabs_ip_adapter_flux.py  | 23 ----------------
 .../model_manager/load/model_loaders/flux.py  |  2 +-
 .../ip_adapter/test_xlabs_ip_adapter_flux.py  |  6 +++--
 4 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/invokeai/backend/flux/ip_adapter/state_dict_utils.py b/invokeai/backend/flux/ip_adapter/state_dict_utils.py
index 96d724f242f..dff4978480f 100644
--- a/invokeai/backend/flux/ip_adapter/state_dict_utils.py
+++ b/invokeai/backend/flux/ip_adapter/state_dict_utils.py
@@ -1,5 +1,9 @@
 from typing import Any, Dict
 
+import torch
+
+from invokeai.backend.flux.ip_adapter.xlabs_ip_adapter_flux import XlabsIpAdapterParams
+
 
 def is_state_dict_xlabs_ip_adapter(sd: Dict[str, Any]) -> bool:
     """Is the state dict for an XLabs FLUX IP-Adapter model?
@@ -21,3 +25,26 @@ def is_state_dict_xlabs_ip_adapter(sd: Dict[str, Any]) -> bool:
     if expected_keys.issubset(sd.keys()):
         return True
     return False
+
+
+def infer_xlabs_ip_adapter_params_from_state_dict(state_dict: dict[str, torch.Tensor]) -> XlabsIpAdapterParams:
+    num_double_blocks = 0
+    context_dim = 0
+    hidden_dim = 0
+
+    # Count the number of double blocks.
+    double_block_index = 0
+    while f"double_blocks.{double_block_index}.processor.ip_adapter_double_stream_k_proj.weight" in state_dict:
+        double_block_index += 1
+    num_double_blocks = double_block_index
+
+    hidden_dim = state_dict["double_blocks.0.processor.ip_adapter_double_stream_k_proj.weight"].shape[0]
+    context_dim = state_dict["double_blocks.0.processor.ip_adapter_double_stream_k_proj.weight"].shape[1]
+    clip_embeddings_dim = state_dict["ip_adapter_proj_model.proj.weight"].shape[1]
+
+    return XlabsIpAdapterParams(
+        num_double_blocks=num_double_blocks,
+        context_dim=context_dim,
+        hidden_dim=hidden_dim,
+        clip_embeddings_dim=clip_embeddings_dim,
+    )
diff --git a/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py b/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py
index 3de2ed2a157..152c391059e 100644
--- a/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py
+++ b/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py
@@ -71,26 +71,3 @@ def load_xlabs_state_dict(self, state_dict: dict[str, torch.Tensor], assign: boo
                 ],
             }
             double_block.load_state_dict(double_block_sd, assign=assign)
-
-
-def infer_xlabs_ip_adapter_params_from_state_dict(state_dict: dict[str, torch.Tensor]) -> XlabsIpAdapterParams:
-    num_double_blocks = 0
-    context_dim = 0
-    hidden_dim = 0
-
-    # Count the number of double blocks.
-    double_block_index = 0
-    while f"double_blocks.{double_block_index}.processor.ip_adapter_double_stream_k_proj.weight" in state_dict:
-        double_block_index += 1
-    num_double_blocks = double_block_index
-
-    hidden_dim = state_dict["double_blocks.0.processor.ip_adapter_double_stream_k_proj.weight"].shape[0]
-    context_dim = state_dict["double_blocks.0.processor.ip_adapter_double_stream_k_proj.weight"].shape[1]
-    clip_embeddings_dim = state_dict["ip_adapter_proj_model.proj.weight"].shape[1]
-
-    return XlabsIpAdapterParams(
-        num_double_blocks=num_double_blocks,
-        context_dim=context_dim,
-        hidden_dim=hidden_dim,
-        clip_embeddings_dim=clip_embeddings_dim,
-    )
diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py
index 8d9c3f6f432..af1101f62da 100644
--- a/invokeai/backend/model_manager/load/model_loaders/flux.py
+++ b/invokeai/backend/model_manager/load/model_loaders/flux.py
@@ -19,9 +19,9 @@
     is_state_dict_xlabs_controlnet,
 )
 from invokeai.backend.flux.controlnet.xlabs_controlnet_flux import XLabsControlNetFlux
+from invokeai.backend.flux.ip_adapter.state_dict_utils import infer_xlabs_ip_adapter_params_from_state_dict
 from invokeai.backend.flux.ip_adapter.xlabs_ip_adapter_flux import (
     XlabsIpAdapterFlux,
-    infer_xlabs_ip_adapter_params_from_state_dict,
 )
 from invokeai.backend.flux.model import Flux
 from invokeai.backend.flux.modules.autoencoder import AutoEncoder
diff --git a/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py b/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py
index 6ffb36aeeb8..1c88304ea13 100644
--- a/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py
+++ b/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py
@@ -1,10 +1,12 @@
 import accelerate
 import torch
 
-from invokeai.backend.flux.ip_adapter.state_dict_utils import is_state_dict_xlabs_ip_adapter
+from invokeai.backend.flux.ip_adapter.state_dict_utils import (
+    infer_xlabs_ip_adapter_params_from_state_dict,
+    is_state_dict_xlabs_ip_adapter,
+)
 from invokeai.backend.flux.ip_adapter.xlabs_ip_adapter_flux import (
     XlabsIpAdapterFlux,
-    infer_xlabs_ip_adapter_params_from_state_dict,
 )
 from tests.backend.flux.ip_adapter.xlabs_flux_ip_adapter_state_dict import xlabs_sd_shapes
 

From 3fa10128794d23bbb7ecd9b7e74c610fbad548a6 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Tue, 15 Oct 2024 15:31:26 +0000
Subject: [PATCH 14/30] Add IPAdapterDoubleBlocks wrapper to tidy FLUX
 ip-adapter handling.

---
 .../flux/ip_adapter/xlabs_ip_adapter_flux.py  | 30 ++++++++-----------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py b/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py
index 152c391059e..cfe72eb54b9 100644
--- a/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py
+++ b/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py
@@ -16,6 +16,14 @@ def __init__(self, context_dim: int, hidden_dim: int):
         self.ip_adapter_double_stream_v_proj = torch.nn.Linear(context_dim, hidden_dim, bias=True)
 
 
+class IPAdapterDoubleBlocks(torch.nn.Module):
+    def __init__(self, num_double_blocks: int, context_dim: int, hidden_dim: int):
+        super().__init__()
+        self.double_blocks = torch.nn.ModuleList(
+            [IPDoubleStreamBlock(context_dim, hidden_dim) for _ in range(num_double_blocks)]
+        )
+
+
 @dataclass
 class XlabsIpAdapterParams:
     num_double_blocks: int
@@ -31,8 +39,8 @@ def __init__(self, params: XlabsIpAdapterParams):
         self.image_proj = ImageProjModel(
             cross_attention_dim=params.context_dim, clip_embeddings_dim=params.clip_embeddings_dim
         )
-        self.double_blocks = torch.nn.ModuleList(
-            [IPDoubleStreamBlock(params.context_dim, params.hidden_dim) for _ in range(params.num_double_blocks)]
+        self.ip_adapter_double_blocks = IPAdapterDoubleBlocks(
+            num_double_blocks=params.num_double_blocks, context_dim=params.context_dim, hidden_dim=params.hidden_dim
         )
 
     def load_xlabs_state_dict(self, state_dict: dict[str, torch.Tensor], assign: bool = False):
@@ -55,19 +63,5 @@ def load_xlabs_state_dict(self, state_dict: dict[str, torch.Tensor], assign: boo
         self.image_proj.load_state_dict(image_proj_sd, assign=assign)
 
         # Initialize the double blocks.
-        for i, double_block in enumerate(self.double_blocks):
-            double_block_sd: dict[str, torch.Tensor] = {
-                "ip_adapter_double_stream_k_proj.bias": double_blocks_sd[
-                    f"double_blocks.{i}.processor.ip_adapter_double_stream_k_proj.bias"
-                ],
-                "ip_adapter_double_stream_k_proj.weight": double_blocks_sd[
-                    f"double_blocks.{i}.processor.ip_adapter_double_stream_k_proj.weight"
-                ],
-                "ip_adapter_double_stream_v_proj.bias": double_blocks_sd[
-                    f"double_blocks.{i}.processor.ip_adapter_double_stream_v_proj.bias"
-                ],
-                "ip_adapter_double_stream_v_proj.weight": double_blocks_sd[
-                    f"double_blocks.{i}.processor.ip_adapter_double_stream_v_proj.weight"
-                ],
-            }
-            double_block.load_state_dict(double_block_sd, assign=assign)
+        double_blocks_sd = {k.replace("processor.", ""): v for k, v in double_blocks_sd.items()}
+        self.ip_adapter_double_blocks.load_state_dict(double_blocks_sd, assign=assign)

From 31ffd734233c6a917b33dbdc95adb736a854bbcb Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Tue, 15 Oct 2024 22:28:59 +0000
Subject: [PATCH 15/30] Initial draft of integrating FLUX IP-Adapter inference
 support.

---
 invokeai/app/invocations/flux_denoise.py      | 92 +++++++++++++++++++
 .../backend/flux/custom_block_processor.py    | 83 +++++++++++++++++
 invokeai/backend/flux/denoise.py              |  5 +
 .../extensions/xlabs_ip_adapter_extension.py  | 89 ++++++++++++++++++
 invokeai/backend/flux/model.py                | 19 +++-
 5 files changed, 287 insertions(+), 1 deletion(-)
 create mode 100644 invokeai/backend/flux/custom_block_processor.py
 create mode 100644 invokeai/backend/flux/extensions/xlabs_ip_adapter_extension.py

diff --git a/invokeai/app/invocations/flux_denoise.py b/invokeai/app/invocations/flux_denoise.py
index 8120ac400f5..1b7dea7b607 100644
--- a/invokeai/app/invocations/flux_denoise.py
+++ b/invokeai/app/invocations/flux_denoise.py
@@ -4,12 +4,14 @@
 import torch
 import torchvision.transforms as tv_transforms
 from torchvision.transforms.functional import resize as tv_resize
+from transformers import CLIPVisionModelWithProjection
 
 from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
 from invokeai.app.invocations.fields import (
     DenoiseMaskField,
     FieldDescriptions,
     FluxConditioningField,
+    ImageField,
     Input,
     InputField,
     LatentsField,
@@ -17,6 +19,7 @@
     WithMetadata,
 )
 from invokeai.app.invocations.flux_controlnet import FluxControlNetField
+from invokeai.app.invocations.ip_adapter import IPAdapterField
 from invokeai.app.invocations.model import TransformerField, VAEField
 from invokeai.app.invocations.primitives import LatentsOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
@@ -26,6 +29,8 @@
 from invokeai.backend.flux.extensions.inpaint_extension import InpaintExtension
 from invokeai.backend.flux.extensions.instantx_controlnet_extension import InstantXControlNetExtension
 from invokeai.backend.flux.extensions.xlabs_controlnet_extension import XLabsControlNetExtension
+from invokeai.backend.flux.extensions.xlabs_ip_adapter_extension import XLabsIPAdapterExtension
+from invokeai.backend.flux.ip_adapter.xlabs_ip_adapter_flux import XlabsIpAdapterFlux
 from invokeai.backend.flux.model import Flux
 from invokeai.backend.flux.sampling_utils import (
     clip_timestep_schedule_fractional,
@@ -118,6 +123,10 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
         input=Input.Connection,
     )
 
+    ip_adapter: IPAdapterField | list[IPAdapterField] | None = InputField(
+        description=FieldDescriptions.ip_adapter, title="IP-Adapter", default=None, input=Input.Connection
+    )
+
     @torch.no_grad()
     def invoke(self, context: InvocationContext) -> LatentsOutput:
         latents = self._run_diffusion(context)
@@ -245,6 +254,12 @@ def _run_diffusion(
                 noise=noise,
             )
 
+        # Compute the IP-Adapter image prompt clip embeddings.
+        # We do this before loading other models to minimize peak memory.
+        # TODO(ryand): We should really do this in a separate invocation to benefit from caching.
+        ip_adapter_fields = self._normalize_ip_adapter_fields()
+        image_prompt_clip_embeds = self._prep_ip_adapter_image_prompt_clip_embeds(ip_adapter_fields, context)
+
         cfg_scale = self.prep_cfg_scale(
             cfg_scale=self.cfg_scale,
             timesteps=timesteps,
@@ -300,6 +315,15 @@ def _run_diffusion(
             else:
                 raise ValueError(f"Unsupported model format: {config.format}")
 
+            # Prepare IP-Adapter extensions.
+            ip_adapter_extensions = self._prep_ip_adapter_extensions(
+                image_prompt_clip_embeds=image_prompt_clip_embeds,
+                ip_adapter_fields=ip_adapter_fields,
+                context=context,
+                exit_stack=exit_stack,
+                dtype=inference_dtype,
+            )
+
             x = denoise(
                 model=transformer,
                 img=x,
@@ -316,6 +340,7 @@ def _run_diffusion(
                 cfg_scale=cfg_scale,
                 inpaint_extension=inpaint_extension,
                 controlnet_extensions=controlnet_extensions,
+                ip_adapter_extensions=ip_adapter_extensions,
             )
 
         x = unpack(x.float(), self.height, self.width)
@@ -509,6 +534,73 @@ def _prep_controlnet_extensions(
 
         return controlnet_extensions
 
+    def _normalize_ip_adapter_fields(self) -> list[IPAdapterField]:
+        if self.ip_adapter is None:
+            return []
+        elif isinstance(self.ip_adapter, IPAdapterField):
+            return [self.ip_adapter]
+        elif isinstance(self.ip_adapter, list):
+            return self.ip_adapter
+        else:
+            raise ValueError(f"Unsupported IP-Adapter type: {type(self.ip_adapter)}")
+
+    def _prep_ip_adapter_image_prompt_clip_embeds(
+        self,
+        ip_adapter_fields: list[IPAdapterField],
+        context: InvocationContext,
+    ) -> list[torch.Tensor]:
+        """Run the IPAdapter CLIPVisionModel, returning image prompt embeddings."""
+        image_prompt_clip_embeds: list[torch.Tensor] = []
+        for ip_adapter_field in ip_adapter_fields:
+            # `ip_adapter_field.image` could be a list or a single ImageField. Normalize to a list here.
+            ipa_image_fields: list[ImageField]
+            if isinstance(ip_adapter_field.image, ImageField):
+                ipa_image_fields = [ip_adapter_field.image]
+            elif isinstance(ip_adapter_field.image, list):
+                ipa_image_fields = ip_adapter_field.image
+            else:
+                raise ValueError(f"Unsupported IP-Adapter image type: {type(ip_adapter_field.image)}")
+
+            ipa_images = [context.images.get_pil(image.image_name) for image in ipa_image_fields]
+
+            with context.models.load(ip_adapter_field.image_encoder_model) as image_encoder_model:
+                assert isinstance(image_encoder_model, CLIPVisionModelWithProjection)
+                image_prompt_clip_embeds.append(
+                    XLabsIPAdapterExtension.run_clip_image_encoder(
+                        pil_image=ipa_images,
+                        image_encoder=image_encoder_model,
+                    )
+                )
+        return image_prompt_clip_embeds
+
+    def _prep_ip_adapter_extensions(
+        self,
+        ip_adapter_fields: list[IPAdapterField],
+        image_prompt_clip_embeds: list[torch.Tensor],
+        context: InvocationContext,
+        exit_stack: ExitStack,
+        dtype: torch.dtype,
+    ) -> list[XLabsIPAdapterExtension]:
+        ip_adapter_extensions: list[XLabsIPAdapterExtension] = []
+        for ip_adapter_field, image_prompt_clip_embed in zip(ip_adapter_fields, image_prompt_clip_embeds, strict=True):
+            ip_adapter_model = exit_stack.enter_context(context.models.load(ip_adapter_field.ip_adapter_model))
+            assert isinstance(ip_adapter_model, XlabsIpAdapterFlux)
+            ip_adapter_model = ip_adapter_model.to(dtype=dtype)
+            if ip_adapter_field.mask is not None:
+                raise ValueError("IP-Adapter masks are not yet supported in Flux.")
+            ip_adapter_extension = XLabsIPAdapterExtension(
+                model=ip_adapter_model,
+                image_prompt_clip_embed=image_prompt_clip_embed,
+                weight=ip_adapter_field.weight,
+                begin_step_percent=ip_adapter_field.begin_step_percent,
+                end_step_percent=ip_adapter_field.end_step_percent,
+            )
+
+            ip_adapter_extension.run_image_proj(dtype=dtype)
+            ip_adapter_extensions.append(ip_adapter_extension)
+
+        return ip_adapter_extensions
+
     def _lora_iterator(self, context: InvocationContext) -> Iterator[Tuple[LoRAModelRaw, float]]:
         for lora in self.transformer.loras:
             lora_info = context.models.load(lora.lora)
diff --git a/invokeai/backend/flux/custom_block_processor.py b/invokeai/backend/flux/custom_block_processor.py
new file mode 100644
index 00000000000..e0c7779e935
--- /dev/null
+++ b/invokeai/backend/flux/custom_block_processor.py
@@ -0,0 +1,83 @@
+import einops
+import torch
+
+from invokeai.backend.flux.extensions.xlabs_ip_adapter_extension import XLabsIPAdapterExtension
+from invokeai.backend.flux.math import attention
+from invokeai.backend.flux.modules.layers import DoubleStreamBlock
+
+
+class CustomDoubleStreamBlockProcessor:
+    """A class containing a custom implementation of DoubleStreamBlock.forward() with additional features
+    (IP-Adapter, etc.).
+    """
+
+    @staticmethod
+    def _double_stream_block_forward(
+        block: DoubleStreamBlock, img: torch.Tensor, txt: torch.Tensor, vec: torch.Tensor, pe: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """This function is a direct copy of DoubleStreamBlock.forward(), but it returns some of the intermediate
+        values.
+        """
+        img_mod1, img_mod2 = block.img_mod(vec)
+        txt_mod1, txt_mod2 = block.txt_mod(vec)
+
+        # prepare image for attention
+        img_modulated = block.img_norm1(img)
+        img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift
+        img_qkv = block.img_attn.qkv(img_modulated)
+        img_q, img_k, img_v = einops.rearrange(img_qkv, "B L (K H D) -> K B H L D", K=3, H=block.num_heads)
+        img_q, img_k = block.img_attn.norm(img_q, img_k, img_v)
+
+        # prepare txt for attention
+        txt_modulated = block.txt_norm1(txt)
+        txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift
+        txt_qkv = block.txt_attn.qkv(txt_modulated)
+        txt_q, txt_k, txt_v = einops.rearrange(txt_qkv, "B L (K H D) -> K B H L D", K=3, H=block.num_heads)
+        txt_q, txt_k = block.txt_attn.norm(txt_q, txt_k, txt_v)
+
+        # run actual attention
+        q = torch.cat((txt_q, img_q), dim=2)
+        k = torch.cat((txt_k, img_k), dim=2)
+        v = torch.cat((txt_v, img_v), dim=2)
+
+        attn = attention(q, k, v, pe=pe)
+        txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1] :]
+
+        # calculate the img bloks
+        img = img + img_mod1.gate * block.img_attn.proj(img_attn)
+        img = img + img_mod2.gate * block.img_mlp((1 + img_mod2.scale) * block.img_norm2(img) + img_mod2.shift)
+
+        # calculate the txt bloks
+        txt = txt + txt_mod1.gate * block.txt_attn.proj(txt_attn)
+        txt = txt + txt_mod2.gate * block.txt_mlp((1 + txt_mod2.scale) * block.txt_norm2(txt) + txt_mod2.shift)
+        return img, txt, img_q
+
+    @staticmethod
+    def custom_double_block_forward(
+        timestep_index: int,
+        total_num_timesteps: int,
+        block_index: int,
+        block: DoubleStreamBlock,
+        img: torch.Tensor,
+        txt: torch.Tensor,
+        vec: torch.Tensor,
+        pe: torch.Tensor,
+        ip_adapter_extensions: list[XLabsIPAdapterExtension],
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """A custom implementation of DoubleStreamBlock.forward() with additional features:
+        - IP-Adapter support
+        """
+        img, txt, img_q = CustomDoubleStreamBlockProcessor._double_stream_block_forward(block, img, txt, vec, pe)
+
+        # Apply IP-Adapter conditioning.
+        for ip_adapter_extension in ip_adapter_extensions:
+            img = ip_adapter_extension.run_ip_adapter(
+                timestep_index=timestep_index,
+                total_num_timesteps=total_num_timesteps,
+                block_index=block_index,
+                block=block,
+                img_q=img_q,
+                img=img,
+            )
+
+        return img, txt
diff --git a/invokeai/backend/flux/denoise.py b/invokeai/backend/flux/denoise.py
index 7ce375f4a24..025586f4e02 100644
--- a/invokeai/backend/flux/denoise.py
+++ b/invokeai/backend/flux/denoise.py
@@ -8,6 +8,7 @@
 from invokeai.backend.flux.extensions.inpaint_extension import InpaintExtension
 from invokeai.backend.flux.extensions.instantx_controlnet_extension import InstantXControlNetExtension
 from invokeai.backend.flux.extensions.xlabs_controlnet_extension import XLabsControlNetExtension
+from invokeai.backend.flux.extensions.xlabs_ip_adapter_extension import XLabsIPAdapterExtension
 from invokeai.backend.flux.model import Flux
 from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState
 
@@ -32,6 +33,7 @@ def denoise(
     cfg_scale: list[float],
     inpaint_extension: InpaintExtension | None,
     controlnet_extensions: list[XLabsControlNetExtension | InstantXControlNetExtension],
+    ip_adapter_extensions: list[XLabsIPAdapterExtension],
 ):
     # step 0 is the initial state
     total_steps = len(timesteps) - 1
@@ -80,8 +82,11 @@ def denoise(
             y=vec,
             timesteps=t_vec,
             guidance=guidance_vec,
+            timestep_index=step_index,
+            total_num_timesteps=total_steps,
             controlnet_double_block_residuals=merged_controlnet_residuals.double_block_residuals,
             controlnet_single_block_residuals=merged_controlnet_residuals.single_block_residuals,
+            ip_adapter_extensions=ip_adapter_extensions,
         )
 
         step_cfg_scale = cfg_scale[step_index]
diff --git a/invokeai/backend/flux/extensions/xlabs_ip_adapter_extension.py b/invokeai/backend/flux/extensions/xlabs_ip_adapter_extension.py
new file mode 100644
index 00000000000..13ebb1451f2
--- /dev/null
+++ b/invokeai/backend/flux/extensions/xlabs_ip_adapter_extension.py
@@ -0,0 +1,89 @@
+import math
+from typing import List, Union
+
+import einops
+import torch
+from PIL import Image
+from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
+
+from invokeai.backend.flux.ip_adapter.xlabs_ip_adapter_flux import XlabsIpAdapterFlux
+from invokeai.backend.flux.modules.layers import DoubleStreamBlock
+
+
+class XLabsIPAdapterExtension:
+    def __init__(
+        self,
+        model: XlabsIpAdapterFlux,
+        image_prompt_clip_embed: torch.Tensor,
+        weight: Union[float, List[float]],
+        begin_step_percent: float,
+        end_step_percent: float,
+    ):
+        self._model = model
+        self._image_prompt_clip_embed = image_prompt_clip_embed
+        self._weight = weight
+        self._begin_step_percent = begin_step_percent
+        self._end_step_percent = end_step_percent
+
+        self._image_proj: torch.Tensor | None = None
+
+    def _get_weight(self, timestep_index: int, total_num_timesteps: int) -> float:
+        first_step = math.floor(self._begin_step_percent * total_num_timesteps)
+        last_step = math.ceil(self._end_step_percent * total_num_timesteps)
+
+        if timestep_index < first_step or timestep_index > last_step:
+            return 0.0
+
+        if isinstance(self._weight, list):
+            return self._weight[timestep_index]
+
+        return self._weight
+
+    @staticmethod
+    def run_clip_image_encoder(
+        pil_image: List[Image.Image], image_encoder: CLIPVisionModelWithProjection
+    ) -> torch.Tensor:
+        clip_image_processor = CLIPImageProcessor()
+        clip_image: torch.Tensor = clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
+        clip_image = clip_image.to(device=image_encoder.device, dtype=image_encoder.dtype)
+        clip_image_embeds = image_encoder(clip_image).image_embeds
+        return clip_image_embeds
+
+    def run_image_proj(self, dtype: torch.dtype):
+        image_prompt_clip_embed = self._image_prompt_clip_embed.to(dtype=dtype)
+        self._image_proj = self._model.image_proj(image_prompt_clip_embed)
+
+    def run_ip_adapter(
+        self,
+        timestep_index: int,
+        total_num_timesteps: int,
+        block_index: int,
+        block: DoubleStreamBlock,
+        img_q: torch.Tensor,
+        img: torch.Tensor,
+    ) -> torch.Tensor:
+        """The logic in this function is based on:
+        https://github.com/XLabs-AI/x-flux/blob/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/modules/layers.py#L245-L301
+        """
+        weight = self._get_weight(timestep_index=timestep_index, total_num_timesteps=total_num_timesteps)
+        if weight < 1e-6:
+            return img
+
+        ip_adapter_block = self._model.ip_adapter_double_blocks.double_blocks[block_index]
+
+        ip_key = ip_adapter_block.ip_adapter_double_stream_k_proj(self._image_proj)
+        ip_value = ip_adapter_block.ip_adapter_double_stream_v_proj(self._image_proj)
+
+        # Reshape projections for multi-head attention.
+        ip_key = einops.rearrange(ip_key, "B L (H D) -> B H L D", H=block.num_heads, D=block.head_dim)
+        ip_value = einops.rearrange(ip_value, "B L (H D) -> B H L D", H=block.num_heads, D=block.head_dim)
+
+        # Compute attention between IP projections and the latent query.
+        ip_attn = torch.nn.functional.scaled_dot_product_attention(
+            img_q, ip_key, ip_value, dropout_p=0.0, is_causal=False
+        )
+        ip_attn = einops.rearrange(ip_attn, "B H L D -> B L (H D)", H=block.num_heads, D=block.head_dim)
+
+        img = img + weight * ip_attn
+
+        return img
diff --git a/invokeai/backend/flux/model.py b/invokeai/backend/flux/model.py
index 3ec4c3922a2..0dadacd8fe1 100644
--- a/invokeai/backend/flux/model.py
+++ b/invokeai/backend/flux/model.py
@@ -5,6 +5,8 @@
 import torch
 from torch import Tensor, nn
 
+from invokeai.backend.flux.custom_block_processor import CustomDoubleStreamBlockProcessor
+from invokeai.backend.flux.extensions.xlabs_ip_adapter_extension import XLabsIPAdapterExtension
 from invokeai.backend.flux.modules.layers import (
     DoubleStreamBlock,
     EmbedND,
@@ -88,8 +90,11 @@ def forward(
         timesteps: Tensor,
         y: Tensor,
         guidance: Tensor | None,
+        timestep_index: int,
+        total_num_timesteps: int,
         controlnet_double_block_residuals: list[Tensor] | None,
         controlnet_single_block_residuals: list[Tensor] | None,
+        ip_adapter_extensions: list[XLabsIPAdapterExtension],
     ) -> Tensor:
         if img.ndim != 3 or txt.ndim != 3:
             raise ValueError("Input img and txt tensors must have 3 dimensions.")
@@ -111,7 +116,19 @@ def forward(
         if controlnet_double_block_residuals is not None:
             assert len(controlnet_double_block_residuals) == len(self.double_blocks)
         for block_index, block in enumerate(self.double_blocks):
-            img, txt = block(img=img, txt=txt, vec=vec, pe=pe)
+            assert isinstance(block, DoubleStreamBlock)
+
+            img, txt = CustomDoubleStreamBlockProcessor.custom_double_block_forward(
+                timestep_index=timestep_index,
+                total_num_timesteps=total_num_timesteps,
+                block_index=block_index,
+                block=block,
+                img=img,
+                txt=txt,
+                vec=vec,
+                pe=pe,
+                ip_adapter_extensions=ip_adapter_extensions,
+            )
 
             if controlnet_double_block_residuals is not None:
                 img += controlnet_double_block_residuals[block_index]

From fdccdd52d507817fd8a994a758a12425a3002934 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Wed, 16 Oct 2024 01:39:48 +0000
Subject: [PATCH 16/30] Fixes to get XLabsIpAdapterExtension running.

---
 .../backend/flux/extensions/xlabs_ip_adapter_extension.py   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/invokeai/backend/flux/extensions/xlabs_ip_adapter_extension.py b/invokeai/backend/flux/extensions/xlabs_ip_adapter_extension.py
index 13ebb1451f2..b7a2bd85a6e 100644
--- a/invokeai/backend/flux/extensions/xlabs_ip_adapter_extension.py
+++ b/invokeai/backend/flux/extensions/xlabs_ip_adapter_extension.py
@@ -75,14 +75,14 @@ def run_ip_adapter(
         ip_value = ip_adapter_block.ip_adapter_double_stream_v_proj(self._image_proj)
 
         # Reshape projections for multi-head attention.
-        ip_key = einops.rearrange(ip_key, "B L (H D) -> B H L D", H=block.num_heads, D=block.head_dim)
-        ip_value = einops.rearrange(ip_value, "B L (H D) -> B H L D", H=block.num_heads, D=block.head_dim)
+        ip_key = einops.rearrange(ip_key, "B L (H D) -> B H L D", H=block.num_heads)
+        ip_value = einops.rearrange(ip_value, "B L (H D) -> B H L D", H=block.num_heads)
 
         # Compute attention between IP projections and the latent query.
         ip_attn = torch.nn.functional.scaled_dot_product_attention(
             img_q, ip_key, ip_value, dropout_p=0.0, is_causal=False
         )
-        ip_attn = einops.rearrange(ip_attn, "B H L D -> B L (H D)", H=block.num_heads, D=block.head_dim)
+        ip_attn = einops.rearrange(ip_attn, "B H L D -> B L (H D)", H=block.num_heads)
 
         img = img + weight * ip_attn
 

From f70a8e2c1a653084d07d5a9bf2454fdcb96157c6 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Wed, 16 Oct 2024 01:41:02 +0000
Subject: [PATCH 17/30] A bunch of HACKS to get ViT-L CLIP vision encoder
 working for FLUX IP-Adapter. Need to revisit how to clean this all up long
 term.

---
 invokeai/app/invocations/ip_adapter.py        | 25 +++++++----
 invokeai/app/invocations/metadata.py          |  2 +-
 .../load/model_loaders/clip_vision.py         | 41 +++++++++++++++++++
 .../load/model_loaders/generic_diffusers.py   |  1 -
 4 files changed, 60 insertions(+), 9 deletions(-)
 create mode 100644 invokeai/backend/model_manager/load/model_loaders/clip_vision.py

diff --git a/invokeai/app/invocations/ip_adapter.py b/invokeai/app/invocations/ip_adapter.py
index de40879eef8..2f18da4530e 100644
--- a/invokeai/app/invocations/ip_adapter.py
+++ b/invokeai/app/invocations/ip_adapter.py
@@ -9,6 +9,7 @@
 from invokeai.app.invocations.model import ModelIdentifierField
 from invokeai.app.invocations.primitives import ImageField
 from invokeai.app.invocations.util import validate_begin_end_step, validate_weights
+from invokeai.app.services.model_records.model_records_base import ModelRecordChanges
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.backend.model_manager.config import (
     AnyModelConfig,
@@ -55,10 +56,14 @@ class IPAdapterOutput(BaseInvocationOutput):
     ip_adapter: IPAdapterField = OutputField(description=FieldDescriptions.ip_adapter, title="IP-Adapter")
 
 
-CLIP_VISION_MODEL_MAP = {"ViT-H": "ip_adapter_sd_image_encoder", "ViT-G": "ip_adapter_sdxl_image_encoder"}
+CLIP_VISION_MODEL_MAP = {
+    "ViT-L": ("InvokeAI/clip-vit-large-patch14", "clip-vit-large-patch14-full"),
+    "ViT-H": ("InvokeAI/ip_adapter_sd_image_encoder", "ip_adapter_sd_image_encoder"),
+    "ViT-G": ("InvokeAI/ip_adapter_sdxl_image_encoder", "ip_adapter_sdxl_image_encoder"),
+}
 
 
-@invocation("ip_adapter", title="IP-Adapter", tags=["ip_adapter", "control"], category="ip_adapter", version="1.4.1")
+@invocation("ip_adapter", title="IP-Adapter", tags=["ip_adapter", "control"], category="ip_adapter", version="1.5.0")
 class IPAdapterInvocation(BaseInvocation):
     """Collects IP-Adapter info to pass to other nodes."""
 
@@ -70,7 +75,7 @@ class IPAdapterInvocation(BaseInvocation):
         ui_order=-1,
         ui_type=UIType.IPAdapterModel,
     )
-    clip_vision_model: Literal["ViT-H", "ViT-G"] = InputField(
+    clip_vision_model: Literal["ViT-L", "ViT-H", "ViT-G"] = InputField(
         description="CLIP Vision model to use. Overrides model settings. Mandatory for checkpoint models.",
         default="ViT-H",
         ui_order=2,
@@ -111,9 +116,9 @@ def invoke(self, context: InvocationContext) -> IPAdapterOutput:
             image_encoder_model_id = ip_adapter_info.image_encoder_model_id
             image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
         else:
-            image_encoder_model_name = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
+            image_encoder_model_id, image_encoder_model_name = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
 
-        image_encoder_model = self._get_image_encoder(context, image_encoder_model_name)
+        image_encoder_model = self._get_image_encoder(context, image_encoder_model_id, image_encoder_model_name)
 
         if self.method == "style":
             if ip_adapter_info.base == "sd-1":
@@ -147,7 +152,9 @@ def invoke(self, context: InvocationContext) -> IPAdapterOutput:
             ),
         )
 
-    def _get_image_encoder(self, context: InvocationContext, image_encoder_model_name: str) -> AnyModelConfig:
+    def _get_image_encoder(
+        self, context: InvocationContext, image_encoder_model_id: str, image_encoder_model_name: str
+    ) -> AnyModelConfig:
         image_encoder_models = context.models.search_by_attrs(
             name=image_encoder_model_name, base=BaseModelType.Any, type=ModelType.CLIPVision
         )
@@ -159,7 +166,11 @@ def _get_image_encoder(self, context: InvocationContext, image_encoder_model_nam
             )
 
             installer = context._services.model_manager.install
-            job = installer.heuristic_import(f"InvokeAI/{image_encoder_model_name}")
+            # Note: We hard-code the type to CLIPVision here because if the model contains both a CLIPVision and a
+            # CLIPText model, the probe may treat it as a CLIPText model.
+            job = installer.heuristic_import(
+                image_encoder_model_id, ModelRecordChanges(name=image_encoder_model_name, type=ModelType.CLIPVision)
+            )
             installer.wait_for_job(job, timeout=600)  # Wait for up to 10 minutes
             image_encoder_models = context.models.search_by_attrs(
                 name=image_encoder_model_name, base=BaseModelType.Any, type=ModelType.CLIPVision
diff --git a/invokeai/app/invocations/metadata.py b/invokeai/app/invocations/metadata.py
index 19e75036035..c3142c824ae 100644
--- a/invokeai/app/invocations/metadata.py
+++ b/invokeai/app/invocations/metadata.py
@@ -40,7 +40,7 @@ class IPAdapterMetadataField(BaseModel):
 
     image: ImageField = Field(description="The IP-Adapter image prompt.")
     ip_adapter_model: ModelIdentifierField = Field(description="The IP-Adapter model.")
-    clip_vision_model: Literal["ViT-H", "ViT-G"] = Field(description="The CLIP Vision model")
+    clip_vision_model: Literal["ViT-L", "ViT-H", "ViT-G"] = Field(description="The CLIP Vision model")
     method: Literal["full", "style", "composition"] = Field(description="Method to apply IP Weights with")
     weight: Union[float, list[float]] = Field(description="The weight given to the IP-Adapter")
     begin_step_percent: float = Field(description="When the IP-Adapter is first applied (% of total steps)")
diff --git a/invokeai/backend/model_manager/load/model_loaders/clip_vision.py b/invokeai/backend/model_manager/load/model_loaders/clip_vision.py
new file mode 100644
index 00000000000..432e0f11756
--- /dev/null
+++ b/invokeai/backend/model_manager/load/model_loaders/clip_vision.py
@@ -0,0 +1,41 @@
+from pathlib import Path
+from typing import Optional
+
+from transformers import CLIPVisionModelWithProjection
+
+from invokeai.backend.model_manager.config import (
+    AnyModel,
+    AnyModelConfig,
+    BaseModelType,
+    DiffusersConfigBase,
+    ModelFormat,
+    ModelType,
+    SubModelType,
+)
+from invokeai.backend.model_manager.load.load_default import ModelLoader
+from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry
+
+
+@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.CLIPVision, format=ModelFormat.Diffusers)
+class ClipVisionLoader(ModelLoader):
+    """Class to load CLIPVision models."""
+
+    def _load_model(
+        self,
+        config: AnyModelConfig,
+        submodel_type: Optional[SubModelType] = None,
+    ) -> AnyModel:
+        if not isinstance(config, DiffusersConfigBase):
+            raise ValueError("Only DiffusersConfigBase models are currently supported here.")
+
+        if submodel_type is not None:
+            raise Exception(f"There are no submodels in models of type {model_class}")
+
+        model_path = Path(config.path)
+
+        model = CLIPVisionModelWithProjection.from_pretrained(
+            model_path, torch_dtype=self._torch_dtype, local_files_only=True
+        )
+        assert isinstance(model, CLIPVisionModelWithProjection)
+
+        return model
diff --git a/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py b/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py
index f1691ec4d4b..4ce51a56d04 100644
--- a/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py
+++ b/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py
@@ -22,7 +22,6 @@
 from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry
 
 
-@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.CLIPVision, format=ModelFormat.Diffusers)
 @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T2IAdapter, format=ModelFormat.Diffusers)
 class GenericDiffusersLoader(ModelLoader):
     """Class to load simple diffusers models."""

From dde54740c5773a1acb927ae554cdd6fbc772c581 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Wed, 16 Oct 2024 18:11:48 +0000
Subject: [PATCH 18/30] Test out IP-Adapter with CFG.

---
 invokeai/app/invocations/flux_denoise.py | 62 ++++++++++++++++--------
 invokeai/backend/flux/denoise.py         | 14 ++++--
 2 files changed, 51 insertions(+), 25 deletions(-)

diff --git a/invokeai/app/invocations/flux_denoise.py b/invokeai/app/invocations/flux_denoise.py
index 1b7dea7b607..c3167cfd51d 100644
--- a/invokeai/app/invocations/flux_denoise.py
+++ b/invokeai/app/invocations/flux_denoise.py
@@ -4,7 +4,7 @@
 import torch
 import torchvision.transforms as tv_transforms
 from torchvision.transforms.functional import resize as tv_resize
-from transformers import CLIPVisionModelWithProjection
+from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
 
 from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
 from invokeai.app.invocations.fields import (
@@ -258,7 +258,9 @@ def _run_diffusion(
         # We do this before loading other models to minimize peak memory.
         # TODO(ryand): We should really do this in a separate invocation to benefit from caching.
         ip_adapter_fields = self._normalize_ip_adapter_fields()
-        image_prompt_clip_embeds = self._prep_ip_adapter_image_prompt_clip_embeds(ip_adapter_fields, context)
+        pos_image_prompt_clip_embeds, neg_image_prompt_clip_embeds = self._prep_ip_adapter_image_prompt_clip_embeds(
+            ip_adapter_fields, context
+        )
 
         cfg_scale = self.prep_cfg_scale(
             cfg_scale=self.cfg_scale,
@@ -316,8 +318,9 @@ def _run_diffusion(
                 raise ValueError(f"Unsupported model format: {config.format}")
 
             # Prepare IP-Adapter extensions.
-            ip_adapter_extensions = self._prep_ip_adapter_extensions(
-                image_prompt_clip_embeds=image_prompt_clip_embeds,
+            pos_ip_adapter_extensions, neg_ip_adapter_extensions = self._prep_ip_adapter_extensions(
+                pos_image_prompt_clip_embeds=pos_image_prompt_clip_embeds,
+                neg_image_prompt_clip_embeds=neg_image_prompt_clip_embeds,
                 ip_adapter_fields=ip_adapter_fields,
                 context=context,
                 exit_stack=exit_stack,
@@ -340,7 +343,8 @@ def _run_diffusion(
                 cfg_scale=cfg_scale,
                 inpaint_extension=inpaint_extension,
                 controlnet_extensions=controlnet_extensions,
-                ip_adapter_extensions=ip_adapter_extensions,
+                pos_ip_adapter_extensions=pos_ip_adapter_extensions,
+                neg_ip_adapter_extensions=neg_ip_adapter_extensions,
             )
 
         x = unpack(x.float(), self.height, self.width)
@@ -548,9 +552,12 @@ def _prep_ip_adapter_image_prompt_clip_embeds(
         self,
         ip_adapter_fields: list[IPAdapterField],
         context: InvocationContext,
-    ) -> list[torch.Tensor]:
+    ) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
         """Run the IPAdapter CLIPVisionModel, returning image prompt embeddings."""
-        image_prompt_clip_embeds: list[torch.Tensor] = []
+        clip_image_processor = CLIPImageProcessor()
+
+        pos_image_prompt_clip_embeds: list[torch.Tensor] = []
+        neg_image_prompt_clip_embeds: list[torch.Tensor] = []
         for ip_adapter_field in ip_adapter_fields:
             # `ip_adapter_field.image` could be a list or a single ImageField. Normalize to a list here.
             ipa_image_fields: list[ImageField]
@@ -565,24 +572,30 @@ def _prep_ip_adapter_image_prompt_clip_embeds(
 
             with context.models.load(ip_adapter_field.image_encoder_model) as image_encoder_model:
                 assert isinstance(image_encoder_model, CLIPVisionModelWithProjection)
-                image_prompt_clip_embeds.append(
-                    XLabsIPAdapterExtension.run_clip_image_encoder(
-                        pil_image=ipa_images,
-                        image_encoder=image_encoder_model,
-                    )
-                )
-        return image_prompt_clip_embeds
+                clip_image: torch.Tensor = clip_image_processor(images=ipa_images, return_tensors="pt").pixel_values
+                clip_image = clip_image.to(device=image_encoder_model.device, dtype=image_encoder_model.dtype)
+                pos_clip_image_embeds = image_encoder_model(clip_image).image_embeds
+                neg_clip_image_embeds = image_encoder_model(torch.zeros_like(clip_image)).image_embeds
+
+            pos_image_prompt_clip_embeds.append(pos_clip_image_embeds)
+            neg_image_prompt_clip_embeds.append(neg_clip_image_embeds)
+
+        return pos_image_prompt_clip_embeds, neg_image_prompt_clip_embeds
 
     def _prep_ip_adapter_extensions(
         self,
         ip_adapter_fields: list[IPAdapterField],
-        image_prompt_clip_embeds: list[torch.Tensor],
+        pos_image_prompt_clip_embeds: list[torch.Tensor],
+        neg_image_prompt_clip_embeds: list[torch.Tensor],
         context: InvocationContext,
         exit_stack: ExitStack,
         dtype: torch.dtype,
     ) -> list[XLabsIPAdapterExtension]:
-        ip_adapter_extensions: list[XLabsIPAdapterExtension] = []
-        for ip_adapter_field, image_prompt_clip_embed in zip(ip_adapter_fields, image_prompt_clip_embeds, strict=True):
+        pos_ip_adapter_extensions: list[XLabsIPAdapterExtension] = []
+        neg_ip_adapter_extensions: list[XLabsIPAdapterExtension] = []
+        for ip_adapter_field, pos_image_prompt_clip_embed, neg_image_prompt_clip_embed in zip(
+            ip_adapter_fields, pos_image_prompt_clip_embeds, neg_image_prompt_clip_embeds, strict=True
+        ):
             ip_adapter_model = exit_stack.enter_context(context.models.load(ip_adapter_field.ip_adapter_model))
             assert isinstance(ip_adapter_model, XlabsIpAdapterFlux)
             ip_adapter_model = ip_adapter_model.to(dtype=dtype)
@@ -590,16 +603,25 @@ def _prep_ip_adapter_extensions(
                 raise ValueError("IP-Adapter masks are not yet supported in Flux.")
             ip_adapter_extension = XLabsIPAdapterExtension(
                 model=ip_adapter_model,
-                image_prompt_clip_embed=image_prompt_clip_embed,
+                image_prompt_clip_embed=pos_image_prompt_clip_embed,
                 weight=ip_adapter_field.weight,
                 begin_step_percent=ip_adapter_field.begin_step_percent,
                 end_step_percent=ip_adapter_field.end_step_percent,
             )
+            ip_adapter_extension.run_image_proj(dtype=dtype)
+            pos_ip_adapter_extensions.append(ip_adapter_extension)
 
+            ip_adapter_extension = XLabsIPAdapterExtension(
+                model=ip_adapter_model,
+                image_prompt_clip_embed=neg_image_prompt_clip_embed,
+                weight=ip_adapter_field.weight,
+                begin_step_percent=ip_adapter_field.begin_step_percent,
+                end_step_percent=ip_adapter_field.end_step_percent,
+            )
             ip_adapter_extension.run_image_proj(dtype=dtype)
-            ip_adapter_extensions.append(ip_adapter_extension)
+            neg_ip_adapter_extensions.append(ip_adapter_extension)
 
-        return ip_adapter_extensions
+        return pos_ip_adapter_extensions, neg_ip_adapter_extensions
 
     def _lora_iterator(self, context: InvocationContext) -> Iterator[Tuple[LoRAModelRaw, float]]:
         for lora in self.transformer.loras:
diff --git a/invokeai/backend/flux/denoise.py b/invokeai/backend/flux/denoise.py
index 025586f4e02..bb0e60409a8 100644
--- a/invokeai/backend/flux/denoise.py
+++ b/invokeai/backend/flux/denoise.py
@@ -33,7 +33,8 @@ def denoise(
     cfg_scale: list[float],
     inpaint_extension: InpaintExtension | None,
     controlnet_extensions: list[XLabsControlNetExtension | InstantXControlNetExtension],
-    ip_adapter_extensions: list[XLabsIPAdapterExtension],
+    pos_ip_adapter_extensions: list[XLabsIPAdapterExtension],
+    neg_ip_adapter_extensions: list[XLabsIPAdapterExtension],
 ):
     # step 0 is the initial state
     total_steps = len(timesteps) - 1
@@ -69,7 +70,7 @@ def denoise(
             )
 
         # Merge the ControlNet residuals from multiple ControlNets.
-        # TODO(ryand): We may want to alculate the sum just-in-time to keep peak memory low. Keep in mind, that the
+        # TODO(ryand): We may want to calculate the sum just-in-time to keep peak memory low. Keep in mind, that the
         # controlnet_residuals datastructure is efficient in that it likely contains multiple references to the same
         # tensors. Calculating the sum materializes each tensor into its own instance.
         merged_controlnet_residuals = sum_controlnet_flux_outputs(controlnet_residuals)
@@ -86,15 +87,15 @@ def denoise(
             total_num_timesteps=total_steps,
             controlnet_double_block_residuals=merged_controlnet_residuals.double_block_residuals,
             controlnet_single_block_residuals=merged_controlnet_residuals.single_block_residuals,
-            ip_adapter_extensions=ip_adapter_extensions,
+            ip_adapter_extensions=pos_ip_adapter_extensions,
         )
 
         step_cfg_scale = cfg_scale[step_index]
 
         # If step_cfg_scale, is 1.0, then we don't need to run the negative prediction.
         if not math.isclose(step_cfg_scale, 1.0):
-            # TODO(ryand): Add option to run positive and negative predictions in a single batch for better performance on
-            # systems with sufficient VRAM.
+            # TODO(ryand): Add option to run positive and negative predictions in a single batch for better performance
+            # on systems with sufficient VRAM.
 
             if neg_txt is None or neg_txt_ids is None or neg_vec is None:
                 raise ValueError("Negative text conditioning is required when cfg_scale is not 1.0.")
@@ -107,8 +108,11 @@ def denoise(
                 y=neg_vec,
                 timesteps=t_vec,
                 guidance=guidance_vec,
+                timestep_index=step_index,
+                total_num_timesteps=total_steps,
                 controlnet_double_block_residuals=None,
                 controlnet_single_block_residuals=None,
+                ip_adapter_extensions=neg_ip_adapter_extensions,
             )
             pred = neg_pred + step_cfg_scale * (pred - neg_pred)
 

From 73bbb12f7a983bddbb264233e563b3c9ea5261b1 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Fri, 18 Oct 2024 18:52:12 +0000
Subject: [PATCH 19/30] Use a black image as the negative IP prompt for parity
 with X-Labs implementation.

---
 invokeai/app/invocations/flux_denoise.py | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/invokeai/app/invocations/flux_denoise.py b/invokeai/app/invocations/flux_denoise.py
index c3167cfd51d..e5413d05520 100644
--- a/invokeai/app/invocations/flux_denoise.py
+++ b/invokeai/app/invocations/flux_denoise.py
@@ -1,6 +1,8 @@
 from contextlib import ExitStack
 from typing import Callable, Iterator, Optional, Tuple
 
+import numpy as np
+import numpy.typing as npt
 import torch
 import torchvision.transforms as tv_transforms
 from torchvision.transforms.functional import resize as tv_resize
@@ -570,12 +572,28 @@ def _prep_ip_adapter_image_prompt_clip_embeds(
 
             ipa_images = [context.images.get_pil(image.image_name) for image in ipa_image_fields]
 
+            pos_images: list[npt.NDArray[np.uint8]] = []
+            neg_images: list[npt.NDArray[np.uint8]] = []
+            for ipa_image in ipa_images:
+                assert ipa_image.mode == "RGB"
+                pos_image = np.array(ipa_image)
+                # We use a black image as the negative image prompt for parity with
+                # https://github.com/XLabs-AI/x-flux-comfyui/blob/45c834727dd2141aebc505ae4b01f193a8414e38/nodes.py#L592-L593
+                # An alternative scheme would be to apply zeros_like() after calling the clip_image_processor.
+                neg_image = np.zeros_like(pos_image)
+                pos_images.append(pos_image)
+                neg_images.append(neg_image)
+
             with context.models.load(ip_adapter_field.image_encoder_model) as image_encoder_model:
                 assert isinstance(image_encoder_model, CLIPVisionModelWithProjection)
-                clip_image: torch.Tensor = clip_image_processor(images=ipa_images, return_tensors="pt").pixel_values
+
+                clip_image: torch.Tensor = clip_image_processor(images=pos_images, return_tensors="pt").pixel_values
                 clip_image = clip_image.to(device=image_encoder_model.device, dtype=image_encoder_model.dtype)
                 pos_clip_image_embeds = image_encoder_model(clip_image).image_embeds
-                neg_clip_image_embeds = image_encoder_model(torch.zeros_like(clip_image)).image_embeds
+
+                clip_image = clip_image_processor(images=neg_images, return_tensors="pt").pixel_values
+                clip_image = clip_image.to(device=image_encoder_model.device, dtype=image_encoder_model.dtype)
+                neg_clip_image_embeds = image_encoder_model(clip_image).image_embeds
 
             pos_image_prompt_clip_embeds.append(pos_clip_image_embeds)
             neg_image_prompt_clip_embeds.append(neg_clip_image_embeds)
@@ -590,7 +608,7 @@ def _prep_ip_adapter_extensions(
         context: InvocationContext,
         exit_stack: ExitStack,
         dtype: torch.dtype,
-    ) -> list[XLabsIPAdapterExtension]:
+    ) -> tuple[list[XLabsIPAdapterExtension], list[XLabsIPAdapterExtension]]:
         pos_ip_adapter_extensions: list[XLabsIPAdapterExtension] = []
         neg_ip_adapter_extensions: list[XLabsIPAdapterExtension] = []
         for ip_adapter_field, pos_image_prompt_clip_embed, neg_image_prompt_clip_embed in zip(

From 554611012785383ce27d573be66676b07fa1eb88 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Mon, 21 Oct 2024 18:23:12 +0000
Subject: [PATCH 20/30] Add FluxIPAdapterInvocation.

---
 invokeai/app/invocations/flux_ip_adapter.py | 94 +++++++++++++++++++++
 invokeai/app/invocations/ip_adapter.py      | 11 +--
 2 files changed, 100 insertions(+), 5 deletions(-)
 create mode 100644 invokeai/app/invocations/flux_ip_adapter.py

diff --git a/invokeai/app/invocations/flux_ip_adapter.py b/invokeai/app/invocations/flux_ip_adapter.py
new file mode 100644
index 00000000000..d44006500a4
--- /dev/null
+++ b/invokeai/app/invocations/flux_ip_adapter.py
@@ -0,0 +1,94 @@
+from builtins import float
+from typing import List, Literal, Union
+
+from pydantic import field_validator, model_validator
+from typing_extensions import Self
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
+from invokeai.app.invocations.fields import InputField, UIType
+from invokeai.app.invocations.ip_adapter import (
+    CLIP_VISION_MODEL_MAP,
+    IPAdapterField,
+    IPAdapterInvocation,
+    IPAdapterOutput,
+)
+from invokeai.app.invocations.model import ModelIdentifierField
+from invokeai.app.invocations.primitives import ImageField
+from invokeai.app.invocations.util import validate_begin_end_step, validate_weights
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.model_manager.config import (
+    IPAdapterCheckpointConfig,
+    IPAdapterInvokeAIConfig,
+)
+
+
+@invocation(
+    "flux_ip_adapter",
+    title="FLUX IP-Adapter",
+    tags=["ip_adapter", "control"],
+    category="ip_adapter",
+    version="1.0.0",
+    classification=Classification.Prototype,
+)
+class FluxIPAdapterInvocation(BaseInvocation):
+    """Collects FLUX IP-Adapter info to pass to other nodes."""
+
+    # FLUXIPAdapterInvocation is based closely on IPAdapterInvocation, but with some unsupported features removed.
+
+    image: Union[ImageField, List[ImageField]] = InputField(description="The IP-Adapter image prompt(s).")
+    ip_adapter_model: ModelIdentifierField = InputField(
+        description="The IP-Adapter model.", title="IP-Adapter Model", ui_type=UIType.IPAdapterModel
+    )
+    clip_vision_model: Literal["ViT-L"] = InputField(
+        description="CLIP Vision model to use. Only applied if the correct CLIP Vision model cannot be detected from "
+        + "the model config.",
+        default="ViT-L",
+    )
+    weight: Union[float, List[float]] = InputField(
+        default=1, description="The weight given to the IP-Adapter", title="Weight"
+    )
+    begin_step_percent: float = InputField(
+        default=0, ge=0, le=1, description="When the IP-Adapter is first applied (% of total steps)"
+    )
+    end_step_percent: float = InputField(
+        default=1, ge=0, le=1, description="When the IP-Adapter is last applied (% of total steps)"
+    )
+
+    @field_validator("weight")
+    @classmethod
+    def validate_ip_adapter_weight(cls, v: float) -> float:
+        validate_weights(v)
+        return v
+
+    @model_validator(mode="after")
+    def validate_begin_end_step_percent(self) -> Self:
+        validate_begin_end_step(self.begin_step_percent, self.end_step_percent)
+        return self
+
+    def invoke(self, context: InvocationContext) -> IPAdapterOutput:
+        # Lookup the CLIP Vision encoder that is intended to be used with the IP-Adapter model.
+        ip_adapter_info = context.models.get_config(self.ip_adapter_model.key)
+        assert isinstance(ip_adapter_info, (IPAdapterInvokeAIConfig, IPAdapterCheckpointConfig))
+
+        if isinstance(ip_adapter_info, IPAdapterInvokeAIConfig):
+            image_encoder_model_id = ip_adapter_info.image_encoder_model_id
+            image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
+        else:
+            image_encoder_model_id, image_encoder_model_name = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
+
+        image_encoder_model = IPAdapterInvocation.get_clip_image_encoder(
+            context, image_encoder_model_id, image_encoder_model_name
+        )
+
+        return IPAdapterOutput(
+            ip_adapter=IPAdapterField(
+                image=self.image,
+                ip_adapter_model=self.ip_adapter_model,
+                image_encoder_model=ModelIdentifierField.from_config(image_encoder_model),
+                weight=self.weight,
+                target_blocks=[],  # target_blocks is currently unused for FLUX IP-Adapters.
+                begin_step_percent=self.begin_step_percent,
+                end_step_percent=self.end_step_percent,
+                mask=None,  # mask is currently unused for FLUX IP-Adapters.
+            ),
+        )
diff --git a/invokeai/app/invocations/ip_adapter.py b/invokeai/app/invocations/ip_adapter.py
index 2f18da4530e..63624398700 100644
--- a/invokeai/app/invocations/ip_adapter.py
+++ b/invokeai/app/invocations/ip_adapter.py
@@ -57,7 +57,7 @@ class IPAdapterOutput(BaseInvocationOutput):
 
 
 CLIP_VISION_MODEL_MAP = {
-    "ViT-L": ("InvokeAI/clip-vit-large-patch14", "clip-vit-large-patch14-full"),
+    "ViT-L": ("InvokeAI/clip-vit-large-patch14", "clip-vit-large-patch14"),
     "ViT-H": ("InvokeAI/ip_adapter_sd_image_encoder", "ip_adapter_sd_image_encoder"),
     "ViT-G": ("InvokeAI/ip_adapter_sdxl_image_encoder", "ip_adapter_sdxl_image_encoder"),
 }
@@ -75,7 +75,7 @@ class IPAdapterInvocation(BaseInvocation):
         ui_order=-1,
         ui_type=UIType.IPAdapterModel,
     )
-    clip_vision_model: Literal["ViT-L", "ViT-H", "ViT-G"] = InputField(
+    clip_vision_model: Literal["ViT-H", "ViT-G"] = InputField(
         description="CLIP Vision model to use. Overrides model settings. Mandatory for checkpoint models.",
         default="ViT-H",
         ui_order=2,
@@ -118,7 +118,7 @@ def invoke(self, context: InvocationContext) -> IPAdapterOutput:
         else:
             image_encoder_model_id, image_encoder_model_name = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
 
-        image_encoder_model = self._get_image_encoder(context, image_encoder_model_id, image_encoder_model_name)
+        image_encoder_model = self.get_clip_image_encoder(context, image_encoder_model_id, image_encoder_model_name)
 
         if self.method == "style":
             if ip_adapter_info.base == "sd-1":
@@ -152,8 +152,9 @@ def invoke(self, context: InvocationContext) -> IPAdapterOutput:
             ),
         )
 
-    def _get_image_encoder(
-        self, context: InvocationContext, image_encoder_model_id: str, image_encoder_model_name: str
+    @classmethod
+    def get_clip_image_encoder(
+        cls, context: InvocationContext, image_encoder_model_id: str, image_encoder_model_name: str
     ) -> AnyModelConfig:
         image_encoder_models = context.models.search_by_attrs(
             name=image_encoder_model_name, base=BaseModelType.Any, type=ModelType.CLIPVision

From 90a906e203ff029f290f7b6d6caced0c021d51f5 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Mon, 21 Oct 2024 19:54:21 +0000
Subject: [PATCH 21/30] Simplify handling of CLIP ViT selection for FLUX
 IP-Adapter invocation.

---
 invokeai/app/invocations/flux_ip_adapter.py | 14 ++++----------
 invokeai/backend/model_manager/config.py    |  2 ++
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/invokeai/app/invocations/flux_ip_adapter.py b/invokeai/app/invocations/flux_ip_adapter.py
index d44006500a4..556c5703f28 100644
--- a/invokeai/app/invocations/flux_ip_adapter.py
+++ b/invokeai/app/invocations/flux_ip_adapter.py
@@ -39,11 +39,8 @@ class FluxIPAdapterInvocation(BaseInvocation):
     ip_adapter_model: ModelIdentifierField = InputField(
         description="The IP-Adapter model.", title="IP-Adapter Model", ui_type=UIType.IPAdapterModel
     )
-    clip_vision_model: Literal["ViT-L"] = InputField(
-        description="CLIP Vision model to use. Only applied if the correct CLIP Vision model cannot be detected from "
-        + "the model config.",
-        default="ViT-L",
-    )
+    # Currently, the only known ViT model used by FLUX IP-Adapters is ViT-L.
+    clip_vision_model: Literal["ViT-L"] = InputField(description="CLIP Vision model to use.", default="ViT-L")
     weight: Union[float, List[float]] = InputField(
         default=1, description="The weight given to the IP-Adapter", title="Weight"
     )
@@ -70,11 +67,8 @@ def invoke(self, context: InvocationContext) -> IPAdapterOutput:
         ip_adapter_info = context.models.get_config(self.ip_adapter_model.key)
         assert isinstance(ip_adapter_info, (IPAdapterInvokeAIConfig, IPAdapterCheckpointConfig))
 
-        if isinstance(ip_adapter_info, IPAdapterInvokeAIConfig):
-            image_encoder_model_id = ip_adapter_info.image_encoder_model_id
-            image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
-        else:
-            image_encoder_model_id, image_encoder_model_name = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
+        # Note: There is a IPAdapterInvokeAIConfig.image_encoder_model_id field, but it isn't trustworthy.
+        image_encoder_model_id, image_encoder_model_name = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
 
         image_encoder_model = IPAdapterInvocation.get_clip_image_encoder(
             context, image_encoder_model_id, image_encoder_model_name
diff --git a/invokeai/backend/model_manager/config.py b/invokeai/backend/model_manager/config.py
index f1c262df994..ab1ee46e9ff 100644
--- a/invokeai/backend/model_manager/config.py
+++ b/invokeai/backend/model_manager/config.py
@@ -394,6 +394,8 @@ class IPAdapterBaseConfig(ModelConfigBase):
 class IPAdapterInvokeAIConfig(IPAdapterBaseConfig):
     """Model config for IP Adapter diffusers format models."""
 
+    # TODO(ryand): Should we deprecate this field? From what I can tell, it hasn't been probed correctly for a long
+    # time. Need to go through the history to make sure I'm understanding this fully.
     image_encoder_model_id: str
     format: Literal[ModelFormat.InvokeAI]
 

From e8cd1bb3d88cbb84cae12b6a58ae84a86f19a866 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Mon, 21 Oct 2024 22:17:42 +0000
Subject: [PATCH 22/30] Add FLUX IP-Adapter starter models.

---
 invokeai/app/invocations/flux_ip_adapter.py   |  5 +--
 invokeai/app/invocations/ip_adapter.py        | 18 +++++++---
 .../backend/model_manager/starter_models.py   | 36 ++++++++++++++-----
 3 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/invokeai/app/invocations/flux_ip_adapter.py b/invokeai/app/invocations/flux_ip_adapter.py
index 556c5703f28..1b342b3c7a3 100644
--- a/invokeai/app/invocations/flux_ip_adapter.py
+++ b/invokeai/app/invocations/flux_ip_adapter.py
@@ -68,8 +68,9 @@ def invoke(self, context: InvocationContext) -> IPAdapterOutput:
         assert isinstance(ip_adapter_info, (IPAdapterInvokeAIConfig, IPAdapterCheckpointConfig))
 
         # Note: There is a IPAdapterInvokeAIConfig.image_encoder_model_id field, but it isn't trustworthy.
-        image_encoder_model_id, image_encoder_model_name = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
-
+        image_encoder_starter_model = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
+        image_encoder_model_id = image_encoder_starter_model.source
+        image_encoder_model_name = image_encoder_starter_model.name
         image_encoder_model = IPAdapterInvocation.get_clip_image_encoder(
             context, image_encoder_model_id, image_encoder_model_name
         )
diff --git a/invokeai/app/invocations/ip_adapter.py b/invokeai/app/invocations/ip_adapter.py
index 63624398700..361e8f0a71b 100644
--- a/invokeai/app/invocations/ip_adapter.py
+++ b/invokeai/app/invocations/ip_adapter.py
@@ -18,6 +18,12 @@
     IPAdapterInvokeAIConfig,
     ModelType,
 )
+from invokeai.backend.model_manager.starter_models import (
+    StarterModel,
+    clip_vit_l_image_encoder,
+    ip_adapter_sd_image_encoder,
+    ip_adapter_sdxl_image_encoder,
+)
 
 
 class IPAdapterField(BaseModel):
@@ -56,10 +62,10 @@ class IPAdapterOutput(BaseInvocationOutput):
     ip_adapter: IPAdapterField = OutputField(description=FieldDescriptions.ip_adapter, title="IP-Adapter")
 
 
-CLIP_VISION_MODEL_MAP = {
-    "ViT-L": ("InvokeAI/clip-vit-large-patch14", "clip-vit-large-patch14"),
-    "ViT-H": ("InvokeAI/ip_adapter_sd_image_encoder", "ip_adapter_sd_image_encoder"),
-    "ViT-G": ("InvokeAI/ip_adapter_sdxl_image_encoder", "ip_adapter_sdxl_image_encoder"),
+CLIP_VISION_MODEL_MAP: dict[Literal["ViT-L", "ViT-H", "ViT-G"], StarterModel] = {
+    "ViT-L": clip_vit_l_image_encoder,
+    "ViT-H": ip_adapter_sd_image_encoder,
+    "ViT-G": ip_adapter_sdxl_image_encoder,
 }
 
 
@@ -116,7 +122,9 @@ def invoke(self, context: InvocationContext) -> IPAdapterOutput:
             image_encoder_model_id = ip_adapter_info.image_encoder_model_id
             image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
         else:
-            image_encoder_model_id, image_encoder_model_name = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
+            image_encoder_starter_model = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
+            image_encoder_model_id = image_encoder_starter_model.source
+            image_encoder_model_name = image_encoder_starter_model.name
 
         image_encoder_model = self.get_clip_image_encoder(context, image_encoder_model_id, image_encoder_model_name)
 
diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py
index 66568f0a0dd..05b61d35e5b 100644
--- a/invokeai/backend/model_manager/starter_models.py
+++ b/invokeai/backend/model_manager/starter_models.py
@@ -25,6 +25,15 @@ class StarterModelBundles(BaseModel):
     models: list[StarterModel]
 
 
+cyberrealistic_negative = StarterModel(
+    name="CyberRealistic Negative v3",
+    base=BaseModelType.StableDiffusion1,
+    source="https://huggingface.co/cyberdelia/CyberRealistic_Negative/resolve/main/CyberRealistic_Negative_v3.pt",
+    description="Negative embedding specifically for use with CyberRealistic.",
+    type=ModelType.TextualInversion,
+)
+
+# region CLIP Image Encoders
 ip_adapter_sd_image_encoder = StarterModel(
     name="IP Adapter SD1.5 Image Encoder",
     base=BaseModelType.StableDiffusion1,
@@ -32,7 +41,6 @@ class StarterModelBundles(BaseModel):
     description="IP Adapter SD Image Encoder",
     type=ModelType.CLIPVision,
 )
-
 ip_adapter_sdxl_image_encoder = StarterModel(
     name="IP Adapter SDXL Image Encoder",
     base=BaseModelType.StableDiffusionXL,
@@ -40,14 +48,16 @@ class StarterModelBundles(BaseModel):
     description="IP Adapter SDXL Image Encoder",
     type=ModelType.CLIPVision,
 )
-
-cyberrealistic_negative = StarterModel(
-    name="CyberRealistic Negative v3",
-    base=BaseModelType.StableDiffusion1,
-    source="https://huggingface.co/cyberdelia/CyberRealistic_Negative/resolve/main/CyberRealistic_Negative_v3.pt",
-    description="Negative embedding specifically for use with CyberRealistic.",
-    type=ModelType.TextualInversion,
+# Note: This model is installed from the same source as the CLIPEmbed model below. The model contains both the image
+# encoder and the text encoder, but we need separate model entries so that they get loaded correctly.
+clip_vit_l_image_encoder = StarterModel(
+    name="clip-vit-large-patch14",
+    base=BaseModelType.Any,
+    source="InvokeAI/clip-vit-large-patch14",
+    description="CLIP ViT-L Image Encoder",
+    type=ModelType.CLIPVision,
 )
+# endregion
 
 # region TextEncoders
 t5_base_encoder = StarterModel(
@@ -254,6 +264,14 @@ class StarterModelBundles(BaseModel):
     type=ModelType.IPAdapter,
     dependencies=[ip_adapter_sdxl_image_encoder],
 )
+ip_adapter_flux = StarterModel(
+    name="XLabs FLUX IP-Adapter",
+    base=BaseModelType.Flux,
+    source="https://huggingface.co/XLabs-AI/flux-ip-adapter/resolve/main/flux-ip-adapter.safetensors",
+    description="FLUX IP-Adapter",
+    type=ModelType.IPAdapter,
+    dependencies=[clip_vit_l_image_encoder],
+)
 # endregion
 # region ControlNet
 qr_code_cnet_sd1 = StarterModel(
@@ -555,6 +573,7 @@ class StarterModelBundles(BaseModel):
     ip_adapter_plus_sd1,
     ip_adapter_plus_face_sd1,
     ip_adapter_sdxl,
+    ip_adapter_flux,
     qr_code_cnet_sd1,
     qr_code_cnet_sdxl,
     canny_sd1,
@@ -642,6 +661,7 @@ class StarterModelBundles(BaseModel):
     t5_8b_quantized_encoder,
     clip_l_encoder,
     union_cnet_flux,
+    ip_adapter_flux,
 ]
 
 STARTER_BUNDLES: dict[str, list[StarterModel]] = {

From e545f18a45b17b9a6bcc7afb53d71c4e57b5894e Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Mon, 21 Oct 2024 22:38:06 +0000
Subject: [PATCH 23/30] (minor) Fix ruff.

---
 .../backend/model_manager/load/model_loaders/clip_vision.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/invokeai/backend/model_manager/load/model_loaders/clip_vision.py b/invokeai/backend/model_manager/load/model_loaders/clip_vision.py
index 432e0f11756..cef1c962f9a 100644
--- a/invokeai/backend/model_manager/load/model_loaders/clip_vision.py
+++ b/invokeai/backend/model_manager/load/model_loaders/clip_vision.py
@@ -29,7 +29,7 @@ def _load_model(
             raise ValueError("Only DiffusersConfigBase models are currently supported here.")
 
         if submodel_type is not None:
-            raise Exception(f"There are no submodels in models of type {model_class}")
+            raise Exception("There are no submodels in CLIP Vision models.")
 
         model_path = Path(config.path)
 

From 740f6eb19f8fc9def544cb96e7bc5535fb1b22ad Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Tue, 22 Oct 2024 15:56:49 +0000
Subject: [PATCH 24/30] Skip tests that use the meta device - they fail on the
 MacOS CI runners.

---
 tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py b/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py
index 1c88304ea13..93012684b7e 100644
--- a/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py
+++ b/tests/backend/flux/ip_adapter/test_xlabs_ip_adapter_flux.py
@@ -1,4 +1,7 @@
+import sys
+
 import accelerate
+import pytest
 import torch
 
 from invokeai.backend.flux.ip_adapter.state_dict_utils import (
@@ -18,6 +21,7 @@ def test_is_state_dict_xlabs_ip_adapter():
     assert is_state_dict_xlabs_ip_adapter(sd)
 
 
+@pytest.mark.skipif(sys.platform == "darwin", reason="Skipping on macOS")
 def test_infer_xlabs_ip_adapter_params_from_state_dict():
     # Construct a dummy state_dict with tensors of the correct shape on the meta device.
     with torch.device("meta"):
@@ -31,6 +35,7 @@ def test_infer_xlabs_ip_adapter_params_from_state_dict():
     assert params.clip_embeddings_dim == 768
 
 
+@pytest.mark.skipif(sys.platform == "darwin", reason="Skipping on macOS")
 def test_initialize_xlabs_ip_adapter_flux_from_state_dict():
     # Construct a dummy state_dict with tensors of the correct shape on the meta device.
     with torch.device("meta"):

From e48cab02768b039db121cfb27c924544bd7e6532 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Tue, 22 Oct 2024 16:32:01 +0000
Subject: [PATCH 25/30] Only allow a single image prompt for FLUX IP-Adapters
 (haven't really looked into this much, but punting on it for now).

---
 invokeai/app/invocations/flux_denoise.py    | 5 +++++
 invokeai/app/invocations/flux_ip_adapter.py | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/invokeai/app/invocations/flux_denoise.py b/invokeai/app/invocations/flux_denoise.py
index e5413d05520..27f8ee02858 100644
--- a/invokeai/app/invocations/flux_denoise.py
+++ b/invokeai/app/invocations/flux_denoise.py
@@ -570,6 +570,11 @@ def _prep_ip_adapter_image_prompt_clip_embeds(
             else:
                 raise ValueError(f"Unsupported IP-Adapter image type: {type(ip_adapter_field.image)}")
 
+            if len(ipa_image_fields) != 1:
+                raise ValueError(
+                    f"FLUX IP-Adapter only supports a single image prompt (received {len(ipa_image_fields)})."
+                )
+
             ipa_images = [context.images.get_pil(image.image_name) for image in ipa_image_fields]
 
             pos_images: list[npt.NDArray[np.uint8]] = []
diff --git a/invokeai/app/invocations/flux_ip_adapter.py b/invokeai/app/invocations/flux_ip_adapter.py
index 1b342b3c7a3..9653f859ad0 100644
--- a/invokeai/app/invocations/flux_ip_adapter.py
+++ b/invokeai/app/invocations/flux_ip_adapter.py
@@ -35,7 +35,7 @@ class FluxIPAdapterInvocation(BaseInvocation):
 
     # FLUXIPAdapterInvocation is based closely on IPAdapterInvocation, but with some unsupported features removed.
 
-    image: Union[ImageField, List[ImageField]] = InputField(description="The IP-Adapter image prompt(s).")
+    image: ImageField = InputField(description="The IP-Adapter image prompt(s).")
     ip_adapter_model: ModelIdentifierField = InputField(
         description="The IP-Adapter model.", title="IP-Adapter Model", ui_type=UIType.IPAdapterModel
     )

From 0a96466b60bbf610a79287c2ff7199273b28a032 Mon Sep 17 00:00:00 2001
From: Mary Hipp <maryhipp@Marys-MacBook-Air.local>
Date: Tue, 22 Oct 2024 15:22:56 -0400
Subject: [PATCH 26/30] feat(ui): add IP adapters to FLUX in linear UI

---
 invokeai/app/invocations/ip_adapter.py        |   2 +-
 .../components/CanvasAddEntityButtons.tsx     |   1 -
 .../EntityListGlobalActionBarAddLayerMenu.tsx |   2 +-
 .../components/IPAdapter/IPAdapterModel.tsx   |  28 ++++-
 .../IPAdapter/IPAdapterSettings.tsx           |   5 +-
 .../src/features/controlLayers/store/types.ts |   2 +-
 .../util/graph/generation/buildFLUXGraph.ts   |  35 ++++++
 .../frontend/web/src/services/api/schema.ts   | 112 ++++++++++++++++--
 8 files changed, 169 insertions(+), 18 deletions(-)

diff --git a/invokeai/app/invocations/ip_adapter.py b/invokeai/app/invocations/ip_adapter.py
index 361e8f0a71b..e3d92374c75 100644
--- a/invokeai/app/invocations/ip_adapter.py
+++ b/invokeai/app/invocations/ip_adapter.py
@@ -81,7 +81,7 @@ class IPAdapterInvocation(BaseInvocation):
         ui_order=-1,
         ui_type=UIType.IPAdapterModel,
     )
-    clip_vision_model: Literal["ViT-H", "ViT-G"] = InputField(
+    clip_vision_model: Literal["ViT-H", "ViT-G", "ViT-L"] = InputField(
         description="CLIP Vision model to use. Overrides model settings. Mandatory for checkpoint models.",
         default="ViT-H",
         ui_order=2,
diff --git a/invokeai/frontend/web/src/features/controlLayers/components/CanvasAddEntityButtons.tsx b/invokeai/frontend/web/src/features/controlLayers/components/CanvasAddEntityButtons.tsx
index 76c7d88fdbd..4fc2fb8b347 100644
--- a/invokeai/frontend/web/src/features/controlLayers/components/CanvasAddEntityButtons.tsx
+++ b/invokeai/frontend/web/src/features/controlLayers/components/CanvasAddEntityButtons.tsx
@@ -34,7 +34,6 @@ export const CanvasAddEntityButtons = memo(() => {
             justifyContent="flex-start"
             leftIcon={<PiPlusBold />}
             onClick={addGlobalReferenceImage}
-            isDisabled={isFLUX}
           >
             {t('controlLayers.globalReferenceImage')}
           </Button>
diff --git a/invokeai/frontend/web/src/features/controlLayers/components/CanvasEntityList/EntityListGlobalActionBarAddLayerMenu.tsx b/invokeai/frontend/web/src/features/controlLayers/components/CanvasEntityList/EntityListGlobalActionBarAddLayerMenu.tsx
index 7a9cf30f678..ba5c8e6d037 100644
--- a/invokeai/frontend/web/src/features/controlLayers/components/CanvasEntityList/EntityListGlobalActionBarAddLayerMenu.tsx
+++ b/invokeai/frontend/web/src/features/controlLayers/components/CanvasEntityList/EntityListGlobalActionBarAddLayerMenu.tsx
@@ -40,7 +40,7 @@ export const EntityListGlobalActionBarAddLayerMenu = memo(() => {
       />
       <MenuList>
         <MenuGroup title={t('controlLayers.global')}>
-          <MenuItem icon={<PiPlusBold />} onClick={addGlobalReferenceImage} isDisabled={isFLUX}>
+          <MenuItem icon={<PiPlusBold />} onClick={addGlobalReferenceImage}>
             {t('controlLayers.globalReferenceImage')}
           </MenuItem>
         </MenuGroup>
diff --git a/invokeai/frontend/web/src/features/controlLayers/components/IPAdapter/IPAdapterModel.tsx b/invokeai/frontend/web/src/features/controlLayers/components/IPAdapter/IPAdapterModel.tsx
index 5b5add0b854..218582535f3 100644
--- a/invokeai/frontend/web/src/features/controlLayers/components/IPAdapter/IPAdapterModel.tsx
+++ b/invokeai/frontend/web/src/features/controlLayers/components/IPAdapter/IPAdapterModel.tsx
@@ -2,7 +2,7 @@ import type { ComboboxOnChange } from '@invoke-ai/ui-library';
 import { Combobox, Flex, FormControl, Tooltip } from '@invoke-ai/ui-library';
 import { useAppSelector } from 'app/store/storeHooks';
 import { useGroupedModelCombobox } from 'common/hooks/useGroupedModelCombobox';
-import { selectBase } from 'features/controlLayers/store/paramsSlice';
+import { selectBase, selectIsFLUX } from 'features/controlLayers/store/paramsSlice';
 import type { CLIPVisionModelV2 } from 'features/controlLayers/store/types';
 import { isCLIPVisionModelV2 } from 'features/controlLayers/store/types';
 import { memo, useCallback, useMemo } from 'react';
@@ -11,9 +11,13 @@ import { useIPAdapterModels } from 'services/api/hooks/modelsByType';
 import type { AnyModelConfig, IPAdapterModelConfig } from 'services/api/types';
 import { assert } from 'tsafe';
 
+// at this time, ViT-L is the only supported clip model for FLUX IP adapter
+const FLUX_CLIP_VISION = 'ViT-L';
+
 const CLIP_VISION_OPTIONS = [
   { label: 'ViT-H', value: 'ViT-H' },
   { label: 'ViT-G', value: 'ViT-G' },
+  { label: FLUX_CLIP_VISION, value: FLUX_CLIP_VISION },
 ];
 
 type Props = {
@@ -47,6 +51,8 @@ export const IPAdapterModel = memo(({ modelKey, onChangeModel, clipVisionModel,
     [onChangeCLIPVisionModel]
   );
 
+  const isFLUX = useAppSelector(selectIsFLUX);
+
   const getIsDisabled = useCallback(
     (model: AnyModelConfig): boolean => {
       const isCompatible = currentBaseModel === model.base;
@@ -64,10 +70,20 @@ export const IPAdapterModel = memo(({ modelKey, onChangeModel, clipVisionModel,
     isLoading,
   });
 
-  const clipVisionModelValue = useMemo(
-    () => CLIP_VISION_OPTIONS.find((o) => o.value === clipVisionModel),
-    [clipVisionModel]
-  );
+  const clipVisionOptions = useMemo(() => {
+    if (isFLUX) {
+      return CLIP_VISION_OPTIONS.map((option) => ({ ...option, isDisabled: option.value !== FLUX_CLIP_VISION }));
+    } else {
+      return CLIP_VISION_OPTIONS;
+    }
+  }, [isFLUX]);
+
+  const clipVisionModelValue = useMemo(() => {
+    if (isFLUX) {
+      return CLIP_VISION_OPTIONS.find((o) => o.value === FLUX_CLIP_VISION);
+    }
+    return CLIP_VISION_OPTIONS.find((o) => o.value === clipVisionModel);
+  }, [clipVisionModel, isFLUX]);
 
   return (
     <Flex gap={2}>
@@ -85,7 +101,7 @@ export const IPAdapterModel = memo(({ modelKey, onChangeModel, clipVisionModel,
       {selectedModel?.format === 'checkpoint' && (
         <FormControl isInvalid={!value || currentBaseModel !== selectedModel?.base} width="max-content" minWidth={28}>
           <Combobox
-            options={CLIP_VISION_OPTIONS}
+            options={clipVisionOptions}
             placeholder={t('common.placeholderSelectAModel')}
             value={clipVisionModelValue}
             onChange={_onChangeCLIPVisionModel}
diff --git a/invokeai/frontend/web/src/features/controlLayers/components/IPAdapter/IPAdapterSettings.tsx b/invokeai/frontend/web/src/features/controlLayers/components/IPAdapter/IPAdapterSettings.tsx
index 32209af39c0..70d321801ff 100644
--- a/invokeai/frontend/web/src/features/controlLayers/components/IPAdapter/IPAdapterSettings.tsx
+++ b/invokeai/frontend/web/src/features/controlLayers/components/IPAdapter/IPAdapterSettings.tsx
@@ -16,6 +16,7 @@ import {
   referenceImageIPAdapterModelChanged,
   referenceImageIPAdapterWeightChanged,
 } from 'features/controlLayers/store/canvasSlice';
+import { selectIsFLUX } from 'features/controlLayers/store/paramsSlice';
 import { selectCanvasSlice, selectEntityOrThrow } from 'features/controlLayers/store/selectors';
 import type { CLIPVisionModelV2, IPMethodV2 } from 'features/controlLayers/store/types';
 import type { IPAImageDropData } from 'features/dnd/types';
@@ -90,6 +91,8 @@ export const IPAdapterSettings = memo(() => {
   const pullBboxIntoIPAdapter = usePullBboxIntoGlobalReferenceImage(entityIdentifier);
   const isBusy = useCanvasIsBusy();
 
+  const isFLUX = useAppSelector(selectIsFLUX);
+
   return (
     <CanvasEntitySettingsWrapper>
       <Flex flexDir="column" gap={2} position="relative" w="full">
@@ -113,7 +116,7 @@ export const IPAdapterSettings = memo(() => {
         </Flex>
         <Flex gap={2} w="full" alignItems="center">
           <Flex flexDir="column" gap={2} w="full">
-            <IPAdapterMethod method={ipAdapter.method} onChange={onChangeIPMethod} />
+            {!isFLUX && <IPAdapterMethod method={ipAdapter.method} onChange={onChangeIPMethod} />}
             <Weight weight={ipAdapter.weight} onChange={onChangeWeight} />
             <BeginEndStepPct beginEndStepPct={ipAdapter.beginEndStepPct} onChange={onChangeBeginEndStepPct} />
           </Flex>
diff --git a/invokeai/frontend/web/src/features/controlLayers/store/types.ts b/invokeai/frontend/web/src/features/controlLayers/store/types.ts
index aacfd630d53..1905b98cede 100644
--- a/invokeai/frontend/web/src/features/controlLayers/store/types.ts
+++ b/invokeai/frontend/web/src/features/controlLayers/store/types.ts
@@ -46,7 +46,7 @@ const zControlModeV2 = z.enum(['balanced', 'more_prompt', 'more_control', 'unbal
 export type ControlModeV2 = z.infer<typeof zControlModeV2>;
 export const isControlModeV2 = (v: unknown): v is ControlModeV2 => zControlModeV2.safeParse(v).success;
 
-const zCLIPVisionModelV2 = z.enum(['ViT-H', 'ViT-G']);
+const zCLIPVisionModelV2 = z.enum(['ViT-H', 'ViT-G', 'ViT-L']);
 export type CLIPVisionModelV2 = z.infer<typeof zCLIPVisionModelV2>;
 export const isCLIPVisionModelV2 = (v: unknown): v is CLIPVisionModelV2 => zCLIPVisionModelV2.safeParse(v).success;
 
diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildFLUXGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildFLUXGraph.ts
index cc8a7347fe2..88e99ae8ea1 100644
--- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildFLUXGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildFLUXGraph.ts
@@ -20,6 +20,7 @@ import { isNonRefinerMainModelConfig } from 'services/api/types';
 import { assert } from 'tsafe';
 
 import { addControlNets } from './addControlAdapters';
+import { addIPAdapters } from './addIPAdapters';
 
 const log = logger('system');
 
@@ -198,6 +199,40 @@ export const buildFLUXGraph = async (
     g.deleteNode(controlNetCollector.id);
   }
 
+  const ipAdapterCollector = g.addNode({
+    type: 'collect',
+    id: getPrefixedId('ip_adapter_collector'),
+  });
+  const ipAdapterResult = addIPAdapters(canvas.referenceImages.entities, g, ipAdapterCollector, modelConfig.base);
+
+  const totalIPAdaptersAdded = ipAdapterResult.addedIPAdapters;
+  if (totalIPAdaptersAdded > 0) {
+    assert(steps > 2);
+    const cfg_scale_start_step = 1;
+    const cfg_scale_end_step = Math.ceil(steps / 2);
+    assert(cfg_scale_end_step > cfg_scale_start_step);
+
+    const negCond = g.addNode({
+      type: 'flux_text_encoder',
+      id: getPrefixedId('flux_text_encoder'),
+      prompt: '',
+    });
+
+    g.addEdge(modelLoader, 'clip', negCond, 'clip');
+    g.addEdge(modelLoader, 't5_encoder', negCond, 't5_encoder');
+    g.addEdge(modelLoader, 'max_seq_len', negCond, 't5_max_seq_len');
+    g.addEdge(negCond, 'conditioning', noise, 'negative_text_conditioning');
+
+    g.updateNode(noise, {
+      cfg_scale: 3,
+      cfg_scale_start_step,
+      cfg_scale_end_step,
+    });
+    g.addEdge(ipAdapterCollector, 'collection', noise, 'ip_adapter');
+  } else {
+    g.deleteNode(ipAdapterCollector.id);
+  }
+
   if (state.system.shouldUseNSFWChecker) {
     canvasOutput = addNSFWChecker(g, canvasOutput);
   }
diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts
index e16aedccd23..458472f507f 100644
--- a/invokeai/frontend/web/src/services/api/schema.ts
+++ b/invokeai/frontend/web/src/services/api/schema.ts
@@ -6487,6 +6487,29 @@ export type components = {
              * @default null
              */
             positive_text_conditioning?: components["schemas"]["FluxConditioningField"];
+            /**
+             * @description Negative conditioning tensor. Can be None if cfg_scale is 1.0.
+             * @default null
+             */
+            negative_text_conditioning?: components["schemas"]["FluxConditioningField"] | null;
+            /**
+             * CFG Scale
+             * @description Classifier-Free Guidance scale
+             * @default 1
+             */
+            cfg_scale?: number | number[];
+            /**
+             * CFG Scale Start Step
+             * @description Index of the first step to apply cfg_scale. Negative indices count backwards from the the last step (e.g. a value of -1 refers to the final step).
+             * @default 0
+             */
+            cfg_scale_start_step?: number;
+            /**
+             * CFG Scale End Step
+             * @description Index of the last step to apply cfg_scale. Negative indices count backwards from the last step (e.g. a value of -1 refers to the final step).
+             * @default -1
+             */
+            cfg_scale_end_step?: number;
             /**
              * Width
              * @description Width of the generated image.
@@ -6528,6 +6551,12 @@ export type components = {
              * @default null
              */
             controlnet_vae?: components["schemas"]["VAEField"] | null;
+            /**
+             * IP-Adapter
+             * @description IP-Adapter to apply
+             * @default null
+             */
+            ip_adapter?: components["schemas"]["IPAdapterField"] | components["schemas"]["IPAdapterField"][] | null;
             /**
              * type
              * @default flux_denoise
@@ -6536,6 +6565,74 @@ export type components = {
              */
             type: "flux_denoise";
         };
+        /**
+         * FLUX IP-Adapter
+         * @description Collects FLUX IP-Adapter info to pass to other nodes.
+         */
+        FluxIPAdapterInvocation: {
+            /**
+             * Id
+             * @description The id of this instance of an invocation. Must be unique among all instances of invocations.
+             */
+            id: string;
+            /**
+             * Is Intermediate
+             * @description Whether or not this is an intermediate invocation.
+             * @default false
+             */
+            is_intermediate?: boolean;
+            /**
+             * Use Cache
+             * @description Whether or not to use the cache
+             * @default true
+             */
+            use_cache?: boolean;
+            /**
+             * Image
+             * @description The IP-Adapter image prompt(s).
+             * @default null
+             */
+            image?: components["schemas"]["ImageField"] | components["schemas"]["ImageField"][];
+            /**
+             * IP-Adapter Model
+             * @description The IP-Adapter model.
+             * @default null
+             */
+            ip_adapter_model?: components["schemas"]["ModelIdentifierField"];
+            /**
+             * Clip Vision Model
+             * @description CLIP Vision model to use.
+             * @default ViT-L
+             * @constant
+             * @enum {string}
+             */
+            clip_vision_model?: "ViT-L";
+            /**
+             * Weight
+             * @description The weight given to the IP-Adapter
+             * @default 1
+             */
+            weight?: number | number[];
+            /**
+             * Begin Step Percent
+             * @description When the IP-Adapter is first applied (% of total steps)
+             * @default 0
+             */
+            begin_step_percent?: number;
+            /**
+             * End Step Percent
+             * @description When the IP-Adapter is last applied (% of total steps)
+             * @default 1
+             */
+            end_step_percent?: number;
+            /**
+             * type
+             * @default flux_ip_adapter
+             * @constant
+             * @enum {string}
+             */
+            type: "flux_ip_adapter";
+        };
         /**
          * FLUX LoRA
          * @description Apply a LoRA model to a FLUX transformer and/or text encoder.
@@ -6981,7 +7078,7 @@ export type components = {
              * @description The nodes in this graph
              */
             nodes?: {
-                [key: string]: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"];
+                [key: string]: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"];
             };
             /**
              * Edges
@@ -7482,7 +7579,7 @@ export type components = {
              * @default ViT-H
              * @enum {string}
              */
-            clip_vision_model?: "ViT-H" | "ViT-G";
+            clip_vision_model?: "ViT-H" | "ViT-G" | "ViT-L";
             /**
              * Weight
              * @description The weight given to the IP-Adapter
@@ -7600,7 +7697,7 @@ export type components = {
              * @description The CLIP Vision model
              * @enum {string}
              */
-            clip_vision_model: "ViT-H" | "ViT-G";
+            clip_vision_model: "ViT-L" | "ViT-H" | "ViT-G";
             /**
              * Method
              * @description Method to apply IP Weights with
@@ -9466,7 +9563,7 @@ export type components = {
              * Invocation
              * @description The ID of the invocation
              */
-            invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"];
+            invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"];
             /**
              * Invocation Source Id
              * @description The ID of the prepared invocation's source node
@@ -9524,7 +9621,7 @@ export type components = {
              * Invocation
              * @description The ID of the invocation
              */
-            invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"];
+            invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"];
             /**
              * Invocation Source Id
              * @description The ID of the prepared invocation's source node
@@ -9607,6 +9704,7 @@ export type components = {
             float_to_int: components["schemas"]["IntegerOutput"];
             flux_controlnet: components["schemas"]["FluxControlNetOutput"];
             flux_denoise: components["schemas"]["LatentsOutput"];
+            flux_ip_adapter: components["schemas"]["IPAdapterOutput"];
             flux_lora_collection_loader: components["schemas"]["FluxLoRALoaderOutput"];
             flux_lora_loader: components["schemas"]["FluxLoRALoaderOutput"];
             flux_model_loader: components["schemas"]["FluxModelLoaderOutput"];
@@ -9772,7 +9870,7 @@ export type components = {
              * Invocation
              * @description The ID of the invocation
              */
-            invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"];
+            invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"];
             /**
              * Invocation Source Id
              * @description The ID of the prepared invocation's source node
@@ -9841,7 +9939,7 @@ export type components = {
              * Invocation
              * @description The ID of the invocation
              */
-            invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"];
+            invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"];
             /**
              * Invocation Source Id
              * @description The ID of the prepared invocation's source node

From f53823b45edff1dfa2c14b262f2283f163b0b71e Mon Sep 17 00:00:00 2001
From: psychedelicious <4822129+psychedelicious@users.noreply.github.com>
Date: Wed, 23 Oct 2024 08:29:14 +1000
Subject: [PATCH 27/30] fix(ui): update CLIP Vision when ipa model changes

---
 .../features/controlLayers/store/canvasSlice.ts    | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/invokeai/frontend/web/src/features/controlLayers/store/canvasSlice.ts b/invokeai/frontend/web/src/features/controlLayers/store/canvasSlice.ts
index b4b784e80d6..fd7b4958436 100644
--- a/invokeai/frontend/web/src/features/controlLayers/store/canvasSlice.ts
+++ b/invokeai/frontend/web/src/features/controlLayers/store/canvasSlice.ts
@@ -381,6 +381,13 @@ export const canvasSlice = createSlice({
         return;
       }
       entity.ipAdapter.model = modelConfig ? zModelIdentifierField.parse(modelConfig) : null;
+      // Ensure that the IP Adapter model is compatible with the CLIP Vision model
+      if (entity.ipAdapter.model?.base === 'flux') {
+        entity.ipAdapter.clipVisionModel = 'ViT-L';
+      } else if (entity.ipAdapter.clipVisionModel === 'ViT-L') {
+        // Fall back to ViT-H (ViT-G would also work)
+        entity.ipAdapter.clipVisionModel = 'ViT-H';
+      }
     },
     referenceImageIPAdapterCLIPVisionModelChanged: (
       state,
@@ -577,6 +584,13 @@ export const canvasSlice = createSlice({
         return;
       }
       referenceImage.ipAdapter.model = modelConfig ? zModelIdentifierField.parse(modelConfig) : null;
+      // Ensure that the IP Adapter model is compatible with the CLIP Vision model
+      if (referenceImage.ipAdapter.model?.base === 'flux') {
+        referenceImage.ipAdapter.clipVisionModel = 'ViT-L';
+      } else if (referenceImage.ipAdapter.clipVisionModel === 'ViT-L') {
+        // Fall back to ViT-H (ViT-G would also work)
+        referenceImage.ipAdapter.clipVisionModel = 'ViT-H';
+      }
     },
     rgIPAdapterCLIPVisionModelChanged: (
       state,

From bf3260446d1744a534cdc0a5d7077cefb7fc6242 Mon Sep 17 00:00:00 2001
From: psychedelicious <4822129+psychedelicious@users.noreply.github.com>
Date: Wed, 23 Oct 2024 08:30:11 +1000
Subject: [PATCH 28/30] fix(ui): use `flux_ip_adapter` for flux

---
 .../util/graph/generation/addIPAdapters.ts    | 50 ++++++++++++++-----
 1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addIPAdapters.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addIPAdapters.ts
index c4c95d4a5a2..fe91b52f22b 100644
--- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addIPAdapters.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addIPAdapters.ts
@@ -34,19 +34,43 @@ const addIPAdapter = (entity: CanvasReferenceImageState, g: Graph, collector: In
   assert(image, 'IP Adapter image is required');
   assert(model, 'IP Adapter model is required');
 
-  const ipAdapterNode = g.addNode({
-    id: `ip_adapter_${id}`,
-    type: 'ip_adapter',
-    weight,
-    method,
-    ip_adapter_model: model,
-    clip_vision_model: clipVisionModel,
-    begin_step_percent: beginEndStepPct[0],
-    end_step_percent: beginEndStepPct[1],
-    image: {
-      image_name: image.image_name,
-    },
-  });
+  let ipAdapterNode: Invocation<'flux_ip_adapter' | 'ip_adapter'>;
+
+  if (model.base === 'flux') {
+    assert(clipVisionModel === 'ViT-L', 'ViT-L is the only supported CLIP Vision model for FLUX IP adapter');
+    ipAdapterNode = g.addNode({
+      id: `ip_adapter_${id}`,
+      type: 'flux_ip_adapter',
+      weight,
+      ip_adapter_model: model,
+      clip_vision_model: clipVisionModel,
+      begin_step_percent: beginEndStepPct[0],
+      end_step_percent: beginEndStepPct[1],
+      image: {
+        image_name: image.image_name,
+      },
+    });
+  } else {
+    // model.base === SD1.5 or SDXL
+    assert(
+      clipVisionModel === 'ViT-H' || clipVisionModel === 'ViT-G',
+      'ViT-G and ViT-H are the only supported CLIP Vision models for SD1.5 and SDXL IP adapters'
+    );
+    ipAdapterNode = g.addNode({
+      id: `ip_adapter_${id}`,
+      type: 'ip_adapter',
+      weight,
+      method,
+      ip_adapter_model: model,
+      clip_vision_model: clipVisionModel,
+      begin_step_percent: beginEndStepPct[0],
+      end_step_percent: beginEndStepPct[1],
+      image: {
+        image_name: image.image_name,
+      },
+    });
+  }
+
   g.addEdge(ipAdapterNode, 'ip_adapter', collector, 'item');
 };
 

From ee8975401aa6379dc858d089c049bbef7a2e2b2b Mon Sep 17 00:00:00 2001
From: psychedelicious <4822129+psychedelicious@users.noreply.github.com>
Date: Wed, 23 Oct 2024 08:31:10 +1000
Subject: [PATCH 29/30] fix(ui): remove special handling for flux in
 `IPAdapterModel`

This masked an issue w/ the CLIP Vision model. Issue is now handled in reducer/graph builder.
---
 .../components/IPAdapter/IPAdapterModel.tsx        | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/invokeai/frontend/web/src/features/controlLayers/components/IPAdapter/IPAdapterModel.tsx b/invokeai/frontend/web/src/features/controlLayers/components/IPAdapter/IPAdapterModel.tsx
index 218582535f3..682c272f892 100644
--- a/invokeai/frontend/web/src/features/controlLayers/components/IPAdapter/IPAdapterModel.tsx
+++ b/invokeai/frontend/web/src/features/controlLayers/components/IPAdapter/IPAdapterModel.tsx
@@ -71,19 +71,15 @@ export const IPAdapterModel = memo(({ modelKey, onChangeModel, clipVisionModel,
   });
 
   const clipVisionOptions = useMemo(() => {
-    if (isFLUX) {
-      return CLIP_VISION_OPTIONS.map((option) => ({ ...option, isDisabled: option.value !== FLUX_CLIP_VISION }));
-    } else {
-      return CLIP_VISION_OPTIONS;
-    }
+    return CLIP_VISION_OPTIONS.map((option) => ({
+      ...option,
+      isDisabled: isFLUX && option.value !== FLUX_CLIP_VISION,
+    }));
   }, [isFLUX]);
 
   const clipVisionModelValue = useMemo(() => {
-    if (isFLUX) {
-      return CLIP_VISION_OPTIONS.find((o) => o.value === FLUX_CLIP_VISION);
-    }
     return CLIP_VISION_OPTIONS.find((o) => o.value === clipVisionModel);
-  }, [clipVisionModel, isFLUX]);
+  }, [clipVisionModel]);
 
   return (
     <Flex gap={2}>

From 61496fdcbc657eb070916c152f3e014e3dfd240a Mon Sep 17 00:00:00 2001
From: psychedelicious <4822129+psychedelicious@users.noreply.github.com>
Date: Wed, 23 Oct 2024 08:34:15 +1000
Subject: [PATCH 30/30] fix(nodes): load IP Adapter images as RGB

FLUX IP Adapter only works with RGB. Did the same for non-FLUX to be safe & consistent, though I don't think it's strictly necessary.
---
 invokeai/app/invocations/denoise_latents.py | 4 +++-
 invokeai/app/invocations/flux_denoise.py    | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/invokeai/app/invocations/denoise_latents.py b/invokeai/app/invocations/denoise_latents.py
index 34295b5e229..0c3a8921547 100644
--- a/invokeai/app/invocations/denoise_latents.py
+++ b/invokeai/app/invocations/denoise_latents.py
@@ -547,7 +547,9 @@ def prep_ip_adapter_image_prompts(
                 if not isinstance(single_ipa_image_fields, list):
                     single_ipa_image_fields = [single_ipa_image_fields]
 
-                single_ipa_images = [context.images.get_pil(image.image_name) for image in single_ipa_image_fields]
+                single_ipa_images = [
+                    context.images.get_pil(image.image_name, mode="RGB") for image in single_ipa_image_fields
+                ]
                 with image_encoder_model_info as image_encoder_model:
                     assert isinstance(image_encoder_model, CLIPVisionModelWithProjection)
                     # Get image embeddings from CLIP and ImageProjModel.
diff --git a/invokeai/app/invocations/flux_denoise.py b/invokeai/app/invocations/flux_denoise.py
index 2e7dba406c2..c9907ce4082 100644
--- a/invokeai/app/invocations/flux_denoise.py
+++ b/invokeai/app/invocations/flux_denoise.py
@@ -576,7 +576,7 @@ def _prep_ip_adapter_image_prompt_clip_embeds(
                     f"FLUX IP-Adapter only supports a single image prompt (received {len(ipa_image_fields)})."
                 )
 
-            ipa_images = [context.images.get_pil(image.image_name) for image in ipa_image_fields]
+            ipa_images = [context.images.get_pil(image.image_name, mode="RGB") for image in ipa_image_fields]
 
             pos_images: list[npt.NDArray[np.uint8]] = []
             neg_images: list[npt.NDArray[np.uint8]] = []