diff --git a/optimum/habana/diffusers/pipelines/flux/pipeline_flux_img2img.py b/optimum/habana/diffusers/pipelines/flux/pipeline_flux_img2img.py
index fda7e5ef7e..4cf3baea90 100644
--- a/optimum/habana/diffusers/pipelines/flux/pipeline_flux_img2img.py
+++ b/optimum/habana/diffusers/pipelines/flux/pipeline_flux_img2img.py
@@ -382,10 +382,11 @@ def _split_inputs_into_batches(cls, batch_size, latents, prompt_embeds, pooled_p
 
             # Pad guidance if necessary
             if guidance is not None:
+                guidance_batches[-1]=guidance_batches[-1].unsqueeze(1)
                 sequence_to_stack = (guidance_batches[-1],) + tuple(
                     torch.zeros_like(guidance_batches[-1][0][None, :]) for _ in range(num_dummy_samples)
                 )
-                guidance_batches[-1] = torch.vstack(sequence_to_stack)
+                guidance_batches[-1] = torch.vstack(sequence_to_stack).squeeze(1)
 
         # Stack batches in the same tensor
         latents_batches = torch.stack(latents_batches)
@@ -623,14 +624,14 @@ def __call__(
                 f"After adjusting the num_inference_steps by strength parameter: {strength}, the number of pipeline"
                 f"steps is {num_inference_steps} which is < 1 and not appropriate for this pipeline."
             )
-        latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
+        latent_timestep = timesteps[:1].repeat(num_prompts * num_images_per_prompt)
 
         # 6. Prepare latent variables
         num_channels_latents = self.transformer.config.in_channels // 4
         latents, latent_image_ids = self.prepare_latents(
             init_image,
             latent_timestep,
-            batch_size * num_images_per_prompt,
+            num_prompts * num_images_per_prompt,
             num_channels_latents,
             height,
             width,