From ee8f693e539841ed7cd2e45b668b0e86a67f00e2 Mon Sep 17 00:00:00 2001
From: Akshan Krithick <akshankrithick305@gmail.com>
Date: Wed, 17 Jun 2026 15:13:05 -0700
Subject: [PATCH] fix(bria_fibo): fix guidance_embeds, prompt_embeds,
 tensor-image and multi-image crashes

---
 .../models/transformers/transformer_bria_fibo.py |  4 ++--
 .../pipelines/bria_fibo/pipeline_bria_fibo.py    | 14 ++++++++++----
 .../bria_fibo/pipeline_bria_fibo_edit.py         | 16 +++++++++++-----
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/src/diffusers/models/transformers/transformer_bria_fibo.py b/src/diffusers/models/transformers/transformer_bria_fibo.py
index 7ddbccfa47c5..eb7e6cf45720 100644
--- a/src/diffusers/models/transformers/transformer_bria_fibo.py
+++ b/src/diffusers/models/transformers/transformer_bria_fibo.py
@@ -470,7 +470,7 @@ def __init__(
         self.time_embed = BriaFiboTimestepProjEmbeddings(embedding_dim=self.inner_dim, time_theta=time_theta)
 
         if guidance_embeds:
-            self.guidance_embed = BriaFiboTimestepProjEmbeddings(embedding_dim=self.inner_dim)
+            self.guidance_embed = BriaFiboTimestepProjEmbeddings(embedding_dim=self.inner_dim, time_theta=time_theta)
 
         self.context_embedder = nn.Linear(self.config.joint_attention_dim, self.inner_dim)
         self.x_embedder = torch.nn.Linear(self.config.in_channels, self.inner_dim)
@@ -555,7 +555,7 @@ def forward(
 
         temb = self.time_embed(timestep, dtype=hidden_states.dtype)
 
-        if guidance:
+        if guidance is not None:
             temb += self.guidance_embed(guidance, dtype=hidden_states.dtype)
 
         encoder_hidden_states = self.context_embedder(encoder_hidden_states)
diff --git a/src/diffusers/pipelines/bria_fibo/pipeline_bria_fibo.py b/src/diffusers/pipelines/bria_fibo/pipeline_bria_fibo.py
index 1f178066b17d..5738c0044e92 100644
--- a/src/diffusers/pipelines/bria_fibo/pipeline_bria_fibo.py
+++ b/src/diffusers/pipelines/bria_fibo/pipeline_bria_fibo.py
@@ -260,6 +260,11 @@ def encode_prompt(
             )
             prompt_embeds = prompt_embeds.to(dtype=self.transformer.dtype)
             prompt_layers = [tensor.to(dtype=self.transformer.dtype) for tensor in prompt_layers]
+        else:
+            raise ValueError(
+                "`prompt_embeds` cannot be passed on its own; this pipeline also needs the per-layer embeddings "
+                "computed from `prompt`. Please pass `prompt` instead."
+            )
 
         if guidance_scale > 1:
             if isinstance(negative_prompt, list) and negative_prompt[0] is None:
@@ -773,10 +778,11 @@ def __call__(
             for scaled_latent in latents_scaled:
                 curr_image = self.vae.decode(scaled_latent.unsqueeze(0), return_dict=False)[0]
                 curr_image = self.image_processor.postprocess(curr_image.squeeze(dim=2), output_type=output_type)
-                image.append(curr_image)
-            if len(image) == 1:
-                image = image[0]
-            else:
+                if output_type == "np":
+                    image.append(curr_image[0])
+                else:
+                    image.extend(curr_image)
+            if output_type == "np":
                 image = np.stack(image, axis=0)
 
         # Offload all models
diff --git a/src/diffusers/pipelines/bria_fibo/pipeline_bria_fibo_edit.py b/src/diffusers/pipelines/bria_fibo/pipeline_bria_fibo_edit.py
index c2327bbce1c7..0e6bb7ae583b 100644
--- a/src/diffusers/pipelines/bria_fibo/pipeline_bria_fibo_edit.py
+++ b/src/diffusers/pipelines/bria_fibo/pipeline_bria_fibo_edit.py
@@ -418,6 +418,11 @@ def encode_prompt(
             )
             prompt_embeds = prompt_embeds.to(dtype=self.transformer.dtype)
             prompt_layers = [tensor.to(dtype=self.transformer.dtype) for tensor in prompt_layers]
+        else:
+            raise ValueError(
+                "`prompt_embeds` cannot be passed on its own; this pipeline also needs the per-layer embeddings "
+                "computed from `prompt`. Please pass `prompt` instead."
+            )
 
         if guidance_scale > 1:
             if isinstance(negative_prompt, list) and negative_prompt[0] is None:
@@ -807,7 +812,7 @@ def __call__(
             prompt_layers = prompt_layers + [prompt_layers[-1]] * (total_num_layers_transformer - len(prompt_layers))
 
         # Preprocess image
-        if image is not None and not (isinstance(image, torch.Tensor) and image.size(1) == self.latent_channels):
+        if image is not None:
             image = self.image_processor.resize(image, height, width)
             image = self.image_processor.preprocess(image, height, width)
 
@@ -996,10 +1001,11 @@ def __call__(
             for scaled_latent in latents_scaled:
                 curr_image = self.vae.decode(scaled_latent.unsqueeze(0), return_dict=False)[0]
                 curr_image = self.image_processor.postprocess(curr_image.squeeze(dim=2), output_type=output_type)
-                image.append(curr_image)
-            if len(image) == 1:
-                image = image[0]
-            else:
+                if output_type == "np":
+                    image.append(curr_image[0])
+                else:
+                    image.extend(curr_image)
+            if output_type == "np":
                 image = np.stack(image, axis=0)
 
         # Offload all models