From e84a3ef63c452800c2884fc195affedef07b9b31 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Mon, 1 Jun 2026 23:55:40 +0530
Subject: [PATCH 01/30] update

---
 src/diffusers/commands/diffusers_cli.py | 2 ++
 1 file changed, 2 insertions(+)
diff --git a/src/diffusers/commands/diffusers_cli.py b/src/diffusers/commands/diffusers_cli.py
index a27ac24f2a3e..95b8dd5f3938 100644
--- a/src/diffusers/commands/diffusers_cli.py
+++ b/src/diffusers/commands/diffusers_cli.py
@@ -15,6 +15,7 @@
 
 from argparse import ArgumentParser
 
+from .agentic import register_agentic_commands
 from .custom_blocks import CustomBlocksCommand
 from .env import EnvironmentCommand
 from .fp16_safetensors import FP16SafetensorsCommand
@@ -28,6 +29,7 @@ def main():
     EnvironmentCommand.register_subcommand(commands_parser)
     FP16SafetensorsCommand.register_subcommand(commands_parser)
     CustomBlocksCommand.register_subcommand(commands_parser)
+    register_agentic_commands(commands_parser)
 
     # Let's go
     args = parser.parse_args()

From 59be75317e59bbb4b295a1c57fdf949843fa0866 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Tue, 2 Jun 2026 00:03:03 +0530
Subject: [PATCH 02/30] update

---
 src/diffusers/commands/agentic/README.md   | 246 ++++++++++
 src/diffusers/commands/agentic/__init__.py |  18 +
 src/diffusers/commands/agentic/_common.py  | 533 +++++++++++++++++++++
 src/diffusers/commands/agentic/app.py      |  38 ++
 src/diffusers/commands/agentic/audio.py    | 133 +++++
 src/diffusers/commands/agentic/image.py    | 170 +++++++
 src/diffusers/commands/agentic/modular.py  | 249 ++++++++++
 src/diffusers/commands/agentic/tasks.py    |  91 ++++
 src/diffusers/commands/agentic/video.py    | 147 ++++++
 9 files changed, 1625 insertions(+)
 create mode 100644 src/diffusers/commands/agentic/README.md
 create mode 100644 src/diffusers/commands/agentic/__init__.py
 create mode 100644 src/diffusers/commands/agentic/_common.py
 create mode 100644 src/diffusers/commands/agentic/app.py
 create mode 100644 src/diffusers/commands/agentic/audio.py
 create mode 100644 src/diffusers/commands/agentic/image.py
 create mode 100644 src/diffusers/commands/agentic/modular.py
 create mode 100644 src/diffusers/commands/agentic/tasks.py
 create mode 100644 src/diffusers/commands/agentic/video.py

diff --git a/src/diffusers/commands/agentic/README.md b/src/diffusers/commands/agentic/README.md
new file mode 100644
index 000000000000..07f261692cd3
--- /dev/null
+++ b/src/diffusers/commands/agentic/README.md
@@ -0,0 +1,246 @@
+# Agentic CLI for Diffusers
+
+Single-command access to common Diffusers use-cases. Designed for AI agents
+and humans who need to run image/video/audio generation **without writing
+Python scripts**.
+
+Every command below is reachable as `diffusers-cli <command>`. Run
+`diffusers-cli <command> --help` for full option documentation.
+
+## How it works
+
+The module integrates with the main CLI through a single function call in
+`diffusers_cli.py` — removing it disables everything with no side effects.
+
+```
+src/diffusers/commands/agentic/
+├── app.py          # register_agentic_commands(subparsers) — single integration point
+├── _common.py      # Shared helpers (arg groups, pipeline detection, loading, remote, I/O)
+├── image.py        # text-to-image, image-to-image, inpaint
+├── video.py        # text-to-video, image-to-video
+├── audio.py        # text-to-audio
+├── modular.py      # generic ModularPipeline runner with free-form inputs
+└── tasks.py        # `tasks` — list every registered agentic command
+```
+
+## Discovering tasks
+
+```bash
+diffusers-cli tasks            # human-readable
+diffusers-cli tasks --json     # for agents
+```
+
+## Pipeline detection (DiffusionPipeline vs ModularPipeline)
+
+Every inference command auto-detects whether the `--model` is a regular
+`DiffusionPipeline` repo (`model_index.json`) or a custom
+`ModularPipeline` repo (`modular_model_index.json`) via a single Hub
+listing — no weights are downloaded. If you point a task-shaped command at
+a modular repo, it exits with a hint to use `diffusers-cli modular`
+instead. The reverse is also true: `modular` rejects a regular repo and
+points back at the task-shaped command.
+
+## Pushing outputs to a bucket
+
+Every inference command (and `modular`) accepts `--push-to <bucket_id>`
+to upload the generated files to a Hugging Face **bucket** after they're
+saved locally. The bucket is created if it doesn't exist and files land
+under a prefix named after the task (e.g. `text-to-image/<filename>`).
+
+```bash
+diffusers-cli text-to-image \
+  --model stabilityai/stable-diffusion-xl-base-1.0 \
+  --prompt "a watercolor of a fox" \
+  --num-images 4 \
+  --push-to your-username/cli-generations
+```
+
+The upload is a single `batch_bucket_files` round-trip regardless of how
+many files were generated. The JSON payload reports `hf://buckets/...`
+URIs so an agent can pipe them into a follow-up tool.
+
+## Running remotely (HF Jobs) and fetching outputs back
+
+Every inference command supports `--remote`, which submits the same call
+to Hugging Face Jobs via `huggingface_hub.run_uv_job`, then by default
+**waits for the job to finish and downloads the outputs back to your
+local machine**.
+
+The flow:
+
+1. If `--push-to` isn't set, default it to `<your-user>/jobs-artifacts`
+   (the canonical jobs bucket — `https://huggingface.co/buckets/<you>/jobs-artifacts`).
+2. Generate a random `run_id` and pass it via `DIFFUSERS_CLI_RUN_ID` env
+   so the container writes its files under `<run_id>/` inside the bucket.
+3. Submit the job (your `HF_TOKEN` is forwarded as a secret).
+4. Poll `inspect_job` every `--poll-interval` seconds until the stage is
+   `COMPLETED` / `CANCELED` / `ERROR` / `DELETED`.
+5. List `<run_id>/` in the bucket and `download_bucket_files` everything
+   into the local `--output` directory (default `./outputs/`).
+
+Pass `--no-wait` to fire-and-forget — the command prints the job id and
+returns immediately; you can fetch later via `huggingface-cli buckets`.
+
+| Option | Description |
+|--------|-------------|
+| `--remote` | Run on HF Jobs instead of locally |
+| `--flavor` | Hardware flavor (default `a10g-small`) |
+| `--timeout` | Job timeout (e.g. `30m`, `2h`) |
+| `--dependencies` | Extra pip deps. Repeat for multiple |
+| `--namespace` | HF namespace (defaults to the current user) |
+| `--no-wait` | Skip polling/download — submit and exit |
+| `--poll-interval` | Seconds between job-status polls (default 5) |
+
+```bash
+# Submit text-to-image to HF Jobs on an A100, wait, download to ./outputs/
+diffusers-cli text-to-image \
+  --model stabilityai/stable-diffusion-xl-base-1.0 \
+  --prompt "a watercolor of a fox in autumn leaves" \
+  --num-images 4 \
+  --remote --flavor a100-large --timeout 30m
+```
+
+```bash
+# Same call, fire-and-forget
+diffusers-cli text-to-image ... --remote --no-wait
+```
+
+## Common options
+
+Every inference command supports:
+
+| Option | Description |
+|--------|-------------|
+| `--model` / `-m` | Model id on the Hub or local path |
+| `--device` | `cpu`, `cuda`, `cuda:0`, `mps` (defaults to best available) |
+| `--dtype` | `auto`, `float16`, `bfloat16`, `float32` |
+| `--variant` | Optional weight variant (e.g. `fp16`) |
+| `--revision` | Model revision (branch, tag, or SHA) |
+| `--token` | Hugging Face token for gated/private models |
+| `--trust-remote-code` | Allow custom code from the Hub |
+| `--output` / `-o` | Output file or directory |
+| `--json` | Machine-readable JSON summary on stdout |
+| `--seed` | Random seed for reproducibility |
+| `--pipeline-kwargs` | JSON object of extra kwargs forwarded to the pipeline call |
+
+## Commands
+
+### Image
+
+1. Generate an image from a text prompt
+   ```bash
+   diffusers-cli text-to-image \
+     --model stabilityai/stable-diffusion-xl-base-1.0 \
+     --prompt "a watercolor of a fox in autumn leaves" \
+     --output fox.png
+   ```
+
+2. Generate with explicit sampling controls
+   ```bash
+   diffusers-cli text-to-image \
+     --model stabilityai/stable-diffusion-xl-base-1.0 \
+     --prompt "studio portrait of a cyberpunk hacker" \
+     --negative-prompt "blurry, low quality" \
+     --num-inference-steps 30 \
+     --guidance-scale 7.5 \
+     --height 1024 --width 1024 \
+     --seed 42
+   ```
+
+3. Generate multiple variants at once
+   ```bash
+   diffusers-cli text-to-image \
+     --model black-forest-labs/FLUX.1-schnell \
+     --prompt "a still life with citrus and ceramics" \
+     --num-images 4 \
+     --output ./outputs/still-life/
+   ```
+
+4. Transform an existing image with a prompt (image-to-image)
+   ```bash
+   diffusers-cli image-to-image \
+     --model stabilityai/stable-diffusion-xl-refiner-1.0 \
+     --image input.jpg \
+     --prompt "make it look like an oil painting" \
+     --strength 0.6 \
+     --output painted.png
+   ```
+
+5. Inpaint a masked region of an image
+   ```bash
+   diffusers-cli inpaint \
+     --model stabilityai/stable-diffusion-2-inpainting \
+     --image photo.png \
+     --mask mask.png \
+     --prompt "a golden retriever sitting on the bench" \
+     --output filled.png
+   ```
+
+6. Emit JSON for downstream tooling
+   ```bash
+   diffusers-cli text-to-image \
+     --model stabilityai/sdxl-turbo \
+     --prompt "neon city at night" \
+     --json
+   ```
+
+### Video
+
+7. Generate a short clip from a text prompt
+   ```bash
+   diffusers-cli text-to-video \
+     --model THUDM/CogVideoX-2b \
+     --prompt "a panda surfing on a wave at sunset" \
+     --num-frames 49 \
+     --fps 8 \
+     --output panda.mp4
+   ```
+
+8. Animate a single still image
+   ```bash
+   diffusers-cli image-to-video \
+     --model stabilityai/stable-video-diffusion-img2vid-xt \
+     --image still.png \
+     --prompt "subtle camera dolly forward" \
+     --num-frames 25 \
+     --output animated.mp4
+   ```
+
+### Audio
+
+9. Generate music or a sound effect from a text prompt
+   ```bash
+   diffusers-cli text-to-audio \
+     --model cvssp/audioldm2 \
+     --prompt "a calm piano melody in a quiet room" \
+     --audio-length-in-s 10 \
+     --output music.wav
+   ```
+
+### Modular pipelines
+
+Modular pipelines have an open-ended input surface defined by the block
+graph, so the CLI doesn't try to predict it — pass inputs verbatim.
+
+14. Run a modular pipeline with free-form inputs
+    ```bash
+    diffusers-cli modular \
+      --model your-username/my-modular-pipeline \
+      --inputs prompt="a calm landscape" \
+      --inputs num_inference_steps=25 \
+      --inputs-json '{"guidance_scale": 4.5}' \
+      --output-key image \
+      --output out.png
+    ```
+
+The output type is auto-detected — a PIL image (or list of PIL images)
+becomes PNG(s), a sequence of frames becomes an MP4, a numpy audio array
+becomes a WAV, and anything else is JSON-serialized.
+
+### Roadmap
+
+Open an issue if you'd like to help land one:
+
+- **Video**: `video-to-video`
+- **Conditioning**: ControlNet, T2I-Adapter, instruction editing (Flux-Kontext, InstructPix2Pix)
+- **Quantization / export**: `convert` (fp16/safetensors/GGUF), `quantize` (bitsandbytes, torchao)
diff --git a/src/diffusers/commands/agentic/__init__.py b/src/diffusers/commands/agentic/__init__.py
new file mode 100644
index 000000000000..9076bc7ee4c9
--- /dev/null
+++ b/src/diffusers/commands/agentic/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .app import register_agentic_commands
+
+
+__all__ = ["register_agentic_commands"]
diff --git a/src/diffusers/commands/agentic/_common.py b/src/diffusers/commands/agentic/_common.py
new file mode 100644
index 000000000000..6910f9daa4db
--- /dev/null
+++ b/src/diffusers/commands/agentic/_common.py
@@ -0,0 +1,533 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Shared helpers for the agentic CLI surface.
+
+These utilities are intentionally small and dependency-light. Each diffusers
+agentic subcommand should be able to be read end-to-end by an agent without
+needing to follow many layers of indirection.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+from argparse import ArgumentParser, Namespace
+from pathlib import Path
+from typing import Any, Optional
+
+
+DEFAULT_OUTPUT_DIR = "outputs"
+
+
+DTYPE_CHOICES = ("auto", "float16", "fp16", "bfloat16", "bf16", "float32", "fp32")
+CPU_OFFLOAD_CHOICES = ("model", "group")
+ATTENTION_BACKEND_CHOICES = (
+    "default",
+    "flash_hub",
+    "flash_varlen_hub",
+    "flash_4_hub",
+    "sage_hub",
+)
+
+
+def add_loading_arguments(parser: ArgumentParser) -> None:
+    """Arguments shared by every inference subcommand."""
+    parser.add_argument("--model", "-m", required=True, help="Model id on the Hugging Face Hub or local path.")
+    parser.add_argument("--device", default=None, help="Device to run on (e.g. cpu, cuda, cuda:0, mps).")
+    parser.add_argument(
+        "--dtype",
+        default="auto",
+        choices=DTYPE_CHOICES,
+        help="Torch dtype for pipeline weights.",
+    )
+    parser.add_argument("--variant", default=None, help='Optional weight variant (e.g. "fp16").')
+    parser.add_argument("--revision", default=None, help="Model revision (branch, tag, or commit SHA).")
+    parser.add_argument("--token", default=None, help="Hugging Face token for gated/private models.")
+    parser.add_argument("--trust-remote-code", action="store_true", help="Allow custom code from the Hub.")
+
+
+def add_optimization_arguments(parser: ArgumentParser) -> None:
+    """Optional pipeline-optimization flags. All default to off."""
+    parser.add_argument(
+        "--cpu-offload",
+        choices=CPU_OFFLOAD_CHOICES,
+        default=None,
+        help=(
+            "Offload pipeline components to CPU during inference. "
+            "'model' uses enable_model_cpu_offload, "
+            "'group' uses pipeline.enable_group_offload(leaf_level, use_stream=True)."
+        ),
+    )
+    parser.add_argument(
+        "--attention-backend",
+        choices=ATTENTION_BACKEND_CHOICES,
+        default="default",
+        help=(
+            "Override the attention backend on the transformer/UNet. "
+            "Only Hub-hosted kernels are exposed — they auto-download on first "
+            "use and avoid a local install. 'default' leaves the backend untouched."
+        ),
+    )
+    parser.add_argument("--vae-tiling", action="store_true", help="Enable VAE tiling (lower peak VRAM).")
+    parser.add_argument("--vae-slicing", action="store_true", help="Enable VAE slicing (lower peak VRAM).")
+    parser.add_argument(
+        "--context-parallel",
+        action="store_true",
+        help=(
+            "Enable Ulysses-style context parallelism (ulysses_anything mode, supports arbitrary "
+            "sequence lengths). Requires launching the CLI under torchrun with ≥2 GPUs."
+        ),
+    )
+
+
+def add_generation_arguments(parser: ArgumentParser) -> None:
+    """Arguments shared by image/video generation subcommands."""
+    parser.add_argument("--prompt", "-p", default=None, help="Text prompt.")
+    parser.add_argument("--negative-prompt", default=None, help="Negative text prompt.")
+    parser.add_argument("--num-inference-steps", type=int, default=None, help="Number of denoising steps.")
+    parser.add_argument("--guidance-scale", type=float, default=None, help="Classifier-free guidance scale.")
+    parser.add_argument("--height", type=int, default=None, help="Output height in pixels.")
+    parser.add_argument("--width", type=int, default=None, help="Output width in pixels.")
+    parser.add_argument("--num-images", type=int, default=1, help="Number of images to generate.")
+    parser.add_argument("--seed", type=int, default=None, help="Random seed for reproducibility.")
+    parser.add_argument(
+        "--pipeline-kwargs",
+        default=None,
+        help="JSON object of extra kwargs forwarded to the pipeline call.",
+    )
+
+
+def add_output_arguments(parser: ArgumentParser) -> None:
+    """Output formatting arguments."""
+    parser.add_argument(
+        "--output",
+        "-o",
+        default=None,
+        help="Output file or directory. Defaults to ./outputs/<task>-<index>.<ext>.",
+    )
+    parser.add_argument(
+        "--push-to",
+        default=None,
+        help=(
+            "Upload the generated files to this HF bucket id after saving "
+            "(created if missing). When --remote is set, defaults to "
+            "<user>/jobs-artifacts; remote runs always write to that bucket "
+            "and fetch the results back locally."
+        ),
+    )
+    parser.add_argument("--json", action="store_true", help="Emit a machine-readable JSON summary on stdout.")
+
+
+def add_remote_arguments(parser: ArgumentParser) -> None:
+    """Optional HF Jobs arguments — works on every inference subcommand."""
+    parser.add_argument(
+        "--remote",
+        action="store_true",
+        help="Submit this command to Hugging Face Jobs instead of running locally.",
+    )
+    parser.add_argument(
+        "--flavor",
+        default="a10g-small",
+        help="HF Jobs hardware flavor for --remote (e.g. a10g-small, a100-large, cpu-basic).",
+    )
+    parser.add_argument(
+        "--timeout",
+        default=None,
+        help="HF Jobs timeout for --remote (e.g. 30m, 2h).",
+    )
+    parser.add_argument(
+        "--dependencies",
+        action="append",
+        default=None,
+        help="Extra pip dependencies for the --remote job. Repeat to add multiple.",
+    )
+    parser.add_argument(
+        "--namespace",
+        default=None,
+        help="HF namespace to run the --remote job under (defaults to the current user).",
+    )
+    parser.add_argument(
+        "--no-wait",
+        action="store_true",
+        help=(
+            "Don't wait for the --remote job to finish — submit and print the job id. "
+            "Default behaviour is to poll until completion and download outputs locally."
+        ),
+    )
+    parser.add_argument(
+        "--poll-interval",
+        type=float,
+        default=5.0,
+        help="Seconds between job-status polls when waiting for --remote completion.",
+    )
+
+
+def resolve_dtype(name: Optional[str]):
+    """Map a CLI dtype string to a torch dtype.
+
+    Returns ``"auto"`` when the user wants diffusers to pick.
+    """
+    if name in (None, "auto"):
+        return "auto"
+
+    import torch
+
+    mapping = {
+        "fp32": torch.float32,
+        "fp16": torch.float16,
+        "bf16": torch.bfloat16,
+    }
+    if name not in mapping:
+        raise ValueError(f"Unknown dtype: {name}")
+    return mapping[name]
+
+
+def resolve_device(name: Optional[str]) -> str:
+    """Pick a device, defaulting to the best available one."""
+    if name:
+        return name
+    import torch
+
+    if torch.cuda.is_available():
+        return "cuda"
+
+    if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+        return "mps"
+
+    return "cpu"
+
+
+def load_pipeline(args: Namespace, pipeline_cls_name: str) -> Any:
+    """Load a diffusers pipeline class by name and move it to the chosen device.
+
+    ``pipeline_cls_name`` can be any class exported from ``diffusers`` —
+    typically one of ``AutoPipelineForText2Image``, ``AutoPipelineForImage2Image``,
+    ``AutoPipelineForInpainting``, or ``DiffusionPipeline`` for video/audio.
+    """
+    import diffusers
+
+    pipeline_cls = getattr(diffusers, pipeline_cls_name)
+    from_pretrained_kwargs: dict[str, Any] = {
+        "torch_dtype": resolve_dtype(args.dtype),
+        "trust_remote_code": args.trust_remote_code,
+    }
+    if args.variant:
+        from_pretrained_kwargs["variant"] = args.variant
+    if args.revision:
+        from_pretrained_kwargs["revision"] = args.revision
+    if args.token:
+        from_pretrained_kwargs["token"] = args.token
+
+    pipeline = pipeline_cls.from_pretrained(args.model, **from_pretrained_kwargs)
+    pipeline = map_to_device(pipeline, args, resolve_device(args.device))
+    if args.vae_tiling and hasattr(pipeline, "enable_vae_tiling"):
+        pipeline.enable_vae_tiling()
+    if args.vae_slicing and hasattr(pipeline, "enable_vae_slicing"):
+        pipeline.enable_vae_slicing()
+    if args.attention_backend != "default":
+        _set_attention_backend(pipeline, args.attention_backend)
+    if args.context_parallel:
+        _enable_context_parallel(pipeline)
+    return pipeline
+
+
+def map_to_device(pipeline: Any, args: Namespace, device: str) -> Any:
+    """Get the pipeline ready to run on ``device``.
+
+    Calls ``.to(device)`` by default; when ``--cpu-offload`` is set the chosen
+    offload helper (``model``, ``sequential``, or ``group``) handles placement instead.
+    """
+    if args.cpu_offload is None:
+        return pipeline.to(device)
+    if args.cpu_offload == "model":
+        pipeline.enable_model_cpu_offload(device=device)
+    elif args.cpu_offload == "group":
+        import torch
+
+        pipeline.enable_group_offload(
+            onload_device=torch.device(device),
+            offload_type="leaf_level",
+            use_stream=device.startswith("cuda"),
+        )
+    return pipeline
+
+
+def _enable_context_parallel(pipeline: Any) -> None:
+    """Enable Ulysses-style context-parallel inference on the transformer/UNet."""
+    import torch
+
+    if not torch.distributed.is_available() or not torch.distributed.is_initialized():
+        raise SystemExit(
+            "--context-parallel requires torch.distributed to be initialized. "
+            "Launch the CLI under torchrun, e.g.: "
+            "`torchrun --nproc-per-node=N -m diffusers.commands.diffusers_cli <task> ...`."
+        )
+
+    from diffusers import ContextParallelConfig
+
+    cfg = ContextParallelConfig(
+        ulysses_degree=torch.distributed.get_world_size(),
+        ring_degree=1,
+        ulysses_anything=True,
+    )
+    for attr in ("transformer", "unet"):
+        module = getattr(pipeline, attr, None)
+        if module is not None and hasattr(module, "enable_parallelism"):
+            module.enable_parallelism(config=cfg)
+            return
+
+
+def _set_attention_backend(pipeline: Any, backend: str) -> None:
+    for attr in ("transformer", "unet"):
+        module = getattr(pipeline, attr, None)
+        if module is not None and hasattr(module, "set_attention_backend"):
+            try:
+                module.set_attention_backend(backend)
+            except (ValueError, ImportError, RuntimeError):
+                pass
+            return
+
+
+def get_generator(seed: Optional[int], device: str):
+    if seed is None:
+        return None
+    import torch
+
+    generator_device = "cpu" if device == "mps" else device
+    return torch.Generator(device=generator_device).manual_seed(seed)
+
+
+def parse_pipeline_kwargs(raw: Optional[str]) -> dict[str, Any]:
+    if not raw:
+        return {}
+    try:
+        parsed = json.loads(raw)
+    except json.JSONDecodeError as e:
+        raise SystemExit(f"--pipeline-kwargs must be valid JSON: {e}") from e
+    if not isinstance(parsed, dict):
+        raise SystemExit("--pipeline-kwargs must decode to a JSON object.")
+    return parsed
+
+
+def default_output_paths(task: str, num: int, explicit: Optional[str], ext: str = "png") -> list[Path]:
+    """Resolve output file paths for ``num`` generated artifacts.
+
+    - If ``explicit`` is a directory (or ends with /), write into it.
+    - If ``explicit`` is a file and ``num == 1``, write to that file.
+    - If ``explicit`` is a file template and ``num > 1``, append ``-<i>`` before the suffix.
+    - Otherwise default to ``./outputs/<task>-<i>.<ext>``.
+    """
+    if explicit is None:
+        base = Path(DEFAULT_OUTPUT_DIR)
+        base.mkdir(parents=True, exist_ok=True)
+        return [base / f"{task}-{i}.{ext}" for i in range(num)]
+
+    p = Path(explicit)
+    if explicit.endswith(os.sep) or p.is_dir():
+        p.mkdir(parents=True, exist_ok=True)
+        return [p / f"{task}-{i}.{ext}" for i in range(num)]
+
+    p.parent.mkdir(parents=True, exist_ok=True)
+    if num == 1:
+        return [p]
+    stem, suffix = p.stem, p.suffix or f".{ext}"
+    return [p.with_name(f"{stem}-{i}{suffix}") for i in range(num)]
+
+
+# Source for the diffusers install used by --remote jobs. While iterating on a
+# feature branch, point at the branch URL; once merged, switch back to a release
+# pin. ``--dependencies "diffusers @ git+..."`` on the local command appends
+# additional dependencies but does not replace this default install.
+DIFFUSERS_SOURCE = "diffusers @ git+https://github.com/huggingface/diffusers@diffuser-cli-for-agent"
+_DEFAULT_REMOTE_DEPS = (DIFFUSERS_SOURCE, "accelerate", "transformers", "safetensors")
+
+# Entry point for ``uv run`` inside the container. ``uv run`` accepts a file path,
+# URL, or *command*; passing the ``diffusers-cli`` console script name makes UV
+# install the deps above (which register the entry point) and then exec the CLI.
+_UV_RUNNER_SCRIPT = "diffusers-cli"
+
+
+RUN_ID_ENV = "DIFFUSERS_CLI_RUN_ID"
+
+# Namespace keys that control *how* a remote job runs locally, not what runs
+# inside the container. They are stripped when forwarding argv to the container.
+HF_JOBS_KEYS = frozenset(
+    {"remote", "flavor", "timeout", "dependencies", "namespace", "no_wait", "poll_interval", "func"}
+)
+
+
+def _forward_args(args: Namespace, task: str) -> list[str]:
+    """Reconstruct argv for the remote container from a parsed Namespace.
+
+    Skips the local-only job-control keys above. Boolean flags are emitted
+    only when True. List values become repeated ``--flag value`` pairs.
+    """
+    out: list[str] = [task]
+    for key, value in vars(args).items():
+        if key in HF_JOBS_KEYS:
+            continue
+        if value is None or value is False:
+            continue
+        flag = "--" + key.replace("_", "-")
+        if value is True:
+            out.append(flag)
+        elif isinstance(value, list):
+            for item in value:
+                out.extend([flag, str(item)])
+        else:
+            out.extend([flag, str(value)])
+    return out
+
+
+def maybe_submit_remote(args: Namespace, task: str) -> bool:
+    """If ``--remote`` was set, submit this invocation to HF Jobs and return True.
+
+    The local ``run()`` should bail immediately when this returns True.
+
+    Auto-defaults ``--push-to`` to ``<user>/jobs-artifacts`` so the remote
+    container has somewhere to write before tear-down. By default, polls
+    the job until completion and downloads the artifacts back to the local
+    output directory; pass ``--no-wait`` to fire-and-forget.
+    """
+    if not args.remote:
+        return False
+
+    import uuid
+
+    from huggingface_hub import HfApi, get_token, run_uv_job
+
+    hf_token = args.token or get_token()
+    api = HfApi(token=hf_token)
+
+    if not args.push_to:
+        args.push_to = f"{api.whoami()['name']}/jobs-artifacts"
+
+    run_id = uuid.uuid4().hex[:12]
+
+    forwarded = _forward_args(args, task)
+    dependencies = list(_DEFAULT_REMOTE_DEPS)
+    if args.dependencies:
+        dependencies.extend(args.dependencies)
+
+    secrets = {"HF_TOKEN": hf_token} if hf_token else None
+    env = {RUN_ID_ENV: run_id}
+
+    job = run_uv_job(
+        script=_UV_RUNNER_SCRIPT,
+        script_args=forwarded,
+        dependencies=dependencies,
+        flavor=args.flavor,
+        timeout=args.timeout,
+        namespace=args.namespace,
+        secrets=secrets,
+        env=env,
+        token=hf_token,
+    )
+
+    payload: dict[str, Any] = {
+        "task": "remote-submit",
+        "job_id": getattr(job, "id", None),
+        "job_status": str(getattr(job, "status", "")),
+        "flavor": args.flavor,
+        "push_to": args.push_to,
+        "run_id": run_id,
+    }
+
+    if args.no_wait:
+        format_result(args, payload)
+        return True
+
+    final_status = _wait_for_job(api, job.id, args.namespace, args.poll_interval)
+    payload["job_status"] = final_status
+    payload["outputs"] = _download_job_artifacts(api, args.push_to, run_id, args.output)
+    format_result(args, payload)
+    return True
+
+
+def _wait_for_job(api: Any, job_id: str, namespace: Optional[str], poll_interval: float) -> str:
+    """Poll ``inspect_job`` until the job reaches a terminal stage; return that stage as a string."""
+    import time
+
+    terminal = {"COMPLETED", "CANCELED", "ERROR", "DELETED"}
+    while True:
+        info = api.inspect_job(job_id=job_id, namespace=namespace)
+        stage = str(info.status.stage) if info.status else "UNKNOWN"
+        if stage in terminal:
+            return stage
+        time.sleep(poll_interval)
+
+
+def _download_job_artifacts(api: Any, bucket_id: str, run_id: str, output: Optional[str]) -> list[str]:
+    """Download every file under ``<run_id>/`` from ``bucket_id`` to a local directory.
+
+    ``output`` is always treated as a directory (created if missing) — remote
+    runs produce many files, so a file-path target wouldn't make sense.
+    """
+    from huggingface_hub import BucketFile
+
+    local_dir = Path(output) if output else Path(DEFAULT_OUTPUT_DIR)
+    local_dir.mkdir(parents=True, exist_ok=True)
+
+    pairs: list[tuple[Any, Path]] = []
+    for entry in api.list_bucket_tree(bucket_id, prefix=f"{run_id}/", recursive=True):
+        if not isinstance(entry, BucketFile):
+            continue
+        pairs.append((entry, local_dir / Path(entry.path).name))
+
+    if not pairs:
+        return []
+    api.download_bucket_files(bucket_id, files=pairs)
+    return [str(local) for _, local in pairs]
+
+
+def push_outputs(args: Namespace, saved_paths: list[str], task: str) -> Optional[dict[str, Any]]:
+    """Upload ``saved_paths`` to the ``--push-to`` bucket, returning a summary.
+
+    Returns None when ``--push-to`` is unset. Creates the bucket if needed.
+    When ``DIFFUSERS_CLI_RUN_ID`` is set (i.e. we're inside a remote job),
+    files land under ``<run_id>/`` so the local side can isolate this run's
+    output; otherwise they land under ``<task>/``.
+    """
+    if not args.push_to:
+        return None
+    target = args.push_to
+
+    from huggingface_hub import HfApi
+
+    api = HfApi(token=args.token)
+    api.create_bucket(target, exist_ok=True)
+
+    prefix = os.environ.get(RUN_ID_ENV) or task
+    add = [(local, f"{prefix}/{Path(local).name}") for local in saved_paths]
+    api.batch_bucket_files(target, add=add)
+
+    uploaded = [f"hf://buckets/{target}/{dest}" for _, dest in add]
+    return {"bucket_id": target, "uploaded": uploaded}
+
+
+def format_result(args: Namespace, payload: dict[str, Any]) -> None:
+    """Print either a human-friendly summary or JSON, depending on --json."""
+    if args.json:
+        json.dump(payload, sys.stdout, default=str)
+        sys.stdout.write("\n")
+        return
+
+    outputs = payload.get("outputs", [])
+    if outputs:
+        for path in outputs:
+            print(path)
+    else:
+        print(payload)
diff --git a/src/diffusers/commands/agentic/app.py b/src/diffusers/commands/agentic/app.py
new file mode 100644
index 000000000000..3ca7b50ae1ed
--- /dev/null
+++ b/src/diffusers/commands/agentic/app.py
@@ -0,0 +1,38 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Single integration point for the agentic CLI.
+
+Removing the call to ``register_agentic_commands`` from
+``diffusers_cli.py`` disables the entire surface with no side effects.
+"""
+
+from __future__ import annotations
+
+from argparse import _SubParsersAction
+
+from . import audio as audio_commands
+from . import image as image_commands
+from . import modular as modular_commands
+from . import tasks as tasks_commands
+from . import video as video_commands
+
+
+def register_agentic_commands(subparsers: _SubParsersAction) -> None:
+    """Register every agentic subcommand on the top-level ``diffusers-cli`` parser."""
+    image_commands.register(subparsers)
+    video_commands.register(subparsers)
+    audio_commands.register(subparsers)
+    modular_commands.register(subparsers)
+    tasks_commands.register(subparsers)
diff --git a/src/diffusers/commands/agentic/audio.py b/src/diffusers/commands/agentic/audio.py
new file mode 100644
index 000000000000..42c2fd0da210
--- /dev/null
+++ b/src/diffusers/commands/agentic/audio.py
@@ -0,0 +1,133 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Audio-generation subcommands: text-to-audio."""
+
+from __future__ import annotations
+
+from argparse import ArgumentParser, Namespace, _SubParsersAction
+
+from .. import BaseDiffusersCLICommand
+from . import _common
+
+
+def register(subparsers: _SubParsersAction) -> None:
+    Text2AudioCommand.register_subcommand(subparsers)
+
+
+def _save_audio(audios, sampling_rate: int, args: Namespace, task: str) -> list[str]:
+    """Save one or more audio arrays as WAV files."""
+    import numpy as np
+    from scipy.io.wavfile import write as wavfile_write
+
+    paths = _common.default_output_paths(task, len(audios), args.output, ext="wav")
+    saved: list[str] = []
+    for audio, path in zip(audios, paths):
+        data = np.asarray(audio)
+        if data.dtype.kind == "f":
+            data = np.clip(data, -1.0, 1.0)
+            data = (data * 32767).astype(np.int16)
+        if data.ndim > 1 and data.shape[0] < data.shape[-1]:
+            # ``(channels, samples)`` → ``(samples, channels)`` for scipy.
+            data = data.T
+        wavfile_write(str(path), sampling_rate, data)
+        saved.append(str(path))
+    return saved
+
+
+class Text2AudioCommand(BaseDiffusersCLICommand):
+    task = "text-to-audio"
+
+    @staticmethod
+    def register_subcommand(subparsers: _SubParsersAction) -> None:
+        parser: ArgumentParser = subparsers.add_parser(
+            "text-to-audio",
+            help="Generate an audio clip (music or sound) from a text prompt.",
+        )
+        _common.add_loading_arguments(parser)
+        _common.add_optimization_arguments(parser)
+        _common.add_generation_arguments(parser)
+        _common.add_remote_arguments(parser)
+        parser.add_argument(
+            "--audio-length-in-s",
+            type=float,
+            default=None,
+            help="Duration of the generated audio in seconds.",
+        )
+        parser.add_argument(
+            "--sampling-rate",
+            type=int,
+            default=None,
+            help="Override the sampling rate written to the WAV file.",
+        )
+        _common.add_output_arguments(parser)
+        parser.set_defaults(func=Text2AudioCommand)
+
+    def __init__(self, args: Namespace):
+        self.args = args
+
+    def run(self) -> None:
+        if _common.maybe_submit_remote(self.args, self.task):
+            return
+        pipeline = _common.load_pipeline(self.args, "DiffusionPipeline")
+
+        call_kwargs: dict = {}
+        if self.args.prompt is not None:
+            call_kwargs["prompt"] = self.args.prompt
+        if self.args.negative_prompt is not None:
+            call_kwargs["negative_prompt"] = self.args.negative_prompt
+        if self.args.num_inference_steps is not None:
+            call_kwargs["num_inference_steps"] = self.args.num_inference_steps
+        if self.args.guidance_scale is not None:
+            call_kwargs["guidance_scale"] = self.args.guidance_scale
+        if self.args.audio_length_in_s is not None:
+            call_kwargs["audio_length_in_s"] = self.args.audio_length_in_s
+        if self.args.num_images != 1:
+            call_kwargs["num_waveforms_per_prompt"] = self.args.num_images
+
+        generator = _common.get_generator(self.args.seed, pipeline.device.type)
+        if generator is not None:
+            call_kwargs["generator"] = generator
+
+        call_kwargs.update(_common.parse_pipeline_kwargs(self.args.pipeline_kwargs))
+
+        result = pipeline(**call_kwargs)
+        audios = getattr(result, "audios", None)
+        if audios is None:
+            audios = result[0]
+
+        sampling_rate = self.args.sampling_rate
+        if sampling_rate is None:
+            pipeline_sr = getattr(pipeline, "sampling_rate", None)
+            if isinstance(pipeline_sr, int):
+                sampling_rate = pipeline_sr
+            else:
+                vocoder_config = getattr(getattr(pipeline, "vocoder", None), "config", None)
+                sampling_rate = getattr(vocoder_config, "sampling_rate", 16000) if vocoder_config else 16000
+
+        saved = _save_audio(audios, sampling_rate, self.args, self.task)
+        pushed = _common.push_outputs(self.args, saved, self.task)
+
+        _common.format_result(
+            self.args,
+            {
+                "task": self.task,
+                "model": self.args.model,
+                "device": pipeline.device.type,
+                "outputs": saved,
+                "pushed": pushed,
+                "sampling_rate": sampling_rate,
+                "seed": self.args.seed,
+            },
+        )
diff --git a/src/diffusers/commands/agentic/image.py b/src/diffusers/commands/agentic/image.py
new file mode 100644
index 000000000000..94fdd81d6953
--- /dev/null
+++ b/src/diffusers/commands/agentic/image.py
@@ -0,0 +1,170 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Image-generation subcommands: text-to-image, image-to-image, inpaint."""
+
+from __future__ import annotations
+
+from argparse import ArgumentParser, Namespace, _SubParsersAction
+
+from diffusers.utils import load_image
+
+from .. import BaseDiffusersCLICommand
+from . import _common
+
+
+def register(subparsers: _SubParsersAction) -> None:
+    Text2ImageCommand.register_subcommand(subparsers)
+    Image2ImageCommand.register_subcommand(subparsers)
+    InpaintCommand.register_subcommand(subparsers)
+
+
+def _build_call_kwargs(args: Namespace, pipeline) -> dict:
+    kwargs: dict = {}
+    if args.prompt is not None:
+        kwargs["prompt"] = args.prompt
+    if args.negative_prompt is not None:
+        kwargs["negative_prompt"] = args.negative_prompt
+    if args.num_inference_steps is not None:
+        kwargs["num_inference_steps"] = args.num_inference_steps
+    if args.guidance_scale is not None:
+        kwargs["guidance_scale"] = args.guidance_scale
+    if args.height is not None:
+        kwargs["height"] = args.height
+    if args.width is not None:
+        kwargs["width"] = args.width
+    if args.num_images != 1:
+        kwargs["num_images_per_prompt"] = args.num_images
+
+    generator = _common.get_generator(args.seed, pipeline.device.type)
+    if generator is not None:
+        kwargs["generator"] = generator
+
+    kwargs.update(_common.parse_pipeline_kwargs(args.pipeline_kwargs))
+    return kwargs
+
+
+def _save_images(images, task: str, args: Namespace) -> list[str]:
+    paths = _common.default_output_paths(task, len(images), args.output, ext="png")
+    saved: list[str] = []
+    for image, path in zip(images, paths):
+        image.save(path)
+        saved.append(str(path))
+    return saved
+
+
+class _BaseImageCommand(BaseDiffusersCLICommand):
+    task: str = ""
+    auto_cls: str = ""
+
+    def __init__(self, args: Namespace):
+        self.args = args
+
+    def run(self) -> None:
+        if _common.maybe_submit_remote(self.args, self.task):
+            return
+
+        pipeline = _common.load_pipeline(self.args, self.auto_cls)
+        call_kwargs = _build_call_kwargs(self.args, pipeline)
+        self._attach_inputs(call_kwargs)
+
+        result = pipeline(**call_kwargs)
+        saved = _save_images(result.images, self.task, self.args)
+        pushed = _common.push_outputs(self.args, saved, self.task)
+
+        _common.format_result(
+            self.args,
+            {
+                "task": self.task,
+                "model": self.args.model,
+                "device": pipeline.device.type,
+                "outputs": saved,
+                "pushed": pushed,
+                "seed": self.args.seed,
+            },
+        )
+
+    def _attach_inputs(self, call_kwargs: dict) -> None:  # noqa: B027
+        """Hook for subclasses to attach image/mask conditioning."""
+
+
+class Text2ImageCommand(_BaseImageCommand):
+    task = "text-to-image"
+    auto_cls = "AutoPipelineForText2Image"
+
+    @staticmethod
+    def register_subcommand(subparsers: _SubParsersAction) -> None:
+        parser: ArgumentParser = subparsers.add_parser(
+            "text-to-image",
+            help="Generate an image from a text prompt.",
+        )
+        _common.add_loading_arguments(parser)
+        _common.add_optimization_arguments(parser)
+        _common.add_generation_arguments(parser)
+        _common.add_remote_arguments(parser)
+        _common.add_output_arguments(parser)
+        parser.set_defaults(func=Text2ImageCommand)
+
+
+class Image2ImageCommand(_BaseImageCommand):
+    task = "image-to-image"
+    auto_cls = "AutoPipelineForImage2Image"
+
+    @staticmethod
+    def register_subcommand(subparsers: _SubParsersAction) -> None:
+        parser: ArgumentParser = subparsers.add_parser(
+            "image-to-image",
+            help="Transform an input image conditioned on a text prompt.",
+        )
+        _common.add_loading_arguments(parser)
+        _common.add_optimization_arguments(parser)
+        _common.add_generation_arguments(parser)
+        _common.add_remote_arguments(parser)
+        _common.add_output_arguments(parser)
+
+        parser.add_argument("--image", required=True, help="Path or URL to the conditioning image.")
+        parser.add_argument("--strength", type=float, default=None, help="How much to transform the input (0-1).")
+        parser.set_defaults(func=Image2ImageCommand)
+
+    def _attach_inputs(self, call_kwargs: dict) -> None:
+        call_kwargs["image"] = load_image(self.args.image)
+        if self.args.strength is not None:
+            call_kwargs["strength"] = self.args.strength
+
+
+class InpaintCommand(_BaseImageCommand):
+    task = "inpaint"
+    auto_cls = "AutoPipelineForInpainting"
+
+    @staticmethod
+    def register_subcommand(subparsers: _SubParsersAction) -> None:
+        parser: ArgumentParser = subparsers.add_parser(
+            "inpaint",
+            help="Inpaint a region of an image defined by a mask.",
+        )
+        _common.add_loading_arguments(parser)
+        _common.add_optimization_arguments(parser)
+        _common.add_generation_arguments(parser)
+        _common.add_remote_arguments(parser)
+        _common.add_output_arguments(parser)
+        parser.add_argument("--image", required=True, help="Path or URL to the base image.")
+        parser.add_argument("--mask", required=True, help="Path or URL to the mask image (white=inpaint).")
+        parser.add_argument("--strength", type=float, default=None, help="Strength of the inpainting transform (0-1).")
+        parser.set_defaults(func=InpaintCommand)
+
+    def _attach_inputs(self, call_kwargs: dict) -> None:
+        call_kwargs["image"] = load_image(self.args.image)
+        call_kwargs["mask_image"] = load_image(self.args.mask)
+        if self.args.strength is not None:
+            call_kwargs["strength"] = self.args.strength
diff --git a/src/diffusers/commands/agentic/modular.py b/src/diffusers/commands/agentic/modular.py
new file mode 100644
index 000000000000..304c8b17329f
--- /dev/null
+++ b/src/diffusers/commands/agentic/modular.py
@@ -0,0 +1,249 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""``diffusers-cli modular`` — run a custom ModularPipeline.
+
+Modular pipelines don't fit the ``task -> AutoPipelineFor*`` taxonomy: the
+pipeline blocks themselves define the surface. This command takes free-form
+``--inputs key=value`` (or a JSON blob) and forwards them to the modular
+pipeline call, then auto-detects the result type so the agent doesn't need
+to know whether it asked for an image, video, or audio output.
+"""
+
+from __future__ import annotations
+
+import json
+from argparse import ArgumentParser, Namespace, _SubParsersAction
+from pathlib import Path
+from typing import Any
+
+from .. import BaseDiffusersCLICommand
+from . import _common
+
+
+def register(subparsers: _SubParsersAction) -> None:
+    ModularCommand.register_subcommand(subparsers)
+
+
+def _parse_inputs(args: Namespace) -> dict[str, Any]:
+    """Combine ``--inputs-json`` and repeated ``--inputs key=value`` into one dict.
+
+    Values from ``--inputs`` are JSON-decoded when possible (so booleans,
+    numbers, lists, and nested objects survive); plain strings fall back to
+    raw text.
+    """
+    out: dict[str, Any] = {}
+    if args.inputs_json:
+        try:
+            decoded = json.loads(args.inputs_json)
+        except json.JSONDecodeError as e:
+            raise SystemExit(f"--inputs-json must be valid JSON: {e}") from e
+        if not isinstance(decoded, dict):
+            raise SystemExit("--inputs-json must decode to a JSON object.")
+        out.update(decoded)
+
+    for pair in args.inputs or []:
+        if "=" not in pair:
+            raise SystemExit(f"--inputs entries must look like key=value, got {pair!r}.")
+        key, _, raw = pair.partition("=")
+        try:
+            out[key] = json.loads(raw)
+        except json.JSONDecodeError:
+            out[key] = raw
+    return out
+
+
+def _save_auto(value: Any, args: Namespace, task: str) -> list[str]:
+    """Save ``value`` based on its runtime type and return the written paths."""
+    pil_images = _as_pil_list(value)
+    if pil_images is not None:
+        paths = _common.default_output_paths(task, len(pil_images), args.output, ext="png")
+        for img, path in zip(pil_images, paths):
+            img.save(path)
+        return [str(p) for p in paths]
+
+    frames = _as_frame_sequence(value)
+    if frames is not None:
+        from diffusers.utils import export_to_video
+
+        path = _common.default_output_paths(task, 1, args.output, ext="mp4")[0]
+        export_to_video(frames, str(path), fps=args.fps)
+        return [str(path)]
+
+    audios = _as_audio_arrays(value)
+    if audios is not None:
+        from .audio import _save_audio
+
+        return _save_audio(audios, args.sampling_rate or 16000, args, task)
+
+    # Fallback: dump as JSON.
+    path = _common.default_output_paths(task, 1, args.output, ext="json")[0]
+    Path(path).write_text(json.dumps(value, default=str, indent=2))
+    return [str(path)]
+
+
+def _as_pil_list(value: Any):
+    try:
+        from PIL.Image import Image as PILImage
+    except ImportError:
+        return None
+    if isinstance(value, PILImage):
+        return [value]
+    if isinstance(value, (list, tuple)) and value and all(isinstance(v, PILImage) for v in value):
+        return list(value)
+    return None
+
+
+def _as_frame_sequence(value: Any):
+    """A frame sequence is a list of PIL images or numpy frames meant to be a single clip."""
+    try:
+        from PIL.Image import Image as PILImage
+    except ImportError:
+        PILImage = None  # type: ignore[assignment]
+
+    if isinstance(value, (list, tuple)) and len(value) >= 2:
+        first = value[0]
+        if PILImage is not None and isinstance(first, PILImage):
+            # Heuristic: distinguish "list of images we want as PNGs" from "frame sequence".
+            # The modular pipeline call already returned a single value, so we treat a
+            # homogeneous list of >=2 images as a clip.
+            return list(value)
+        try:
+            import numpy as np
+
+            if isinstance(first, np.ndarray):
+                return list(value)
+        except ImportError:
+            pass
+    return None
+
+
+def _as_audio_arrays(value: Any):
+    try:
+        import numpy as np
+    except ImportError:
+        return None
+    if isinstance(value, np.ndarray) and value.ndim <= 2:
+        return [value]
+    if (
+        isinstance(value, (list, tuple))
+        and value
+        and all(isinstance(v, np.ndarray) for v in value)
+    ):
+        return list(value)
+    return None
+
+
+class ModularCommand(BaseDiffusersCLICommand):
+    task = "modular"
+
+    @staticmethod
+    def register_subcommand(subparsers: _SubParsersAction) -> None:
+        parser: ArgumentParser = subparsers.add_parser(
+            "modular",
+            help="Run a custom ModularPipeline with free-form inputs.",
+        )
+        _common.add_loading_arguments(parser)
+        _common.add_optimization_arguments(parser)
+        parser.add_argument(
+            "--inputs",
+            action="append",
+            default=None,
+            help='Inputs as key=value (value JSON-decoded when possible). Repeat to add multiple.',
+        )
+        parser.add_argument(
+            "--inputs-json",
+            default=None,
+            help="Inputs as a single JSON object (merged with any --inputs entries).",
+        )
+        parser.add_argument(
+            "--output-key",
+            default=None,
+            help='Optional intermediate to extract (e.g. "image", "video"). '
+            "Forwarded to ModularPipeline as the ``output`` argument.",
+        )
+        parser.add_argument(
+            "--fps",
+            type=int,
+            default=8,
+            help="FPS used when the output happens to be a frame sequence.",
+        )
+        parser.add_argument(
+            "--sampling-rate",
+            type=int,
+            default=None,
+            help="Sample rate used when the output happens to be an audio array.",
+        )
+        _common.add_remote_arguments(parser)
+        _common.add_output_arguments(parser)
+        parser.set_defaults(func=ModularCommand)
+
+    def __init__(self, args: Namespace):
+        self.args = args
+
+    def run(self) -> None:
+        if _common.maybe_submit_remote(self.args, self.task):
+            return
+
+        pipeline = self._load_modular()
+        call_kwargs = _parse_inputs(self.args)
+        if self.args.output_key is not None:
+            call_kwargs["output"] = self.args.output_key
+
+        result = pipeline(**call_kwargs)
+        saved = _save_auto(result, self.args, self.task)
+        pushed = _common.push_outputs(self.args, saved, self.task)
+
+        _common.format_result(
+            self.args,
+            {
+                "task": self.task,
+                "model": self.args.model,
+                "pipeline_class": type(pipeline).__name__,
+                "outputs": saved,
+                "pushed": pushed,
+                "output_key": self.args.output_key,
+            },
+        )
+
+    def _load_modular(self):
+        from diffusers import ModularPipeline
+
+        dtype = _common.resolve_dtype(self.args.dtype)
+        device = _common.resolve_device(self.args.device)
+
+        from_pretrained_kwargs: dict[str, Any] = {
+            "trust_remote_code": self.args.trust_remote_code,
+        }
+        if dtype != "auto":
+            from_pretrained_kwargs["torch_dtype"] = dtype
+        if self.args.revision:
+            from_pretrained_kwargs["revision"] = self.args.revision
+        if self.args.token:
+            from_pretrained_kwargs["token"] = self.args.token
+
+        pipeline = ModularPipeline.from_pretrained(self.args.model, **from_pretrained_kwargs)
+        if not hasattr(pipeline, "to"):
+            return pipeline
+
+        pipeline = _common.map_to_device(pipeline, self.args, device)
+        if self.args.vae_tiling and hasattr(pipeline, "enable_vae_tiling"):
+            pipeline.enable_vae_tiling()
+        if self.args.vae_slicing and hasattr(pipeline, "enable_vae_slicing"):
+            pipeline.enable_vae_slicing()
+        if self.args.attention_backend != "default":
+            _common._set_attention_backend(pipeline, self.args.attention_backend)
+        if self.args.context_parallel:
+            _common._enable_context_parallel(pipeline)
+        return pipeline
diff --git a/src/diffusers/commands/agentic/tasks.py b/src/diffusers/commands/agentic/tasks.py
new file mode 100644
index 000000000000..be2999469783
--- /dev/null
+++ b/src/diffusers/commands/agentic/tasks.py
@@ -0,0 +1,91 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""``diffusers-cli tasks`` — list every registered agentic subcommand.
+
+Designed so an agent can discover the surface area without parsing
+``--help`` output.
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+from argparse import ArgumentParser, Namespace, _SubParsersAction
+
+from .. import BaseDiffusersCLICommand
+
+
+AGENTIC_TASK_NAMES: tuple[str, ...] = (
+    "text-to-image",
+    "image-to-image",
+    "inpaint",
+    "text-to-video",
+    "image-to-video",
+    "text-to-audio",
+    "modular",
+)
+
+
+def register(subparsers: _SubParsersAction) -> None:
+    ListTasksCommand.register_subcommand(subparsers, subparsers)
+
+
+def list_agentic_tasks(subparsers: _SubParsersAction) -> list[dict]:
+    """Return ``[{name, description}, ...]`` for every registered agentic task.
+
+    Reads metadata directly from the live argparse subparsers so the list
+    can never drift from the actual commands.
+    """
+    choices = getattr(subparsers, "choices", {}) or {}
+    actions = [a for a in getattr(subparsers, "_choices_actions", [])]
+    descriptions = {a.dest: a.help for a in actions}
+
+    out: list[dict] = []
+    for name in AGENTIC_TASK_NAMES:
+        if name not in choices:
+            continue
+        out.append({"name": name, "description": descriptions.get(name, "")})
+    return out
+
+
+class ListTasksCommand(BaseDiffusersCLICommand):
+    task = "tasks"
+
+    # The live subparsers object is captured at registration time so ``run``
+    # can introspect it without needing access to ``main``'s locals.
+    _root_subparsers: _SubParsersAction | None = None
+
+    @staticmethod
+    def register_subcommand(subparsers: _SubParsersAction, root_subparsers: _SubParsersAction) -> None:
+        parser: ArgumentParser = subparsers.add_parser(
+            "tasks",
+            help="List every registered agentic task with a one-line description.",
+        )
+        parser.add_argument("--json", action="store_true", help="Emit machine-readable JSON.")
+        parser.set_defaults(func=ListTasksCommand)
+        ListTasksCommand._root_subparsers = root_subparsers
+
+    def __init__(self, args: Namespace):
+        self.args = args
+
+    def run(self) -> None:
+        tasks = list_agentic_tasks(self._root_subparsers) if self._root_subparsers else []
+        if self.args.json:
+            json.dump({"tasks": tasks}, sys.stdout)
+            sys.stdout.write("\n")
+            return
+        width = max((len(t["name"]) for t in tasks), default=0)
+        for entry in tasks:
+            print(f"{entry['name']:<{width}}  {entry['description'] or ''}")
diff --git a/src/diffusers/commands/agentic/video.py b/src/diffusers/commands/agentic/video.py
new file mode 100644
index 000000000000..e4dcdc4bb8a2
--- /dev/null
+++ b/src/diffusers/commands/agentic/video.py
@@ -0,0 +1,147 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Video-generation subcommands: text-to-video, image-to-video.
+
+There is no AutoPipeline for video, so these commands load via
+``DiffusionPipeline`` and rely on the repo's ``model_index.json`` to pick
+the right pipeline class (CogVideoX, Hunyuan, LTX, Wan, etc.).
+"""
+
+from __future__ import annotations
+
+from argparse import ArgumentParser, Namespace, _SubParsersAction
+
+from diffusers.utils import load_image
+
+from .. import BaseDiffusersCLICommand
+from . import _common
+
+
+def register(subparsers: _SubParsersAction) -> None:
+    Text2VideoCommand.register_subcommand(subparsers)
+    Image2VideoCommand.register_subcommand(subparsers)
+
+
+def _add_video_arguments(parser: ArgumentParser) -> None:
+    parser.add_argument("--num-frames", type=int, default=None, help="Number of frames to generate.")
+    parser.add_argument("--fps", type=int, default=8, help="Frames per second for the output video.")
+
+
+def _build_call_kwargs(args: Namespace, pipeline) -> dict:
+    kwargs: dict = {}
+    if args.prompt is not None:
+        kwargs["prompt"] = args.prompt
+    if args.negative_prompt is not None:
+        kwargs["negative_prompt"] = args.negative_prompt
+    if args.num_inference_steps is not None:
+        kwargs["num_inference_steps"] = args.num_inference_steps
+    if args.guidance_scale is not None:
+        kwargs["guidance_scale"] = args.guidance_scale
+    if args.height is not None:
+        kwargs["height"] = args.height
+    if args.width is not None:
+        kwargs["width"] = args.width
+    if args.num_frames is not None:
+        kwargs["num_frames"] = args.num_frames
+
+    generator = _common.get_generator(args.seed, pipeline.device.type)
+    if generator is not None:
+        kwargs["generator"] = generator
+
+    kwargs.update(_common.parse_pipeline_kwargs(args.pipeline_kwargs))
+    return kwargs
+
+
+def _save_video(frames, args: Namespace, task: str) -> str:
+    from diffusers.utils import export_to_video
+
+    path = _common.default_output_paths(task, 1, args.output, ext="mp4")[0]
+    export_to_video(frames, str(path), fps=args.fps)
+    return str(path)
+
+
+class _BaseVideoCommand(BaseDiffusersCLICommand):
+    task: str = ""
+
+    def __init__(self, args: Namespace):
+        self.args = args
+
+    def run(self) -> None:
+        if _common.maybe_submit_remote(self.args, self.task):
+            return
+        pipeline = _common.load_pipeline(self.args, "DiffusionPipeline")
+        call_kwargs = _build_call_kwargs(self.args, pipeline)
+        self._attach_inputs(call_kwargs)
+
+        result = pipeline(**call_kwargs)
+        frames = result.frames[0] if hasattr(result, "frames") else result[0]
+        out_path = _save_video(frames, self.args, self.task)
+        pushed = _common.push_outputs(self.args, [out_path], self.task)
+
+        _common.format_result(
+            self.args,
+            {
+                "task": self.task,
+                "model": self.args.model,
+                "device": pipeline.device.type,
+                "outputs": [out_path],
+                "pushed": pushed,
+                "fps": self.args.fps,
+                "seed": self.args.seed,
+            },
+        )
+
+    def _attach_inputs(self, call_kwargs: dict) -> None:  # noqa: B027
+        """Hook for subclasses to attach conditioning inputs."""
+
+
+class Text2VideoCommand(_BaseVideoCommand):
+    task = "text-to-video"
+
+    @staticmethod
+    def register_subcommand(subparsers: _SubParsersAction) -> None:
+        parser: ArgumentParser = subparsers.add_parser(
+            "text-to-video",
+            help="Generate a video clip from a text prompt.",
+        )
+        _common.add_loading_arguments(parser)
+        _common.add_optimization_arguments(parser)
+        _common.add_generation_arguments(parser)
+        _add_video_arguments(parser)
+        _common.add_remote_arguments(parser)
+        _common.add_output_arguments(parser)
+        parser.set_defaults(func=Text2VideoCommand)
+
+
+class Image2VideoCommand(_BaseVideoCommand):
+    task = "image-to-video"
+
+    @staticmethod
+    def register_subcommand(subparsers: _SubParsersAction) -> None:
+        parser: ArgumentParser = subparsers.add_parser(
+            "image-to-video",
+            help="Generate a video clip conditioned on an input image.",
+        )
+        _common.add_loading_arguments(parser)
+        _common.add_optimization_arguments(parser)
+        _common.add_generation_arguments(parser)
+        _add_video_arguments(parser)
+        _common.add_remote_arguments(parser)
+        _common.add_output_arguments(parser)
+        parser.add_argument("--image", required=True, help="Path or URL to the conditioning image.")
+        parser.set_defaults(func=Image2VideoCommand)
+
+    def _attach_inputs(self, call_kwargs: dict) -> None:
+        call_kwargs["image"] = load_image(self.args.image)

From 4194c3980256be65cbafc762705b27e095a94eb6 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Tue, 2 Jun 2026 00:22:19 +0530
Subject: [PATCH 03/30] update

---
 src/diffusers/commands/agentic/_common.py | 55 +++++++++++++++++++++--
 1 file changed, 52 insertions(+), 3 deletions(-)

diff --git a/src/diffusers/commands/agentic/_common.py b/src/diffusers/commands/agentic/_common.py
index 6910f9daa4db..d81937a25b76 100644
--- a/src/diffusers/commands/agentic/_common.py
+++ b/src/diffusers/commands/agentic/_common.py
@@ -369,6 +369,16 @@ def default_output_paths(task: str, num: int, explicit: Optional[str], ext: str
 )
 
 
+def _rewrite_model_arg(forwarded: list[str], new_path: str) -> list[str]:
+    """Return a copy of ``forwarded`` with the ``--model`` value replaced by ``new_path``."""
+    out = list(forwarded)
+    for i, token in enumerate(out):
+        if token in ("--model", "-m") and i + 1 < len(out):
+            out[i + 1] = new_path
+            return out
+    return out
+
+
 def _forward_args(args: Namespace, task: str) -> list[str]:
     """Reconstruct argv for the remote container from a parsed Namespace.
 
@@ -409,6 +419,11 @@ def maybe_submit_remote(args: Namespace, task: str) -> bool:
 
     from huggingface_hub import HfApi, get_token, run_uv_job
 
+    try:
+        from huggingface_hub import Volume
+    except ImportError:
+        Volume = None
+
     hf_token = args.token or get_token()
     api = HfApi(token=hf_token)
 
@@ -423,9 +438,15 @@ def maybe_submit_remote(args: Namespace, task: str) -> bool:
         dependencies.extend(args.dependencies)
 
     secrets = {"HF_TOKEN": hf_token} if hf_token else None
-    env = {RUN_ID_ENV: run_id}
+    env = {
+        RUN_ID_ENV: run_id,
+        "HF_ENABLE_PARALLEL_LOADING": "1",  # thread-pool the safetensors load step
+    }
 
-    job = run_uv_job(
+    # Mount the model repo into the job's filesystem so the container reads it
+    # from local disk instead of downloading on every run. Requires
+    # huggingface_hub >= 1.16. Falls back to the download path otherwise.
+    run_uv_job_kwargs: dict[str, Any] = dict(
         script=_UV_RUNNER_SCRIPT,
         script_args=forwarded,
         dependencies=dependencies,
@@ -436,6 +457,14 @@ def maybe_submit_remote(args: Namespace, task: str) -> bool:
         env=env,
         token=hf_token,
     )
+    if Volume is not None and not Path(args.model).exists():
+        mount_path = "/model"
+        run_uv_job_kwargs["volumes"] = [
+            Volume(type="model", source=args.model, mount_path=mount_path)
+        ]
+        run_uv_job_kwargs["script_args"] = _rewrite_model_arg(forwarded, mount_path)
+
+    job = run_uv_job(**run_uv_job_kwargs)
 
     payload: dict[str, Any] = {
         "task": "remote-submit",
@@ -450,6 +479,12 @@ def maybe_submit_remote(args: Namespace, task: str) -> bool:
         format_result(args, payload)
         return True
 
+    print(
+        f"[diffusers-cli] submitted job {job.id} (run_id={run_id}); "
+        f"watch at {getattr(job, 'url', 'https://huggingface.co/jobs')}",
+        file=sys.stderr,
+        flush=True,
+    )
     final_status = _wait_for_job(api, job.id, args.namespace, args.poll_interval)
     payload["job_status"] = final_status
     payload["outputs"] = _download_job_artifacts(api, args.push_to, run_id, args.output)
@@ -458,14 +493,28 @@ def maybe_submit_remote(args: Namespace, task: str) -> bool:
 
 
 def _wait_for_job(api: Any, job_id: str, namespace: Optional[str], poll_interval: float) -> str:
-    """Poll ``inspect_job`` until the job reaches a terminal stage; return that stage as a string."""
+    """Poll ``inspect_job`` until the job reaches a terminal stage; return that stage as a string.
+
+    Prints a heartbeat each poll and a labelled line on every stage transition so
+    the local terminal isn't silent for the multi-minute install/download/run
+    window of a remote inference job.
+    """
     import time
 
     terminal = {"COMPLETED", "CANCELED", "ERROR", "DELETED"}
+    last_stage: Optional[str] = None
     while True:
         info = api.inspect_job(job_id=job_id, namespace=namespace)
         stage = str(info.status.stage) if info.status else "UNKNOWN"
+        if stage != last_stage:
+            if last_stage is not None:
+                print("", file=sys.stderr, flush=True)
+            print(f"[diffusers-cli] job {job_id}: {stage}", file=sys.stderr, flush=True)
+            last_stage = stage
+        else:
+            print(".", end="", file=sys.stderr, flush=True)
         if stage in terminal:
+            print("", file=sys.stderr, flush=True)
             return stage
         time.sleep(poll_interval)
 

From d8eb952a1546cb881e8824250dd8f78669ed1dc5 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Tue, 2 Jun 2026 00:26:01 +0530
Subject: [PATCH 04/30] update

---
 src/diffusers/commands/agentic/_common.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/diffusers/commands/agentic/_common.py b/src/diffusers/commands/agentic/_common.py
index d81937a25b76..bd0cb1fd0de4 100644
--- a/src/diffusers/commands/agentic/_common.py
+++ b/src/diffusers/commands/agentic/_common.py
@@ -352,7 +352,14 @@ def default_output_paths(task: str, num: int, explicit: Optional[str], ext: str
 # pin. ``--dependencies "diffusers @ git+..."`` on the local command appends
 # additional dependencies but does not replace this default install.
 DIFFUSERS_SOURCE = "diffusers @ git+https://github.com/huggingface/diffusers@diffuser-cli-for-agent"
-_DEFAULT_REMOTE_DEPS = (DIFFUSERS_SOURCE, "accelerate", "transformers", "safetensors")
+_DEFAULT_REMOTE_DEPS = (
+    DIFFUSERS_SOURCE,
+    "accelerate",
+    "transformers",
+    "safetensors",
+    "torch==2.10.*",
+    "torchvision",
+)
 
 # Entry point for ``uv run`` inside the container. ``uv run`` accepts a file path,
 # URL, or *command*; passing the ``diffusers-cli`` console script name makes UV
@@ -459,9 +466,7 @@ def maybe_submit_remote(args: Namespace, task: str) -> bool:
     )
     if Volume is not None and not Path(args.model).exists():
         mount_path = "/model"
-        run_uv_job_kwargs["volumes"] = [
-            Volume(type="model", source=args.model, mount_path=mount_path)
-        ]
+        run_uv_job_kwargs["volumes"] = [Volume(type="model", source=args.model, mount_path=mount_path)]
         run_uv_job_kwargs["script_args"] = _rewrite_model_arg(forwarded, mount_path)
 
     job = run_uv_job(**run_uv_job_kwargs)

From 95f33c7ddcde604ed576751cacdd3f67510e0560 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Wed, 3 Jun 2026 22:37:37 +0530
Subject: [PATCH 05/30] update

---
 src/diffusers/commands/agentic/README.md   | 246 ------
 src/diffusers/commands/agentic/__init__.py |  18 -
 src/diffusers/commands/agentic/_common.py  | 587 --------------
 src/diffusers/commands/agentic/app.py      |  38 -
 src/diffusers/commands/agentic/audio.py    | 133 ----
 src/diffusers/commands/agentic/image.py    | 170 -----
 src/diffusers/commands/agentic/modular.py  | 249 ------
 src/diffusers/commands/agentic/tasks.py    |  91 ---
 src/diffusers/commands/agentic/video.py    | 147 ----
 src/diffusers/commands/custom_blocks.py    | 114 ++-
 src/diffusers/commands/diffusers_cli.py    |   4 +-
 src/diffusers/commands/inference.py        | 846 +++++++++++++++++++++
 12 files changed, 898 insertions(+), 1745 deletions(-)
 delete mode 100644 src/diffusers/commands/agentic/README.md
 delete mode 100644 src/diffusers/commands/agentic/__init__.py
 delete mode 100644 src/diffusers/commands/agentic/_common.py
 delete mode 100644 src/diffusers/commands/agentic/app.py
 delete mode 100644 src/diffusers/commands/agentic/audio.py
 delete mode 100644 src/diffusers/commands/agentic/image.py
 delete mode 100644 src/diffusers/commands/agentic/modular.py
 delete mode 100644 src/diffusers/commands/agentic/tasks.py
 delete mode 100644 src/diffusers/commands/agentic/video.py
 create mode 100644 src/diffusers/commands/inference.py

diff --git a/src/diffusers/commands/agentic/README.md b/src/diffusers/commands/agentic/README.md
deleted file mode 100644
index 07f261692cd3..000000000000
--- a/src/diffusers/commands/agentic/README.md
+++ /dev/null
@@ -1,246 +0,0 @@
-# Agentic CLI for Diffusers
-
-Single-command access to common Diffusers use-cases. Designed for AI agents
-and humans who need to run image/video/audio generation **without writing
-Python scripts**.
-
-Every command below is reachable as `diffusers-cli <command>`. Run
-`diffusers-cli <command> --help` for full option documentation.
-
-## How it works
-
-The module integrates with the main CLI through a single function call in
-`diffusers_cli.py` — removing it disables everything with no side effects.
-
-```
-src/diffusers/commands/agentic/
-├── app.py          # register_agentic_commands(subparsers) — single integration point
-├── _common.py      # Shared helpers (arg groups, pipeline detection, loading, remote, I/O)
-├── image.py        # text-to-image, image-to-image, inpaint
-├── video.py        # text-to-video, image-to-video
-├── audio.py        # text-to-audio
-├── modular.py      # generic ModularPipeline runner with free-form inputs
-└── tasks.py        # `tasks` — list every registered agentic command
-```
-
-## Discovering tasks
-
-```bash
-diffusers-cli tasks            # human-readable
-diffusers-cli tasks --json     # for agents
-```
-
-## Pipeline detection (DiffusionPipeline vs ModularPipeline)
-
-Every inference command auto-detects whether the `--model` is a regular
-`DiffusionPipeline` repo (`model_index.json`) or a custom
-`ModularPipeline` repo (`modular_model_index.json`) via a single Hub
-listing — no weights are downloaded. If you point a task-shaped command at
-a modular repo, it exits with a hint to use `diffusers-cli modular`
-instead. The reverse is also true: `modular` rejects a regular repo and
-points back at the task-shaped command.
-
-## Pushing outputs to a bucket
-
-Every inference command (and `modular`) accepts `--push-to <bucket_id>`
-to upload the generated files to a Hugging Face **bucket** after they're
-saved locally. The bucket is created if it doesn't exist and files land
-under a prefix named after the task (e.g. `text-to-image/<filename>`).
-
-```bash
-diffusers-cli text-to-image \
-  --model stabilityai/stable-diffusion-xl-base-1.0 \
-  --prompt "a watercolor of a fox" \
-  --num-images 4 \
-  --push-to your-username/cli-generations
-```
-
-The upload is a single `batch_bucket_files` round-trip regardless of how
-many files were generated. The JSON payload reports `hf://buckets/...`
-URIs so an agent can pipe them into a follow-up tool.
-
-## Running remotely (HF Jobs) and fetching outputs back
-
-Every inference command supports `--remote`, which submits the same call
-to Hugging Face Jobs via `huggingface_hub.run_uv_job`, then by default
-**waits for the job to finish and downloads the outputs back to your
-local machine**.
-
-The flow:
-
-1. If `--push-to` isn't set, default it to `<your-user>/jobs-artifacts`
-   (the canonical jobs bucket — `https://huggingface.co/buckets/<you>/jobs-artifacts`).
-2. Generate a random `run_id` and pass it via `DIFFUSERS_CLI_RUN_ID` env
-   so the container writes its files under `<run_id>/` inside the bucket.
-3. Submit the job (your `HF_TOKEN` is forwarded as a secret).
-4. Poll `inspect_job` every `--poll-interval` seconds until the stage is
-   `COMPLETED` / `CANCELED` / `ERROR` / `DELETED`.
-5. List `<run_id>/` in the bucket and `download_bucket_files` everything
-   into the local `--output` directory (default `./outputs/`).
-
-Pass `--no-wait` to fire-and-forget — the command prints the job id and
-returns immediately; you can fetch later via `huggingface-cli buckets`.
-
-| Option | Description |
-|--------|-------------|
-| `--remote` | Run on HF Jobs instead of locally |
-| `--flavor` | Hardware flavor (default `a10g-small`) |
-| `--timeout` | Job timeout (e.g. `30m`, `2h`) |
-| `--dependencies` | Extra pip deps. Repeat for multiple |
-| `--namespace` | HF namespace (defaults to the current user) |
-| `--no-wait` | Skip polling/download — submit and exit |
-| `--poll-interval` | Seconds between job-status polls (default 5) |
-
-```bash
-# Submit text-to-image to HF Jobs on an A100, wait, download to ./outputs/
-diffusers-cli text-to-image \
-  --model stabilityai/stable-diffusion-xl-base-1.0 \
-  --prompt "a watercolor of a fox in autumn leaves" \
-  --num-images 4 \
-  --remote --flavor a100-large --timeout 30m
-```
-
-```bash
-# Same call, fire-and-forget
-diffusers-cli text-to-image ... --remote --no-wait
-```
-
-## Common options
-
-Every inference command supports:
-
-| Option | Description |
-|--------|-------------|
-| `--model` / `-m` | Model id on the Hub or local path |
-| `--device` | `cpu`, `cuda`, `cuda:0`, `mps` (defaults to best available) |
-| `--dtype` | `auto`, `float16`, `bfloat16`, `float32` |
-| `--variant` | Optional weight variant (e.g. `fp16`) |
-| `--revision` | Model revision (branch, tag, or SHA) |
-| `--token` | Hugging Face token for gated/private models |
-| `--trust-remote-code` | Allow custom code from the Hub |
-| `--output` / `-o` | Output file or directory |
-| `--json` | Machine-readable JSON summary on stdout |
-| `--seed` | Random seed for reproducibility |
-| `--pipeline-kwargs` | JSON object of extra kwargs forwarded to the pipeline call |
-
-## Commands
-
-### Image
-
-1. Generate an image from a text prompt
-   ```bash
-   diffusers-cli text-to-image \
-     --model stabilityai/stable-diffusion-xl-base-1.0 \
-     --prompt "a watercolor of a fox in autumn leaves" \
-     --output fox.png
-   ```
-
-2. Generate with explicit sampling controls
-   ```bash
-   diffusers-cli text-to-image \
-     --model stabilityai/stable-diffusion-xl-base-1.0 \
-     --prompt "studio portrait of a cyberpunk hacker" \
-     --negative-prompt "blurry, low quality" \
-     --num-inference-steps 30 \
-     --guidance-scale 7.5 \
-     --height 1024 --width 1024 \
-     --seed 42
-   ```
-
-3. Generate multiple variants at once
-   ```bash
-   diffusers-cli text-to-image \
-     --model black-forest-labs/FLUX.1-schnell \
-     --prompt "a still life with citrus and ceramics" \
-     --num-images 4 \
-     --output ./outputs/still-life/
-   ```
-
-4. Transform an existing image with a prompt (image-to-image)
-   ```bash
-   diffusers-cli image-to-image \
-     --model stabilityai/stable-diffusion-xl-refiner-1.0 \
-     --image input.jpg \
-     --prompt "make it look like an oil painting" \
-     --strength 0.6 \
-     --output painted.png
-   ```
-
-5. Inpaint a masked region of an image
-   ```bash
-   diffusers-cli inpaint \
-     --model stabilityai/stable-diffusion-2-inpainting \
-     --image photo.png \
-     --mask mask.png \
-     --prompt "a golden retriever sitting on the bench" \
-     --output filled.png
-   ```
-
-6. Emit JSON for downstream tooling
-   ```bash
-   diffusers-cli text-to-image \
-     --model stabilityai/sdxl-turbo \
-     --prompt "neon city at night" \
-     --json
-   ```
-
-### Video
-
-7. Generate a short clip from a text prompt
-   ```bash
-   diffusers-cli text-to-video \
-     --model THUDM/CogVideoX-2b \
-     --prompt "a panda surfing on a wave at sunset" \
-     --num-frames 49 \
-     --fps 8 \
-     --output panda.mp4
-   ```
-
-8. Animate a single still image
-   ```bash
-   diffusers-cli image-to-video \
-     --model stabilityai/stable-video-diffusion-img2vid-xt \
-     --image still.png \
-     --prompt "subtle camera dolly forward" \
-     --num-frames 25 \
-     --output animated.mp4
-   ```
-
-### Audio
-
-9. Generate music or a sound effect from a text prompt
-   ```bash
-   diffusers-cli text-to-audio \
-     --model cvssp/audioldm2 \
-     --prompt "a calm piano melody in a quiet room" \
-     --audio-length-in-s 10 \
-     --output music.wav
-   ```
-
-### Modular pipelines
-
-Modular pipelines have an open-ended input surface defined by the block
-graph, so the CLI doesn't try to predict it — pass inputs verbatim.
-
-14. Run a modular pipeline with free-form inputs
-    ```bash
-    diffusers-cli modular \
-      --model your-username/my-modular-pipeline \
-      --inputs prompt="a calm landscape" \
-      --inputs num_inference_steps=25 \
-      --inputs-json '{"guidance_scale": 4.5}' \
-      --output-key image \
-      --output out.png
-    ```
-
-The output type is auto-detected — a PIL image (or list of PIL images)
-becomes PNG(s), a sequence of frames becomes an MP4, a numpy audio array
-becomes a WAV, and anything else is JSON-serialized.
-
-### Roadmap
-
-Open an issue if you'd like to help land one:
-
-- **Video**: `video-to-video`
-- **Conditioning**: ControlNet, T2I-Adapter, instruction editing (Flux-Kontext, InstructPix2Pix)
-- **Quantization / export**: `convert` (fp16/safetensors/GGUF), `quantize` (bitsandbytes, torchao)
diff --git a/src/diffusers/commands/agentic/__init__.py b/src/diffusers/commands/agentic/__init__.py
deleted file mode 100644
index 9076bc7ee4c9..000000000000
--- a/src/diffusers/commands/agentic/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright 2025 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .app import register_agentic_commands
-
-
-__all__ = ["register_agentic_commands"]
diff --git a/src/diffusers/commands/agentic/_common.py b/src/diffusers/commands/agentic/_common.py
deleted file mode 100644
index bd0cb1fd0de4..000000000000
--- a/src/diffusers/commands/agentic/_common.py
+++ /dev/null
@@ -1,587 +0,0 @@
-# Copyright 2025 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Shared helpers for the agentic CLI surface.
-
-These utilities are intentionally small and dependency-light. Each diffusers
-agentic subcommand should be able to be read end-to-end by an agent without
-needing to follow many layers of indirection.
-"""
-
-from __future__ import annotations
-
-import json
-import os
-import sys
-from argparse import ArgumentParser, Namespace
-from pathlib import Path
-from typing import Any, Optional
-
-
-DEFAULT_OUTPUT_DIR = "outputs"
-
-
-DTYPE_CHOICES = ("auto", "float16", "fp16", "bfloat16", "bf16", "float32", "fp32")
-CPU_OFFLOAD_CHOICES = ("model", "group")
-ATTENTION_BACKEND_CHOICES = (
-    "default",
-    "flash_hub",
-    "flash_varlen_hub",
-    "flash_4_hub",
-    "sage_hub",
-)
-
-
-def add_loading_arguments(parser: ArgumentParser) -> None:
-    """Arguments shared by every inference subcommand."""
-    parser.add_argument("--model", "-m", required=True, help="Model id on the Hugging Face Hub or local path.")
-    parser.add_argument("--device", default=None, help="Device to run on (e.g. cpu, cuda, cuda:0, mps).")
-    parser.add_argument(
-        "--dtype",
-        default="auto",
-        choices=DTYPE_CHOICES,
-        help="Torch dtype for pipeline weights.",
-    )
-    parser.add_argument("--variant", default=None, help='Optional weight variant (e.g. "fp16").')
-    parser.add_argument("--revision", default=None, help="Model revision (branch, tag, or commit SHA).")
-    parser.add_argument("--token", default=None, help="Hugging Face token for gated/private models.")
-    parser.add_argument("--trust-remote-code", action="store_true", help="Allow custom code from the Hub.")
-
-
-def add_optimization_arguments(parser: ArgumentParser) -> None:
-    """Optional pipeline-optimization flags. All default to off."""
-    parser.add_argument(
-        "--cpu-offload",
-        choices=CPU_OFFLOAD_CHOICES,
-        default=None,
-        help=(
-            "Offload pipeline components to CPU during inference. "
-            "'model' uses enable_model_cpu_offload, "
-            "'group' uses pipeline.enable_group_offload(leaf_level, use_stream=True)."
-        ),
-    )
-    parser.add_argument(
-        "--attention-backend",
-        choices=ATTENTION_BACKEND_CHOICES,
-        default="default",
-        help=(
-            "Override the attention backend on the transformer/UNet. "
-            "Only Hub-hosted kernels are exposed — they auto-download on first "
-            "use and avoid a local install. 'default' leaves the backend untouched."
-        ),
-    )
-    parser.add_argument("--vae-tiling", action="store_true", help="Enable VAE tiling (lower peak VRAM).")
-    parser.add_argument("--vae-slicing", action="store_true", help="Enable VAE slicing (lower peak VRAM).")
-    parser.add_argument(
-        "--context-parallel",
-        action="store_true",
-        help=(
-            "Enable Ulysses-style context parallelism (ulysses_anything mode, supports arbitrary "
-            "sequence lengths). Requires launching the CLI under torchrun with ≥2 GPUs."
-        ),
-    )
-
-
-def add_generation_arguments(parser: ArgumentParser) -> None:
-    """Arguments shared by image/video generation subcommands."""
-    parser.add_argument("--prompt", "-p", default=None, help="Text prompt.")
-    parser.add_argument("--negative-prompt", default=None, help="Negative text prompt.")
-    parser.add_argument("--num-inference-steps", type=int, default=None, help="Number of denoising steps.")
-    parser.add_argument("--guidance-scale", type=float, default=None, help="Classifier-free guidance scale.")
-    parser.add_argument("--height", type=int, default=None, help="Output height in pixels.")
-    parser.add_argument("--width", type=int, default=None, help="Output width in pixels.")
-    parser.add_argument("--num-images", type=int, default=1, help="Number of images to generate.")
-    parser.add_argument("--seed", type=int, default=None, help="Random seed for reproducibility.")
-    parser.add_argument(
-        "--pipeline-kwargs",
-        default=None,
-        help="JSON object of extra kwargs forwarded to the pipeline call.",
-    )
-
-
-def add_output_arguments(parser: ArgumentParser) -> None:
-    """Output formatting arguments."""
-    parser.add_argument(
-        "--output",
-        "-o",
-        default=None,
-        help="Output file or directory. Defaults to ./outputs/<task>-<index>.<ext>.",
-    )
-    parser.add_argument(
-        "--push-to",
-        default=None,
-        help=(
-            "Upload the generated files to this HF bucket id after saving "
-            "(created if missing). When --remote is set, defaults to "
-            "<user>/jobs-artifacts; remote runs always write to that bucket "
-            "and fetch the results back locally."
-        ),
-    )
-    parser.add_argument("--json", action="store_true", help="Emit a machine-readable JSON summary on stdout.")
-
-
-def add_remote_arguments(parser: ArgumentParser) -> None:
-    """Optional HF Jobs arguments — works on every inference subcommand."""
-    parser.add_argument(
-        "--remote",
-        action="store_true",
-        help="Submit this command to Hugging Face Jobs instead of running locally.",
-    )
-    parser.add_argument(
-        "--flavor",
-        default="a10g-small",
-        help="HF Jobs hardware flavor for --remote (e.g. a10g-small, a100-large, cpu-basic).",
-    )
-    parser.add_argument(
-        "--timeout",
-        default=None,
-        help="HF Jobs timeout for --remote (e.g. 30m, 2h).",
-    )
-    parser.add_argument(
-        "--dependencies",
-        action="append",
-        default=None,
-        help="Extra pip dependencies for the --remote job. Repeat to add multiple.",
-    )
-    parser.add_argument(
-        "--namespace",
-        default=None,
-        help="HF namespace to run the --remote job under (defaults to the current user).",
-    )
-    parser.add_argument(
-        "--no-wait",
-        action="store_true",
-        help=(
-            "Don't wait for the --remote job to finish — submit and print the job id. "
-            "Default behaviour is to poll until completion and download outputs locally."
-        ),
-    )
-    parser.add_argument(
-        "--poll-interval",
-        type=float,
-        default=5.0,
-        help="Seconds between job-status polls when waiting for --remote completion.",
-    )
-
-
-def resolve_dtype(name: Optional[str]):
-    """Map a CLI dtype string to a torch dtype.
-
-    Returns ``"auto"`` when the user wants diffusers to pick.
-    """
-    if name in (None, "auto"):
-        return "auto"
-
-    import torch
-
-    mapping = {
-        "fp32": torch.float32,
-        "fp16": torch.float16,
-        "bf16": torch.bfloat16,
-    }
-    if name not in mapping:
-        raise ValueError(f"Unknown dtype: {name}")
-    return mapping[name]
-
-
-def resolve_device(name: Optional[str]) -> str:
-    """Pick a device, defaulting to the best available one."""
-    if name:
-        return name
-    import torch
-
-    if torch.cuda.is_available():
-        return "cuda"
-
-    if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-        return "mps"
-
-    return "cpu"
-
-
-def load_pipeline(args: Namespace, pipeline_cls_name: str) -> Any:
-    """Load a diffusers pipeline class by name and move it to the chosen device.
-
-    ``pipeline_cls_name`` can be any class exported from ``diffusers`` —
-    typically one of ``AutoPipelineForText2Image``, ``AutoPipelineForImage2Image``,
-    ``AutoPipelineForInpainting``, or ``DiffusionPipeline`` for video/audio.
-    """
-    import diffusers
-
-    pipeline_cls = getattr(diffusers, pipeline_cls_name)
-    from_pretrained_kwargs: dict[str, Any] = {
-        "torch_dtype": resolve_dtype(args.dtype),
-        "trust_remote_code": args.trust_remote_code,
-    }
-    if args.variant:
-        from_pretrained_kwargs["variant"] = args.variant
-    if args.revision:
-        from_pretrained_kwargs["revision"] = args.revision
-    if args.token:
-        from_pretrained_kwargs["token"] = args.token
-
-    pipeline = pipeline_cls.from_pretrained(args.model, **from_pretrained_kwargs)
-    pipeline = map_to_device(pipeline, args, resolve_device(args.device))
-    if args.vae_tiling and hasattr(pipeline, "enable_vae_tiling"):
-        pipeline.enable_vae_tiling()
-    if args.vae_slicing and hasattr(pipeline, "enable_vae_slicing"):
-        pipeline.enable_vae_slicing()
-    if args.attention_backend != "default":
-        _set_attention_backend(pipeline, args.attention_backend)
-    if args.context_parallel:
-        _enable_context_parallel(pipeline)
-    return pipeline
-
-
-def map_to_device(pipeline: Any, args: Namespace, device: str) -> Any:
-    """Get the pipeline ready to run on ``device``.
-
-    Calls ``.to(device)`` by default; when ``--cpu-offload`` is set the chosen
-    offload helper (``model``, ``sequential``, or ``group``) handles placement instead.
-    """
-    if args.cpu_offload is None:
-        return pipeline.to(device)
-    if args.cpu_offload == "model":
-        pipeline.enable_model_cpu_offload(device=device)
-    elif args.cpu_offload == "group":
-        import torch
-
-        pipeline.enable_group_offload(
-            onload_device=torch.device(device),
-            offload_type="leaf_level",
-            use_stream=device.startswith("cuda"),
-        )
-    return pipeline
-
-
-def _enable_context_parallel(pipeline: Any) -> None:
-    """Enable Ulysses-style context-parallel inference on the transformer/UNet."""
-    import torch
-
-    if not torch.distributed.is_available() or not torch.distributed.is_initialized():
-        raise SystemExit(
-            "--context-parallel requires torch.distributed to be initialized. "
-            "Launch the CLI under torchrun, e.g.: "
-            "`torchrun --nproc-per-node=N -m diffusers.commands.diffusers_cli <task> ...`."
-        )
-
-    from diffusers import ContextParallelConfig
-
-    cfg = ContextParallelConfig(
-        ulysses_degree=torch.distributed.get_world_size(),
-        ring_degree=1,
-        ulysses_anything=True,
-    )
-    for attr in ("transformer", "unet"):
-        module = getattr(pipeline, attr, None)
-        if module is not None and hasattr(module, "enable_parallelism"):
-            module.enable_parallelism(config=cfg)
-            return
-
-
-def _set_attention_backend(pipeline: Any, backend: str) -> None:
-    for attr in ("transformer", "unet"):
-        module = getattr(pipeline, attr, None)
-        if module is not None and hasattr(module, "set_attention_backend"):
-            try:
-                module.set_attention_backend(backend)
-            except (ValueError, ImportError, RuntimeError):
-                pass
-            return
-
-
-def get_generator(seed: Optional[int], device: str):
-    if seed is None:
-        return None
-    import torch
-
-    generator_device = "cpu" if device == "mps" else device
-    return torch.Generator(device=generator_device).manual_seed(seed)
-
-
-def parse_pipeline_kwargs(raw: Optional[str]) -> dict[str, Any]:
-    if not raw:
-        return {}
-    try:
-        parsed = json.loads(raw)
-    except json.JSONDecodeError as e:
-        raise SystemExit(f"--pipeline-kwargs must be valid JSON: {e}") from e
-    if not isinstance(parsed, dict):
-        raise SystemExit("--pipeline-kwargs must decode to a JSON object.")
-    return parsed
-
-
-def default_output_paths(task: str, num: int, explicit: Optional[str], ext: str = "png") -> list[Path]:
-    """Resolve output file paths for ``num`` generated artifacts.
-
-    - If ``explicit`` is a directory (or ends with /), write into it.
-    - If ``explicit`` is a file and ``num == 1``, write to that file.
-    - If ``explicit`` is a file template and ``num > 1``, append ``-<i>`` before the suffix.
-    - Otherwise default to ``./outputs/<task>-<i>.<ext>``.
-    """
-    if explicit is None:
-        base = Path(DEFAULT_OUTPUT_DIR)
-        base.mkdir(parents=True, exist_ok=True)
-        return [base / f"{task}-{i}.{ext}" for i in range(num)]
-
-    p = Path(explicit)
-    if explicit.endswith(os.sep) or p.is_dir():
-        p.mkdir(parents=True, exist_ok=True)
-        return [p / f"{task}-{i}.{ext}" for i in range(num)]
-
-    p.parent.mkdir(parents=True, exist_ok=True)
-    if num == 1:
-        return [p]
-    stem, suffix = p.stem, p.suffix or f".{ext}"
-    return [p.with_name(f"{stem}-{i}{suffix}") for i in range(num)]
-
-
-# Source for the diffusers install used by --remote jobs. While iterating on a
-# feature branch, point at the branch URL; once merged, switch back to a release
-# pin. ``--dependencies "diffusers @ git+..."`` on the local command appends
-# additional dependencies but does not replace this default install.
-DIFFUSERS_SOURCE = "diffusers @ git+https://github.com/huggingface/diffusers@diffuser-cli-for-agent"
-_DEFAULT_REMOTE_DEPS = (
-    DIFFUSERS_SOURCE,
-    "accelerate",
-    "transformers",
-    "safetensors",
-    "torch==2.10.*",
-    "torchvision",
-)
-
-# Entry point for ``uv run`` inside the container. ``uv run`` accepts a file path,
-# URL, or *command*; passing the ``diffusers-cli`` console script name makes UV
-# install the deps above (which register the entry point) and then exec the CLI.
-_UV_RUNNER_SCRIPT = "diffusers-cli"
-
-
-RUN_ID_ENV = "DIFFUSERS_CLI_RUN_ID"
-
-# Namespace keys that control *how* a remote job runs locally, not what runs
-# inside the container. They are stripped when forwarding argv to the container.
-HF_JOBS_KEYS = frozenset(
-    {"remote", "flavor", "timeout", "dependencies", "namespace", "no_wait", "poll_interval", "func"}
-)
-
-
-def _rewrite_model_arg(forwarded: list[str], new_path: str) -> list[str]:
-    """Return a copy of ``forwarded`` with the ``--model`` value replaced by ``new_path``."""
-    out = list(forwarded)
-    for i, token in enumerate(out):
-        if token in ("--model", "-m") and i + 1 < len(out):
-            out[i + 1] = new_path
-            return out
-    return out
-
-
-def _forward_args(args: Namespace, task: str) -> list[str]:
-    """Reconstruct argv for the remote container from a parsed Namespace.
-
-    Skips the local-only job-control keys above. Boolean flags are emitted
-    only when True. List values become repeated ``--flag value`` pairs.
-    """
-    out: list[str] = [task]
-    for key, value in vars(args).items():
-        if key in HF_JOBS_KEYS:
-            continue
-        if value is None or value is False:
-            continue
-        flag = "--" + key.replace("_", "-")
-        if value is True:
-            out.append(flag)
-        elif isinstance(value, list):
-            for item in value:
-                out.extend([flag, str(item)])
-        else:
-            out.extend([flag, str(value)])
-    return out
-
-
-def maybe_submit_remote(args: Namespace, task: str) -> bool:
-    """If ``--remote`` was set, submit this invocation to HF Jobs and return True.
-
-    The local ``run()`` should bail immediately when this returns True.
-
-    Auto-defaults ``--push-to`` to ``<user>/jobs-artifacts`` so the remote
-    container has somewhere to write before tear-down. By default, polls
-    the job until completion and downloads the artifacts back to the local
-    output directory; pass ``--no-wait`` to fire-and-forget.
-    """
-    if not args.remote:
-        return False
-
-    import uuid
-
-    from huggingface_hub import HfApi, get_token, run_uv_job
-
-    try:
-        from huggingface_hub import Volume
-    except ImportError:
-        Volume = None
-
-    hf_token = args.token or get_token()
-    api = HfApi(token=hf_token)
-
-    if not args.push_to:
-        args.push_to = f"{api.whoami()['name']}/jobs-artifacts"
-
-    run_id = uuid.uuid4().hex[:12]
-
-    forwarded = _forward_args(args, task)
-    dependencies = list(_DEFAULT_REMOTE_DEPS)
-    if args.dependencies:
-        dependencies.extend(args.dependencies)
-
-    secrets = {"HF_TOKEN": hf_token} if hf_token else None
-    env = {
-        RUN_ID_ENV: run_id,
-        "HF_ENABLE_PARALLEL_LOADING": "1",  # thread-pool the safetensors load step
-    }
-
-    # Mount the model repo into the job's filesystem so the container reads it
-    # from local disk instead of downloading on every run. Requires
-    # huggingface_hub >= 1.16. Falls back to the download path otherwise.
-    run_uv_job_kwargs: dict[str, Any] = dict(
-        script=_UV_RUNNER_SCRIPT,
-        script_args=forwarded,
-        dependencies=dependencies,
-        flavor=args.flavor,
-        timeout=args.timeout,
-        namespace=args.namespace,
-        secrets=secrets,
-        env=env,
-        token=hf_token,
-    )
-    if Volume is not None and not Path(args.model).exists():
-        mount_path = "/model"
-        run_uv_job_kwargs["volumes"] = [Volume(type="model", source=args.model, mount_path=mount_path)]
-        run_uv_job_kwargs["script_args"] = _rewrite_model_arg(forwarded, mount_path)
-
-    job = run_uv_job(**run_uv_job_kwargs)
-
-    payload: dict[str, Any] = {
-        "task": "remote-submit",
-        "job_id": getattr(job, "id", None),
-        "job_status": str(getattr(job, "status", "")),
-        "flavor": args.flavor,
-        "push_to": args.push_to,
-        "run_id": run_id,
-    }
-
-    if args.no_wait:
-        format_result(args, payload)
-        return True
-
-    print(
-        f"[diffusers-cli] submitted job {job.id} (run_id={run_id}); "
-        f"watch at {getattr(job, 'url', 'https://huggingface.co/jobs')}",
-        file=sys.stderr,
-        flush=True,
-    )
-    final_status = _wait_for_job(api, job.id, args.namespace, args.poll_interval)
-    payload["job_status"] = final_status
-    payload["outputs"] = _download_job_artifacts(api, args.push_to, run_id, args.output)
-    format_result(args, payload)
-    return True
-
-
-def _wait_for_job(api: Any, job_id: str, namespace: Optional[str], poll_interval: float) -> str:
-    """Poll ``inspect_job`` until the job reaches a terminal stage; return that stage as a string.
-
-    Prints a heartbeat each poll and a labelled line on every stage transition so
-    the local terminal isn't silent for the multi-minute install/download/run
-    window of a remote inference job.
-    """
-    import time
-
-    terminal = {"COMPLETED", "CANCELED", "ERROR", "DELETED"}
-    last_stage: Optional[str] = None
-    while True:
-        info = api.inspect_job(job_id=job_id, namespace=namespace)
-        stage = str(info.status.stage) if info.status else "UNKNOWN"
-        if stage != last_stage:
-            if last_stage is not None:
-                print("", file=sys.stderr, flush=True)
-            print(f"[diffusers-cli] job {job_id}: {stage}", file=sys.stderr, flush=True)
-            last_stage = stage
-        else:
-            print(".", end="", file=sys.stderr, flush=True)
-        if stage in terminal:
-            print("", file=sys.stderr, flush=True)
-            return stage
-        time.sleep(poll_interval)
-
-
-def _download_job_artifacts(api: Any, bucket_id: str, run_id: str, output: Optional[str]) -> list[str]:
-    """Download every file under ``<run_id>/`` from ``bucket_id`` to a local directory.
-
-    ``output`` is always treated as a directory (created if missing) — remote
-    runs produce many files, so a file-path target wouldn't make sense.
-    """
-    from huggingface_hub import BucketFile
-
-    local_dir = Path(output) if output else Path(DEFAULT_OUTPUT_DIR)
-    local_dir.mkdir(parents=True, exist_ok=True)
-
-    pairs: list[tuple[Any, Path]] = []
-    for entry in api.list_bucket_tree(bucket_id, prefix=f"{run_id}/", recursive=True):
-        if not isinstance(entry, BucketFile):
-            continue
-        pairs.append((entry, local_dir / Path(entry.path).name))
-
-    if not pairs:
-        return []
-    api.download_bucket_files(bucket_id, files=pairs)
-    return [str(local) for _, local in pairs]
-
-
-def push_outputs(args: Namespace, saved_paths: list[str], task: str) -> Optional[dict[str, Any]]:
-    """Upload ``saved_paths`` to the ``--push-to`` bucket, returning a summary.
-
-    Returns None when ``--push-to`` is unset. Creates the bucket if needed.
-    When ``DIFFUSERS_CLI_RUN_ID`` is set (i.e. we're inside a remote job),
-    files land under ``<run_id>/`` so the local side can isolate this run's
-    output; otherwise they land under ``<task>/``.
-    """
-    if not args.push_to:
-        return None
-    target = args.push_to
-
-    from huggingface_hub import HfApi
-
-    api = HfApi(token=args.token)
-    api.create_bucket(target, exist_ok=True)
-
-    prefix = os.environ.get(RUN_ID_ENV) or task
-    add = [(local, f"{prefix}/{Path(local).name}") for local in saved_paths]
-    api.batch_bucket_files(target, add=add)
-
-    uploaded = [f"hf://buckets/{target}/{dest}" for _, dest in add]
-    return {"bucket_id": target, "uploaded": uploaded}
-
-
-def format_result(args: Namespace, payload: dict[str, Any]) -> None:
-    """Print either a human-friendly summary or JSON, depending on --json."""
-    if args.json:
-        json.dump(payload, sys.stdout, default=str)
-        sys.stdout.write("\n")
-        return
-
-    outputs = payload.get("outputs", [])
-    if outputs:
-        for path in outputs:
-            print(path)
-    else:
-        print(payload)
diff --git a/src/diffusers/commands/agentic/app.py b/src/diffusers/commands/agentic/app.py
deleted file mode 100644
index 3ca7b50ae1ed..000000000000
--- a/src/diffusers/commands/agentic/app.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright 2025 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Single integration point for the agentic CLI.
-
-Removing the call to ``register_agentic_commands`` from
-``diffusers_cli.py`` disables the entire surface with no side effects.
-"""
-
-from __future__ import annotations
-
-from argparse import _SubParsersAction
-
-from . import audio as audio_commands
-from . import image as image_commands
-from . import modular as modular_commands
-from . import tasks as tasks_commands
-from . import video as video_commands
-
-
-def register_agentic_commands(subparsers: _SubParsersAction) -> None:
-    """Register every agentic subcommand on the top-level ``diffusers-cli`` parser."""
-    image_commands.register(subparsers)
-    video_commands.register(subparsers)
-    audio_commands.register(subparsers)
-    modular_commands.register(subparsers)
-    tasks_commands.register(subparsers)
diff --git a/src/diffusers/commands/agentic/audio.py b/src/diffusers/commands/agentic/audio.py
deleted file mode 100644
index 42c2fd0da210..000000000000
--- a/src/diffusers/commands/agentic/audio.py
+++ /dev/null
@@ -1,133 +0,0 @@
-# Copyright 2025 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Audio-generation subcommands: text-to-audio."""
-
-from __future__ import annotations
-
-from argparse import ArgumentParser, Namespace, _SubParsersAction
-
-from .. import BaseDiffusersCLICommand
-from . import _common
-
-
-def register(subparsers: _SubParsersAction) -> None:
-    Text2AudioCommand.register_subcommand(subparsers)
-
-
-def _save_audio(audios, sampling_rate: int, args: Namespace, task: str) -> list[str]:
-    """Save one or more audio arrays as WAV files."""
-    import numpy as np
-    from scipy.io.wavfile import write as wavfile_write
-
-    paths = _common.default_output_paths(task, len(audios), args.output, ext="wav")
-    saved: list[str] = []
-    for audio, path in zip(audios, paths):
-        data = np.asarray(audio)
-        if data.dtype.kind == "f":
-            data = np.clip(data, -1.0, 1.0)
-            data = (data * 32767).astype(np.int16)
-        if data.ndim > 1 and data.shape[0] < data.shape[-1]:
-            # ``(channels, samples)`` → ``(samples, channels)`` for scipy.
-            data = data.T
-        wavfile_write(str(path), sampling_rate, data)
-        saved.append(str(path))
-    return saved
-
-
-class Text2AudioCommand(BaseDiffusersCLICommand):
-    task = "text-to-audio"
-
-    @staticmethod
-    def register_subcommand(subparsers: _SubParsersAction) -> None:
-        parser: ArgumentParser = subparsers.add_parser(
-            "text-to-audio",
-            help="Generate an audio clip (music or sound) from a text prompt.",
-        )
-        _common.add_loading_arguments(parser)
-        _common.add_optimization_arguments(parser)
-        _common.add_generation_arguments(parser)
-        _common.add_remote_arguments(parser)
-        parser.add_argument(
-            "--audio-length-in-s",
-            type=float,
-            default=None,
-            help="Duration of the generated audio in seconds.",
-        )
-        parser.add_argument(
-            "--sampling-rate",
-            type=int,
-            default=None,
-            help="Override the sampling rate written to the WAV file.",
-        )
-        _common.add_output_arguments(parser)
-        parser.set_defaults(func=Text2AudioCommand)
-
-    def __init__(self, args: Namespace):
-        self.args = args
-
-    def run(self) -> None:
-        if _common.maybe_submit_remote(self.args, self.task):
-            return
-        pipeline = _common.load_pipeline(self.args, "DiffusionPipeline")
-
-        call_kwargs: dict = {}
-        if self.args.prompt is not None:
-            call_kwargs["prompt"] = self.args.prompt
-        if self.args.negative_prompt is not None:
-            call_kwargs["negative_prompt"] = self.args.negative_prompt
-        if self.args.num_inference_steps is not None:
-            call_kwargs["num_inference_steps"] = self.args.num_inference_steps
-        if self.args.guidance_scale is not None:
-            call_kwargs["guidance_scale"] = self.args.guidance_scale
-        if self.args.audio_length_in_s is not None:
-            call_kwargs["audio_length_in_s"] = self.args.audio_length_in_s
-        if self.args.num_images != 1:
-            call_kwargs["num_waveforms_per_prompt"] = self.args.num_images
-
-        generator = _common.get_generator(self.args.seed, pipeline.device.type)
-        if generator is not None:
-            call_kwargs["generator"] = generator
-
-        call_kwargs.update(_common.parse_pipeline_kwargs(self.args.pipeline_kwargs))
-
-        result = pipeline(**call_kwargs)
-        audios = getattr(result, "audios", None)
-        if audios is None:
-            audios = result[0]
-
-        sampling_rate = self.args.sampling_rate
-        if sampling_rate is None:
-            pipeline_sr = getattr(pipeline, "sampling_rate", None)
-            if isinstance(pipeline_sr, int):
-                sampling_rate = pipeline_sr
-            else:
-                vocoder_config = getattr(getattr(pipeline, "vocoder", None), "config", None)
-                sampling_rate = getattr(vocoder_config, "sampling_rate", 16000) if vocoder_config else 16000
-
-        saved = _save_audio(audios, sampling_rate, self.args, self.task)
-        pushed = _common.push_outputs(self.args, saved, self.task)
-
-        _common.format_result(
-            self.args,
-            {
-                "task": self.task,
-                "model": self.args.model,
-                "device": pipeline.device.type,
-                "outputs": saved,
-                "pushed": pushed,
-                "sampling_rate": sampling_rate,
-                "seed": self.args.seed,
-            },
-        )
diff --git a/src/diffusers/commands/agentic/image.py b/src/diffusers/commands/agentic/image.py
deleted file mode 100644
index 94fdd81d6953..000000000000
--- a/src/diffusers/commands/agentic/image.py
+++ /dev/null
@@ -1,170 +0,0 @@
-# Copyright 2025 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Image-generation subcommands: text-to-image, image-to-image, inpaint."""
-
-from __future__ import annotations
-
-from argparse import ArgumentParser, Namespace, _SubParsersAction
-
-from diffusers.utils import load_image
-
-from .. import BaseDiffusersCLICommand
-from . import _common
-
-
-def register(subparsers: _SubParsersAction) -> None:
-    Text2ImageCommand.register_subcommand(subparsers)
-    Image2ImageCommand.register_subcommand(subparsers)
-    InpaintCommand.register_subcommand(subparsers)
-
-
-def _build_call_kwargs(args: Namespace, pipeline) -> dict:
-    kwargs: dict = {}
-    if args.prompt is not None:
-        kwargs["prompt"] = args.prompt
-    if args.negative_prompt is not None:
-        kwargs["negative_prompt"] = args.negative_prompt
-    if args.num_inference_steps is not None:
-        kwargs["num_inference_steps"] = args.num_inference_steps
-    if args.guidance_scale is not None:
-        kwargs["guidance_scale"] = args.guidance_scale
-    if args.height is not None:
-        kwargs["height"] = args.height
-    if args.width is not None:
-        kwargs["width"] = args.width
-    if args.num_images != 1:
-        kwargs["num_images_per_prompt"] = args.num_images
-
-    generator = _common.get_generator(args.seed, pipeline.device.type)
-    if generator is not None:
-        kwargs["generator"] = generator
-
-    kwargs.update(_common.parse_pipeline_kwargs(args.pipeline_kwargs))
-    return kwargs
-
-
-def _save_images(images, task: str, args: Namespace) -> list[str]:
-    paths = _common.default_output_paths(task, len(images), args.output, ext="png")
-    saved: list[str] = []
-    for image, path in zip(images, paths):
-        image.save(path)
-        saved.append(str(path))
-    return saved
-
-
-class _BaseImageCommand(BaseDiffusersCLICommand):
-    task: str = ""
-    auto_cls: str = ""
-
-    def __init__(self, args: Namespace):
-        self.args = args
-
-    def run(self) -> None:
-        if _common.maybe_submit_remote(self.args, self.task):
-            return
-
-        pipeline = _common.load_pipeline(self.args, self.auto_cls)
-        call_kwargs = _build_call_kwargs(self.args, pipeline)
-        self._attach_inputs(call_kwargs)
-
-        result = pipeline(**call_kwargs)
-        saved = _save_images(result.images, self.task, self.args)
-        pushed = _common.push_outputs(self.args, saved, self.task)
-
-        _common.format_result(
-            self.args,
-            {
-                "task": self.task,
-                "model": self.args.model,
-                "device": pipeline.device.type,
-                "outputs": saved,
-                "pushed": pushed,
-                "seed": self.args.seed,
-            },
-        )
-
-    def _attach_inputs(self, call_kwargs: dict) -> None:  # noqa: B027
-        """Hook for subclasses to attach image/mask conditioning."""
-
-
-class Text2ImageCommand(_BaseImageCommand):
-    task = "text-to-image"
-    auto_cls = "AutoPipelineForText2Image"
-
-    @staticmethod
-    def register_subcommand(subparsers: _SubParsersAction) -> None:
-        parser: ArgumentParser = subparsers.add_parser(
-            "text-to-image",
-            help="Generate an image from a text prompt.",
-        )
-        _common.add_loading_arguments(parser)
-        _common.add_optimization_arguments(parser)
-        _common.add_generation_arguments(parser)
-        _common.add_remote_arguments(parser)
-        _common.add_output_arguments(parser)
-        parser.set_defaults(func=Text2ImageCommand)
-
-
-class Image2ImageCommand(_BaseImageCommand):
-    task = "image-to-image"
-    auto_cls = "AutoPipelineForImage2Image"
-
-    @staticmethod
-    def register_subcommand(subparsers: _SubParsersAction) -> None:
-        parser: ArgumentParser = subparsers.add_parser(
-            "image-to-image",
-            help="Transform an input image conditioned on a text prompt.",
-        )
-        _common.add_loading_arguments(parser)
-        _common.add_optimization_arguments(parser)
-        _common.add_generation_arguments(parser)
-        _common.add_remote_arguments(parser)
-        _common.add_output_arguments(parser)
-
-        parser.add_argument("--image", required=True, help="Path or URL to the conditioning image.")
-        parser.add_argument("--strength", type=float, default=None, help="How much to transform the input (0-1).")
-        parser.set_defaults(func=Image2ImageCommand)
-
-    def _attach_inputs(self, call_kwargs: dict) -> None:
-        call_kwargs["image"] = load_image(self.args.image)
-        if self.args.strength is not None:
-            call_kwargs["strength"] = self.args.strength
-
-
-class InpaintCommand(_BaseImageCommand):
-    task = "inpaint"
-    auto_cls = "AutoPipelineForInpainting"
-
-    @staticmethod
-    def register_subcommand(subparsers: _SubParsersAction) -> None:
-        parser: ArgumentParser = subparsers.add_parser(
-            "inpaint",
-            help="Inpaint a region of an image defined by a mask.",
-        )
-        _common.add_loading_arguments(parser)
-        _common.add_optimization_arguments(parser)
-        _common.add_generation_arguments(parser)
-        _common.add_remote_arguments(parser)
-        _common.add_output_arguments(parser)
-        parser.add_argument("--image", required=True, help="Path or URL to the base image.")
-        parser.add_argument("--mask", required=True, help="Path or URL to the mask image (white=inpaint).")
-        parser.add_argument("--strength", type=float, default=None, help="Strength of the inpainting transform (0-1).")
-        parser.set_defaults(func=InpaintCommand)
-
-    def _attach_inputs(self, call_kwargs: dict) -> None:
-        call_kwargs["image"] = load_image(self.args.image)
-        call_kwargs["mask_image"] = load_image(self.args.mask)
-        if self.args.strength is not None:
-            call_kwargs["strength"] = self.args.strength
diff --git a/src/diffusers/commands/agentic/modular.py b/src/diffusers/commands/agentic/modular.py
deleted file mode 100644
index 304c8b17329f..000000000000
--- a/src/diffusers/commands/agentic/modular.py
+++ /dev/null
@@ -1,249 +0,0 @@
-# Copyright 2025 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""``diffusers-cli modular`` — run a custom ModularPipeline.
-
-Modular pipelines don't fit the ``task -> AutoPipelineFor*`` taxonomy: the
-pipeline blocks themselves define the surface. This command takes free-form
-``--inputs key=value`` (or a JSON blob) and forwards them to the modular
-pipeline call, then auto-detects the result type so the agent doesn't need
-to know whether it asked for an image, video, or audio output.
-"""
-
-from __future__ import annotations
-
-import json
-from argparse import ArgumentParser, Namespace, _SubParsersAction
-from pathlib import Path
-from typing import Any
-
-from .. import BaseDiffusersCLICommand
-from . import _common
-
-
-def register(subparsers: _SubParsersAction) -> None:
-    ModularCommand.register_subcommand(subparsers)
-
-
-def _parse_inputs(args: Namespace) -> dict[str, Any]:
-    """Combine ``--inputs-json`` and repeated ``--inputs key=value`` into one dict.
-
-    Values from ``--inputs`` are JSON-decoded when possible (so booleans,
-    numbers, lists, and nested objects survive); plain strings fall back to
-    raw text.
-    """
-    out: dict[str, Any] = {}
-    if args.inputs_json:
-        try:
-            decoded = json.loads(args.inputs_json)
-        except json.JSONDecodeError as e:
-            raise SystemExit(f"--inputs-json must be valid JSON: {e}") from e
-        if not isinstance(decoded, dict):
-            raise SystemExit("--inputs-json must decode to a JSON object.")
-        out.update(decoded)
-
-    for pair in args.inputs or []:
-        if "=" not in pair:
-            raise SystemExit(f"--inputs entries must look like key=value, got {pair!r}.")
-        key, _, raw = pair.partition("=")
-        try:
-            out[key] = json.loads(raw)
-        except json.JSONDecodeError:
-            out[key] = raw
-    return out
-
-
-def _save_auto(value: Any, args: Namespace, task: str) -> list[str]:
-    """Save ``value`` based on its runtime type and return the written paths."""
-    pil_images = _as_pil_list(value)
-    if pil_images is not None:
-        paths = _common.default_output_paths(task, len(pil_images), args.output, ext="png")
-        for img, path in zip(pil_images, paths):
-            img.save(path)
-        return [str(p) for p in paths]
-
-    frames = _as_frame_sequence(value)
-    if frames is not None:
-        from diffusers.utils import export_to_video
-
-        path = _common.default_output_paths(task, 1, args.output, ext="mp4")[0]
-        export_to_video(frames, str(path), fps=args.fps)
-        return [str(path)]
-
-    audios = _as_audio_arrays(value)
-    if audios is not None:
-        from .audio import _save_audio
-
-        return _save_audio(audios, args.sampling_rate or 16000, args, task)
-
-    # Fallback: dump as JSON.
-    path = _common.default_output_paths(task, 1, args.output, ext="json")[0]
-    Path(path).write_text(json.dumps(value, default=str, indent=2))
-    return [str(path)]
-
-
-def _as_pil_list(value: Any):
-    try:
-        from PIL.Image import Image as PILImage
-    except ImportError:
-        return None
-    if isinstance(value, PILImage):
-        return [value]
-    if isinstance(value, (list, tuple)) and value and all(isinstance(v, PILImage) for v in value):
-        return list(value)
-    return None
-
-
-def _as_frame_sequence(value: Any):
-    """A frame sequence is a list of PIL images or numpy frames meant to be a single clip."""
-    try:
-        from PIL.Image import Image as PILImage
-    except ImportError:
-        PILImage = None  # type: ignore[assignment]
-
-    if isinstance(value, (list, tuple)) and len(value) >= 2:
-        first = value[0]
-        if PILImage is not None and isinstance(first, PILImage):
-            # Heuristic: distinguish "list of images we want as PNGs" from "frame sequence".
-            # The modular pipeline call already returned a single value, so we treat a
-            # homogeneous list of >=2 images as a clip.
-            return list(value)
-        try:
-            import numpy as np
-
-            if isinstance(first, np.ndarray):
-                return list(value)
-        except ImportError:
-            pass
-    return None
-
-
-def _as_audio_arrays(value: Any):
-    try:
-        import numpy as np
-    except ImportError:
-        return None
-    if isinstance(value, np.ndarray) and value.ndim <= 2:
-        return [value]
-    if (
-        isinstance(value, (list, tuple))
-        and value
-        and all(isinstance(v, np.ndarray) for v in value)
-    ):
-        return list(value)
-    return None
-
-
-class ModularCommand(BaseDiffusersCLICommand):
-    task = "modular"
-
-    @staticmethod
-    def register_subcommand(subparsers: _SubParsersAction) -> None:
-        parser: ArgumentParser = subparsers.add_parser(
-            "modular",
-            help="Run a custom ModularPipeline with free-form inputs.",
-        )
-        _common.add_loading_arguments(parser)
-        _common.add_optimization_arguments(parser)
-        parser.add_argument(
-            "--inputs",
-            action="append",
-            default=None,
-            help='Inputs as key=value (value JSON-decoded when possible). Repeat to add multiple.',
-        )
-        parser.add_argument(
-            "--inputs-json",
-            default=None,
-            help="Inputs as a single JSON object (merged with any --inputs entries).",
-        )
-        parser.add_argument(
-            "--output-key",
-            default=None,
-            help='Optional intermediate to extract (e.g. "image", "video"). '
-            "Forwarded to ModularPipeline as the ``output`` argument.",
-        )
-        parser.add_argument(
-            "--fps",
-            type=int,
-            default=8,
-            help="FPS used when the output happens to be a frame sequence.",
-        )
-        parser.add_argument(
-            "--sampling-rate",
-            type=int,
-            default=None,
-            help="Sample rate used when the output happens to be an audio array.",
-        )
-        _common.add_remote_arguments(parser)
-        _common.add_output_arguments(parser)
-        parser.set_defaults(func=ModularCommand)
-
-    def __init__(self, args: Namespace):
-        self.args = args
-
-    def run(self) -> None:
-        if _common.maybe_submit_remote(self.args, self.task):
-            return
-
-        pipeline = self._load_modular()
-        call_kwargs = _parse_inputs(self.args)
-        if self.args.output_key is not None:
-            call_kwargs["output"] = self.args.output_key
-
-        result = pipeline(**call_kwargs)
-        saved = _save_auto(result, self.args, self.task)
-        pushed = _common.push_outputs(self.args, saved, self.task)
-
-        _common.format_result(
-            self.args,
-            {
-                "task": self.task,
-                "model": self.args.model,
-                "pipeline_class": type(pipeline).__name__,
-                "outputs": saved,
-                "pushed": pushed,
-                "output_key": self.args.output_key,
-            },
-        )
-
-    def _load_modular(self):
-        from diffusers import ModularPipeline
-
-        dtype = _common.resolve_dtype(self.args.dtype)
-        device = _common.resolve_device(self.args.device)
-
-        from_pretrained_kwargs: dict[str, Any] = {
-            "trust_remote_code": self.args.trust_remote_code,
-        }
-        if dtype != "auto":
-            from_pretrained_kwargs["torch_dtype"] = dtype
-        if self.args.revision:
-            from_pretrained_kwargs["revision"] = self.args.revision
-        if self.args.token:
-            from_pretrained_kwargs["token"] = self.args.token
-
-        pipeline = ModularPipeline.from_pretrained(self.args.model, **from_pretrained_kwargs)
-        if not hasattr(pipeline, "to"):
-            return pipeline
-
-        pipeline = _common.map_to_device(pipeline, self.args, device)
-        if self.args.vae_tiling and hasattr(pipeline, "enable_vae_tiling"):
-            pipeline.enable_vae_tiling()
-        if self.args.vae_slicing and hasattr(pipeline, "enable_vae_slicing"):
-            pipeline.enable_vae_slicing()
-        if self.args.attention_backend != "default":
-            _common._set_attention_backend(pipeline, self.args.attention_backend)
-        if self.args.context_parallel:
-            _common._enable_context_parallel(pipeline)
-        return pipeline
diff --git a/src/diffusers/commands/agentic/tasks.py b/src/diffusers/commands/agentic/tasks.py
deleted file mode 100644
index be2999469783..000000000000
--- a/src/diffusers/commands/agentic/tasks.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# Copyright 2025 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""``diffusers-cli tasks`` — list every registered agentic subcommand.
-
-Designed so an agent can discover the surface area without parsing
-``--help`` output.
-"""
-
-from __future__ import annotations
-
-import json
-import sys
-from argparse import ArgumentParser, Namespace, _SubParsersAction
-
-from .. import BaseDiffusersCLICommand
-
-
-AGENTIC_TASK_NAMES: tuple[str, ...] = (
-    "text-to-image",
-    "image-to-image",
-    "inpaint",
-    "text-to-video",
-    "image-to-video",
-    "text-to-audio",
-    "modular",
-)
-
-
-def register(subparsers: _SubParsersAction) -> None:
-    ListTasksCommand.register_subcommand(subparsers, subparsers)
-
-
-def list_agentic_tasks(subparsers: _SubParsersAction) -> list[dict]:
-    """Return ``[{name, description}, ...]`` for every registered agentic task.
-
-    Reads metadata directly from the live argparse subparsers so the list
-    can never drift from the actual commands.
-    """
-    choices = getattr(subparsers, "choices", {}) or {}
-    actions = [a for a in getattr(subparsers, "_choices_actions", [])]
-    descriptions = {a.dest: a.help for a in actions}
-
-    out: list[dict] = []
-    for name in AGENTIC_TASK_NAMES:
-        if name not in choices:
-            continue
-        out.append({"name": name, "description": descriptions.get(name, "")})
-    return out
-
-
-class ListTasksCommand(BaseDiffusersCLICommand):
-    task = "tasks"
-
-    # The live subparsers object is captured at registration time so ``run``
-    # can introspect it without needing access to ``main``'s locals.
-    _root_subparsers: _SubParsersAction | None = None
-
-    @staticmethod
-    def register_subcommand(subparsers: _SubParsersAction, root_subparsers: _SubParsersAction) -> None:
-        parser: ArgumentParser = subparsers.add_parser(
-            "tasks",
-            help="List every registered agentic task with a one-line description.",
-        )
-        parser.add_argument("--json", action="store_true", help="Emit machine-readable JSON.")
-        parser.set_defaults(func=ListTasksCommand)
-        ListTasksCommand._root_subparsers = root_subparsers
-
-    def __init__(self, args: Namespace):
-        self.args = args
-
-    def run(self) -> None:
-        tasks = list_agentic_tasks(self._root_subparsers) if self._root_subparsers else []
-        if self.args.json:
-            json.dump({"tasks": tasks}, sys.stdout)
-            sys.stdout.write("\n")
-            return
-        width = max((len(t["name"]) for t in tasks), default=0)
-        for entry in tasks:
-            print(f"{entry['name']:<{width}}  {entry['description'] or ''}")
diff --git a/src/diffusers/commands/agentic/video.py b/src/diffusers/commands/agentic/video.py
deleted file mode 100644
index e4dcdc4bb8a2..000000000000
--- a/src/diffusers/commands/agentic/video.py
+++ /dev/null
@@ -1,147 +0,0 @@
-# Copyright 2025 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Video-generation subcommands: text-to-video, image-to-video.
-
-There is no AutoPipeline for video, so these commands load via
-``DiffusionPipeline`` and rely on the repo's ``model_index.json`` to pick
-the right pipeline class (CogVideoX, Hunyuan, LTX, Wan, etc.).
-"""
-
-from __future__ import annotations
-
-from argparse import ArgumentParser, Namespace, _SubParsersAction
-
-from diffusers.utils import load_image
-
-from .. import BaseDiffusersCLICommand
-from . import _common
-
-
-def register(subparsers: _SubParsersAction) -> None:
-    Text2VideoCommand.register_subcommand(subparsers)
-    Image2VideoCommand.register_subcommand(subparsers)
-
-
-def _add_video_arguments(parser: ArgumentParser) -> None:
-    parser.add_argument("--num-frames", type=int, default=None, help="Number of frames to generate.")
-    parser.add_argument("--fps", type=int, default=8, help="Frames per second for the output video.")
-
-
-def _build_call_kwargs(args: Namespace, pipeline) -> dict:
-    kwargs: dict = {}
-    if args.prompt is not None:
-        kwargs["prompt"] = args.prompt
-    if args.negative_prompt is not None:
-        kwargs["negative_prompt"] = args.negative_prompt
-    if args.num_inference_steps is not None:
-        kwargs["num_inference_steps"] = args.num_inference_steps
-    if args.guidance_scale is not None:
-        kwargs["guidance_scale"] = args.guidance_scale
-    if args.height is not None:
-        kwargs["height"] = args.height
-    if args.width is not None:
-        kwargs["width"] = args.width
-    if args.num_frames is not None:
-        kwargs["num_frames"] = args.num_frames
-
-    generator = _common.get_generator(args.seed, pipeline.device.type)
-    if generator is not None:
-        kwargs["generator"] = generator
-
-    kwargs.update(_common.parse_pipeline_kwargs(args.pipeline_kwargs))
-    return kwargs
-
-
-def _save_video(frames, args: Namespace, task: str) -> str:
-    from diffusers.utils import export_to_video
-
-    path = _common.default_output_paths(task, 1, args.output, ext="mp4")[0]
-    export_to_video(frames, str(path), fps=args.fps)
-    return str(path)
-
-
-class _BaseVideoCommand(BaseDiffusersCLICommand):
-    task: str = ""
-
-    def __init__(self, args: Namespace):
-        self.args = args
-
-    def run(self) -> None:
-        if _common.maybe_submit_remote(self.args, self.task):
-            return
-        pipeline = _common.load_pipeline(self.args, "DiffusionPipeline")
-        call_kwargs = _build_call_kwargs(self.args, pipeline)
-        self._attach_inputs(call_kwargs)
-
-        result = pipeline(**call_kwargs)
-        frames = result.frames[0] if hasattr(result, "frames") else result[0]
-        out_path = _save_video(frames, self.args, self.task)
-        pushed = _common.push_outputs(self.args, [out_path], self.task)
-
-        _common.format_result(
-            self.args,
-            {
-                "task": self.task,
-                "model": self.args.model,
-                "device": pipeline.device.type,
-                "outputs": [out_path],
-                "pushed": pushed,
-                "fps": self.args.fps,
-                "seed": self.args.seed,
-            },
-        )
-
-    def _attach_inputs(self, call_kwargs: dict) -> None:  # noqa: B027
-        """Hook for subclasses to attach conditioning inputs."""
-
-
-class Text2VideoCommand(_BaseVideoCommand):
-    task = "text-to-video"
-
-    @staticmethod
-    def register_subcommand(subparsers: _SubParsersAction) -> None:
-        parser: ArgumentParser = subparsers.add_parser(
-            "text-to-video",
-            help="Generate a video clip from a text prompt.",
-        )
-        _common.add_loading_arguments(parser)
-        _common.add_optimization_arguments(parser)
-        _common.add_generation_arguments(parser)
-        _add_video_arguments(parser)
-        _common.add_remote_arguments(parser)
-        _common.add_output_arguments(parser)
-        parser.set_defaults(func=Text2VideoCommand)
-
-
-class Image2VideoCommand(_BaseVideoCommand):
-    task = "image-to-video"
-
-    @staticmethod
-    def register_subcommand(subparsers: _SubParsersAction) -> None:
-        parser: ArgumentParser = subparsers.add_parser(
-            "image-to-video",
-            help="Generate a video clip conditioned on an input image.",
-        )
-        _common.add_loading_arguments(parser)
-        _common.add_optimization_arguments(parser)
-        _common.add_generation_arguments(parser)
-        _add_video_arguments(parser)
-        _common.add_remote_arguments(parser)
-        _common.add_output_arguments(parser)
-        parser.add_argument("--image", required=True, help="Path or URL to the conditioning image.")
-        parser.set_defaults(func=Image2VideoCommand)
-
-    def _attach_inputs(self, call_kwargs: dict) -> None:
-        call_kwargs["image"] = load_image(self.args.image)
diff --git a/src/diffusers/commands/custom_blocks.py b/src/diffusers/commands/custom_blocks.py
index 953240c5a2c3..22c38e6256b3 100644
--- a/src/diffusers/commands/custom_blocks.py
+++ b/src/diffusers/commands/custom_blocks.py
@@ -12,94 +12,89 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
-Usage example:
-    TODO
+"""``diffusers-cli custom_blocks`` — save a custom ``ModularPipelineBlocks`` subclass.
+
+Parses a local ``block.py``, finds a ``ModularPipelineBlocks`` subclass,
+dynamically imports it, and calls ``save_pretrained`` in the current
+working directory so the result can be pushed to the Hub and consumed by
+``diffusers-cli inference``.
 """
 
+from __future__ import annotations
+
 import ast
 import importlib.util
 import os
-from argparse import ArgumentParser, Namespace
+from argparse import ArgumentParser, Namespace, _SubParsersAction
 from pathlib import Path
 
 from ..utils import logging
 from . import BaseDiffusersCLICommand
 
 
-EXPECTED_PARENT_CLASSES = ["ModularPipelineBlocks"]
-CONFIG = "config.json"
-
-
-def conversion_command_factory(args: Namespace):
-    return CustomBlocksCommand(args.block_module_name, args.block_class_name)
+_EXPECTED_BASE_CLASSES = ("ModularPipelineBlocks",)
 
 
 class CustomBlocksCommand(BaseDiffusersCLICommand):
+    task = "custom_blocks"
+
     @staticmethod
-    def register_subcommand(parser: ArgumentParser):
-        conversion_parser = parser.add_parser("custom_blocks")
-        conversion_parser.add_argument(
-            "--block_module_name",
-            type=str,
+    def register_subcommand(subparsers: _SubParsersAction) -> None:
+        parser: ArgumentParser = subparsers.add_parser(
+            "custom_blocks",
+            help="Save a custom ModularPipelineBlocks subclass via save_pretrained.",
+        )
+        parser.add_argument(
+            "--block-module-name",
             default="block.py",
-            help="Module filename in which the custom block will be implemented.",
+            help="Module filename in which the custom block is implemented (default: block.py).",
         )
-        conversion_parser.add_argument(
-            "--block_class_name",
-            type=str,
+        parser.add_argument(
+            "--block-class-name",
             default=None,
-            help="Name of the custom block. If provided None, we will try to infer it.",
+            help="Name of the custom block class. If None, the first ModularPipelineBlocks subclass found is used.",
         )
-        conversion_parser.set_defaults(func=conversion_command_factory)
+        parser.set_defaults(func=CustomBlocksCommand)
 
-    def __init__(self, block_module_name: str = "block.py", block_class_name: str = None):
+    def __init__(self, args: Namespace):
         self.logger = logging.get_logger("diffusers-cli/custom_blocks")
-        self.block_module_name = Path(block_module_name)
-        self.block_class_name = block_class_name
+        self.block_module_name = Path(args.block_module_name)
+        self.block_class_name = args.block_class_name
+
+    def run(self) -> None:
+        candidates = self._get_class_names(self.block_module_name)
+        classes_found = list({cls for cls, _ in candidates})
 
-    def run(self):
-        # determine the block to be saved.
-        out = self._get_class_names(self.block_module_name)
-        classes_found = list({cls for cls, _ in out})
+        if not candidates:
+            raise ValueError(
+                f"No ModularPipelineBlocks subclass found in {self.block_module_name}. "
+                "Ensure your block class inherits from `ModularPipelineBlocks` directly."
+            )
 
         if self.block_class_name is not None:
-            child_class, parent_class = self._choose_block(out, self.block_class_name)
-            if child_class is None and parent_class is None:
+            child_class = next((cls for cls, _ in candidates if cls == self.block_class_name), None)
+            if child_class is None:
                 raise ValueError(
-                    "`block_class_name` could not be retrieved. Available classes from "
-                    f"{self.block_module_name}:\n{classes_found}"
+                    f"--block-class-name {self.block_class_name!r} not found in "
+                    f"{self.block_module_name}. Available: {classes_found}"
                 )
         else:
             self.logger.info(
-                f"Found classes: {classes_found} will be using {classes_found[0]}. "
-                "If this needs to be changed, re-run the command specifying `block_class_name`."
+                f"Found classes: {classes_found} — using {classes_found[0]}. "
+                "Re-run with --block-class-name to override."
             )
-            child_class, parent_class = out[0][0], out[0][1]
+            child_class, _ = candidates[0]
 
-        # dynamically get the custom block and initialize it to call `save_pretrained` in the current directory.
-        # the user is responsible for running it, so I guess that is safe?
         module_name = f"__dynamic__{self.block_module_name.stem}"
         spec = importlib.util.spec_from_file_location(module_name, str(self.block_module_name))
         module = importlib.util.module_from_spec(spec)
         spec.loader.exec_module(module)
         getattr(module, child_class)().save_pretrained(os.getcwd())
 
-        # or, we could create it manually.
-        # automap = self._create_automap(parent_class=parent_class, child_class=child_class)
-        # with open(CONFIG, "w") as f:
-        #     json.dump(automap, f)
-
-    def _choose_block(self, candidates, chosen=None):
-        for cls, base in candidates:
-            if cls == chosen:
-                return cls, base
-        return None, None
-
-    def _get_class_names(self, file_path):
+    def _get_class_names(self, file_path: Path) -> list[tuple[str, str]]:
         source = file_path.read_text(encoding="utf-8")
         try:
-            tree = ast.parse(source, filename=file_path)
+            tree = ast.parse(source, filename=str(file_path))
         except SyntaxError as e:
             raise ValueError(f"Could not parse {file_path!r}: {e}") from e
 
@@ -107,26 +102,17 @@ def _get_class_names(self, file_path):
         for node in tree.body:
             if not isinstance(node, ast.ClassDef):
                 continue
-
-            # extract all base names for this class
             base_names = [bname for b in node.bases if (bname := self._get_base_name(b)) is not None]
-
-            # for each allowed base that appears in the class's bases, emit a tuple
-            for allowed in EXPECTED_PARENT_CLASSES:
+            for allowed in _EXPECTED_BASE_CLASSES:
                 if allowed in base_names:
                     results.append((node.name, allowed))
-
         return results
 
-    def _get_base_name(self, node: ast.expr):
+    @staticmethod
+    def _get_base_name(node: ast.expr) -> str | None:
         if isinstance(node, ast.Name):
             return node.id
-        elif isinstance(node, ast.Attribute):
-            val = self._get_base_name(node.value)
+        if isinstance(node, ast.Attribute):
+            val = CustomBlocksCommand._get_base_name(node.value)
             return f"{val}.{node.attr}" if val else node.attr
         return None
-
-    def _create_automap(self, parent_class, child_class):
-        module = str(self.block_module_name).replace(".py", "").rsplit(".", 1)[-1]
-        auto_map = {f"{parent_class}": f"{module}.{child_class}"}
-        return {"auto_map": auto_map}
diff --git a/src/diffusers/commands/diffusers_cli.py b/src/diffusers/commands/diffusers_cli.py
index 95b8dd5f3938..ceb806af1a3e 100644
--- a/src/diffusers/commands/diffusers_cli.py
+++ b/src/diffusers/commands/diffusers_cli.py
@@ -15,10 +15,10 @@
 
 from argparse import ArgumentParser
 
-from .agentic import register_agentic_commands
 from .custom_blocks import CustomBlocksCommand
 from .env import EnvironmentCommand
 from .fp16_safetensors import FP16SafetensorsCommand
+from .inference import InferenceCommand
 
 
 def main():
@@ -29,7 +29,7 @@ def main():
     EnvironmentCommand.register_subcommand(commands_parser)
     FP16SafetensorsCommand.register_subcommand(commands_parser)
     CustomBlocksCommand.register_subcommand(commands_parser)
-    register_agentic_commands(commands_parser)
+    InferenceCommand.register_subcommand(commands_parser)
 
     # Let's go
     args = parser.parse_args()
diff --git a/src/diffusers/commands/inference.py b/src/diffusers/commands/inference.py
new file mode 100644
index 000000000000..614994c07016
--- /dev/null
+++ b/src/diffusers/commands/inference.py
@@ -0,0 +1,846 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""``diffusers-cli inference`` — single agentic entry point.
+
+Runs any diffusers pipeline (standard or modular) by forwarding
+``--pipeline-kwargs`` verbatim, saves the output by sniffing its runtime
+type, and can submit the same call to HF Jobs via ``--remote`` (with the
+model repo volume-mounted and the results downloaded back).
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+from argparse import ArgumentParser, Namespace, _SubParsersAction
+from pathlib import Path
+from typing import Any, Optional
+
+from diffusers.utils import load_image
+
+from . import BaseDiffusersCLICommand
+
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+DEFAULT_OUTPUT_DIR = "outputs"
+DTYPE_CHOICES = ("auto", "float16", "fp16", "bfloat16", "bf16", "float32", "fp32")
+CPU_OFFLOAD_CHOICES = ("model", "group")
+ATTENTION_BACKEND_CHOICES = (
+    "default",
+    "flash_hub",
+    "flash_varlen_hub",
+    "flash_4_hub",
+    "sage_hub",
+)
+
+_MODULAR_INDEX = "modular_model_index.json"
+
+# Keys whose string value should be resolved via ``diffusers.utils.load_image``
+# before being passed to the pipeline call.
+_IMAGE_INPUT_KEYS = (
+    "image",
+    "mask_image",
+    "control_image",
+    "ip_adapter_image",
+    "image_2",
+)
+
+# Source for the diffusers install used by --remote jobs. While iterating on a
+# feature branch, point at the branch URL; once merged, switch back to a release
+# pin. ``--dependencies "diffusers @ git+..."`` on the local command appends
+# additional dependencies but does not replace this default install.
+DIFFUSERS_SOURCE = "diffusers @ git+https://github.com/huggingface/diffusers@diffuser-cli-for-agent"
+_DEFAULT_REMOTE_DEPS = (
+    DIFFUSERS_SOURCE,
+    "accelerate",
+    "transformers",
+    "safetensors",
+    "torch==2.10.*",
+    "torchvision",
+)
+
+# Entry point for ``uv run`` inside the container. ``uv run`` accepts a file
+# path, URL, or command; passing the installed console script name makes UV
+# install the deps above (which register the entry point) and exec the CLI.
+_UV_RUNNER_SCRIPT = "diffusers-cli"
+
+RUN_ID_ENV = "DIFFUSERS_CLI_RUN_ID"
+
+# Namespace keys that control *how* a remote job runs locally, not what runs
+# inside the container. They are stripped when forwarding argv to the container.
+HF_JOBS_KEYS = frozenset(
+    {"remote", "flavor", "timeout", "dependencies", "namespace", "no_wait", "poll_interval", "func"}
+)
+
+
+# ---------------------------------------------------------------------------
+# Argparse helpers
+# ---------------------------------------------------------------------------
+
+
+def _add_loading_arguments(parser: ArgumentParser) -> None:
+    parser.add_argument("--model", "-m", required=True, help="Model id on the Hugging Face Hub or local path.")
+    parser.add_argument("--device", default=None, help="Device to run on (e.g. cpu, cuda, cuda:0, mps).")
+    parser.add_argument("--dtype", default="auto", choices=DTYPE_CHOICES, help="Torch dtype for pipeline weights.")
+    parser.add_argument("--variant", default=None, help='Optional weight variant (e.g. "fp16").')
+    parser.add_argument("--revision", default=None, help="Model revision (branch, tag, or commit SHA).")
+    parser.add_argument("--token", default=None, help="Hugging Face token for gated/private models.")
+    parser.add_argument("--trust-remote-code", action="store_true", help="Allow custom code from the Hub.")
+
+
+def _add_optimization_arguments(parser: ArgumentParser) -> None:
+    parser.add_argument(
+        "--cpu-offload",
+        choices=CPU_OFFLOAD_CHOICES,
+        default=None,
+        help=(
+            "Offload pipeline components to CPU during inference. "
+            "'model' uses enable_model_cpu_offload, "
+            "'group' uses pipeline.enable_group_offload(leaf_level, use_stream=True)."
+        ),
+    )
+    parser.add_argument(
+        "--attention-backend",
+        choices=ATTENTION_BACKEND_CHOICES,
+        default="default",
+        help=(
+            "Override the attention backend on the transformer/UNet. "
+            "Only Hub-hosted kernels are exposed — they auto-download on first use."
+        ),
+    )
+    parser.add_argument("--vae-tiling", action="store_true", help="Enable VAE tiling (lower peak VRAM).")
+    parser.add_argument("--vae-slicing", action="store_true", help="Enable VAE slicing (lower peak VRAM).")
+    parser.add_argument(
+        "--context-parallel",
+        action="store_true",
+        help=(
+            "Enable Ulysses-style context parallelism (ulysses_anything mode). "
+            "Requires launching the CLI under torchrun with ≥2 GPUs."
+        ),
+    )
+
+
+def _add_output_arguments(parser: ArgumentParser) -> None:
+    parser.add_argument(
+        "--output",
+        "-o",
+        default=None,
+        help="Output file or directory. Defaults to ./outputs/<task>-<index>.<ext>.",
+    )
+    parser.add_argument(
+        "--push-to",
+        default=None,
+        help=(
+            "Upload the generated files to this HF bucket id after saving (created if missing). "
+            "When --remote is set, defaults to <user>/jobs-artifacts."
+        ),
+    )
+    parser.add_argument("--json", action="store_true", help="Emit a machine-readable JSON summary on stdout.")
+
+
+def _add_remote_arguments(parser: ArgumentParser) -> None:
+    parser.add_argument(
+        "--remote",
+        action="store_true",
+        help="Submit this command to Hugging Face Jobs instead of running locally.",
+    )
+    parser.add_argument(
+        "--flavor",
+        default="a10g-small",
+        help="HF Jobs hardware flavor for --remote (e.g. a10g-small, a100-large, cpu-basic).",
+    )
+    parser.add_argument("--timeout", default=None, help="HF Jobs timeout for --remote (e.g. 30m, 2h).")
+    parser.add_argument(
+        "--dependencies",
+        action="append",
+        default=None,
+        help="Extra pip dependencies for the --remote job. Repeat to add multiple.",
+    )
+    parser.add_argument(
+        "--namespace",
+        default=None,
+        help="HF namespace to run the --remote job under (defaults to the current user).",
+    )
+    parser.add_argument(
+        "--no-wait",
+        action="store_true",
+        help="Don't wait for the --remote job to finish — submit and print the job id.",
+    )
+    parser.add_argument(
+        "--poll-interval",
+        type=float,
+        default=5.0,
+        help="Seconds between job-status polls when waiting for --remote completion.",
+    )
+
+
+# ---------------------------------------------------------------------------
+# Pipeline loading + optimization
+# ---------------------------------------------------------------------------
+
+
+def _resolve_dtype(name: Optional[str]):
+    if name in (None, "auto"):
+        return "auto"
+    import torch
+
+    mapping = {"fp32": torch.float32, "fp16": torch.float16, "bf16": torch.bfloat16}
+    if name not in mapping:
+        raise ValueError(f"Unknown dtype: {name}")
+    return mapping[name]
+
+
+def _resolve_device(name: Optional[str]) -> str:
+    if name:
+        return name
+    import torch
+
+    if torch.cuda.is_available():
+        return "cuda"
+    if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+        return "mps"
+    return "cpu"
+
+
+def _map_to_device(pipeline: Any, args: Namespace, device: str) -> Any:
+    """Move the pipeline to ``device``, or hand off to the chosen CPU-offload helper."""
+    if args.cpu_offload is None:
+        return pipeline.to(device)
+    if args.cpu_offload == "model":
+        pipeline.enable_model_cpu_offload(device=device)
+    elif args.cpu_offload == "group":
+        import torch
+
+        pipeline.enable_group_offload(
+            onload_device=torch.device(device),
+            offload_type="leaf_level",
+            use_stream=device.startswith("cuda"),
+        )
+    return pipeline
+
+
+def _set_attention_backend(pipeline: Any, backend: str) -> None:
+    for attr in ("transformer", "unet"):
+        module = getattr(pipeline, attr, None)
+        if module is not None and hasattr(module, "set_attention_backend"):
+            try:
+                module.set_attention_backend(backend)
+            except (ValueError, ImportError, RuntimeError):
+                pass
+            return
+
+
+def _enable_context_parallel(pipeline: Any) -> None:
+    import torch
+
+    if not torch.distributed.is_available() or not torch.distributed.is_initialized():
+        raise SystemExit(
+            "--context-parallel requires torch.distributed to be initialized. "
+            "Launch the CLI under torchrun, e.g.: "
+            "`torchrun --nproc-per-node=N -m diffusers.commands.diffusers_cli inference ...`."
+        )
+
+    from diffusers import ContextParallelConfig
+
+    cfg = ContextParallelConfig(
+        ulysses_degree=torch.distributed.get_world_size(),
+        ring_degree=1,
+        ulysses_anything=True,
+    )
+    for attr in ("transformer", "unet"):
+        module = getattr(pipeline, attr, None)
+        if module is not None and hasattr(module, "enable_parallelism"):
+            module.enable_parallelism(config=cfg)
+            return
+
+
+def _apply_optimizations(pipeline: Any, args: Namespace) -> None:
+    """Apply VAE tiling/slicing, attention backend, and context-parallel toggles."""
+    if args.vae_tiling and hasattr(pipeline, "enable_vae_tiling"):
+        pipeline.enable_vae_tiling()
+    if args.vae_slicing and hasattr(pipeline, "enable_vae_slicing"):
+        pipeline.enable_vae_slicing()
+    if args.attention_backend != "default":
+        _set_attention_backend(pipeline, args.attention_backend)
+    if args.context_parallel:
+        _enable_context_parallel(pipeline)
+
+
+def _from_pretrained_kwargs(args: Namespace) -> dict[str, Any]:
+    dtype = _resolve_dtype(args.dtype)
+    kwargs: dict[str, Any] = {"trust_remote_code": args.trust_remote_code}
+    if dtype != "auto":
+        kwargs["torch_dtype"] = dtype
+    if args.variant:
+        kwargs["variant"] = args.variant
+    if args.revision:
+        kwargs["revision"] = args.revision
+    if args.token:
+        kwargs["token"] = args.token
+    return kwargs
+
+
+def _load_pipeline(args: Namespace, modular: bool) -> Any:
+    import diffusers
+
+    pipeline_cls = diffusers.ModularPipeline if modular else diffusers.DiffusionPipeline
+    pipeline = pipeline_cls.from_pretrained(args.model, **_from_pretrained_kwargs(args))
+    if not hasattr(pipeline, "to"):
+        return pipeline
+    pipeline = _map_to_device(pipeline, args, _resolve_device(args.device))
+    _apply_optimizations(pipeline, args)
+    return pipeline
+
+
+# ---------------------------------------------------------------------------
+# Modular pipeline detection + introspection
+# ---------------------------------------------------------------------------
+
+
+def _is_modular_repo(args: Namespace) -> bool:
+    local = Path(args.model)
+    if local.exists():
+        return (local / _MODULAR_INDEX).exists()
+
+    from huggingface_hub import HfApi
+    from huggingface_hub.utils import HfHubHTTPError, RepositoryNotFoundError
+
+    try:
+        files = set(HfApi(token=args.token).list_repo_files(args.model, revision=args.revision))
+    except (RepositoryNotFoundError, HfHubHTTPError):
+        return False
+    return _MODULAR_INDEX in files
+
+
+def _describe_modular(args: Namespace) -> None:
+    """Load just the block definitions and print the input schema."""
+    from diffusers import ModularPipelineBlocks
+
+    kwargs: dict[str, Any] = {"trust_remote_code": args.trust_remote_code}
+    if args.revision:
+        kwargs["revision"] = args.revision
+    if args.token:
+        kwargs["token"] = args.token
+
+    blocks = ModularPipelineBlocks.from_pretrained(args.model, **kwargs)
+    schema = [
+        {
+            "name": p.name,
+            "type_hint": str(p.type_hint) if p.type_hint is not None else None,
+            "default": p.default,
+            "required": p.required,
+            "description": p.description,
+        }
+        for p in blocks.inputs
+    ]
+    payload = {
+        "task": "inference-describe",
+        "model": args.model,
+        "blocks_class": type(blocks).__name__,
+        "inputs": schema,
+    }
+
+    if args.json:
+        json.dump(payload, sys.stdout, default=str)
+        sys.stdout.write("\n")
+        return
+
+    print(f"{type(blocks).__name__} ({args.model}) inputs:")
+    for entry in schema:
+        tag = "required" if entry["required"] else f"optional, default={entry['default']!r}"
+        print(f"  {entry['name']}  ({tag})")
+        if entry["type_hint"]:
+            print(f"    type: {entry['type_hint']}")
+        if entry["description"]:
+            print(f"    desc: {entry['description']}")
+
+
+# ---------------------------------------------------------------------------
+# Pipeline call helpers
+# ---------------------------------------------------------------------------
+
+
+def _parse_pipeline_kwargs(raw: Optional[str]) -> dict[str, Any]:
+    if not raw:
+        return {}
+    try:
+        parsed = json.loads(raw)
+    except json.JSONDecodeError as e:
+        raise SystemExit(f"--pipeline-kwargs must be valid JSON: {e}") from e
+    if not isinstance(parsed, dict):
+        raise SystemExit("--pipeline-kwargs must decode to a JSON object.")
+    return parsed
+
+
+def _resolve_image_inputs(call_kwargs: dict[str, Any]) -> None:
+    """Replace string paths/URLs at known image-input keys with PIL images."""
+    for key in _IMAGE_INPUT_KEYS:
+        value = call_kwargs.get(key)
+        if isinstance(value, str):
+            call_kwargs[key] = load_image(value)
+
+
+def _get_generator(seed: Optional[int], device: str):
+    if seed is None:
+        return None
+    import torch
+
+    generator_device = "cpu" if device == "mps" else device
+    return torch.Generator(device=generator_device).manual_seed(seed)
+
+
+def _result_to_savable(result: Any) -> Any:
+    """Unwrap a pipeline-output object into the raw payload the saver can sniff."""
+    if hasattr(result, "images"):
+        return result.images
+    if hasattr(result, "frames"):
+        frames = result.frames
+        return frames[0] if isinstance(frames, (list, tuple)) and frames else frames
+    if hasattr(result, "audios"):
+        return result.audios
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Output saving (auto-sniff by type)
+# ---------------------------------------------------------------------------
+
+
+def _default_output_paths(task: str, num: int, explicit: Optional[str], ext: str) -> list[Path]:
+    if explicit is None:
+        base = Path(DEFAULT_OUTPUT_DIR)
+        base.mkdir(parents=True, exist_ok=True)
+        return [base / f"{task}-{i}.{ext}" for i in range(num)]
+
+    p = Path(explicit)
+    if explicit.endswith(os.sep) or p.is_dir():
+        p.mkdir(parents=True, exist_ok=True)
+        return [p / f"{task}-{i}.{ext}" for i in range(num)]
+
+    p.parent.mkdir(parents=True, exist_ok=True)
+    if num == 1:
+        return [p]
+    stem, suffix = p.stem, p.suffix or f".{ext}"
+    return [p.with_name(f"{stem}-{i}{suffix}") for i in range(num)]
+
+
+def _as_pil_list(value: Any):
+    try:
+        from PIL.Image import Image as PILImage
+    except ImportError:
+        return None
+    if isinstance(value, PILImage):
+        return [value]
+    if isinstance(value, (list, tuple)) and value and all(isinstance(v, PILImage) for v in value):
+        return list(value)
+    return None
+
+
+def _as_frame_sequence(value: Any):
+    try:
+        from PIL.Image import Image as PILImage
+    except ImportError:
+        PILImage = None  # type: ignore[assignment]
+
+    if isinstance(value, (list, tuple)) and len(value) >= 2:
+        first = value[0]
+        if PILImage is not None and isinstance(first, PILImage):
+            return list(value)
+        try:
+            import numpy as np
+
+            if isinstance(first, np.ndarray):
+                return list(value)
+        except ImportError:
+            pass
+    return None
+
+
+def _as_audio_arrays(value: Any):
+    try:
+        import numpy as np
+    except ImportError:
+        return None
+    if isinstance(value, np.ndarray) and value.ndim <= 2:
+        return [value]
+    if isinstance(value, (list, tuple)) and value and all(isinstance(v, np.ndarray) for v in value):
+        return list(value)
+    return None
+
+
+def _save_audio_arrays(audios, sampling_rate: int, args: Namespace, task: str) -> list[str]:
+    """Write each numpy audio array to a 16-bit PCM WAV at ``sampling_rate`` Hz."""
+    import numpy as np
+    from scipy.io.wavfile import write as wavfile_write
+
+    paths = _default_output_paths(task, len(audios), args.output, ext="wav")
+    saved: list[str] = []
+    for audio, path in zip(audios, paths):
+        data = np.asarray(audio)
+        if data.dtype.kind == "f":
+            data = (np.clip(data, -1.0, 1.0) * 32767).astype(np.int16)
+        if data.ndim > 1 and data.shape[0] < data.shape[-1]:
+            data = data.T  # (channels, samples) → (samples, channels) for scipy.
+        wavfile_write(str(path), sampling_rate, data)
+        saved.append(str(path))
+    return saved
+
+
+def _save_auto(value: Any, args: Namespace, task: str) -> list[str]:
+    """Save ``value`` by sniffing its runtime type."""
+    pil_images = _as_pil_list(value)
+    if pil_images is not None:
+        paths = _default_output_paths(task, len(pil_images), args.output, ext="png")
+        for img, path in zip(pil_images, paths):
+            img.save(path)
+        return [str(p) for p in paths]
+
+    frames = _as_frame_sequence(value)
+    if frames is not None:
+        from diffusers.utils import export_to_video
+
+        path = _default_output_paths(task, 1, args.output, ext="mp4")[0]
+        export_to_video(frames, str(path), fps=getattr(args, "fps", 8))
+        return [str(path)]
+
+    audios = _as_audio_arrays(value)
+    if audios is not None:
+        return _save_audio_arrays(audios, getattr(args, "sampling_rate", None) or 16000, args, task)
+
+    path = _default_output_paths(task, 1, args.output, ext="json")[0]
+    Path(path).write_text(json.dumps(value, default=str, indent=2))
+    return [str(path)]
+
+
+# ---------------------------------------------------------------------------
+# Hub bucket upload (--push-to)
+# ---------------------------------------------------------------------------
+
+
+def _push_outputs(args: Namespace, saved_paths: list[str], task: str) -> Optional[dict[str, Any]]:
+    """Upload ``saved_paths`` to the ``--push-to`` bucket. Returns a summary or None."""
+    if not args.push_to:
+        return None
+
+    from huggingface_hub import HfApi
+
+    api = HfApi(token=args.token)
+    api.create_bucket(args.push_to, exist_ok=True)
+
+    prefix = os.environ.get(RUN_ID_ENV) or task
+    add = [(local, f"{prefix}/{Path(local).name}") for local in saved_paths]
+    api.batch_bucket_files(args.push_to, add=add)
+
+    uploaded = [f"hf://buckets/{args.push_to}/{dest}" for _, dest in add]
+    return {"bucket_id": args.push_to, "uploaded": uploaded}
+
+
+# ---------------------------------------------------------------------------
+# Remote submission (HF Jobs)
+# ---------------------------------------------------------------------------
+
+
+def _build_task_kwargs(args: Namespace) -> dict[str, Any]:
+    """Pick out the kwargs the container should invoke the task with."""
+    out: dict[str, Any] = {}
+    for key, value in vars(args).items():
+        if key in HF_JOBS_KEYS or value is None or value is False:
+            continue
+        out[key] = value
+    return out
+
+
+def _kwargs_to_argv(task: str, task_kwargs: dict[str, Any]) -> list[str]:
+    """Render ``task_kwargs`` as the argv list the container's argparse will see."""
+    argv: list[str] = [task]
+    for key, value in task_kwargs.items():
+        flag = "--" + key.replace("_", "-")
+        if value is True:
+            argv.append(flag)
+        elif isinstance(value, list):
+            for item in value:
+                argv.extend([flag, str(item)])
+        else:
+            argv.extend([flag, str(value)])
+    return argv
+
+
+def _maybe_submit_remote(args: Namespace, task: str) -> bool:
+    """If ``--remote`` was set, submit this invocation to HF Jobs and return True."""
+    if not args.remote:
+        return False
+
+    import uuid
+
+    from huggingface_hub import HfApi, get_token, run_uv_job
+
+    try:
+        from huggingface_hub import Volume
+    except ImportError:
+        Volume = None
+
+    hf_token = args.token or get_token()
+    api = HfApi(token=hf_token)
+
+    if not args.push_to:
+        args.push_to = f"{api.whoami()['name']}/jobs-artifacts"
+
+    run_id = uuid.uuid4().hex[:12]
+
+    task_kwargs = _build_task_kwargs(args)
+    dependencies = list(_DEFAULT_REMOTE_DEPS)
+    if args.dependencies:
+        dependencies.extend(args.dependencies)
+
+    secrets = {"HF_TOKEN": hf_token} if hf_token else None
+    env = {
+        RUN_ID_ENV: run_id,
+        "HF_ENABLE_PARALLEL_LOADING": "1",  # thread-pool the safetensors load step
+    }
+
+    # Mount the model repo into the job's filesystem so the container reads it
+    # from local disk instead of downloading. Requires huggingface_hub >= 1.16.
+    volumes = None
+    if Volume is not None and not Path(args.model).exists():
+        mount_path = "/model"
+        volumes = [Volume(type="model", source=args.model, mount_path=mount_path)]
+        task_kwargs["model"] = mount_path
+
+    run_uv_job_kwargs: dict[str, Any] = {
+        "script": _UV_RUNNER_SCRIPT,
+        "script_args": _kwargs_to_argv(task, task_kwargs),
+        "dependencies": dependencies,
+        "flavor": args.flavor,
+        "timeout": args.timeout,
+        "namespace": args.namespace,
+        "secrets": secrets,
+        "env": env,
+        "token": hf_token,
+    }
+    if volumes is not None:
+        run_uv_job_kwargs["volumes"] = volumes
+
+    job = run_uv_job(**run_uv_job_kwargs)
+
+    payload: dict[str, Any] = {
+        "task": "remote-submit",
+        "job_id": getattr(job, "id", None),
+        "job_status": str(getattr(job, "status", "")),
+        "flavor": args.flavor,
+        "push_to": args.push_to,
+        "run_id": run_id,
+    }
+
+    if args.no_wait:
+        _format_result(args, payload)
+        return True
+
+    print(
+        f"[diffusers-cli] submitted job {job.id} (run_id={run_id}); "
+        f"watch at {getattr(job, 'url', 'https://huggingface.co/jobs')}",
+        file=sys.stderr,
+        flush=True,
+    )
+    final_status = _wait_for_job(api, job.id, args.namespace, args.poll_interval)
+    payload["job_status"] = final_status
+    payload["outputs"] = _download_job_artifacts(api, args.push_to, run_id, args.output)
+    _format_result(args, payload)
+    return True
+
+
+def _wait_for_job(api: Any, job_id: str, namespace: Optional[str], poll_interval: float) -> str:
+    """Stream container logs to stderr until the job terminates; return the final stage."""
+    fetch = getattr(api, "fetch_job_logs", None)
+    if fetch is not None:
+        try:
+            for line in fetch(job_id=job_id, namespace=namespace, follow=True):
+                print(line, file=sys.stderr, flush=True)
+        except TypeError:
+            return _poll_for_job(api, job_id, namespace, poll_interval)
+        info = api.inspect_job(job_id=job_id, namespace=namespace)
+        return str(info.status.stage) if info.status else "UNKNOWN"
+    return _poll_for_job(api, job_id, namespace, poll_interval)
+
+
+def _poll_for_job(api: Any, job_id: str, namespace: Optional[str], poll_interval: float) -> str:
+    """Heartbeat-style fallback when ``fetch_job_logs`` isn't available."""
+    import time
+
+    terminal = {"COMPLETED", "CANCELED", "ERROR", "DELETED"}
+    last_stage: Optional[str] = None
+    while True:
+        info = api.inspect_job(job_id=job_id, namespace=namespace)
+        stage = str(info.status.stage) if info.status else "UNKNOWN"
+        if stage != last_stage:
+            if last_stage is not None:
+                print("", file=sys.stderr, flush=True)
+            print(f"[diffusers-cli] job {job_id}: {stage}", file=sys.stderr, flush=True)
+            last_stage = stage
+        else:
+            print(".", end="", file=sys.stderr, flush=True)
+        if stage in terminal:
+            print("", file=sys.stderr, flush=True)
+            return stage
+        time.sleep(poll_interval)
+
+
+def _download_job_artifacts(api: Any, bucket_id: str, run_id: str, output: Optional[str]) -> list[str]:
+    """Download every file under ``<run_id>/`` from ``bucket_id`` into a local directory."""
+    from huggingface_hub import BucketFile
+
+    local_dir = Path(output) if output else Path(DEFAULT_OUTPUT_DIR)
+    local_dir.mkdir(parents=True, exist_ok=True)
+
+    pairs: list[tuple[Any, Path]] = []
+    for entry in api.list_bucket_tree(bucket_id, prefix=f"{run_id}/", recursive=True):
+        if not isinstance(entry, BucketFile):
+            continue
+        pairs.append((entry, local_dir / Path(entry.path).name))
+
+    if not pairs:
+        return []
+    api.download_bucket_files(bucket_id, files=pairs)
+    return [str(local) for _, local in pairs]
+
+
+# ---------------------------------------------------------------------------
+# Result formatting
+# ---------------------------------------------------------------------------
+
+
+def _format_result(args: Namespace, payload: dict[str, Any]) -> None:
+    """Print either a human-friendly summary or JSON, depending on --json."""
+    if args.json:
+        json.dump(payload, sys.stdout, default=str)
+        sys.stdout.write("\n")
+        return
+
+    outputs = payload.get("outputs", [])
+    if outputs:
+        for path in outputs:
+            print(path)
+    else:
+        print(payload)
+
+
+# ---------------------------------------------------------------------------
+# The one and only agentic subcommand
+# ---------------------------------------------------------------------------
+
+
+class InferenceCommand(BaseDiffusersCLICommand):
+    task = "inference"
+
+    @staticmethod
+    def register_subcommand(subparsers: _SubParsersAction) -> None:
+        parser: ArgumentParser = subparsers.add_parser(
+            "inference",
+            help="Run any diffusers pipeline (standard or modular) by forwarding --pipeline-kwargs verbatim.",
+        )
+        _add_loading_arguments(parser)
+        _add_optimization_arguments(parser)
+        parser.add_argument(
+            "--pipeline-kwargs",
+            default=None,
+            help=(
+                "JSON object of kwargs passed to the pipeline call. String values at known "
+                f"image-input keys ({', '.join(_IMAGE_INPUT_KEYS)}) are auto-loaded as PIL images."
+            ),
+        )
+        parser.add_argument(
+            "--output-key",
+            default=None,
+            help="For modular pipelines: name of the intermediate to extract (passed as `output=` to the call).",
+        )
+        parser.add_argument(
+            "--describe",
+            action="store_true",
+            help=(
+                "For modular pipelines: print the input schema from block definitions and exit. "
+                "Weights are NOT downloaded. Errors on standard (non-modular) pipelines."
+            ),
+        )
+        parser.add_argument("--seed", type=int, default=None, help="Random seed for reproducibility.")
+        parser.add_argument(
+            "--fps",
+            type=int,
+            default=8,
+            help="FPS used when the output happens to be a frame sequence.",
+        )
+        parser.add_argument(
+            "--sampling-rate",
+            type=int,
+            default=None,
+            help="Sample rate used when the output happens to be an audio array.",
+        )
+        _add_remote_arguments(parser)
+        _add_output_arguments(parser)
+        parser.set_defaults(func=InferenceCommand)
+
+    def __init__(self, args: Namespace):
+        self.args = args
+
+    def run(self) -> None:
+        is_modular = _is_modular_repo(self.args)
+
+        if self.args.describe:
+            if not is_modular:
+                raise SystemExit(
+                    "--describe only works for modular pipeline repos "
+                    "(those that ship modular_model_index.json)."
+                )
+            _describe_modular(self.args)
+            return
+
+        if _maybe_submit_remote(self.args, self.task):
+            return
+
+        pipeline = _load_pipeline(self.args, modular=is_modular)
+
+        call_kwargs = _parse_pipeline_kwargs(self.args.pipeline_kwargs)
+        _resolve_image_inputs(call_kwargs)
+
+        if self.args.output_key is not None:
+            call_kwargs["output"] = self.args.output_key
+
+        generator = _get_generator(self.args.seed, getattr(pipeline, "device", None) and pipeline.device.type or "cpu")
+        if generator is not None:
+            call_kwargs["generator"] = generator
+
+        result = pipeline(**call_kwargs)
+        savable = result if is_modular else _result_to_savable(result)
+        saved = _save_auto(savable, self.args, self.task)
+        pushed = _push_outputs(self.args, saved, self.task)
+
+        _format_result(
+            self.args,
+            {
+                "task": self.task,
+                "model": self.args.model,
+                "device": pipeline.device.type if hasattr(pipeline, "device") else None,
+                "pipeline_class": type(pipeline).__name__,
+                "modular": is_modular,
+                "outputs": saved,
+                "pushed": pushed,
+                "seed": self.args.seed,
+                "output_key": self.args.output_key,
+            },
+        )
+
+

From accfa06e69ed05e229c198b402281a83769ee7dd Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Wed, 3 Jun 2026 22:39:23 +0530
Subject: [PATCH 06/30] update

---
 src/diffusers/commands/inference.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/diffusers/commands/inference.py b/src/diffusers/commands/inference.py
index 614994c07016..178c6e52115e 100644
--- a/src/diffusers/commands/inference.py
+++ b/src/diffusers/commands/inference.py
@@ -1,4 +1,4 @@
-# Copyright 2025 The HuggingFace Team. All rights reserved.
+# Copyright 2026 The HuggingFace Team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -802,8 +802,7 @@ def run(self) -> None:
         if self.args.describe:
             if not is_modular:
                 raise SystemExit(
-                    "--describe only works for modular pipeline repos "
-                    "(those that ship modular_model_index.json)."
+                    "--describe only works for modular pipeline repos " "(those that ship modular_model_index.json)."
                 )
             _describe_modular(self.args)
             return
@@ -842,5 +841,3 @@ def run(self) -> None:
                 "output_key": self.args.output_key,
             },
         )
-
-

From 4d4d9e8ecda362ff4ebe1a3fcb4388714783cfe9 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Wed, 3 Jun 2026 22:44:20 +0530
Subject: [PATCH 07/30] update

---
 src/diffusers/commands/custom_blocks.py | 114 +++++++++++++-----------
 1 file changed, 64 insertions(+), 50 deletions(-)

diff --git a/src/diffusers/commands/custom_blocks.py b/src/diffusers/commands/custom_blocks.py
index 22c38e6256b3..953240c5a2c3 100644
--- a/src/diffusers/commands/custom_blocks.py
+++ b/src/diffusers/commands/custom_blocks.py
@@ -12,89 +12,94 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""``diffusers-cli custom_blocks`` — save a custom ``ModularPipelineBlocks`` subclass.
-
-Parses a local ``block.py``, finds a ``ModularPipelineBlocks`` subclass,
-dynamically imports it, and calls ``save_pretrained`` in the current
-working directory so the result can be pushed to the Hub and consumed by
-``diffusers-cli inference``.
 """
-
-from __future__ import annotations
+Usage example:
+    TODO
+"""
 
 import ast
 import importlib.util
 import os
-from argparse import ArgumentParser, Namespace, _SubParsersAction
+from argparse import ArgumentParser, Namespace
 from pathlib import Path
 
 from ..utils import logging
 from . import BaseDiffusersCLICommand
 
 
-_EXPECTED_BASE_CLASSES = ("ModularPipelineBlocks",)
+EXPECTED_PARENT_CLASSES = ["ModularPipelineBlocks"]
+CONFIG = "config.json"
 
 
-class CustomBlocksCommand(BaseDiffusersCLICommand):
-    task = "custom_blocks"
+def conversion_command_factory(args: Namespace):
+    return CustomBlocksCommand(args.block_module_name, args.block_class_name)
 
+
+class CustomBlocksCommand(BaseDiffusersCLICommand):
     @staticmethod
-    def register_subcommand(subparsers: _SubParsersAction) -> None:
-        parser: ArgumentParser = subparsers.add_parser(
-            "custom_blocks",
-            help="Save a custom ModularPipelineBlocks subclass via save_pretrained.",
-        )
-        parser.add_argument(
-            "--block-module-name",
+    def register_subcommand(parser: ArgumentParser):
+        conversion_parser = parser.add_parser("custom_blocks")
+        conversion_parser.add_argument(
+            "--block_module_name",
+            type=str,
             default="block.py",
-            help="Module filename in which the custom block is implemented (default: block.py).",
+            help="Module filename in which the custom block will be implemented.",
         )
-        parser.add_argument(
-            "--block-class-name",
+        conversion_parser.add_argument(
+            "--block_class_name",
+            type=str,
             default=None,
-            help="Name of the custom block class. If None, the first ModularPipelineBlocks subclass found is used.",
+            help="Name of the custom block. If provided None, we will try to infer it.",
         )
-        parser.set_defaults(func=CustomBlocksCommand)
+        conversion_parser.set_defaults(func=conversion_command_factory)
 
-    def __init__(self, args: Namespace):
+    def __init__(self, block_module_name: str = "block.py", block_class_name: str = None):
         self.logger = logging.get_logger("diffusers-cli/custom_blocks")
-        self.block_module_name = Path(args.block_module_name)
-        self.block_class_name = args.block_class_name
-
-    def run(self) -> None:
-        candidates = self._get_class_names(self.block_module_name)
-        classes_found = list({cls for cls, _ in candidates})
+        self.block_module_name = Path(block_module_name)
+        self.block_class_name = block_class_name
 
-        if not candidates:
-            raise ValueError(
-                f"No ModularPipelineBlocks subclass found in {self.block_module_name}. "
-                "Ensure your block class inherits from `ModularPipelineBlocks` directly."
-            )
+    def run(self):
+        # determine the block to be saved.
+        out = self._get_class_names(self.block_module_name)
+        classes_found = list({cls for cls, _ in out})
 
         if self.block_class_name is not None:
-            child_class = next((cls for cls, _ in candidates if cls == self.block_class_name), None)
-            if child_class is None:
+            child_class, parent_class = self._choose_block(out, self.block_class_name)
+            if child_class is None and parent_class is None:
                 raise ValueError(
-                    f"--block-class-name {self.block_class_name!r} not found in "
-                    f"{self.block_module_name}. Available: {classes_found}"
+                    "`block_class_name` could not be retrieved. Available classes from "
+                    f"{self.block_module_name}:\n{classes_found}"
                 )
         else:
             self.logger.info(
-                f"Found classes: {classes_found} — using {classes_found[0]}. "
-                "Re-run with --block-class-name to override."
+                f"Found classes: {classes_found} will be using {classes_found[0]}. "
+                "If this needs to be changed, re-run the command specifying `block_class_name`."
             )
-            child_class, _ = candidates[0]
+            child_class, parent_class = out[0][0], out[0][1]
 
+        # dynamically get the custom block and initialize it to call `save_pretrained` in the current directory.
+        # the user is responsible for running it, so I guess that is safe?
         module_name = f"__dynamic__{self.block_module_name.stem}"
         spec = importlib.util.spec_from_file_location(module_name, str(self.block_module_name))
         module = importlib.util.module_from_spec(spec)
         spec.loader.exec_module(module)
         getattr(module, child_class)().save_pretrained(os.getcwd())
 
-    def _get_class_names(self, file_path: Path) -> list[tuple[str, str]]:
+        # or, we could create it manually.
+        # automap = self._create_automap(parent_class=parent_class, child_class=child_class)
+        # with open(CONFIG, "w") as f:
+        #     json.dump(automap, f)
+
+    def _choose_block(self, candidates, chosen=None):
+        for cls, base in candidates:
+            if cls == chosen:
+                return cls, base
+        return None, None
+
+    def _get_class_names(self, file_path):
         source = file_path.read_text(encoding="utf-8")
         try:
-            tree = ast.parse(source, filename=str(file_path))
+            tree = ast.parse(source, filename=file_path)
         except SyntaxError as e:
             raise ValueError(f"Could not parse {file_path!r}: {e}") from e
 
@@ -102,17 +107,26 @@ def _get_class_names(self, file_path: Path) -> list[tuple[str, str]]:
         for node in tree.body:
             if not isinstance(node, ast.ClassDef):
                 continue
+
+            # extract all base names for this class
             base_names = [bname for b in node.bases if (bname := self._get_base_name(b)) is not None]
-            for allowed in _EXPECTED_BASE_CLASSES:
+
+            # for each allowed base that appears in the class's bases, emit a tuple
+            for allowed in EXPECTED_PARENT_CLASSES:
                 if allowed in base_names:
                     results.append((node.name, allowed))
+
         return results
 
-    @staticmethod
-    def _get_base_name(node: ast.expr) -> str | None:
+    def _get_base_name(self, node: ast.expr):
         if isinstance(node, ast.Name):
             return node.id
-        if isinstance(node, ast.Attribute):
-            val = CustomBlocksCommand._get_base_name(node.value)
+        elif isinstance(node, ast.Attribute):
+            val = self._get_base_name(node.value)
             return f"{val}.{node.attr}" if val else node.attr
         return None
+
+    def _create_automap(self, parent_class, child_class):
+        module = str(self.block_module_name).replace(".py", "").rsplit(".", 1)[-1]
+        auto_map = {f"{parent_class}": f"{module}.{child_class}"}
+        return {"auto_map": auto_map}

From f97aef8f517a0c417e5ed72abc7929b2d39665f0 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Wed, 3 Jun 2026 23:29:13 +0530
Subject: [PATCH 08/30] update

---
 src/diffusers/commands/diffusers_cli.py |   2 +
 src/diffusers/commands/inference.py     | 212 +++++++++++++++++-------
 2 files changed, 156 insertions(+), 58 deletions(-)

diff --git a/src/diffusers/commands/diffusers_cli.py b/src/diffusers/commands/diffusers_cli.py
index ceb806af1a3e..80f449426c54 100644
--- a/src/diffusers/commands/diffusers_cli.py
+++ b/src/diffusers/commands/diffusers_cli.py
@@ -16,6 +16,7 @@
 from argparse import ArgumentParser
 
 from .custom_blocks import CustomBlocksCommand
+from .describe import DescribeCommand
 from .env import EnvironmentCommand
 from .fp16_safetensors import FP16SafetensorsCommand
 from .inference import InferenceCommand
@@ -30,6 +31,7 @@ def main():
     FP16SafetensorsCommand.register_subcommand(commands_parser)
     CustomBlocksCommand.register_subcommand(commands_parser)
     InferenceCommand.register_subcommand(commands_parser)
+    DescribeCommand.register_subcommand(commands_parser)
 
     # Let's go
     args = parser.parse_args()
diff --git a/src/diffusers/commands/inference.py b/src/diffusers/commands/inference.py
index 178c6e52115e..b92f3fcbedd0 100644
--- a/src/diffusers/commands/inference.py
+++ b/src/diffusers/commands/inference.py
@@ -14,10 +14,9 @@
 
 """``diffusers-cli inference`` — single agentic entry point.
 
-Runs any diffusers pipeline (standard or modular) by forwarding
-``--pipeline-kwargs`` verbatim, saves the output by sniffing its runtime
-type, and can submit the same call to HF Jobs via ``--remote`` (with the
-model repo volume-mounted and the results downloaded back).
+Runs any diffusers pipeline (standard or modular) by forwarding ``--pipeline-kwargs`` verbatim, saves the output by
+sniffing its runtime type, and can submit the same call to HF Jobs via ``--remote`` (with the model repo volume-mounted
+and the results downloaded back).
 """
 
 from __future__ import annotations
@@ -49,8 +48,6 @@
     "sage_hub",
 )
 
-_MODULAR_INDEX = "modular_model_index.json"
-
 # Keys whose string value should be resolved via ``diffusers.utils.load_image``
 # before being passed to the pipeline call.
 _IMAGE_INPUT_KEYS = (
@@ -313,55 +310,170 @@ def _load_pipeline(args: Namespace, modular: bool) -> Any:
 # ---------------------------------------------------------------------------
 
 
-def _is_modular_repo(args: Namespace) -> bool:
+def _try_fetch_config(args: Namespace, filename: str) -> Optional[str]:
+    """Try to resolve ``filename`` for ``args.model`` (local path or Hub repo). None if absent."""
     local = Path(args.model)
     if local.exists():
-        return (local / _MODULAR_INDEX).exists()
+        candidate = local / filename
+        return str(candidate) if candidate.exists() else None
 
-    from huggingface_hub import HfApi
-    from huggingface_hub.utils import HfHubHTTPError, RepositoryNotFoundError
+    from huggingface_hub import hf_hub_download
+    from huggingface_hub.utils import EntryNotFoundError, HfHubHTTPError, RepositoryNotFoundError
 
     try:
-        files = set(HfApi(token=args.token).list_repo_files(args.model, revision=args.revision))
-    except (RepositoryNotFoundError, HfHubHTTPError):
-        return False
-    return _MODULAR_INDEX in files
+        return hf_hub_download(args.model, filename, revision=args.revision, token=args.token)
+    except (EntryNotFoundError, HfHubHTTPError, RepositoryNotFoundError):
+        return None
 
 
-def _describe_modular(args: Namespace) -> None:
-    """Load just the block definitions and print the input schema."""
-    from diffusers import ModularPipelineBlocks
+def _is_modular_repo(args: Namespace) -> bool:
+    """Detect by trying ``DiffusionPipeline.config_name`` first; modular iff that's absent."""
+    from diffusers import DiffusionPipeline
 
-    kwargs: dict[str, Any] = {"trust_remote_code": args.trust_remote_code}
-    if args.revision:
-        kwargs["revision"] = args.revision
-    if args.token:
-        kwargs["token"] = args.token
+    return _try_fetch_config(args, DiffusionPipeline.config_name) is None
 
-    blocks = ModularPipelineBlocks.from_pretrained(args.model, **kwargs)
-    schema = [
-        {
-            "name": p.name,
-            "type_hint": str(p.type_hint) if p.type_hint is not None else None,
-            "default": p.default,
-            "required": p.required,
-            "description": p.description,
-        }
-        for p in blocks.inputs
-    ]
-    payload = {
-        "task": "inference-describe",
-        "model": args.model,
-        "blocks_class": type(blocks).__name__,
-        "inputs": schema,
-    }
+
+def _describe(args: Namespace) -> None:
+    """Print the pipeline's input schema.
+
+    Tries ``DiffusionPipeline.config_name`` (= ``model_index.json``) first; if present, introspects the declared
+    pipeline class's ``__call__`` signature. Otherwise falls back to ``ModularPipelineBlocks.from_pretrained`` and
+    reads the block-declared ``inputs``. No weights downloaded either way.
+    """
+    import inspect
+
+    import diffusers
+
+    standard_index = _try_fetch_config(args, diffusers.DiffusionPipeline.config_name)
+
+    if standard_index is not None:
+        with open(standard_index) as f:
+            index = json.load(f)
+        class_name = index.get("_class_name")
+        pipeline_cls = getattr(diffusers, class_name, None)
+        if pipeline_cls is None:
+            raise SystemExit(
+                f"Pipeline class {class_name!r} declared in {diffusers.DiffusionPipeline.config_name} "
+                "is not exported by the installed diffusers."
+            )
+        sig = inspect.signature(pipeline_cls.__call__)
+        descriptions = _parse_docstring_args(pipeline_cls.__call__.__doc__) if getattr(args, "verbose", False) else {}
+        schema: list[dict[str, Any]] = []
+        for name, param in sig.parameters.items():
+            if name == "self":
+                continue
+            if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
+                continue
+            has_default = param.default is not inspect.Parameter.empty
+            schema.append(
+                {
+                    "name": name,
+                    "type_hint": str(param.annotation) if param.annotation is not inspect.Parameter.empty else None,
+                    "default": param.default if has_default else None,
+                    "required": not has_default,
+                    "description": descriptions.get(name, ""),
+                }
+            )
+    else:
+        kwargs: dict[str, Any] = {"trust_remote_code": args.trust_remote_code}
+        if args.revision:
+            kwargs["revision"] = args.revision
+        if args.token:
+            kwargs["token"] = args.token
+        try:
+            blocks = diffusers.ModularPipelineBlocks.from_pretrained(args.model, **kwargs)
+        except Exception as e:
+            raise SystemExit(
+                f"Could not describe {args.model!r}: no {diffusers.DiffusionPipeline.config_name} and "
+                f"loading as a modular pipeline failed ({type(e).__name__}: {e}). "
+                "Is this a diffusers pipeline repo? Pass --trust-remote-code if it ships custom block code."
+            ) from e
+        class_name = type(blocks).__name__
+        schema = [
+            {
+                "name": p.name,
+                "type_hint": str(p.type_hint) if p.type_hint is not None else None,
+                "default": p.default,
+                "required": p.required,
+                "description": p.description,
+            }
+            for p in blocks.inputs
+        ]
 
     if args.json:
+        payload = {
+            "task": "inference-describe",
+            "model": args.model,
+            "pipeline_class": class_name,
+            "inputs": schema,
+        }
         json.dump(payload, sys.stdout, default=str)
         sys.stdout.write("\n")
         return
 
-    print(f"{type(blocks).__name__} ({args.model}) inputs:")
+    _print_schema(class_name, args.model, schema)
+
+
+def _parse_docstring_args(docstring: Optional[str]) -> dict[str, str]:
+    """Extract per-argument descriptions from a Google-style ``Args:`` block.
+
+    Returns a ``{name: description}`` mapping. Best-effort — unrecognised formats just yield an empty dict rather than
+    raising.
+    """
+    if not docstring:
+        return {}
+
+    import re
+
+    lines = docstring.expandtabs().splitlines()
+    start = None
+    section_indent = 0
+    for i, line in enumerate(lines):
+        if line.strip() in ("Args:", "Arguments:", "Parameters:"):
+            start = i + 1
+            section_indent = len(line) - len(line.lstrip())
+            break
+    if start is None:
+        return {}
+
+    descriptions: dict[str, str] = {}
+    current_name: Optional[str] = None
+    current_lines: list[str] = []
+    arg_indent: Optional[int] = None
+    name_pattern = re.compile(r"^(\w+)\s*(?:\([^)]*\))?\s*:?\s*(.*)$")
+
+    def _flush() -> None:
+        if current_name and current_lines:
+            descriptions[current_name] = " ".join(s.strip() for s in current_lines).strip()
+
+    for line in lines[start:]:
+        if not line.strip():
+            continue
+        indent = len(line) - len(line.lstrip())
+        # A new top-level section ends the Args block.
+        if indent <= section_indent and line.strip().endswith(":"):
+            break
+        if arg_indent is None:
+            arg_indent = indent
+        if indent == arg_indent:
+            _flush()
+            current_lines = []
+            match = name_pattern.match(line.strip())
+            if match:
+                current_name = match.group(1)
+                tail = match.group(2).strip()
+                if tail:
+                    current_lines.append(tail)
+            else:
+                current_name = None
+        elif current_name is not None and indent > arg_indent:
+            current_lines.append(line.strip())
+    _flush()
+    return descriptions
+
+
+def _print_schema(class_name: str, model: str, schema: list[dict[str, Any]]) -> None:
+    print(f"{class_name} ({model}) inputs:")
     for entry in schema:
         tag = "required" if entry["required"] else f"optional, default={entry['default']!r}"
         print(f"  {entry['name']}  ({tag})")
@@ -502,7 +614,7 @@ def _save_audio_arrays(audios, sampling_rate: int, args: Namespace, task: str) -
     return saved
 
 
-def _save_auto(value: Any, args: Namespace, task: str) -> list[str]:
+def _save_output(value: Any, args: Namespace, task: str) -> list[str]:
     """Save ``value`` by sniffing its runtime type."""
     pil_images = _as_pil_list(value)
     if pil_images is not None:
@@ -768,14 +880,6 @@ def register_subcommand(subparsers: _SubParsersAction) -> None:
             default=None,
             help="For modular pipelines: name of the intermediate to extract (passed as `output=` to the call).",
         )
-        parser.add_argument(
-            "--describe",
-            action="store_true",
-            help=(
-                "For modular pipelines: print the input schema from block definitions and exit. "
-                "Weights are NOT downloaded. Errors on standard (non-modular) pipelines."
-            ),
-        )
         parser.add_argument("--seed", type=int, default=None, help="Random seed for reproducibility.")
         parser.add_argument(
             "--fps",
@@ -799,14 +903,6 @@ def __init__(self, args: Namespace):
     def run(self) -> None:
         is_modular = _is_modular_repo(self.args)
 
-        if self.args.describe:
-            if not is_modular:
-                raise SystemExit(
-                    "--describe only works for modular pipeline repos " "(those that ship modular_model_index.json)."
-                )
-            _describe_modular(self.args)
-            return
-
         if _maybe_submit_remote(self.args, self.task):
             return
 
@@ -824,7 +920,7 @@ def run(self) -> None:
 
         result = pipeline(**call_kwargs)
         savable = result if is_modular else _result_to_savable(result)
-        saved = _save_auto(savable, self.args, self.task)
+        saved = _save_output(savable, self.args, self.task)
         pushed = _push_outputs(self.args, saved, self.task)
 
         _format_result(

From 3774951818d3c2cc72cc3fd8ed6838da0ffe6cd0 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Wed, 3 Jun 2026 23:40:20 +0530
Subject: [PATCH 09/30] update

---
 src/diffusers/commands/describe.py  | 81 +++++++++++++++++++++++++++++
 src/diffusers/commands/inference.py |  6 +++
 2 files changed, 87 insertions(+)
 create mode 100644 src/diffusers/commands/describe.py

diff --git a/src/diffusers/commands/describe.py b/src/diffusers/commands/describe.py
new file mode 100644
index 000000000000..7eae367a8df7
--- /dev/null
+++ b/src/diffusers/commands/describe.py
@@ -0,0 +1,81 @@
+# Copyright 2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""``diffusers-cli describe`` — print the input schema for any pipeline repo.
+
+Tries ``DiffusionPipeline.config_name`` first (so standard repos get their ``__call__`` signature introspected); falls
+back to ``ModularPipelineBlocks.from_pretrained`` for modular repos. No weights are downloaded — only the small index
+file (and any custom block code if ``--trust-remote-code`` is set).
+"""
+
+from __future__ import annotations
+
+from argparse import ArgumentParser, Namespace, _SubParsersAction
+
+from . import BaseDiffusersCLICommand
+from .inference import _describe
+
+
+class DescribeCommand(BaseDiffusersCLICommand):
+    task = "describe"
+
+    @staticmethod
+    def register_subcommand(subparsers: _SubParsersAction) -> None:
+        parser: ArgumentParser = subparsers.add_parser(
+            "describe",
+            help="Print the input schema for a diffusers pipeline repo. No weights downloaded.",
+        )
+        parser.add_argument(
+            "--model",
+            "-m",
+            required=True,
+            help="Model id on the Hugging Face Hub or local path.",
+        )
+        parser.add_argument(
+            "--revision",
+            default=None,
+            help="Model revision (branch, tag, or commit SHA).",
+        )
+        parser.add_argument(
+            "--token",
+            default=None,
+            help="Hugging Face token for gated/private models.",
+        )
+        parser.add_argument(
+            "--trust-remote-code",
+            action="store_true",
+            help="Allow custom code from the Hub (required for modular pipelines that ship block code).",
+        )
+        parser.add_argument(
+            "--verbose",
+            "-v",
+            action="store_true",
+            help=(
+                "Also include per-argument descriptions from the pipeline's __call__ docstring. "
+                "Modular pipelines always include block-declared descriptions; --verbose populates "
+                "the equivalent field for standard pipelines by parsing the Google-style Args: block."
+            ),
+        )
+        parser.add_argument(
+            "--json",
+            action="store_true",
+            help="Emit a machine-readable JSON summary on stdout.",
+        )
+        parser.set_defaults(func=DescribeCommand)
+
+    def __init__(self, args: Namespace):
+        self.args = args
+
+    def run(self) -> None:
+        _describe(self.args)
diff --git a/src/diffusers/commands/inference.py b/src/diffusers/commands/inference.py
index b92f3fcbedd0..539b47177dff 100644
--- a/src/diffusers/commands/inference.py
+++ b/src/diffusers/commands/inference.py
@@ -698,6 +698,12 @@ def _maybe_submit_remote(args: Namespace, task: str) -> bool:
     if not args.remote:
         return False
 
+    print(
+        f"[diffusers-cli] preparing remote {task!r} job on flavor={args.flavor!r}...",
+        file=sys.stderr,
+        flush=True,
+    )
+
     import uuid
 
     from huggingface_hub import HfApi, get_token, run_uv_job

From add747b6d82cffcba1b9d68db4b360a234c87662 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Fri, 12 Jun 2026 14:54:19 +0530
Subject: [PATCH 10/30] update

---
 src/diffusers/commands/custom_blocks.py       | 27 +++---
 src/diffusers/commands/describe.py            |  4 +-
 src/diffusers/commands/diffusers_cli.py       | 12 ++-
 src/diffusers/commands/env.py                 |  7 +-
 src/diffusers/commands/fp16_safetensors.py    | 14 ++-
 .../commands/{inference.py => generate.py}    | 90 +++++++++----------
 6 files changed, 84 insertions(+), 70 deletions(-)
 rename src/diffusers/commands/{inference.py => generate.py} (93%)

diff --git a/src/diffusers/commands/custom_blocks.py b/src/diffusers/commands/custom_blocks.py
index 953240c5a2c3..bc0889376a95 100644
--- a/src/diffusers/commands/custom_blocks.py
+++ b/src/diffusers/commands/custom_blocks.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
-Usage example:
-    TODO
+"""``diffusers-cli custom_blocks`` — package a local ``ModularPipelineBlocks`` subclass for the Hub.
+
+Parses ``block.py`` (or ``--block_module_name``), instantiates the chosen block, and calls ``save_pretrained`` in the
+current working directory.
 """
 
 import ast
@@ -28,7 +29,6 @@
 
 
 EXPECTED_PARENT_CLASSES = ["ModularPipelineBlocks"]
-CONFIG = "config.json"
 
 
 def conversion_command_factory(args: Namespace):
@@ -38,7 +38,12 @@ def conversion_command_factory(args: Namespace):
 class CustomBlocksCommand(BaseDiffusersCLICommand):
     @staticmethod
     def register_subcommand(parser: ArgumentParser):
-        conversion_parser = parser.add_parser("custom_blocks")
+        conversion_parser = parser.add_parser(
+            "custom_blocks",
+            help="Package a local ModularPipelineBlocks subclass for the Hub.",
+            usage="\n  diffusers-cli custom_blocks [options]",
+        )
+        conversion_parser._optionals.title = "Options"
         conversion_parser.add_argument(
             "--block_module_name",
             type=str,
@@ -77,19 +82,12 @@ def run(self):
             )
             child_class, parent_class = out[0][0], out[0][1]
 
-        # dynamically get the custom block and initialize it to call `save_pretrained` in the current directory.
-        # the user is responsible for running it, so I guess that is safe?
         module_name = f"__dynamic__{self.block_module_name.stem}"
         spec = importlib.util.spec_from_file_location(module_name, str(self.block_module_name))
         module = importlib.util.module_from_spec(spec)
         spec.loader.exec_module(module)
         getattr(module, child_class)().save_pretrained(os.getcwd())
 
-        # or, we could create it manually.
-        # automap = self._create_automap(parent_class=parent_class, child_class=child_class)
-        # with open(CONFIG, "w") as f:
-        #     json.dump(automap, f)
-
     def _choose_block(self, candidates, chosen=None):
         for cls, base in candidates:
             if cls == chosen:
@@ -125,8 +123,3 @@ def _get_base_name(self, node: ast.expr):
             val = self._get_base_name(node.value)
             return f"{val}.{node.attr}" if val else node.attr
         return None
-
-    def _create_automap(self, parent_class, child_class):
-        module = str(self.block_module_name).replace(".py", "").rsplit(".", 1)[-1]
-        auto_map = {f"{parent_class}": f"{module}.{child_class}"}
-        return {"auto_map": auto_map}
diff --git a/src/diffusers/commands/describe.py b/src/diffusers/commands/describe.py
index 7eae367a8df7..b5c617c10319 100644
--- a/src/diffusers/commands/describe.py
+++ b/src/diffusers/commands/describe.py
@@ -24,7 +24,7 @@
 from argparse import ArgumentParser, Namespace, _SubParsersAction
 
 from . import BaseDiffusersCLICommand
-from .inference import _describe
+from .generate import _describe
 
 
 class DescribeCommand(BaseDiffusersCLICommand):
@@ -35,7 +35,9 @@ def register_subcommand(subparsers: _SubParsersAction) -> None:
         parser: ArgumentParser = subparsers.add_parser(
             "describe",
             help="Print the input schema for a diffusers pipeline repo. No weights downloaded.",
+            usage="\n  diffusers-cli describe [options]",
         )
+        parser._optionals.title = "Options"
         parser.add_argument(
             "--model",
             "-m",
diff --git a/src/diffusers/commands/diffusers_cli.py b/src/diffusers/commands/diffusers_cli.py
index 80f449426c54..09a3a8ab03b7 100644
--- a/src/diffusers/commands/diffusers_cli.py
+++ b/src/diffusers/commands/diffusers_cli.py
@@ -19,18 +19,22 @@
 from .describe import DescribeCommand
 from .env import EnvironmentCommand
 from .fp16_safetensors import FP16SafetensorsCommand
-from .inference import InferenceCommand
+from .generate import GenerateCommand
 
 
 def main():
-    parser = ArgumentParser("Diffusers CLI tool", usage="diffusers-cli <command> [<args>]")
-    commands_parser = parser.add_subparsers(help="diffusers-cli command helpers")
+    parser = ArgumentParser(
+        prog="diffusers-cli",
+        usage="\n  diffusers-cli <command> [options]",
+    )
+    parser._optionals.title = "General Options"
+    commands_parser = parser.add_subparsers(title="Commands", metavar="<command>")
 
     # Register commands
     EnvironmentCommand.register_subcommand(commands_parser)
     FP16SafetensorsCommand.register_subcommand(commands_parser)
     CustomBlocksCommand.register_subcommand(commands_parser)
-    InferenceCommand.register_subcommand(commands_parser)
+    GenerateCommand.register_subcommand(commands_parser)
     DescribeCommand.register_subcommand(commands_parser)
 
     # Let's go
diff --git a/src/diffusers/commands/env.py b/src/diffusers/commands/env.py
index 58f31d478bf3..cab163fcdd63 100644
--- a/src/diffusers/commands/env.py
+++ b/src/diffusers/commands/env.py
@@ -40,7 +40,12 @@ def info_command_factory(_):
 class EnvironmentCommand(BaseDiffusersCLICommand):
     @staticmethod
     def register_subcommand(parser: ArgumentParser) -> None:
-        download_parser = parser.add_parser("env")
+        download_parser = parser.add_parser(
+            "env",
+            help="Print versions of diffusers and its dependencies (for bug reports).",
+            usage="\n  diffusers-cli env",
+        )
+        download_parser._optionals.title = "Options"
         download_parser.set_defaults(func=info_command_factory)
 
     def run(self) -> dict:
diff --git a/src/diffusers/commands/fp16_safetensors.py b/src/diffusers/commands/fp16_safetensors.py
index 382d6c39bd19..44e374b5707d 100644
--- a/src/diffusers/commands/fp16_safetensors.py
+++ b/src/diffusers/commands/fp16_safetensors.py
@@ -33,6 +33,13 @@
 
 
 def conversion_command_factory(args: Namespace):
+    warnings.warn(
+        "`diffusers-cli fp16_safetensors` is deprecated and will be removed in a future version. "
+        "Convert weights to fp16 safetensors directly with `safetensors.torch.save_file` or via "
+        "`pipeline.save_pretrained(..., safe_serialization=True, variant='fp16')`.",
+        FutureWarning,
+        stacklevel=2,
+    )
     if args.use_auth_token:
         warnings.warn(
             "The `--use_auth_token` flag is deprecated and will be removed in a future version."
@@ -44,7 +51,12 @@ def conversion_command_factory(args: Namespace):
 class FP16SafetensorsCommand(BaseDiffusersCLICommand):
     @staticmethod
     def register_subcommand(parser: ArgumentParser):
-        conversion_parser = parser.add_parser("fp16_safetensors")
+        conversion_parser = parser.add_parser(
+            "fp16_safetensors",
+            help="[DEPRECATED] Convert a Hub checkpoint's weights to fp16 safetensors and push back as a PR.",
+            usage="\n  diffusers-cli fp16_safetensors [options]",
+        )
+        conversion_parser._optionals.title = "Options"
         conversion_parser.add_argument(
             "--ckpt_id",
             type=str,
diff --git a/src/diffusers/commands/inference.py b/src/diffusers/commands/generate.py
similarity index 93%
rename from src/diffusers/commands/inference.py
rename to src/diffusers/commands/generate.py
index 539b47177dff..e8f92083e1c8 100644
--- a/src/diffusers/commands/inference.py
+++ b/src/diffusers/commands/generate.py
@@ -12,11 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""``diffusers-cli inference`` — single agentic entry point.
+"""``diffusers-cli generate`` — single agentic entry point.
 
 Runs any diffusers pipeline (standard or modular) by forwarding ``--pipeline-kwargs`` verbatim, saves the output by
-sniffing its runtime type, and can submit the same call to HF Jobs via ``--remote`` (with the model repo volume-mounted
-and the results downloaded back).
+sniffing its runtime type, and can submit the same call to HF Jobs via ``--remote``.
 """
 
 from __future__ import annotations
@@ -59,10 +58,13 @@
 )
 
 # Source for the diffusers install used by --remote jobs. While iterating on a
-# feature branch, point at the branch URL; once merged, switch back to a release
-# pin. ``--dependencies "diffusers @ git+..."`` on the local command appends
-# additional dependencies but does not replace this default install.
-DIFFUSERS_SOURCE = "diffusers @ git+https://github.com/huggingface/diffusers@diffuser-cli-for-agent"
+# feature branch, point at the GitHub tarball URL — uv installs it over plain
+# HTTP and the container doesn't need ``git``. Once merged, switch back to a
+# PyPI release pin. ``--dependencies "diffusers @ ..."`` on the local command
+# appends additional dependencies but does not replace this default install.
+DIFFUSERS_SOURCE = (
+    "diffusers @ https://github.com/huggingface/diffusers/archive/refs/heads/diffuser-cli-for-agent.tar.gz"
+)
 _DEFAULT_REMOTE_DEPS = (
     DIFFUSERS_SOURCE,
     "accelerate",
@@ -250,7 +252,7 @@ def _enable_context_parallel(pipeline: Any) -> None:
         raise SystemExit(
             "--context-parallel requires torch.distributed to be initialized. "
             "Launch the CLI under torchrun, e.g.: "
-            "`torchrun --nproc-per-node=N -m diffusers.commands.diffusers_cli inference ...`."
+            "`torchrun --nproc-per-node=N -m diffusers.commands.diffusers_cli generate ...`."
         )
 
     from diffusers import ContextParallelConfig
@@ -281,7 +283,9 @@ def _apply_optimizations(pipeline: Any, args: Namespace) -> None:
 
 def _from_pretrained_kwargs(args: Namespace) -> dict[str, Any]:
     dtype = _resolve_dtype(args.dtype)
-    kwargs: dict[str, Any] = {"trust_remote_code": args.trust_remote_code}
+    # disable_mmap: mmap-faults over a network-mounted volume trigger one round-trip per page;
+    # a sequential read is dramatically faster than the random-access mmap pattern.
+    kwargs: dict[str, Any] = {"trust_remote_code": args.trust_remote_code, "disable_mmap": True}
     if dtype != "auto":
         kwargs["torch_dtype"] = dtype
     if args.variant:
@@ -357,7 +361,7 @@ def _describe(args: Namespace) -> None:
                 "is not exported by the installed diffusers."
             )
         sig = inspect.signature(pipeline_cls.__call__)
-        descriptions = _parse_docstring_args(pipeline_cls.__call__.__doc__) if getattr(args, "verbose", False) else {}
+        descriptions = _parse_docstring_args(pipeline_cls.__call__.__doc__) if args.verbose else {}
         schema: list[dict[str, Any]] = []
         for name, param in sig.parameters.items():
             if name == "self":
@@ -402,7 +406,7 @@ def _describe(args: Namespace) -> None:
 
     if args.json:
         payload = {
-            "task": "inference-describe",
+            "task": "describe",
             "model": args.model,
             "pipeline_class": class_name,
             "inputs": schema,
@@ -628,12 +632,12 @@ def _save_output(value: Any, args: Namespace, task: str) -> list[str]:
         from diffusers.utils import export_to_video
 
         path = _default_output_paths(task, 1, args.output, ext="mp4")[0]
-        export_to_video(frames, str(path), fps=getattr(args, "fps", 8))
+        export_to_video(frames, str(path), fps=args.fps)
         return [str(path)]
 
     audios = _as_audio_arrays(value)
     if audios is not None:
-        return _save_audio_arrays(audios, getattr(args, "sampling_rate", None) or 16000, args, task)
+        return _save_audio_arrays(audios, args.sampling_rate or 16000, args, task)
 
     path = _default_output_paths(task, 1, args.output, ext="json")[0]
     Path(path).write_text(json.dumps(value, default=str, indent=2))
@@ -708,11 +712,6 @@ def _maybe_submit_remote(args: Namespace, task: str) -> bool:
 
     from huggingface_hub import HfApi, get_token, run_uv_job
 
-    try:
-        from huggingface_hub import Volume
-    except ImportError:
-        Volume = None
-
     hf_token = args.token or get_token()
     api = HfApi(token=hf_token)
 
@@ -732,29 +731,25 @@ def _maybe_submit_remote(args: Namespace, task: str) -> bool:
         "HF_ENABLE_PARALLEL_LOADING": "1",  # thread-pool the safetensors load step
     }
 
-    # Mount the model repo into the job's filesystem so the container reads it
-    # from local disk instead of downloading. Requires huggingface_hub >= 1.16.
-    volumes = None
-    if Volume is not None and not Path(args.model).exists():
-        mount_path = "/model"
-        volumes = [Volume(type="model", source=args.model, mount_path=mount_path)]
-        task_kwargs["model"] = mount_path
-
-    run_uv_job_kwargs: dict[str, Any] = {
-        "script": _UV_RUNNER_SCRIPT,
-        "script_args": _kwargs_to_argv(task, task_kwargs),
-        "dependencies": dependencies,
-        "flavor": args.flavor,
-        "timeout": args.timeout,
-        "namespace": args.namespace,
-        "secrets": secrets,
-        "env": env,
-        "token": hf_token,
-    }
-    if volumes is not None:
-        run_uv_job_kwargs["volumes"] = volumes
+    if Path(args.model).exists():
+        print(
+            f"[diffusers-cli] WARNING: --model {args.model!r} is a local path; the container can't see it. "
+            "Pass a Hub repo id so the job can download it.",
+            file=sys.stderr,
+            flush=True,
+        )
 
-    job = run_uv_job(**run_uv_job_kwargs)
+    job = run_uv_job(
+        script=_UV_RUNNER_SCRIPT,
+        script_args=_kwargs_to_argv(task, task_kwargs),
+        dependencies=dependencies,
+        flavor=args.flavor,
+        timeout=args.timeout,
+        namespace=args.namespace,
+        secrets=secrets,
+        env=env,
+        token=hf_token,
+    )
 
     payload: dict[str, Any] = {
         "task": "remote-submit",
@@ -858,19 +853,21 @@ def _format_result(args: Namespace, payload: dict[str, Any]) -> None:
 
 
 # ---------------------------------------------------------------------------
-# The one and only agentic subcommand
+# Subcommand
 # ---------------------------------------------------------------------------
 
 
-class InferenceCommand(BaseDiffusersCLICommand):
-    task = "inference"
+class GenerateCommand(BaseDiffusersCLICommand):
+    task = "generate"
 
     @staticmethod
     def register_subcommand(subparsers: _SubParsersAction) -> None:
         parser: ArgumentParser = subparsers.add_parser(
-            "inference",
+            "generate",
             help="Run any diffusers pipeline (standard or modular) by forwarding --pipeline-kwargs verbatim.",
+            usage="\n  diffusers-cli generate [options]",
         )
+        parser._optionals.title = "Options"
         _add_loading_arguments(parser)
         _add_optimization_arguments(parser)
         parser.add_argument(
@@ -901,7 +898,7 @@ def register_subcommand(subparsers: _SubParsersAction) -> None:
         )
         _add_remote_arguments(parser)
         _add_output_arguments(parser)
-        parser.set_defaults(func=InferenceCommand)
+        parser.set_defaults(func=GenerateCommand)
 
     def __init__(self, args: Namespace):
         self.args = args
@@ -920,7 +917,8 @@ def run(self) -> None:
         if self.args.output_key is not None:
             call_kwargs["output"] = self.args.output_key
 
-        generator = _get_generator(self.args.seed, getattr(pipeline, "device", None) and pipeline.device.type or "cpu")
+        device = pipeline.device.type if hasattr(pipeline, "device") else "cpu"
+        generator = _get_generator(self.args.seed, device)
         if generator is not None:
             call_kwargs["generator"] = generator
 

From 934b5575de12074816e5fb5e1f06fcf1ad52a95f Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Fri, 12 Jun 2026 15:15:15 +0530
Subject: [PATCH 11/30] update

---
 src/diffusers/commands/generate.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/commands/generate.py b/src/diffusers/commands/generate.py
index e8f92083e1c8..fae4bf78a565 100644
--- a/src/diffusers/commands/generate.py
+++ b/src/diffusers/commands/generate.py
@@ -70,10 +70,13 @@
     "accelerate",
     "transformers",
     "safetensors",
-    "torch==2.10.*",
-    "torchvision",
 )
 
+# Base container image — provides torch + CUDA so uv doesn't reinstall the ~3GB nvidia-*
+# wheels per cold start. cuda12.8 is the highest cuda12.x tag below the HF Jobs host
+# driver's CUDA 12.9 max; cuda13.x tags fail with "driver too old".
+_DEFAULT_REMOTE_IMAGE = "pytorch/pytorch:2.10.0-cuda12.8-cudnn9-runtime"
+
 # Entry point for ``uv run`` inside the container. ``uv run`` accepts a file
 # path, URL, or command; passing the installed console script name makes UV
 # install the deps above (which register the entry point) and exec the CLI.
@@ -743,6 +746,7 @@ def _maybe_submit_remote(args: Namespace, task: str) -> bool:
         script=_UV_RUNNER_SCRIPT,
         script_args=_kwargs_to_argv(task, task_kwargs),
         dependencies=dependencies,
+        image=_DEFAULT_REMOTE_IMAGE,
         flavor=args.flavor,
         timeout=args.timeout,
         namespace=args.namespace,

From 0ae1eb087bdd34d32b55e0233db3595b77705fc5 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Fri, 12 Jun 2026 15:37:40 +0530
Subject: [PATCH 12/30] update

---
 src/diffusers/commands/generate.py | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/src/diffusers/commands/generate.py b/src/diffusers/commands/generate.py
index fae4bf78a565..d152582d4cec 100644
--- a/src/diffusers/commands/generate.py
+++ b/src/diffusers/commands/generate.py
@@ -72,15 +72,13 @@
     "safetensors",
 )
 
-# Base container image — provides torch + CUDA so uv doesn't reinstall the ~3GB nvidia-*
-# wheels per cold start. cuda12.8 is the highest cuda12.x tag below the HF Jobs host
-# driver's CUDA 12.9 max; cuda13.x tags fail with "driver too old".
+# Base container image — provides torch + CUDA so ``uv pip install --system``
+# only has to add the small Python deps. cuda12.8 is the highest cuda12.x tag
+# below the HF Jobs host driver's CUDA 12.9 max.
 _DEFAULT_REMOTE_IMAGE = "pytorch/pytorch:2.10.0-cuda12.8-cudnn9-runtime"
 
-# Entry point for ``uv run`` inside the container. ``uv run`` accepts a file
-# path, URL, or command; passing the installed console script name makes UV
-# install the deps above (which register the entry point) and exec the CLI.
-_UV_RUNNER_SCRIPT = "diffusers-cli"
+# Installed console-script name invoked inside the container after the deps land.
+_CONTAINER_CLI_BINARY = "diffusers-cli"
 
 RUN_ID_ENV = "DIFFUSERS_CLI_RUN_ID"
 
@@ -711,9 +709,10 @@ def _maybe_submit_remote(args: Namespace, task: str) -> bool:
         flush=True,
     )
 
+    import shlex
     import uuid
 
-    from huggingface_hub import HfApi, get_token, run_uv_job
+    from huggingface_hub import HfApi, get_token, run_job
 
     hf_token = args.token or get_token()
     api = HfApi(token=hf_token)
@@ -742,11 +741,17 @@ def _maybe_submit_remote(args: Namespace, task: str) -> bool:
             flush=True,
         )
 
-    job = run_uv_job(
-        script=_UV_RUNNER_SCRIPT,
-        script_args=_kwargs_to_argv(task, task_kwargs),
-        dependencies=dependencies,
+    # Build the in-container shell command: install the small Python deps into the
+    # image's system Python (where torch + CUDA already live) via ``uv pip install
+    # --system``, then exec the CLI with the same argv. --break-system-packages
+    # bypasses PEP 668; safe here because the container is ephemeral.
+    install_cmd = shlex.join(["uv", "pip", "install", "--system", "--break-system-packages", *dependencies])
+    cli_cmd = shlex.join([_CONTAINER_CLI_BINARY, *_kwargs_to_argv(task, task_kwargs)])
+    container_cmd = ["sh", "-c", f"{install_cmd} && {cli_cmd}"]
+
+    job = run_job(
         image=_DEFAULT_REMOTE_IMAGE,
+        command=container_cmd,
         flavor=args.flavor,
         timeout=args.timeout,
         namespace=args.namespace,

From dcfd09c111141cea26a14eef0626c5ea9e8bb6c4 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Fri, 12 Jun 2026 19:19:15 +0530
Subject: [PATCH 13/30] update

---
 src/diffusers/commands/generate.py | 38 ++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/src/diffusers/commands/generate.py b/src/diffusers/commands/generate.py
index d152582d4cec..5398d3fa114a 100644
--- a/src/diffusers/commands/generate.py
+++ b/src/diffusers/commands/generate.py
@@ -200,7 +200,14 @@ def _resolve_dtype(name: Optional[str]):
         return "auto"
     import torch
 
-    mapping = {"fp32": torch.float32, "fp16": torch.float16, "bf16": torch.bfloat16}
+    mapping = {
+        "fp32": torch.float32,
+        "float32": torch.float32,
+        "fp16": torch.float16,
+        "float16": torch.float16,
+        "bf16": torch.bfloat16,
+        "bfloat16": torch.bfloat16,
+    }
     if name not in mapping:
         raise ValueError(f"Unknown dtype: {name}")
     return mapping[name]
@@ -284,8 +291,6 @@ def _apply_optimizations(pipeline: Any, args: Namespace) -> None:
 
 def _from_pretrained_kwargs(args: Namespace) -> dict[str, Any]:
     dtype = _resolve_dtype(args.dtype)
-    # disable_mmap: mmap-faults over a network-mounted volume trigger one round-trip per page;
-    # a sequential read is dramatically faster than the random-access mmap pattern.
     kwargs: dict[str, Any] = {"trust_remote_code": args.trust_remote_code, "disable_mmap": True}
     if dtype != "auto":
         kwargs["torch_dtype"] = dtype
@@ -355,6 +360,10 @@ def _describe(args: Namespace) -> None:
         with open(standard_index) as f:
             index = json.load(f)
         class_name = index.get("_class_name")
+        if class_name is None:
+            raise SystemExit(
+                f"{diffusers.DiffusionPipeline.config_name} for {args.model!r} has no `_class_name` field."
+            )
         pipeline_cls = getattr(diffusers, class_name, None)
         if pipeline_cls is None:
             raise SystemExit(
@@ -781,11 +790,30 @@ def _maybe_submit_remote(args: Namespace, task: str) -> bool:
     )
     final_status = _wait_for_job(api, job.id, args.namespace, args.poll_interval)
     payload["job_status"] = final_status
+    payload["timing"] = _job_timing(api, job.id, args.namespace)
     payload["outputs"] = _download_job_artifacts(api, args.push_to, run_id, args.output)
     _format_result(args, payload)
     return True
 
 
+def _job_timing(api: Any, job_id: str, namespace: Optional[str]) -> dict[str, Optional[float]]:
+    """Return queue/run/total wallclock seconds for ``job_id`` from inspect_job timestamps."""
+    info = api.inspect_job(job_id=job_id, namespace=namespace)
+
+    def _delta(start, end) -> Optional[float]:
+        return (end - start).total_seconds() if (start is not None and end is not None) else None
+
+    timing = {
+        "queued_seconds": _delta(info.created_at, info.started_at),
+        "run_seconds": _delta(info.started_at, info.finished_at),
+        "total_seconds": _delta(info.created_at, info.finished_at),
+    }
+    parts = [f"{k.replace('_seconds', '')}={v:.1f}s" for k, v in timing.items() if v is not None]
+    if parts:
+        print(f"[diffusers-cli] timing: {' '.join(parts)}", file=sys.stderr, flush=True)
+    return timing
+
+
 def _wait_for_job(api: Any, job_id: str, namespace: Optional[str], poll_interval: float) -> str:
     """Stream container logs to stderr until the job terminates; return the final stage."""
     fetch = getattr(api, "fetch_job_logs", None)
@@ -873,7 +901,7 @@ class GenerateCommand(BaseDiffusersCLICommand):
     def register_subcommand(subparsers: _SubParsersAction) -> None:
         parser: ArgumentParser = subparsers.add_parser(
             "generate",
-            help="Run any diffusers pipeline (standard or modular) by forwarding --pipeline-kwargs verbatim.",
+            help="Run any diffusers pipeline locally or remotely on HF Jobs.",
             usage="\n  diffusers-cli generate [options]",
         )
         parser._optionals.title = "Options"
@@ -941,7 +969,7 @@ def run(self) -> None:
             {
                 "task": self.task,
                 "model": self.args.model,
-                "device": pipeline.device.type if hasattr(pipeline, "device") else None,
+                "device": device,
                 "pipeline_class": type(pipeline).__name__,
                 "modular": is_modular,
                 "outputs": saved,

From 9515c551da50be398335a5383dec9c7d83afb7d1 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Fri, 12 Jun 2026 19:31:45 +0530
Subject: [PATCH 14/30] update

---
 .ai/skills/diffusers-cli/SKILL.md | 140 ++++++++++++++++++++++++++++++
 1 file changed, 140 insertions(+)
 create mode 100644 .ai/skills/diffusers-cli/SKILL.md

diff --git a/.ai/skills/diffusers-cli/SKILL.md b/.ai/skills/diffusers-cli/SKILL.md
new file mode 100644
index 000000000000..ae9c34cdc712
--- /dev/null
+++ b/.ai/skills/diffusers-cli/SKILL.md
@@ -0,0 +1,140 @@
+---
+name: diffusers-cli
+description: >
+  Use when the user wants to run a diffusers pipeline from a terminal (one-off
+  generation, batch jobs, smoke-testing a new model), submit jobs to HF Jobs
+  hardware via `--remote`, or introspect an unknown pipeline's input schema
+  before calling it. Prefer this over writing ad-hoc Python scripts for
+  generation tasks.
+---
+
+## Overview
+
+`diffusers-cli` is the shipped CLI in `src/diffusers/commands/`. Three subcommands
+matter for agentic use:
+
+| Command | Purpose |
+| --- | --- |
+| `generate` | Run any `DiffusionPipeline` or `ModularPipeline` by forwarding `--pipeline-kwargs` verbatim. Saves output by sniffing its runtime type. |
+| `describe` | Print the input schema (kwarg names + types + defaults + docstring) for a pipeline repo. **No weights downloaded** — only `model_index.json` (or `modular_model_index.json`) is fetched. |
+| `custom_blocks` | Package a local `ModularPipelineBlocks` subclass for the Hub. |
+
+`env` (system info) and `fp16_safetensors` (deprecated) also exist but aren't
+relevant to inference.
+
+## The describe → generate flow
+
+For any model you haven't called before, run `describe` first to learn its
+input contract, then `generate` with the right `--pipeline-kwargs`:
+
+```bash
+# 1. Discover what kwargs the pipeline takes (no weight download)
+diffusers-cli describe --model black-forest-labs/FLUX.1-dev --json
+
+# 2. Run it
+diffusers-cli generate \
+    --model black-forest-labs/FLUX.1-dev \
+    --pipeline-kwargs '{"prompt": "a cat", "num_inference_steps": 30}' \
+    --dtype bf16
+```
+
+`describe`'s `--json` output is machine-readable: a list of `{name, type_hint,
+default, required, description}` entries. Use `--verbose` to additionally parse
+the `__call__` docstring's `Args:` block for descriptions on standard pipelines.
+
+## Standard vs modular detection
+
+`generate` auto-detects which kind of pipeline it's calling:
+
+1. If `model_index.json` exists on the repo → `DiffusionPipeline.from_pretrained` path
+2. Otherwise → `ModularPipeline.from_pretrained` path
+
+You don't need to tell it which. Modular repos must pass `--trust-remote-code`
+if they ship custom block code.
+
+## `--pipeline-kwargs` semantics
+
+A JSON object passed straight through to `pipeline(**kwargs)`. String values at
+known image-input keys (`image`, `mask_image`, `control_image`,
+`ip_adapter_image`, `image_2`) are auto-loaded as PIL images, so you can pass
+URLs or local paths directly:
+
+```bash
+diffusers-cli generate \
+    --model stabilityai/stable-diffusion-xl-refiner-1.0 \
+    --pipeline-kwargs '{
+        "image": "https://example.com/cat.png",
+        "prompt": "a photorealistic cat",
+        "strength": 0.6
+    }'
+```
+
+**Shell-quoting gotcha**: the JSON must be on one line (or use `\` to
+line-continue). A literal newline inside the single-quoted argument lands as a
+raw control char inside the string and breaks `json.loads`.
+
+## Output handling
+
+`generate` sniffs the pipeline return type and saves accordingly:
+
+- `PIL.Image` / list of them → `outputs/generate-<i>.png`
+- Frame sequence (≥2 PILs or ndarrays) → `outputs/generate-0.mp4` (uses `--fps`, default 8)
+- Numpy audio array → `outputs/generate-0.wav` (uses `--sampling-rate`)
+- Anything else → JSON dump
+
+Override the destination with `--output <path>` (file or directory).
+
+Use `--push-to <user>/<bucket>` to upload outputs to an HF bucket after saving.
+The bucket is created if it doesn't exist; objects land under
+`<run_id>/<filename>`.
+
+## Remote execution (`--remote`)
+
+Adds `--remote` to submit the same call as a Hugging Face Job:
+
+```bash
+diffusers-cli generate \
+    --model black-forest-labs/FLUX.1-dev \
+    --pipeline-kwargs '{"prompt": "a cat"}' \
+    --remote --flavor a100-large
+```
+
+What happens:
+
+1. Token is read from `args.token` or `huggingface_hub.get_token()`.
+2. A bucket (`<user>/jobs-artifacts` by default) is auto-created.
+3. Job is submitted to HF Jobs via `run_job` with the pytorch image
+   (`pytorch/pytorch:2.10.0-cuda12.8-cudnn9-runtime`) so torch + CUDA are
+   preinstalled.
+4. Container runs `uv pip install --system --break-system-packages
+   <small-deps> && diffusers-cli generate ...` — only ~50 MB of deps install
+   because torch already lives in the image's site-packages.
+5. The CLI streams the container's logs to stderr until the job terminates,
+   then downloads any files the job uploaded to the bucket under its `run_id`
+   prefix.
+6. A timing breakdown (`queued_seconds`, `run_seconds`, `total_seconds`) is
+   printed and added to the JSON payload.
+
+Use `--no-wait` to submit and immediately return the job id without streaming
+logs. Use `--namespace` to run under a different account.
+
+## `--json` machine-readable mode
+
+All subcommands accept `--json` to emit a single JSON object on stdout instead
+of human-readable text. Use this when an agent needs to parse the result —
+output paths, timing, pushed-bucket URIs, etc.
+
+## When NOT to use this skill
+
+- Multi-stage workflows where you need intermediate tensor manipulation between
+  pipelines → write Python.
+- Training or fine-tuning → CLI only covers inference.
+- Anything requiring custom `device_map`, `quantization_config`, or other
+  low-level loader knobs not exposed by the CLI flags → write Python.
+
+## Verifying the CLI is installed
+
+The console entry point lives in `pyproject.toml` (`diffusers-cli =
+"diffusers.commands.diffusers_cli:main"`). If `diffusers-cli` is not on PATH
+after `pip install -e .`, reinstall with `pip install -e . --force-reinstall
+--no-deps` and check `which diffusers-cli`.

From 404be8a381f50ac1ff2fc5ec99bbf9bb5478d7e5 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Mon, 15 Jun 2026 14:30:16 +0530
Subject: [PATCH 15/30] update

---
 src/diffusers/commands/generate.py | 57 ++++++++++++++++++++----------
 1 file changed, 38 insertions(+), 19 deletions(-)

diff --git a/src/diffusers/commands/generate.py b/src/diffusers/commands/generate.py
index 5398d3fa114a..1b9c98ebe552 100644
--- a/src/diffusers/commands/generate.py
+++ b/src/diffusers/commands/generate.py
@@ -131,7 +131,7 @@ def _add_optimization_arguments(parser: ArgumentParser) -> None:
         action="store_true",
         help=(
             "Enable Ulysses-style context parallelism (ulysses_anything mode). "
-            "Requires launching the CLI under torchrun with ≥2 GPUs."
+            "Requires a DiT-based pipeline and launching the CLI under torchrun with ≥2 GPUs."
         ),
     )
 
@@ -242,15 +242,23 @@ def _map_to_device(pipeline: Any, args: Namespace, device: str) -> Any:
     return pipeline
 
 
-def _set_attention_backend(pipeline: Any, backend: str) -> None:
+def _denoiser(pipeline: Any) -> Optional[Any]:
+    """Return the pipeline's denoiser submodule (transformer or unet) or None."""
     for attr in ("transformer", "unet"):
         module = getattr(pipeline, attr, None)
-        if module is not None and hasattr(module, "set_attention_backend"):
-            try:
-                module.set_attention_backend(backend)
-            except (ValueError, ImportError, RuntimeError):
-                pass
-            return
+        if module is not None:
+            return module
+    return None
+
+
+def _set_attention_backend(pipeline: Any, backend: str) -> None:
+    module = _denoiser(pipeline)
+    if module is None or not hasattr(module, "set_attention_backend"):
+        return
+    try:
+        module.set_attention_backend(backend)
+    except (ValueError, ImportError, RuntimeError):
+        pass
 
 
 def _enable_context_parallel(pipeline: Any) -> None:
@@ -263,18 +271,22 @@ def _enable_context_parallel(pipeline: Any) -> None:
             "`torchrun --nproc-per-node=N -m diffusers.commands.diffusers_cli generate ...`."
         )
 
+    transformer = getattr(pipeline, "transformer", None)
+    if transformer is None or not hasattr(transformer, "enable_parallelism"):
+        raise SystemExit(
+            "--context-parallel requires a DiT-based pipeline. "
+            f"{type(pipeline).__name__} does not expose a `transformer` with `enable_parallelism`."
+        )
+
     from diffusers import ContextParallelConfig
 
-    cfg = ContextParallelConfig(
-        ulysses_degree=torch.distributed.get_world_size(),
-        ring_degree=1,
-        ulysses_anything=True,
+    transformer.enable_parallelism(
+        config=ContextParallelConfig(
+            ulysses_degree=torch.distributed.get_world_size(),
+            ring_degree=1,
+            ulysses_anything=True,
+        )
     )
-    for attr in ("transformer", "unet"):
-        module = getattr(pipeline, attr, None)
-        if module is not None and hasattr(module, "enable_parallelism"):
-            module.enable_parallelism(config=cfg)
-            return
 
 
 def _apply_optimizations(pipeline: Any, args: Namespace) -> None:
@@ -754,8 +766,15 @@ def _maybe_submit_remote(args: Namespace, task: str) -> bool:
     # image's system Python (where torch + CUDA already live) via ``uv pip install
     # --system``, then exec the CLI with the same argv. --break-system-packages
     # bypasses PEP 668; safe here because the container is ephemeral.
+    # For --context-parallel, wrap with torchrun so torch.distributed initializes
+    # across every visible GPU before our generate command runs.
     install_cmd = shlex.join(["uv", "pip", "install", "--system", "--break-system-packages", *dependencies])
-    cli_cmd = shlex.join([_CONTAINER_CLI_BINARY, *_kwargs_to_argv(task, task_kwargs)])
+    cli_argv = _kwargs_to_argv(task, task_kwargs)
+    if args.context_parallel:
+        cli_argv = ["torchrun", "--nproc-per-node=gpu", "-m", "diffusers.commands.diffusers_cli", *cli_argv]
+    else:
+        cli_argv = [_CONTAINER_CLI_BINARY, *cli_argv]
+    cli_cmd = shlex.join(cli_argv)
     container_cmd = ["sh", "-c", f"{install_cmd} && {cli_cmd}"]
 
     job = run_job(
@@ -901,7 +920,7 @@ class GenerateCommand(BaseDiffusersCLICommand):
     def register_subcommand(subparsers: _SubParsersAction) -> None:
         parser: ArgumentParser = subparsers.add_parser(
             "generate",
-            help="Run any diffusers pipeline locally or remotely on HF Jobs.",
+            help="Run any diffusers pipeline locally or remotely with HF Jobs.",
             usage="\n  diffusers-cli generate [options]",
         )
         parser._optionals.title = "Options"

From f3fa589f53f2ca8e5f22195561ede8fd970c90b2 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Mon, 15 Jun 2026 14:49:08 +0530
Subject: [PATCH 16/30] update

---
 src/diffusers/commands/generate.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/diffusers/commands/generate.py b/src/diffusers/commands/generate.py
index 1b9c98ebe552..20f081a008c3 100644
--- a/src/diffusers/commands/generate.py
+++ b/src/diffusers/commands/generate.py
@@ -219,6 +219,13 @@ def _resolve_device(name: Optional[str]) -> str:
     import torch
 
     if torch.cuda.is_available():
+        # Under torchrun, LOCAL_RANK identifies this process's assigned GPU.
+        # Without this pin every rank falls back to cuda:0 and OOMs because the
+        # whole pipeline gets replicated onto a single device.
+        local_rank = os.environ.get("LOCAL_RANK")
+        if local_rank is not None:
+            torch.cuda.set_device(int(local_rank))
+            return f"cuda:{local_rank}"
         return "cuda"
     if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
         return "mps"

From 633461dc76af2c6ad1924b854f96a58d44ec6c60 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Mon, 15 Jun 2026 14:57:37 +0530
Subject: [PATCH 17/30] update

---
 src/diffusers/commands/generate.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/diffusers/commands/generate.py b/src/diffusers/commands/generate.py
index 20f081a008c3..b8ee139fa17b 100644
--- a/src/diffusers/commands/generate.py
+++ b/src/diffusers/commands/generate.py
@@ -271,12 +271,20 @@ def _set_attention_backend(pipeline: Any, backend: str) -> None:
 def _enable_context_parallel(pipeline: Any) -> None:
     import torch
 
-    if not torch.distributed.is_available() or not torch.distributed.is_initialized():
-        raise SystemExit(
-            "--context-parallel requires torch.distributed to be initialized. "
-            "Launch the CLI under torchrun, e.g.: "
-            "`torchrun --nproc-per-node=N -m diffusers.commands.diffusers_cli generate ...`."
-        )
+    if not torch.distributed.is_available():
+        raise SystemExit("--context-parallel requires a torch build with distributed support.")
+
+    if not torch.distributed.is_initialized():
+        # torchrun sets RANK/WORLD_SIZE/LOCAL_RANK/MASTER_* env vars but does not call
+        # init_process_group on our behalf — do it here. If those env vars are absent the
+        # process wasn't launched under torchrun, so point the user at the right command.
+        if "LOCAL_RANK" not in os.environ:
+            raise SystemExit(
+                "--context-parallel requires torch.distributed to be initialized. "
+                "Launch the CLI under torchrun, e.g.: "
+                "`torchrun --nproc-per-node=N -m diffusers.commands.diffusers_cli generate ...`."
+            )
+        torch.distributed.init_process_group(backend="nccl")
 
     transformer = getattr(pipeline, "transformer", None)
     if transformer is None or not hasattr(transformer, "enable_parallelism"):

From 268bae965aa7f878cc776976e773f00da9accecd Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Mon, 15 Jun 2026 16:20:53 +0530
Subject: [PATCH 18/30] update

---
 src/diffusers/commands/generate.py | 66 ++++++++++++++++++++----------
 1 file changed, 45 insertions(+), 21 deletions(-)

diff --git a/src/diffusers/commands/generate.py b/src/diffusers/commands/generate.py
index b8ee139fa17b..6501984fe2ff 100644
--- a/src/diffusers/commands/generate.py
+++ b/src/diffusers/commands/generate.py
@@ -284,7 +284,9 @@ def _enable_context_parallel(pipeline: Any) -> None:
                 "Launch the CLI under torchrun, e.g.: "
                 "`torchrun --nproc-per-node=N -m diffusers.commands.diffusers_cli generate ...`."
             )
-        torch.distributed.init_process_group(backend="nccl")
+        # Hybrid backend: ulysses_anything's per-rank size coordination wants Gloo on CPU
+        # (avoids H2D/D2H for a tiny int tensor); the main attention all-to-all stays on NCCL.
+        torch.distributed.init_process_group(backend="cpu:gloo,cuda:nccl")
 
     transformer = getattr(pipeline, "transformer", None)
     if transformer is None or not hasattr(transformer, "enable_parallelism"):
@@ -831,8 +833,19 @@ def _maybe_submit_remote(args: Namespace, task: str) -> bool:
 
 
 def _job_timing(api: Any, job_id: str, namespace: Optional[str]) -> dict[str, Optional[float]]:
-    """Return queue/run/total wallclock seconds for ``job_id`` from inspect_job timestamps."""
+    """Return queue/run/total wallclock seconds for ``job_id`` from inspect_job timestamps.
+
+    inspect_job sometimes returns finished_at=None for a few seconds after the container exits
+    while HF Jobs propagates the terminal state; retry briefly so we don't miss run/total.
+    """
+    import time
+
     info = api.inspect_job(job_id=job_id, namespace=namespace)
+    for _ in range(5):
+        if info.finished_at is not None:
+            break
+        time.sleep(1.0)
+        info = api.inspect_job(job_id=job_id, namespace=namespace)
 
     def _delta(start, end) -> Optional[float]:
         return (end - start).total_seconds() if (start is not None and end is not None) else None
@@ -993,22 +1006,33 @@ def run(self) -> None:
         if generator is not None:
             call_kwargs["generator"] = generator
 
-        result = pipeline(**call_kwargs)
-        savable = result if is_modular else _result_to_savable(result)
-        saved = _save_output(savable, self.args, self.task)
-        pushed = _push_outputs(self.args, saved, self.task)
-
-        _format_result(
-            self.args,
-            {
-                "task": self.task,
-                "model": self.args.model,
-                "device": device,
-                "pipeline_class": type(pipeline).__name__,
-                "modular": is_modular,
-                "outputs": saved,
-                "pushed": pushed,
-                "seed": self.args.seed,
-                "output_key": self.args.output_key,
-            },
-        )
+        try:
+            result = pipeline(**call_kwargs)
+
+            # Under torchrun, ranks > 0 produce identical output to rank 0 (CP shards the
+            # transformer compute but ranks reduce to the same final tensors). Save/push/print
+            # from rank 0 only to avoid clobbering bucket files 4x and printing 4x.
+            if os.environ.get("RANK", "0") == "0":
+                savable = result if is_modular else _result_to_savable(result)
+                saved = _save_output(savable, self.args, self.task)
+                pushed = _push_outputs(self.args, saved, self.task)
+
+                _format_result(
+                    self.args,
+                    {
+                        "task": self.task,
+                        "model": self.args.model,
+                        "device": device,
+                        "pipeline_class": type(pipeline).__name__,
+                        "modular": is_modular,
+                        "outputs": saved,
+                        "pushed": pushed,
+                        "seed": self.args.seed,
+                        "output_key": self.args.output_key,
+                    },
+                )
+        finally:
+            import torch
+
+            if torch.distributed.is_available() and torch.distributed.is_initialized():
+                torch.distributed.destroy_process_group()

From fa7a0a23eabc052f9984898f62919d2c143499d8 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Mon, 15 Jun 2026 17:11:41 +0530
Subject: [PATCH 19/30] update

---
 src/diffusers/commands/describe.py      | 160 +++++++++++++++-
 src/diffusers/commands/diffusers_cli.py |  15 +-
 src/diffusers/commands/generate.py      | 242 +++++-------------------
 3 files changed, 219 insertions(+), 198 deletions(-)

diff --git a/src/diffusers/commands/describe.py b/src/diffusers/commands/describe.py
index b5c617c10319..8dd240014540 100644
--- a/src/diffusers/commands/describe.py
+++ b/src/diffusers/commands/describe.py
@@ -21,10 +21,168 @@
 
 from __future__ import annotations
 
+import json
 from argparse import ArgumentParser, Namespace, _SubParsersAction
+from typing import Any, Optional
 
 from . import BaseDiffusersCLICommand
-from .generate import _describe
+from ._common import try_fetch_config
+from ._output import OutputFormat, out
+
+
+def _describe(args: Namespace) -> None:
+    """Print the pipeline's input schema.
+
+    Tries ``DiffusionPipeline.config_name`` (= ``model_index.json``) first; if present, introspects the declared
+    pipeline class's ``__call__`` signature. Otherwise falls back to ``ModularPipelineBlocks.from_pretrained`` and
+    reads the block-declared ``inputs``. No weights downloaded either way.
+    """
+    import inspect
+
+    import diffusers
+
+    model_index = try_fetch_config(args, diffusers.DiffusionPipeline.config_name)
+    if model_index is not None:
+        with open(model_index) as f:
+            index = json.load(f)
+        class_name = index.get("_class_name")
+        if class_name is None:
+            raise SystemExit(
+                f"{diffusers.DiffusionPipeline.config_name} for {args.model!r} has no `_class_name` field."
+            )
+        pipeline_cls = getattr(diffusers, class_name, None)
+        if pipeline_cls is None:
+            raise SystemExit(
+                f"Pipeline class {class_name!r} declared in {diffusers.DiffusionPipeline.config_name} "
+                "is not exported by the installed diffusers."
+            )
+
+        sig = inspect.signature(pipeline_cls.__call__)
+        descriptions = _parse_docstring_args(pipeline_cls.__call__.__doc__) if args.verbose else {}
+        schema: list[dict[str, Any]] = []
+        for name, param in sig.parameters.items():
+            if name == "self":
+                continue
+            if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
+                continue
+            has_default = param.default is not inspect.Parameter.empty
+            schema.append(
+                {
+                    "name": name,
+                    "type_hint": str(param.annotation) if param.annotation is not inspect.Parameter.empty else None,
+                    "default": param.default if has_default else None,
+                    "required": not has_default,
+                    "description": descriptions.get(name, ""),
+                }
+            )
+    else:
+        kwargs: dict[str, Any] = {"trust_remote_code": args.trust_remote_code}
+        if args.revision:
+            kwargs["revision"] = args.revision
+        if args.token:
+            kwargs["token"] = args.token
+        try:
+            blocks = diffusers.ModularPipelineBlocks.from_pretrained(args.model, **kwargs)
+        except Exception as e:
+            raise SystemExit(
+                f"Could not describe {args.model!r}: no {diffusers.DiffusionPipeline.config_name} and "
+                f"loading as a modular pipeline failed ({type(e).__name__}: {e}). "
+                "Is this a diffusers pipeline repo? Pass --trust-remote-code if it ships custom block code."
+            ) from e
+
+        class_name = type(blocks).__name__
+        schema = [
+            {
+                "name": p.name,
+                "type_hint": str(p.type_hint) if p.type_hint is not None else None,
+                "default": p.default,
+                "required": p.required,
+                "description": p.description,
+            }
+            for p in blocks.inputs
+        ]
+
+    if args.json:
+        out.set_mode(OutputFormat.JSON)
+
+    if out.mode in (OutputFormat.JSON, OutputFormat.AGENT):
+        # Agents get the structured schema (full payload for JSON, the inputs table for AGENT).
+        if out.mode == OutputFormat.JSON:
+            out.dict({"task": "describe", "model": args.model, "pipeline_class": class_name, "inputs": schema})
+        else:
+            out.table(schema, headers=["name", "required", "type_hint", "default", "description"])
+        return
+
+    _print_schema(class_name, args.model, schema)
+
+
+def _parse_docstring_args(docstring: Optional[str]) -> dict[str, str]:
+    """Extract per-argument descriptions from a Google-style ``Args:`` block.
+
+    Returns a ``{name: description}`` mapping. Best-effort — unrecognised formats just yield an empty dict rather than
+    raising.
+    """
+    if not docstring:
+        return {}
+
+    import re
+
+    lines = docstring.expandtabs().splitlines()
+    start = None
+    section_indent = 0
+    for i, line in enumerate(lines):
+        if line.strip() in ("Args:", "Arguments:", "Parameters:"):
+            start = i + 1
+            section_indent = len(line) - len(line.lstrip())
+            break
+    if start is None:
+        return {}
+
+    descriptions: dict[str, str] = {}
+    current_name: Optional[str] = None
+    current_lines: list[str] = []
+    arg_indent: Optional[int] = None
+    name_pattern = re.compile(r"^(\w+)\s*(?:\([^)]*\))?\s*:?\s*(.*)$")
+
+    def _flush() -> None:
+        if current_name and current_lines:
+            descriptions[current_name] = " ".join(s.strip() for s in current_lines).strip()
+
+    for line in lines[start:]:
+        if not line.strip():
+            continue
+        indent = len(line) - len(line.lstrip())
+        # A new top-level section ends the Args block.
+        if indent <= section_indent and line.strip().endswith(":"):
+            break
+        if arg_indent is None:
+            arg_indent = indent
+        if indent == arg_indent:
+            _flush()
+            current_lines = []
+            match = name_pattern.match(line.strip())
+            if match:
+                current_name = match.group(1)
+                tail = match.group(2).strip()
+                if tail:
+                    current_lines.append(tail)
+            else:
+                current_name = None
+        elif current_name is not None and indent > arg_indent:
+            current_lines.append(line.strip())
+    _flush()
+    return descriptions
+
+
+def _print_schema(class_name: str, model: str, schema: list[dict[str, Any]]) -> None:
+    print(f"{class_name} ({model}) inputs:")
+    for entry in schema:
+        tag = "required" if entry["required"] else f"optional, default={entry['default']!r}"
+        print(f"  {entry['name']}  ({tag})")
+        if entry["type_hint"]:
+            print(f"    type: {entry['type_hint']}")
+        if entry["description"]:
+            print(f"    desc: {entry['description']}")
 
 
 class DescribeCommand(BaseDiffusersCLICommand):
diff --git a/src/diffusers/commands/diffusers_cli.py b/src/diffusers/commands/diffusers_cli.py
index 09a3a8ab03b7..deca219d90f4 100644
--- a/src/diffusers/commands/diffusers_cli.py
+++ b/src/diffusers/commands/diffusers_cli.py
@@ -15,6 +15,7 @@
 
 from argparse import ArgumentParser
 
+from ._output import OutputFormat, out
 from .custom_blocks import CustomBlocksCommand
 from .describe import DescribeCommand
 from .env import EnvironmentCommand
@@ -25,9 +26,19 @@
 def main():
     parser = ArgumentParser(
         prog="diffusers-cli",
-        usage="\n  diffusers-cli <command> [options]",
+        usage="\n  diffusers-cli [--format <fmt>] <command> [options]",
     )
     parser._optionals.title = "General Options"
+    parser.add_argument(
+        "--format",
+        choices=[m.value for m in OutputFormat],
+        default=OutputFormat.AUTO.value,
+        help=(
+            "Output format. 'auto' (default) picks 'agent' when an AI coding agent is detected "
+            "(via CLAUDECODE/CURSOR_AI/AIDER_AI_CONTEXT/... env vars) and 'human' otherwise. "
+            "Must appear before the subcommand."
+        ),
+    )
     commands_parser = parser.add_subparsers(title="Commands", metavar="<command>")
 
     # Register commands
@@ -40,6 +51,8 @@ def main():
     # Let's go
     args = parser.parse_args()
 
+    out.set_mode(OutputFormat(args.format))
+
     if not hasattr(args, "func"):
         parser.print_help()
         exit(1)
diff --git a/src/diffusers/commands/generate.py b/src/diffusers/commands/generate.py
index 6501984fe2ff..3421f72d3920 100644
--- a/src/diffusers/commands/generate.py
+++ b/src/diffusers/commands/generate.py
@@ -30,6 +30,8 @@
 from diffusers.utils import load_image
 
 from . import BaseDiffusersCLICommand
+from ._common import try_fetch_config
+from ._output import OutputFormat, out
 
 
 # ---------------------------------------------------------------------------
@@ -102,6 +104,15 @@ def _add_loading_arguments(parser: ArgumentParser) -> None:
     parser.add_argument("--revision", default=None, help="Model revision (branch, tag, or commit SHA).")
     parser.add_argument("--token", default=None, help="Hugging Face token for gated/private models.")
     parser.add_argument("--trust-remote-code", action="store_true", help="Allow custom code from the Hub.")
+    parser.add_argument(
+        "--lora",
+        default=None,
+        help=(
+            "JSON object describing a LoRA adapter to attach after the pipeline loads. "
+            'Shape: {"lora_id": "<hub-id-or-path>", "lora_scale": <float>}. '
+            'Example: \'{"lora_id": "alvdansen/littletinies", "lora_scale": 0.8}\'.'
+        ),
+    )
 
 
 def _add_optimization_arguments(parser: ArgumentParser) -> None:
@@ -244,7 +255,7 @@ def _map_to_device(pipeline: Any, args: Namespace, device: str) -> Any:
         pipeline.enable_group_offload(
             onload_device=torch.device(device),
             offload_type="leaf_level",
-            use_stream=device.startswith("cuda"),
+            use_stream=True,
         )
     return pipeline
 
@@ -264,8 +275,11 @@ def _set_attention_backend(pipeline: Any, backend: str) -> None:
         return
     try:
         module.set_attention_backend(backend)
-    except (ValueError, ImportError, RuntimeError):
-        pass
+    except (ValueError, ImportError, RuntimeError) as e:
+        raise SystemExit(
+            f"Failed to set attention backend {backend!r}: {type(e).__name__}: {e}. "
+            f"Allowed backends: {', '.join(ATTENTION_BACKEND_CHOICES)}."
+        ) from e
 
 
 def _enable_context_parallel(pipeline: Any) -> None:
@@ -275,15 +289,6 @@ def _enable_context_parallel(pipeline: Any) -> None:
         raise SystemExit("--context-parallel requires a torch build with distributed support.")
 
     if not torch.distributed.is_initialized():
-        # torchrun sets RANK/WORLD_SIZE/LOCAL_RANK/MASTER_* env vars but does not call
-        # init_process_group on our behalf — do it here. If those env vars are absent the
-        # process wasn't launched under torchrun, so point the user at the right command.
-        if "LOCAL_RANK" not in os.environ:
-            raise SystemExit(
-                "--context-parallel requires torch.distributed to be initialized. "
-                "Launch the CLI under torchrun, e.g.: "
-                "`torchrun --nproc-per-node=N -m diffusers.commands.diffusers_cli generate ...`."
-            )
         # Hybrid backend: ulysses_anything's per-rank size coordination wants Gloo on CPU
         # (avoids H2D/D2H for a tiny int tensor); the main attention all-to-all stays on NCCL.
         torch.distributed.init_process_group(backend="cpu:gloo,cuda:nccl")
@@ -341,189 +346,42 @@ def _load_pipeline(args: Namespace, modular: bool) -> Any:
         return pipeline
     pipeline = _map_to_device(pipeline, args, _resolve_device(args.device))
     _apply_optimizations(pipeline, args)
+    _load_lora(pipeline, args)
     return pipeline
 
 
-# ---------------------------------------------------------------------------
-# Modular pipeline detection + introspection
-# ---------------------------------------------------------------------------
-
+def _load_lora(pipeline: Any, args: Namespace) -> None:
+    """Attach a LoRA adapter from a JSON spec like ``{"lora_id": "...", "lora_scale": 0.8}``."""
+    if not args.lora:
+        return
+    try:
+        spec = json.loads(args.lora)
+    except json.JSONDecodeError as e:
+        raise SystemExit(f"--lora must be valid JSON: {e}") from e
+    if not isinstance(spec, dict):
+        raise SystemExit("--lora must decode to a JSON object.")
+    lora_id = spec.get("lora_id")
+    if not lora_id:
+        raise SystemExit("--lora must include a 'lora_id' field.")
+    if not hasattr(pipeline, "load_lora_weights"):
+        raise SystemExit(f"{type(pipeline).__name__} does not support LoRA loading.")
 
-def _try_fetch_config(args: Namespace, filename: str) -> Optional[str]:
-    """Try to resolve ``filename`` for ``args.model`` (local path or Hub repo). None if absent."""
-    local = Path(args.model)
-    if local.exists():
-        candidate = local / filename
-        return str(candidate) if candidate.exists() else None
+    pipeline.load_lora_weights(lora_id, adapter_name="default")
+    scale = spec.get("lora_scale")
+    if scale is not None and hasattr(pipeline, "set_adapters"):
+        pipeline.set_adapters(["default"], adapter_weights=[float(scale)])
 
-    from huggingface_hub import hf_hub_download
-    from huggingface_hub.utils import EntryNotFoundError, HfHubHTTPError, RepositoryNotFoundError
 
-    try:
-        return hf_hub_download(args.model, filename, revision=args.revision, token=args.token)
-    except (EntryNotFoundError, HfHubHTTPError, RepositoryNotFoundError):
-        return None
+# ---------------------------------------------------------------------------
+# Modular pipeline detection + introspection
+# ---------------------------------------------------------------------------
 
 
 def _is_modular_repo(args: Namespace) -> bool:
     """Detect by trying ``DiffusionPipeline.config_name`` first; modular iff that's absent."""
     from diffusers import DiffusionPipeline
 
-    return _try_fetch_config(args, DiffusionPipeline.config_name) is None
-
-
-def _describe(args: Namespace) -> None:
-    """Print the pipeline's input schema.
-
-    Tries ``DiffusionPipeline.config_name`` (= ``model_index.json``) first; if present, introspects the declared
-    pipeline class's ``__call__`` signature. Otherwise falls back to ``ModularPipelineBlocks.from_pretrained`` and
-    reads the block-declared ``inputs``. No weights downloaded either way.
-    """
-    import inspect
-
-    import diffusers
-
-    standard_index = _try_fetch_config(args, diffusers.DiffusionPipeline.config_name)
-
-    if standard_index is not None:
-        with open(standard_index) as f:
-            index = json.load(f)
-        class_name = index.get("_class_name")
-        if class_name is None:
-            raise SystemExit(
-                f"{diffusers.DiffusionPipeline.config_name} for {args.model!r} has no `_class_name` field."
-            )
-        pipeline_cls = getattr(diffusers, class_name, None)
-        if pipeline_cls is None:
-            raise SystemExit(
-                f"Pipeline class {class_name!r} declared in {diffusers.DiffusionPipeline.config_name} "
-                "is not exported by the installed diffusers."
-            )
-        sig = inspect.signature(pipeline_cls.__call__)
-        descriptions = _parse_docstring_args(pipeline_cls.__call__.__doc__) if args.verbose else {}
-        schema: list[dict[str, Any]] = []
-        for name, param in sig.parameters.items():
-            if name == "self":
-                continue
-            if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
-                continue
-            has_default = param.default is not inspect.Parameter.empty
-            schema.append(
-                {
-                    "name": name,
-                    "type_hint": str(param.annotation) if param.annotation is not inspect.Parameter.empty else None,
-                    "default": param.default if has_default else None,
-                    "required": not has_default,
-                    "description": descriptions.get(name, ""),
-                }
-            )
-    else:
-        kwargs: dict[str, Any] = {"trust_remote_code": args.trust_remote_code}
-        if args.revision:
-            kwargs["revision"] = args.revision
-        if args.token:
-            kwargs["token"] = args.token
-        try:
-            blocks = diffusers.ModularPipelineBlocks.from_pretrained(args.model, **kwargs)
-        except Exception as e:
-            raise SystemExit(
-                f"Could not describe {args.model!r}: no {diffusers.DiffusionPipeline.config_name} and "
-                f"loading as a modular pipeline failed ({type(e).__name__}: {e}). "
-                "Is this a diffusers pipeline repo? Pass --trust-remote-code if it ships custom block code."
-            ) from e
-        class_name = type(blocks).__name__
-        schema = [
-            {
-                "name": p.name,
-                "type_hint": str(p.type_hint) if p.type_hint is not None else None,
-                "default": p.default,
-                "required": p.required,
-                "description": p.description,
-            }
-            for p in blocks.inputs
-        ]
-
-    if args.json:
-        payload = {
-            "task": "describe",
-            "model": args.model,
-            "pipeline_class": class_name,
-            "inputs": schema,
-        }
-        json.dump(payload, sys.stdout, default=str)
-        sys.stdout.write("\n")
-        return
-
-    _print_schema(class_name, args.model, schema)
-
-
-def _parse_docstring_args(docstring: Optional[str]) -> dict[str, str]:
-    """Extract per-argument descriptions from a Google-style ``Args:`` block.
-
-    Returns a ``{name: description}`` mapping. Best-effort — unrecognised formats just yield an empty dict rather than
-    raising.
-    """
-    if not docstring:
-        return {}
-
-    import re
-
-    lines = docstring.expandtabs().splitlines()
-    start = None
-    section_indent = 0
-    for i, line in enumerate(lines):
-        if line.strip() in ("Args:", "Arguments:", "Parameters:"):
-            start = i + 1
-            section_indent = len(line) - len(line.lstrip())
-            break
-    if start is None:
-        return {}
-
-    descriptions: dict[str, str] = {}
-    current_name: Optional[str] = None
-    current_lines: list[str] = []
-    arg_indent: Optional[int] = None
-    name_pattern = re.compile(r"^(\w+)\s*(?:\([^)]*\))?\s*:?\s*(.*)$")
-
-    def _flush() -> None:
-        if current_name and current_lines:
-            descriptions[current_name] = " ".join(s.strip() for s in current_lines).strip()
-
-    for line in lines[start:]:
-        if not line.strip():
-            continue
-        indent = len(line) - len(line.lstrip())
-        # A new top-level section ends the Args block.
-        if indent <= section_indent and line.strip().endswith(":"):
-            break
-        if arg_indent is None:
-            arg_indent = indent
-        if indent == arg_indent:
-            _flush()
-            current_lines = []
-            match = name_pattern.match(line.strip())
-            if match:
-                current_name = match.group(1)
-                tail = match.group(2).strip()
-                if tail:
-                    current_lines.append(tail)
-            else:
-                current_name = None
-        elif current_name is not None and indent > arg_indent:
-            current_lines.append(line.strip())
-    _flush()
-    return descriptions
-
-
-def _print_schema(class_name: str, model: str, schema: list[dict[str, Any]]) -> None:
-    print(f"{class_name} ({model}) inputs:")
-    for entry in schema:
-        tag = "required" if entry["required"] else f"optional, default={entry['default']!r}"
-        print(f"  {entry['name']}  ({tag})")
-        if entry["type_hint"]:
-            print(f"    type: {entry['type_hint']}")
-        if entry["description"]:
-            print(f"    desc: {entry['description']}")
+    return try_fetch_config(args, DiffusionPipeline.config_name) is None
 
 
 # ---------------------------------------------------------------------------
@@ -835,8 +693,8 @@ def _maybe_submit_remote(args: Namespace, task: str) -> bool:
 def _job_timing(api: Any, job_id: str, namespace: Optional[str]) -> dict[str, Optional[float]]:
     """Return queue/run/total wallclock seconds for ``job_id`` from inspect_job timestamps.
 
-    inspect_job sometimes returns finished_at=None for a few seconds after the container exits
-    while HF Jobs propagates the terminal state; retry briefly so we don't miss run/total.
+    inspect_job sometimes returns finished_at=None for a few seconds after the container exits while HF Jobs propagates
+    the terminal state; retry briefly so we don't miss run/total.
     """
     import time
 
@@ -922,18 +780,10 @@ def _download_job_artifacts(api: Any, bucket_id: str, run_id: str, output: Optio
 
 
 def _format_result(args: Namespace, payload: dict[str, Any]) -> None:
-    """Print either a human-friendly summary or JSON, depending on --json."""
+    """Route the result payload through ``out``. ``--json`` escalates the mode regardless of --format."""
     if args.json:
-        json.dump(payload, sys.stdout, default=str)
-        sys.stdout.write("\n")
-        return
-
-    outputs = payload.get("outputs", [])
-    if outputs:
-        for path in outputs:
-            print(path)
-    else:
-        print(payload)
+        out.set_mode(OutputFormat.JSON)
+    out.result(payload.get("task", "done"), **payload)
 
 
 # ---------------------------------------------------------------------------

From 55e1c1433cd781814b40a894ba930ac0289499a6 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Mon, 15 Jun 2026 17:13:48 +0530
Subject: [PATCH 20/30] update

---
 src/diffusers/commands/_common.py |  44 ++++++++
 src/diffusers/commands/_output.py | 182 ++++++++++++++++++++++++++++++
 2 files changed, 226 insertions(+)
 create mode 100644 src/diffusers/commands/_common.py
 create mode 100644 src/diffusers/commands/_output.py

diff --git a/src/diffusers/commands/_common.py b/src/diffusers/commands/_common.py
new file mode 100644
index 000000000000..df242628841d
--- /dev/null
+++ b/src/diffusers/commands/_common.py
@@ -0,0 +1,44 @@
+# Copyright 2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Shared helpers used by multiple ``diffusers-cli`` subcommands.
+
+Anything imported by more than one command file lives here so command modules stay standalone — no cross-command
+imports between e.g. ``describe`` and ``generate``.
+"""
+
+from __future__ import annotations
+
+from argparse import Namespace
+from pathlib import Path
+from typing import Optional
+
+
+def try_fetch_config(args: Namespace, filename: str) -> Optional[str]:
+    """Resolve ``filename`` for ``args.model`` (local path or Hub repo). Return None if absent.
+
+    Used by ``generate`` (to detect modular vs standard pipelines) and ``describe`` (to read the pipeline class for
+    schema introspection) — no weights are downloaded, only the small index file.
+    """
+    local = Path(args.model)
+    if local.exists():
+        candidate = local / filename
+        return str(candidate) if candidate.exists() else None
+
+    from huggingface_hub import hf_hub_download
+    from huggingface_hub.utils import EntryNotFoundError, HfHubHTTPError, RepositoryNotFoundError
+
+    try:
+        return hf_hub_download(args.model, filename, revision=args.revision, token=args.token)
+    except (EntryNotFoundError, HfHubHTTPError, RepositoryNotFoundError):
+        return None
diff --git a/src/diffusers/commands/_output.py b/src/diffusers/commands/_output.py
new file mode 100644
index 000000000000..1200737ec696
--- /dev/null
+++ b/src/diffusers/commands/_output.py
@@ -0,0 +1,182 @@
+# Copyright 2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Dual-audience output sink for ``diffusers-cli``.
+
+Every subcommand routes user-visible output through the singleton ``out``. The mode is one of ``human`` (default for
+terminals), ``agent`` (auto-selected when an AI coding agent is detected), ``json`` (machine-parseable), or ``quiet``
+(first value per record). The set of methods on ``out`` covers the shapes our commands actually produce — free-form
+text, key/value results, structured dicts, and tabular schemas — so leaf commands never branch on ``args.json``
+themselves.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+from enum import Enum
+from typing import Any, Optional, Sequence
+
+
+# Environment variables set by known AI coding agents. Presence of any one triggers AGENT mode
+# under `--format auto`. Subset of the huggingface_hub harness registry — extend as needed.
+_AGENT_ENV_VARS = (
+    "CLAUDECODE",  # Claude Code
+    "CLAUDE_CODE",  # alt spelling
+    "CURSOR_AI",  # Cursor
+    "AIDER_AI_CONTEXT",  # Aider
+    "GH_COPILOT_AGENT",  # GitHub Copilot Agent
+)
+
+
+def is_agent() -> bool:
+    """Return True if the process appears to be invoked by an AI coding agent."""
+    return any(os.environ.get(v) for v in _AGENT_ENV_VARS)
+
+
+class OutputFormat(str, Enum):
+    AUTO = "auto"
+    HUMAN = "human"
+    AGENT = "agent"
+    JSON = "json"
+    QUIET = "quiet"
+
+
+class Output:
+    """Singleton output sink. Resolve mode once at startup, then call ``out.<method>``."""
+
+    mode: OutputFormat
+
+    def __init__(self) -> None:
+        self.set_mode(OutputFormat.AUTO)
+
+    def set_mode(self, mode: OutputFormat) -> None:
+        """Set the active output mode. AUTO resolves to AGENT or HUMAN via ``is_agent()``."""
+        if mode == OutputFormat.AUTO:
+            mode = OutputFormat.AGENT if is_agent() else OutputFormat.HUMAN
+        self.mode = mode
+
+    # ------------------------------------------------------------------ stdout
+
+    def text(self, msg: str) -> None:
+        """Free-form line. Suppressed in QUIET; printed plain in every other mode."""
+        if self.mode == OutputFormat.QUIET:
+            return
+        print(msg)
+
+    def dict(self, data: dict[str, Any]) -> None:
+        """Structured object — JSON in every mode (indented for HUMAN, compact otherwise).
+
+        Use for payloads that don't decompose cleanly into key/value pairs (e.g. describe schemas).
+        """
+        if self.mode == OutputFormat.QUIET:
+            return
+        indent = 2 if self.mode == OutputFormat.HUMAN else None
+        print(json.dumps(data, indent=indent, default=str))
+
+    def result(self, message: str, **data: Any) -> None:
+        """Success summary.
+
+        - HUMAN: ``message`` followed by `` key: value`` lines.
+        - AGENT: ``key=value`` pairs space-separated on one line (TSV-ish, parser-friendly).
+        - JSON: compact JSON of ``data``.
+        - QUIET: first non-None value.
+        """
+        if self.mode == OutputFormat.HUMAN:
+            print(message)
+            for k, v in data.items():
+                if v is not None:
+                    print(f"  {k}: {v}")
+        elif self.mode == OutputFormat.AGENT:
+            parts = [f"{k}={v}" for k, v in data.items() if v is not None]
+            print(" ".join(parts) if parts else message)
+        elif self.mode == OutputFormat.JSON:
+            print(json.dumps(data, default=str))
+        elif self.mode == OutputFormat.QUIET:
+            for v in data.values():
+                if v is not None:
+                    print(v)
+                    return
+
+    def table(
+        self,
+        items: Sequence[dict[str, Any]],
+        *,
+        headers: Optional[list[str]] = None,
+        id_key: Optional[str] = None,
+    ) -> None:
+        """Tabular data — HUMAN gets padded columns, AGENT gets TSV, JSON gets the list, QUIET gets id_key.
+
+        Headers default to the keys of the first item.
+        """
+        if not items:
+            if self.mode in (OutputFormat.HUMAN, OutputFormat.AGENT):
+                print("No results.")
+            elif self.mode == OutputFormat.JSON:
+                print("[]")
+            return
+
+        if headers is None:
+            headers = list(items[0].keys())
+
+        if self.mode == OutputFormat.JSON:
+            print(json.dumps(list(items), default=str))
+            return
+
+        if self.mode == OutputFormat.QUIET:
+            key = id_key or headers[0]
+            for item in items:
+                value = item.get(key)
+                if value is not None:
+                    print(value)
+            return
+
+        rows = [[_cell(item.get(h)) for h in headers] for item in items]
+        if self.mode == OutputFormat.AGENT:
+            print("\t".join(headers))
+            for row in rows:
+                print("\t".join(row))
+            return
+
+        # HUMAN: pad each column to its widest cell for readable alignment.
+        widths = [max(len(h), *(len(r[i]) for r in rows)) for i, h in enumerate(headers)]
+        print("  ".join(h.ljust(widths[i]) for i, h in enumerate(headers)))
+        for row in rows:
+            print("  ".join(c.ljust(widths[i]) for i, c in enumerate(row)))
+
+    # ------------------------------------------------------------------ stderr
+
+    def hint(self, message: str) -> None:
+        """Next-step suggestion. Always goes to stderr so it never pollutes parseable stdout."""
+        if self.mode == OutputFormat.QUIET:
+            return
+        print(f"Hint: {message}", file=sys.stderr)
+
+    def warning(self, message: str) -> None:
+        """Non-fatal warning — stderr, every mode."""
+        print(f"Warning: {message}", file=sys.stderr)
+
+    def error(self, message: str) -> None:
+        """Error — stderr, every mode."""
+        print(f"Error: {message}", file=sys.stderr)
+
+
+def _cell(value: Any) -> str:
+    if value is None:
+        return ""
+    return str(value)
+
+
+# Module-level singleton imported by every subcommand.
+out = Output()

From 6ba7a3fd50223385fb32ce6e58e39651f4395cd5 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Mon, 15 Jun 2026 17:17:19 +0530
Subject: [PATCH 21/30] update

---
 src/diffusers/commands/generate.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/commands/generate.py b/src/diffusers/commands/generate.py
index 3421f72d3920..523e3a55e6c8 100644
--- a/src/diffusers/commands/generate.py
+++ b/src/diffusers/commands/generate.py
@@ -87,7 +87,17 @@
 # Namespace keys that control *how* a remote job runs locally, not what runs
 # inside the container. They are stripped when forwarding argv to the container.
 HF_JOBS_KEYS = frozenset(
-    {"remote", "flavor", "timeout", "dependencies", "namespace", "no_wait", "poll_interval", "func"}
+    {
+        "remote",
+        "flavor",
+        "timeout",
+        "dependencies",
+        "namespace",
+        "no_wait",
+        "poll_interval",
+        "func",
+        "format",  # top-level --format is a local rendering flag; never forward to the container
+    }
 )
 
 

From 6f02aedc823af55980b6df0ca979924e68f69c11 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Mon, 15 Jun 2026 17:23:44 +0530
Subject: [PATCH 22/30] update

---
 src/diffusers/commands/generate.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/commands/generate.py b/src/diffusers/commands/generate.py
index 523e3a55e6c8..e60e088da79f 100644
--- a/src/diffusers/commands/generate.py
+++ b/src/diffusers/commands/generate.py
@@ -186,7 +186,11 @@ def _add_remote_arguments(parser: ArgumentParser) -> None:
         default="a10g-small",
         help="HF Jobs hardware flavor for --remote (e.g. a10g-small, a100-large, cpu-basic).",
     )
-    parser.add_argument("--timeout", default=None, help="HF Jobs timeout for --remote (e.g. 30m, 2h).")
+    parser.add_argument(
+        "--timeout",
+        default="10m",
+        help="HF Jobs timeout for --remote (e.g. 30m, 2h). Defaults to 10m.",
+    )
     parser.add_argument(
         "--dependencies",
         action="append",
@@ -247,9 +251,12 @@ def _resolve_device(name: Optional[str]) -> str:
         if local_rank is not None:
             torch.cuda.set_device(int(local_rank))
             return f"cuda:{local_rank}"
+
         return "cuda"
+
     if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
         return "mps"
+
     return "cpu"
 
 

From 889f6460314d80eb9e060302b49c8d8d054cf4ff Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Mon, 15 Jun 2026 17:55:55 +0530
Subject: [PATCH 23/30] update

---
 .ai/skills/diffusers-cli/SKILL.md | 149 +++++++-----------------------
 1 file changed, 34 insertions(+), 115 deletions(-)

diff --git a/.ai/skills/diffusers-cli/SKILL.md b/.ai/skills/diffusers-cli/SKILL.md
index ae9c34cdc712..b2aedd8fca16 100644
--- a/.ai/skills/diffusers-cli/SKILL.md
+++ b/.ai/skills/diffusers-cli/SKILL.md
@@ -3,138 +3,57 @@ name: diffusers-cli
 description: >
   Use when the user wants to run a diffusers pipeline from a terminal (one-off
   generation, batch jobs, smoke-testing a new model), submit jobs to HF Jobs
-  hardware via `--remote`, or introspect an unknown pipeline's input schema
-  before calling it. Prefer this over writing ad-hoc Python scripts for
-  generation tasks.
+  hardware via `--remote`, introspect a pipeline's input schema before
+  calling it, or attach a LoRA at inference time. Prefer this over writing
+  ad-hoc Python scripts for generation tasks.
 ---
 
 ## Overview
 
-`diffusers-cli` is the shipped CLI in `src/diffusers/commands/`. Three subcommands
-matter for agentic use:
+`diffusers-cli` is the shipped CLI in `src/diffusers/commands/`. Subcommands relevant to agentic use:
 
-| Command | Purpose |
-| --- | --- |
-| `generate` | Run any `DiffusionPipeline` or `ModularPipeline` by forwarding `--pipeline-kwargs` verbatim. Saves output by sniffing its runtime type. |
-| `describe` | Print the input schema (kwarg names + types + defaults + docstring) for a pipeline repo. **No weights downloaded** — only `model_index.json` (or `modular_model_index.json`) is fetched. |
-| `custom_blocks` | Package a local `ModularPipelineBlocks` subclass for the Hub. |
+| Command         | Purpose                                                                                                                                                                                       |
+| --------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `generate`      | Run any `DiffusionPipeline` or `ModularPipeline`. Forwards `--pipeline-kwargs` verbatim, saves output by sniffing its runtime type, optionally runs on HF Jobs via `--remote`.                |
+| `describe`      | Print the input schema for a pipeline repo (kwarg names, types, defaults, descriptions). **No weights downloaded** — only the small index file.                                               |
+| `custom_blocks` | Package a local `ModularPipelineBlocks` subclass for the Hub.                                                                                                                                 |
+| `env`           | Print versions of diffusers + torch + transformers + accelerate + safetensors + CUDA + GPU info. Use when investigating environment issues, dtype/precision support, or building bug reports. |
 
-`env` (system info) and `fp16_safetensors` (deprecated) also exist but aren't
-relevant to inference.
+`fp16_safetensors` is also shipped but deprecated and not relevant to inference.
 
-## The describe → generate flow
+## When to read which file
 
-For any model you haven't called before, run `describe` first to learn its
-input contract, then `generate` with the right `--pipeline-kwargs`:
+Most agentic work goes through `generate`. Read the matching reference file before constructing a command:
 
-```bash
-# 1. Discover what kwargs the pipeline takes (no weight download)
-diffusers-cli describe --model black-forest-labs/FLUX.1-dev --json
-
-# 2. Run it
-diffusers-cli generate \
-    --model black-forest-labs/FLUX.1-dev \
-    --pipeline-kwargs '{"prompt": "a cat", "num_inference_steps": 30}' \
-    --dtype bf16
-```
-
-`describe`'s `--json` output is machine-readable: a list of `{name, type_hint,
-default, required, description}` entries. Use `--verbose` to additionally parse
-the `__call__` docstring's `Args:` block for descriptions on standard pipelines.
-
-## Standard vs modular detection
-
-`generate` auto-detects which kind of pipeline it's calling:
-
-1. If `model_index.json` exists on the repo → `DiffusionPipeline.from_pretrained` path
-2. Otherwise → `ModularPipeline.from_pretrained` path
+- **[`generate.md`](generate.md)** — full reference for `diffusers-cli generate`. Covers `--pipeline-kwargs`
+  semantics and the shell-quoting gotcha, LoRA via `--lora`, optimization flags (`--dtype`, `--cpu-offload`,
+  `--attention-backend`, `--vae-tiling/slicing`), output handling and `--push-to` bucket uploads, the full
+  `--remote` HF Jobs flow (image, container command, log streaming, timing payload, artifact download), and
+  context parallel (`--context-parallel`) for both local-torchrun and `--remote` paths.
 
-You don't need to tell it which. Modular repos must pass `--trust-remote-code`
-if they ship custom block code.
-
-## `--pipeline-kwargs` semantics
-
-A JSON object passed straight through to `pipeline(**kwargs)`. String values at
-known image-input keys (`image`, `mask_image`, `control_image`,
-`ip_adapter_image`, `image_2`) are auto-loaded as PIL images, so you can pass
-URLs or local paths directly:
+The other commands are small enough that `diffusers-cli <command> --help` is the canonical reference:
 
 ```bash
-diffusers-cli generate \
-    --model stabilityai/stable-diffusion-xl-refiner-1.0 \
-    --pipeline-kwargs '{
-        "image": "https://example.com/cat.png",
-        "prompt": "a photorealistic cat",
-        "strength": 0.6
-    }'
+diffusers-cli describe --help
+diffusers-cli custom_blocks --help
+diffusers-cli env --help
 ```
 
-**Shell-quoting gotcha**: the JSON must be on one line (or use `\` to
-line-continue). A literal newline inside the single-quoted argument lands as a
-raw control char inside the string and breaks `json.loads`.
-
-## Output handling
-
-`generate` sniffs the pipeline return type and saves accordingly:
-
-- `PIL.Image` / list of them → `outputs/generate-<i>.png`
-- Frame sequence (≥2 PILs or ndarrays) → `outputs/generate-0.mp4` (uses `--fps`, default 8)
-- Numpy audio array → `outputs/generate-0.wav` (uses `--sampling-rate`)
-- Anything else → JSON dump
-
-Override the destination with `--output <path>` (file or directory).
-
-Use `--push-to <user>/<bucket>` to upload outputs to an HF bucket after saving.
-The bucket is created if it doesn't exist; objects land under
-`<run_id>/<filename>`.
-
-## Remote execution (`--remote`)
-
-Adds `--remote` to submit the same call as a Hugging Face Job:
-
-```bash
-diffusers-cli generate \
-    --model black-forest-labs/FLUX.1-dev \
-    --pipeline-kwargs '{"prompt": "a cat"}' \
-    --remote --flavor a100-large
-```
-
-What happens:
-
-1. Token is read from `args.token` or `huggingface_hub.get_token()`.
-2. A bucket (`<user>/jobs-artifacts` by default) is auto-created.
-3. Job is submitted to HF Jobs via `run_job` with the pytorch image
-   (`pytorch/pytorch:2.10.0-cuda12.8-cudnn9-runtime`) so torch + CUDA are
-   preinstalled.
-4. Container runs `uv pip install --system --break-system-packages
-   <small-deps> && diffusers-cli generate ...` — only ~50 MB of deps install
-   because torch already lives in the image's site-packages.
-5. The CLI streams the container's logs to stderr until the job terminates,
-   then downloads any files the job uploaded to the bucket under its `run_id`
-   prefix.
-6. A timing breakdown (`queued_seconds`, `run_seconds`, `total_seconds`) is
-   printed and added to the JSON payload.
-
-Use `--no-wait` to submit and immediately return the job id without streaming
-logs. Use `--namespace` to run under a different account.
-
-## `--json` machine-readable mode
-
-All subcommands accept `--json` to emit a single JSON object on stdout instead
-of human-readable text. Use this when an agent needs to parse the result —
-output paths, timing, pushed-bucket URIs, etc.
-
 ## When NOT to use this skill
 
-- Multi-stage workflows where you need intermediate tensor manipulation between
-  pipelines → write Python.
+- Multi-stage workflows where you need intermediate tensor manipulation between pipelines → write Python.
 - Training or fine-tuning → CLI only covers inference.
-- Anything requiring custom `device_map`, `quantization_config`, or other
-  low-level loader knobs not exposed by the CLI flags → write Python.
+- Anything requiring custom `device_map`, `quantization_config`, or other low-level loader knobs not exposed by
+  the CLI flags → write Python.
 
 ## Verifying the CLI is installed
 
-The console entry point lives in `pyproject.toml` (`diffusers-cli =
-"diffusers.commands.diffusers_cli:main"`). If `diffusers-cli` is not on PATH
-after `pip install -e .`, reinstall with `pip install -e . --force-reinstall
---no-deps` and check `which diffusers-cli`.
+The console entry point is registered in `pyproject.toml` (`diffusers-cli =
+"diffusers.commands.diffusers_cli:main"`). If `diffusers-cli` is not on PATH after `pip install -e .`, reinstall
+with `pip install -e . --force-reinstall --no-deps` and check `which diffusers-cli`. If the installed binary is
+missing recent features (e.g. you see `unrecognized arguments: --lora`), reinstall.
+
+## Full user guide
+
+For a non-agent overview with troubleshooting and tips, see [`DIFFUSERS_CLI.md`](../../../DIFFUSERS_CLI.md) at
+the repo root.

From ab70d692713eb728898d91117dafbd4e20fbee23 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Tue, 16 Jun 2026 12:46:10 +0530
Subject: [PATCH 24/30] pdate

---
 .ai/skills/diffusers-cli/generate.md | 182 +++++++++++++++++++++++++++
 1 file changed, 182 insertions(+)
 create mode 100644 .ai/skills/diffusers-cli/generate.md

diff --git a/.ai/skills/diffusers-cli/generate.md b/.ai/skills/diffusers-cli/generate.md
new file mode 100644
index 000000000000..e71ba56f5590
--- /dev/null
+++ b/.ai/skills/diffusers-cli/generate.md
@@ -0,0 +1,182 @@
+# `diffusers-cli generate` — reference
+
+Full surface for `diffusers-cli generate`. Use this file as the source of truth when constructing a `generate`
+invocation. The top-level [`SKILL.md`](SKILL.md) covers when to use the CLI; this file covers how.
+
+## The describe → generate flow
+
+For any model you haven't called before, run `describe` first to learn its input contract, then `generate` with
+the right `--pipeline-kwargs`:
+
+```bash
+# 1. Discover what kwargs the pipeline takes (no weight download)
+diffusers-cli --format json describe --model black-forest-labs/FLUX.2-klein-9B
+
+# 2. Run it
+diffusers-cli generate \
+    --model black-forest-labs/FLUX.2-klein-9B \
+    --pipeline-kwargs '{"prompt": "Make the cats fur grey", "image": "https://blobcdn.same.energy/a/d0/58/d058b51c2329b0ea4057e9f12cd9a1da36347e34"}' \
+    --dtype bf16
+```
+
+`describe --format json` emits a `{task, model, pipeline_class, inputs[]}` payload where each input is
+`{name, type_hint, default, required, description}`.
+
+## Standard vs modular detection
+
+`generate` auto-detects which kind of pipeline it's calling:
+
+1. If `model_index.json` exists on the repo → `DiffusionPipeline.from_pretrained` path.
+2. Otherwise → `ModularPipeline.from_pretrained` path.
+
+You don't need to tell it which. Modular repos must pass `--trust-remote-code` if they ship custom block code.
+
+## `--pipeline-kwargs` semantics
+
+A JSON object passed straight through to `pipeline(**kwargs)`. String values at known image-input keys (`image`,
+`mask_image`, `control_image`, `ip_adapter_image`, `image_2`) are auto-loaded as PIL images, so you can pass URLs
+or local paths directly:
+
+```bash
+diffusers-cli generate \
+    --model black-forest-labs/FLUX.2-klein-9B \
+    --pipeline-kwargs '{"image": "https://example.com/cat.png", "prompt": "make the fur grey", "strength": 0.6}'
+```
+
+**Shell-quoting gotcha**: the JSON must be on one line (or use `\` to line-continue). A literal newline inside the
+single-quoted argument lands as a raw control char inside the string and breaks `json.loads`.
+
+## LoRA adapters (`--lora`)
+
+Attach a LoRA after the pipeline loads via a JSON spec:
+
+```bash
+diffusers-cli generate \
+    --model black-forest-labs/FLUX.2-klein-9B \
+    --pipeline-kwargs '{"prompt": "a tiny grey cat"}' \
+    --lora '{"lora_id": "alvdansen/littletinies", "lora_scale": 0.8}'
+```
+
+Calls `pipeline.load_lora_weights(<lora_id>, adapter_name="default")` and, if `lora_scale` is present,
+`pipeline.set_adapters(["default"], adapter_weights=[<scale>])`. Errors clearly if the pipeline doesn't support
+LoRA or `lora_id` is missing.
+
+## Optimization flags
+
+- `--dtype {auto, bf16, fp16, fp32, …}` — pipeline weight dtype. `bf16` is the right default for modern DiTs on
+  A100/H100.
+- `--cpu-offload {model, group}` — `model` uses `enable_model_cpu_offload`, `group` uses
+  `enable_group_offload(offload_type="leaf_level", use_stream=True)`. Use `group` to fit a 9B+ model on a single A100.
+- `--attention-backend {default, flash_hub, flash_varlen_hub, flash_4_hub, sage_hub}` — hub-hosted kernels,
+  auto-downloaded on first use. Failures (kernel not available, CUDA arch mismatch, network) raise a clear
+  `SystemExit` listing the alternatives instead of silently reverting to the default.
+- `--vae-tiling` / `--vae-slicing` — lower peak VAE decode VRAM.
+- `--context-parallel` — Ulysses-style context parallelism on a DiT. See [Context parallel](#context-parallel) below.
+
+`disable_mmap=True` is always passed to `from_pretrained` — sequential reads are faster than mmap page-faults on
+most filesystems.
+
+## Output handling
+
+`generate` sniffs the pipeline return type and saves accordingly:
+
+- `PIL.Image` / list of them → `outputs/generate-<i>.png`
+- Frame sequence (≥2 PILs or ndarrays) → `outputs/generate-0.mp4` (uses `--fps`, default 8)
+- Numpy audio array → `outputs/generate-0.wav` (uses `--sampling-rate`)
+- Anything else → JSON dump
+
+Override the destination with `--output <path>` (file or directory).
+
+Use `--push-to <user>/<bucket>` to upload outputs to an HF bucket after saving. The bucket is created if it
+doesn't exist; objects land under `<run_id>/<filename>`.
+
+## Remote execution (`--remote`)
+
+Adds `--remote` to submit the same call as a Hugging Face Job:
+
+```bash
+diffusers-cli generate \
+    --model black-forest-labs/FLUX.2-klein-9B \
+    --pipeline-kwargs '{"prompt": "Make the cats fur grey", "image": "https://blobcdn.same.energy/a/d0/58/d058b51c2329b0ea4057e9f12cd9a1da36347e34"}' \
+    --remote --flavor a100-large \
+    --dtype bf16 \
+    --cpu-offload group
+```
+
+What happens:
+
+1. Token is read from `args.token` or `huggingface_hub.get_token()`.
+2. A bucket (`<user>/jobs-artifacts` by default) is auto-created.
+3. Job is submitted via `run_job` (not `run_uv_job` — needed to honor the image) with image
+   `pytorch/pytorch:2.10.0-cuda12.8-cudnn9-runtime` (torch 2.10 + CUDA 12.8, matches HF Jobs host driver max of
+   CUDA 12.9).
+4. Container runs:
+   ```
+   sh -c "uv pip install --system --break-system-packages <small-deps> && diffusers-cli generate ..."
+   ```
+   Only `diffusers`-tarball + `accelerate` + `transformers` + `safetensors` are installed inline (~50 MB instead
+   of ~3 GB) because torch+CUDA come from the image. `--break-system-packages` bypasses PEP 668 in the image's
+   system Python.
+5. Container logs stream to stderr; on completion the CLI downloads any files the job uploaded to the bucket
+   under its `run_id` prefix into `./outputs/`.
+6. A timing breakdown (`queued_seconds`, `run_seconds`, `total_seconds`) is printed and included in the JSON
+   payload.
+
+Flags:
+
+- `--flavor <name>` — HF Jobs hardware (e.g. `a10g-small`, `a100-large`, `4xa100-large`).
+- `--timeout <duration>` — max wallclock (e.g. `30m`, `2h`). Defaults to `10m`.
+- `--dependencies <pkg>` — extra pip deps (repeatable).
+- `--namespace <name>` — run under a different account.
+- `--no-wait` — submit, return job id, don't stream logs.
+- `--push-to <bucket>` — override the artifact bucket id.
+
+## Context parallel
+
+`--context-parallel` enables Ulysses CP on a DiT-based pipeline. **Locally** the user must launch via torchrun:
+
+```bash
+torchrun --nproc-per-node=2 -m diffusers.commands.diffusers_cli generate \
+    --model black-forest-labs/FLUX.2-klein-9B \
+    --pipeline-kwargs '{"prompt": "Make the cats fur grey"}' \
+    --dtype bf16 \
+    --context-parallel
+```
+
+**Remotely** the CLI handles the torchrun wrapping — just pass `--context-parallel` to a `--remote` invocation on
+a multi-GPU flavor:
+
+```bash
+diffusers-cli generate \
+    --model black-forest-labs/FLUX.2-klein-9B \
+    --pipeline-kwargs '{"prompt": "Make the cats fur grey", "image": "https://blobcdn.same.energy/a/d0/58/d058b51c2329b0ea4057e9f12cd9a1da36347e34"}' \
+    --remote --flavor 4xa100-large \
+    --dtype bf16 \
+    --context-parallel
+```
+
+Inside the container, CP swaps the entrypoint to `torchrun --nproc-per-node=gpu -m
+diffusers.commands.diffusers_cli`, initializes a hybrid process group (`cpu:gloo,cuda:nccl` — NCCL for the
+attention all-to-all, Gloo for `ulysses_anything`'s per-rank size coordination), pins each rank to
+`cuda:{LOCAL_RANK}`, and gates output saving/printing to rank 0 only.
+
+**Memory note**: CP shards the sequence, **not the weights**. Every rank still holds the full transformer. Wins
+are wall-clock attention speedup and headroom for very long sequences, not "fit a model that doesn't fit." For
+weight sharding you'd want TP or FSDP — not exposed in the CLI yet.
+
+CP is DiT-only. UNet pipelines raise a clear error directing you to a DiT pipeline (FLUX, SD3, HunyuanDiT,
+AuraFlow, …).
+
+## Output mode (`--format`)
+
+The CLI auto-detects when running under an AI coding agent (Claude Code, Cursor, Aider, GH Copilot Agent — via
+`CLAUDECODE`, `CLAUDE_CODE`, `CURSOR_AI`, `AIDER_AI_CONTEXT`, `GH_COPILOT_AGENT`) and switches output to **agent
+mode** automatically — TSV tables, `key=value` results, compact JSON dicts, no progress bars.
+
+Override explicitly with `--format {auto, human, agent, json, quiet}` placed **before** the subcommand:
+
+```bash
+diffusers-cli --format json generate --model <id> --pipeline-kwargs '...'
+```
+
+The legacy `--json` flag on `generate` still works as a shortcut for `--format json`.

From af8cbf40c1fca385cea1a66b9473f440cc67b547 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Tue, 16 Jun 2026 16:33:40 +0530
Subject: [PATCH 25/30] update

---
 .ai/skills/diffusers-cli/SKILL.md       | 19 ++++--
 .ai/skills/diffusers-cli/generate.md    |  2 +-
 src/diffusers/commands/_output.py       | 36 +++--------
 src/diffusers/commands/custom_blocks.py |  2 +
 src/diffusers/commands/describe.py      | 41 ++++--------
 src/diffusers/commands/diffusers_cli.py |  2 +-
 src/diffusers/commands/generate.py      | 83 +++++++++++++++----------
 7 files changed, 88 insertions(+), 97 deletions(-)

diff --git a/.ai/skills/diffusers-cli/SKILL.md b/.ai/skills/diffusers-cli/SKILL.md
index b2aedd8fca16..cf70620044d7 100644
--- a/.ai/skills/diffusers-cli/SKILL.md
+++ b/.ai/skills/diffusers-cli/SKILL.md
@@ -19,8 +19,6 @@ description: >
 | `custom_blocks` | Package a local `ModularPipelineBlocks` subclass for the Hub.                                                                                                                                 |
 | `env`           | Print versions of diffusers + torch + transformers + accelerate + safetensors + CUDA + GPU info. Use when investigating environment issues, dtype/precision support, or building bug reports. |
 
-`fp16_safetensors` is also shipped but deprecated and not relevant to inference.
-
 ## When to read which file
 
 Most agentic work goes through `generate`. Read the matching reference file before constructing a command:
@@ -53,7 +51,18 @@ The console entry point is registered in `pyproject.toml` (`diffusers-cli =
 with `pip install -e . --force-reinstall --no-deps` and check `which diffusers-cli`. If the installed binary is
 missing recent features (e.g. you see `unrecognized arguments: --lora`), reinstall.
 
-## Full user guide
+## Output formats
+
+`--format {auto, human, agent, json}` (top-level flag, must appear before the subcommand):
+
+- **`human`** — plain-text indented output for terminals (default when not running under an agent harness). No ANSI color.
+- **`agent`** — TSV tables and `key=value` lines. Auto-selected when an agent env var is present
+  (`CLAUDECODE`, `CLAUDE_CODE`, `CODEX_SANDBOX`, `CURSOR_AI`, `AIDER_AI_CONTEXT`, `GH_COPILOT_AGENT`,
+  `AI_AGENT`). Token-cheap for LLM agents to read.
+- **`json`** — compact JSON. Use for programmatic parsing (scripts, services) where type fidelity and nested
+  structures matter.
+
+`stdout` carries data; `stderr` carries hints/warnings/progress — parseable output is never polluted.
 
-For a non-agent overview with troubleshooting and tips, see [`DIFFUSERS_CLI.md`](../../../DIFFUSERS_CLI.md) at
-the repo root.
+Rule of thumb: `--format json` for scripts that will `json.loads()` the output, otherwise leave it on
+auto-detect (`agent` for LLMs, `human` for terminals).
diff --git a/.ai/skills/diffusers-cli/generate.md b/.ai/skills/diffusers-cli/generate.md
index e71ba56f5590..ba64cae017c9 100644
--- a/.ai/skills/diffusers-cli/generate.md
+++ b/.ai/skills/diffusers-cli/generate.md
@@ -173,7 +173,7 @@ The CLI auto-detects when running under an AI coding agent (Claude Code, Cursor,
 `CLAUDECODE`, `CLAUDE_CODE`, `CURSOR_AI`, `AIDER_AI_CONTEXT`, `GH_COPILOT_AGENT`) and switches output to **agent
 mode** automatically — TSV tables, `key=value` results, compact JSON dicts, no progress bars.
 
-Override explicitly with `--format {auto, human, agent, json, quiet}` placed **before** the subcommand:
+Override explicitly with `--format {auto, human, agent, json}` placed **before** the subcommand:
 
 ```bash
 diffusers-cli --format json generate --model <id> --pipeline-kwargs '...'
diff --git a/src/diffusers/commands/_output.py b/src/diffusers/commands/_output.py
index 1200737ec696..5c4d91e1d86c 100644
--- a/src/diffusers/commands/_output.py
+++ b/src/diffusers/commands/_output.py
@@ -14,10 +14,9 @@
 """Dual-audience output sink for ``diffusers-cli``.
 
 Every subcommand routes user-visible output through the singleton ``out``. The mode is one of ``human`` (default for
-terminals), ``agent`` (auto-selected when an AI coding agent is detected), ``json`` (machine-parseable), or ``quiet``
-(first value per record). The set of methods on ``out`` covers the shapes our commands actually produce — free-form
-text, key/value results, structured dicts, and tabular schemas — so leaf commands never branch on ``args.json``
-themselves.
+terminals), ``agent`` (auto-selected when an AI coding agent is detected), or ``json`` (machine-parseable). The set of
+methods on ``out`` covers the shapes our commands actually produce — free-form text, key/value results, structured
+dicts, and tabular schemas — so leaf commands never branch on ``args.json`` themselves.
 """
 
 from __future__ import annotations
@@ -30,10 +29,11 @@
 
 
 # Environment variables set by known AI coding agents. Presence of any one triggers AGENT mode
-# under `--format auto`. Subset of the huggingface_hub harness registry — extend as needed.
+# under `--format auto`.
 _AGENT_ENV_VARS = (
     "CLAUDECODE",  # Claude Code
     "CLAUDE_CODE",  # alt spelling
+    "CODEX_SANDBOX",  # Codex
     "CURSOR_AI",  # Cursor
     "AIDER_AI_CONTEXT",  # Aider
     "GH_COPILOT_AGENT",  # GitHub Copilot Agent
@@ -50,7 +50,6 @@ class OutputFormat(str, Enum):
     HUMAN = "human"
     AGENT = "agent"
     JSON = "json"
-    QUIET = "quiet"
 
 
 class Output:
@@ -70,9 +69,7 @@ def set_mode(self, mode: OutputFormat) -> None:
     # ------------------------------------------------------------------ stdout
 
     def text(self, msg: str) -> None:
-        """Free-form line. Suppressed in QUIET; printed plain in every other mode."""
-        if self.mode == OutputFormat.QUIET:
-            return
+        """Free-form line. Printed plain in every mode."""
         print(msg)
 
     def dict(self, data: dict[str, Any]) -> None:
@@ -80,8 +77,6 @@ def dict(self, data: dict[str, Any]) -> None:
 
         Use for payloads that don't decompose cleanly into key/value pairs (e.g. describe schemas).
         """
-        if self.mode == OutputFormat.QUIET:
-            return
         indent = 2 if self.mode == OutputFormat.HUMAN else None
         print(json.dumps(data, indent=indent, default=str))
 
@@ -91,7 +86,6 @@ def result(self, message: str, **data: Any) -> None:
         - HUMAN: ``message`` followed by `` key: value`` lines.
         - AGENT: ``key=value`` pairs space-separated on one line (TSV-ish, parser-friendly).
         - JSON: compact JSON of ``data``.
-        - QUIET: first non-None value.
         """
         if self.mode == OutputFormat.HUMAN:
             print(message)
@@ -103,20 +97,14 @@ def result(self, message: str, **data: Any) -> None:
             print(" ".join(parts) if parts else message)
         elif self.mode == OutputFormat.JSON:
             print(json.dumps(data, default=str))
-        elif self.mode == OutputFormat.QUIET:
-            for v in data.values():
-                if v is not None:
-                    print(v)
-                    return
 
     def table(
         self,
         items: Sequence[dict[str, Any]],
         *,
         headers: Optional[list[str]] = None,
-        id_key: Optional[str] = None,
     ) -> None:
-        """Tabular data — HUMAN gets padded columns, AGENT gets TSV, JSON gets the list, QUIET gets id_key.
+        """Tabular data — HUMAN gets padded columns, AGENT gets TSV, JSON gets the list.
 
         Headers default to the keys of the first item.
         """
@@ -134,14 +122,6 @@ def table(
             print(json.dumps(list(items), default=str))
             return
 
-        if self.mode == OutputFormat.QUIET:
-            key = id_key or headers[0]
-            for item in items:
-                value = item.get(key)
-                if value is not None:
-                    print(value)
-            return
-
         rows = [[_cell(item.get(h)) for h in headers] for item in items]
         if self.mode == OutputFormat.AGENT:
             print("\t".join(headers))
@@ -159,8 +139,6 @@ def table(
 
     def hint(self, message: str) -> None:
         """Next-step suggestion. Always goes to stderr so it never pollutes parseable stdout."""
-        if self.mode == OutputFormat.QUIET:
-            return
         print(f"Hint: {message}", file=sys.stderr)
 
     def warning(self, message: str) -> None:
diff --git a/src/diffusers/commands/custom_blocks.py b/src/diffusers/commands/custom_blocks.py
index bc0889376a95..10a17ee60b8c 100644
--- a/src/diffusers/commands/custom_blocks.py
+++ b/src/diffusers/commands/custom_blocks.py
@@ -82,6 +82,8 @@ def run(self):
             )
             child_class, parent_class = out[0][0], out[0][1]
 
+        # dynamically get the custom block and initialize it to call `save_pretrained` in the current directory.
+        # the user is responsible for running it, so I guess that is safe?
         module_name = f"__dynamic__{self.block_module_name.stem}"
         spec = importlib.util.spec_from_file_location(module_name, str(self.block_module_name))
         module = importlib.util.module_from_spec(spec)
diff --git a/src/diffusers/commands/describe.py b/src/diffusers/commands/describe.py
index 8dd240014540..95dbf13459af 100644
--- a/src/diffusers/commands/describe.py
+++ b/src/diffusers/commands/describe.py
@@ -102,18 +102,19 @@ def _describe(args: Namespace) -> None:
             for p in blocks.inputs
         ]
 
-    if args.json:
-        out.set_mode(OutputFormat.JSON)
-
-    if out.mode in (OutputFormat.JSON, OutputFormat.AGENT):
-        # Agents get the structured schema (full payload for JSON, the inputs table for AGENT).
-        if out.mode == OutputFormat.JSON:
-            out.dict({"task": "describe", "model": args.model, "pipeline_class": class_name, "inputs": schema})
-        else:
-            out.table(schema, headers=["name", "required", "type_hint", "default", "description"])
-        return
-
-    _print_schema(class_name, args.model, schema)
+    if out.mode == OutputFormat.JSON:
+        out.dict({"task": "describe", "model": args.model, "pipeline_class": class_name, "inputs": schema})
+    elif out.mode == OutputFormat.AGENT:
+        out.table(schema, headers=["name", "required", "type_hint", "default", "description"])
+    else:
+        out.text(f"{class_name} ({args.model}) inputs:")
+        for entry in schema:
+            tag = "required" if entry["required"] else f"optional, default={entry['default']!r}"
+            out.text(f"  {entry['name']}  ({tag})")
+            if entry["type_hint"]:
+                out.text(f"    type: {entry['type_hint']}")
+            if entry["description"]:
+                out.text(f"    desc: {entry['description']}")
 
 
 def _parse_docstring_args(docstring: Optional[str]) -> dict[str, str]:
@@ -174,17 +175,6 @@ def _flush() -> None:
     return descriptions
 
 
-def _print_schema(class_name: str, model: str, schema: list[dict[str, Any]]) -> None:
-    print(f"{class_name} ({model}) inputs:")
-    for entry in schema:
-        tag = "required" if entry["required"] else f"optional, default={entry['default']!r}"
-        print(f"  {entry['name']}  ({tag})")
-        if entry["type_hint"]:
-            print(f"    type: {entry['type_hint']}")
-        if entry["description"]:
-            print(f"    desc: {entry['description']}")
-
-
 class DescribeCommand(BaseDiffusersCLICommand):
     task = "describe"
 
@@ -227,11 +217,6 @@ def register_subcommand(subparsers: _SubParsersAction) -> None:
                 "the equivalent field for standard pipelines by parsing the Google-style Args: block."
             ),
         )
-        parser.add_argument(
-            "--json",
-            action="store_true",
-            help="Emit a machine-readable JSON summary on stdout.",
-        )
         parser.set_defaults(func=DescribeCommand)
 
     def __init__(self, args: Namespace):
diff --git a/src/diffusers/commands/diffusers_cli.py b/src/diffusers/commands/diffusers_cli.py
index deca219d90f4..8deb98c9916b 100644
--- a/src/diffusers/commands/diffusers_cli.py
+++ b/src/diffusers/commands/diffusers_cli.py
@@ -28,7 +28,7 @@ def main():
         prog="diffusers-cli",
         usage="\n  diffusers-cli [--format <fmt>] <command> [options]",
     )
-    parser._optionals.title = "General Options"
+    parser._optionals.title = "Options"
     parser.add_argument(
         "--format",
         choices=[m.value for m in OutputFormat],
diff --git a/src/diffusers/commands/generate.py b/src/diffusers/commands/generate.py
index e60e088da79f..2591dae200a1 100644
--- a/src/diffusers/commands/generate.py
+++ b/src/diffusers/commands/generate.py
@@ -31,7 +31,7 @@
 
 from . import BaseDiffusersCLICommand
 from ._common import try_fetch_config
-from ._output import OutputFormat, out
+from ._output import out
 
 
 # ---------------------------------------------------------------------------
@@ -41,13 +41,19 @@
 DEFAULT_OUTPUT_DIR = "outputs"
 DTYPE_CHOICES = ("auto", "float16", "fp16", "bfloat16", "bf16", "float32", "fp32")
 CPU_OFFLOAD_CHOICES = ("model", "group")
-ATTENTION_BACKEND_CHOICES = (
-    "default",
-    "flash_hub",
-    "flash_varlen_hub",
-    "flash_4_hub",
-    "sage_hub",
-)
+
+
+def _hub_attention_backends() -> tuple[str, ...]:
+    """Hub-hosted attention backends sourced from ``_HUB_KERNELS_REGISTRY``.
+
+    Single source of truth: if the registry grows or shrinks, the CLI choices follow.
+    """
+    from diffusers.models.attention_dispatch import _HUB_KERNELS_REGISTRY
+
+    return tuple(sorted(backend.value for backend in _HUB_KERNELS_REGISTRY))
+
+
+ATTENTION_BACKEND_CHOICES = ("default", *_hub_attention_backends())
 
 # Keys whose string value should be resolved via ``diffusers.utils.load_image``
 # before being passed to the pipeline call.
@@ -72,6 +78,8 @@
     "accelerate",
     "transformers",
     "safetensors",
+    "sentencepiece",  # required by several text-encoder tokenizers (T5, LLaMA, …)
+    "ftfy",  # required by older CLIP text-encoder paths
 )
 
 # Base container image — provides torch + CUDA so ``uv pip install --system``
@@ -172,7 +180,6 @@ def _add_output_arguments(parser: ArgumentParser) -> None:
             "When --remote is set, defaults to <user>/jobs-artifacts."
         ),
     )
-    parser.add_argument("--json", action="store_true", help="Emit a machine-readable JSON summary on stdout.")
 
 
 def _add_remote_arguments(parser: ArgumentParser) -> None:
@@ -241,23 +248,20 @@ def _resolve_dtype(name: Optional[str]):
 def _resolve_device(name: Optional[str]) -> str:
     if name:
         return name
-    import torch
 
-    if torch.cuda.is_available():
-        # Under torchrun, LOCAL_RANK identifies this process's assigned GPU.
-        # Without this pin every rank falls back to cuda:0 and OOMs because the
-        # whole pipeline gets replicated onto a single device.
+    from diffusers.utils.torch_utils import torch_device
+
+    # Under torchrun, LOCAL_RANK identifies this process's assigned GPU. Without this
+    # pin every rank falls back to cuda:0 and OOMs as the pipeline replicates onto a
+    # single device. Only applies to cuda — torch_device already handles npu/xpu/mps/etc.
+    if torch_device == "cuda":
         local_rank = os.environ.get("LOCAL_RANK")
         if local_rank is not None:
+            import torch
+
             torch.cuda.set_device(int(local_rank))
             return f"cuda:{local_rank}"
-
-        return "cuda"
-
-    if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-        return "mps"
-
-    return "cpu"
+    return torch_device
 
 
 def _map_to_device(pipeline: Any, args: Namespace, device: str) -> Any:
@@ -515,9 +519,13 @@ def _as_audio_arrays(value: Any):
 
 
 def _save_audio_arrays(audios, sampling_rate: int, args: Namespace, task: str) -> list[str]:
-    """Write each numpy audio array to a 16-bit PCM WAV at ``sampling_rate`` Hz."""
+    """Write each numpy audio array to a 16-bit PCM WAV at ``sampling_rate`` Hz.
+
+    Uses the stdlib ``wave`` module so no scipy dependency is required.
+    """
+    import wave
+
     import numpy as np
-    from scipy.io.wavfile import write as wavfile_write
 
     paths = _default_output_paths(task, len(audios), args.output, ext="wav")
     saved: list[str] = []
@@ -525,9 +533,21 @@ def _save_audio_arrays(audios, sampling_rate: int, args: Namespace, task: str) -
         data = np.asarray(audio)
         if data.dtype.kind == "f":
             data = (np.clip(data, -1.0, 1.0) * 32767).astype(np.int16)
-        if data.ndim > 1 and data.shape[0] < data.shape[-1]:
-            data = data.T  # (channels, samples) → (samples, channels) for scipy.
-        wavfile_write(str(path), sampling_rate, data)
+        else:
+            data = data.astype(np.int16)
+        if data.ndim == 1:
+            n_channels = 1
+        else:
+            # Heuristic: shorter axis is channels (interleaved layout for `wave` is
+            # samples × channels, so transpose if needed).
+            if data.shape[0] < data.shape[-1]:
+                data = data.T
+            n_channels = data.shape[1]
+        with wave.open(str(path), "wb") as w:
+            w.setnchannels(n_channels)
+            w.setsampwidth(2)  # 16-bit PCM
+            w.setframerate(sampling_rate)
+            w.writeframes(data.tobytes())
         saved.append(str(path))
     return saved
 
@@ -690,7 +710,7 @@ def _maybe_submit_remote(args: Namespace, task: str) -> bool:
     }
 
     if args.no_wait:
-        _format_result(args, payload)
+        _format_result(payload)
         return True
 
     print(
@@ -703,7 +723,7 @@ def _maybe_submit_remote(args: Namespace, task: str) -> bool:
     payload["job_status"] = final_status
     payload["timing"] = _job_timing(api, job.id, args.namespace)
     payload["outputs"] = _download_job_artifacts(api, args.push_to, run_id, args.output)
-    _format_result(args, payload)
+    _format_result(payload)
     return True
 
 
@@ -796,10 +816,8 @@ def _download_job_artifacts(api: Any, bucket_id: str, run_id: str, output: Optio
 # ---------------------------------------------------------------------------
 
 
-def _format_result(args: Namespace, payload: dict[str, Any]) -> None:
-    """Route the result payload through ``out``. ``--json`` escalates the mode regardless of --format."""
-    if args.json:
-        out.set_mode(OutputFormat.JSON)
+def _format_result(payload: dict[str, Any]) -> None:
+    """Route the result payload through the output sink."""
     out.result(payload.get("task", "done"), **payload)
 
 
@@ -885,7 +903,6 @@ def run(self) -> None:
                 pushed = _push_outputs(self.args, saved, self.task)
 
                 _format_result(
-                    self.args,
                     {
                         "task": self.task,
                         "model": self.args.model,

From b50dae1ac2b32bc958d96a7ab2c9dd6c918ad501 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Tue, 16 Jun 2026 17:07:38 +0530
Subject: [PATCH 26/30] update

---
 src/diffusers/commands/_common.py  |  3 +--
 src/diffusers/commands/_output.py  |  4 ++--
 src/diffusers/commands/describe.py | 14 ++++++-------
 src/diffusers/commands/generate.py | 33 +++++++++++++++++-------------
 4 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/src/diffusers/commands/_common.py b/src/diffusers/commands/_common.py
index df242628841d..bd95b3f88969 100644
--- a/src/diffusers/commands/_common.py
+++ b/src/diffusers/commands/_common.py
@@ -21,10 +21,9 @@
 
 from argparse import Namespace
 from pathlib import Path
-from typing import Optional
 
 
-def try_fetch_config(args: Namespace, filename: str) -> Optional[str]:
+def try_fetch_config(args: Namespace, filename: str) -> str | None:
     """Resolve ``filename`` for ``args.model`` (local path or Hub repo). Return None if absent.
 
     Used by ``generate`` (to detect modular vs standard pipelines) and ``describe`` (to read the pipeline class for
diff --git a/src/diffusers/commands/_output.py b/src/diffusers/commands/_output.py
index 5c4d91e1d86c..c155d789990f 100644
--- a/src/diffusers/commands/_output.py
+++ b/src/diffusers/commands/_output.py
@@ -25,7 +25,7 @@
 import os
 import sys
 from enum import Enum
-from typing import Any, Optional, Sequence
+from typing import Any, Sequence
 
 
 # Environment variables set by known AI coding agents. Presence of any one triggers AGENT mode
@@ -102,7 +102,7 @@ def table(
         self,
         items: Sequence[dict[str, Any]],
         *,
-        headers: Optional[list[str]] = None,
+        headers: list[str] | None = None,
     ) -> None:
         """Tabular data — HUMAN gets padded columns, AGENT gets TSV, JSON gets the list.
 
diff --git a/src/diffusers/commands/describe.py b/src/diffusers/commands/describe.py
index 95dbf13459af..5a9514961211 100644
--- a/src/diffusers/commands/describe.py
+++ b/src/diffusers/commands/describe.py
@@ -21,9 +21,11 @@
 
 from __future__ import annotations
 
+import inspect
 import json
+import re
 from argparse import ArgumentParser, Namespace, _SubParsersAction
-from typing import Any, Optional
+from typing import Any
 
 from . import BaseDiffusersCLICommand
 from ._common import try_fetch_config
@@ -37,8 +39,6 @@ def _describe(args: Namespace) -> None:
     pipeline class's ``__call__`` signature. Otherwise falls back to ``ModularPipelineBlocks.from_pretrained`` and
     reads the block-declared ``inputs``. No weights downloaded either way.
     """
-    import inspect
-
     import diffusers
 
     model_index = try_fetch_config(args, diffusers.DiffusionPipeline.config_name)
@@ -117,7 +117,7 @@ def _describe(args: Namespace) -> None:
                 out.text(f"    desc: {entry['description']}")
 
 
-def _parse_docstring_args(docstring: Optional[str]) -> dict[str, str]:
+def _parse_docstring_args(docstring: str | None) -> dict[str, str]:
     """Extract per-argument descriptions from a Google-style ``Args:`` block.
 
     Returns a ``{name: description}`` mapping. Best-effort — unrecognised formats just yield an empty dict rather than
@@ -126,8 +126,6 @@ def _parse_docstring_args(docstring: Optional[str]) -> dict[str, str]:
     if not docstring:
         return {}
 
-    import re
-
     lines = docstring.expandtabs().splitlines()
     start = None
     section_indent = 0
@@ -140,9 +138,9 @@ def _parse_docstring_args(docstring: Optional[str]) -> dict[str, str]:
         return {}
 
     descriptions: dict[str, str] = {}
-    current_name: Optional[str] = None
+    current_name: str | None = None
     current_lines: list[str] = []
-    arg_indent: Optional[int] = None
+    arg_indent: int | None = None
     name_pattern = re.compile(r"^(\w+)\s*(?:\([^)]*\))?\s*:?\s*(.*)$")
 
     def _flush() -> None:
diff --git a/src/diffusers/commands/generate.py b/src/diffusers/commands/generate.py
index 2591dae200a1..c86cd28df730 100644
--- a/src/diffusers/commands/generate.py
+++ b/src/diffusers/commands/generate.py
@@ -25,7 +25,7 @@
 import sys
 from argparse import ArgumentParser, Namespace, _SubParsersAction
 from pathlib import Path
-from typing import Any, Optional
+from typing import Any
 
 from diffusers.utils import load_image
 
@@ -80,6 +80,7 @@ def _hub_attention_backends() -> tuple[str, ...]:
     "safetensors",
     "sentencepiece",  # required by several text-encoder tokenizers (T5, LLaMA, …)
     "ftfy",  # required by older CLIP text-encoder paths
+    "kernels",  # required by hub-hosted attention backends (flash_hub, sage_hub, …)
 )
 
 # Base container image — provides torch + CUDA so ``uv pip install --system``
@@ -105,6 +106,7 @@ def _hub_attention_backends() -> tuple[str, ...]:
         "poll_interval",
         "func",
         "format",  # top-level --format is a local rendering flag; never forward to the container
+        "device",  # local device pin; container auto-detects its own (cuda:0 or LOCAL_RANK)
     }
 )
 
@@ -227,7 +229,7 @@ def _add_remote_arguments(parser: ArgumentParser) -> None:
 # ---------------------------------------------------------------------------
 
 
-def _resolve_dtype(name: Optional[str]):
+def _resolve_dtype(name: str | None):
     if name in (None, "auto"):
         return "auto"
     import torch
@@ -245,7 +247,7 @@ def _resolve_dtype(name: Optional[str]):
     return mapping[name]
 
 
-def _resolve_device(name: Optional[str]) -> str:
+def _resolve_device(name: str | None) -> str:
     if name:
         return name
 
@@ -281,7 +283,7 @@ def _map_to_device(pipeline: Any, args: Namespace, device: str) -> Any:
     return pipeline
 
 
-def _denoiser(pipeline: Any) -> Optional[Any]:
+def _denoiser(pipeline: Any) -> Any | None:
     """Return the pipeline's denoiser submodule (transformer or unet) or None."""
     for attr in ("transformer", "unet"):
         module = getattr(pipeline, attr, None)
@@ -410,7 +412,7 @@ def _is_modular_repo(args: Namespace) -> bool:
 # ---------------------------------------------------------------------------
 
 
-def _parse_pipeline_kwargs(raw: Optional[str]) -> dict[str, Any]:
+def _parse_pipeline_kwargs(raw: str | None) -> dict[str, Any]:
     if not raw:
         return {}
     try:
@@ -430,7 +432,7 @@ def _resolve_image_inputs(call_kwargs: dict[str, Any]) -> None:
             call_kwargs[key] = load_image(value)
 
 
-def _get_generator(seed: Optional[int], device: str):
+def _get_generator(seed: int | None, device: str):
     if seed is None:
         return None
     import torch
@@ -456,7 +458,7 @@ def _result_to_savable(result: Any) -> Any:
 # ---------------------------------------------------------------------------
 
 
-def _default_output_paths(task: str, num: int, explicit: Optional[str], ext: str) -> list[Path]:
+def _default_output_paths(task: str, num: int, explicit: str | None, ext: str) -> list[Path]:
     if explicit is None:
         base = Path(DEFAULT_OUTPUT_DIR)
         base.mkdir(parents=True, exist_ok=True)
@@ -583,7 +585,7 @@ def _save_output(value: Any, args: Namespace, task: str) -> list[str]:
 # ---------------------------------------------------------------------------
 
 
-def _push_outputs(args: Namespace, saved_paths: list[str], task: str) -> Optional[dict[str, Any]]:
+def _push_outputs(args: Namespace, saved_paths: list[str], task: str) -> dict[str, Any] | None:
     """Upload ``saved_paths`` to the ``--push-to`` bucket. Returns a summary or None."""
     if not args.push_to:
         return None
@@ -636,6 +638,9 @@ def _maybe_submit_remote(args: Namespace, task: str) -> bool:
     if not args.remote:
         return False
 
+    if args.device is not None:
+        out.warning(f"--device {args.device!r} is ignored with --remote; the container auto-detects its GPU.")
+
     print(
         f"[diffusers-cli] preparing remote {task!r} job on flavor={args.flavor!r}...",
         file=sys.stderr,
@@ -727,7 +732,7 @@ def _maybe_submit_remote(args: Namespace, task: str) -> bool:
     return True
 
 
-def _job_timing(api: Any, job_id: str, namespace: Optional[str]) -> dict[str, Optional[float]]:
+def _job_timing(api: Any, job_id: str, namespace: str | None) -> dict[str, float | None]:
     """Return queue/run/total wallclock seconds for ``job_id`` from inspect_job timestamps.
 
     inspect_job sometimes returns finished_at=None for a few seconds after the container exits while HF Jobs propagates
@@ -742,7 +747,7 @@ def _job_timing(api: Any, job_id: str, namespace: Optional[str]) -> dict[str, Op
         time.sleep(1.0)
         info = api.inspect_job(job_id=job_id, namespace=namespace)
 
-    def _delta(start, end) -> Optional[float]:
+    def _delta(start, end) -> float | None:
         return (end - start).total_seconds() if (start is not None and end is not None) else None
 
     timing = {
@@ -756,7 +761,7 @@ def _delta(start, end) -> Optional[float]:
     return timing
 
 
-def _wait_for_job(api: Any, job_id: str, namespace: Optional[str], poll_interval: float) -> str:
+def _wait_for_job(api: Any, job_id: str, namespace: str | None, poll_interval: float) -> str:
     """Stream container logs to stderr until the job terminates; return the final stage."""
     fetch = getattr(api, "fetch_job_logs", None)
     if fetch is not None:
@@ -770,12 +775,12 @@ def _wait_for_job(api: Any, job_id: str, namespace: Optional[str], poll_interval
     return _poll_for_job(api, job_id, namespace, poll_interval)
 
 
-def _poll_for_job(api: Any, job_id: str, namespace: Optional[str], poll_interval: float) -> str:
+def _poll_for_job(api: Any, job_id: str, namespace: str | None, poll_interval: float) -> str:
     """Heartbeat-style fallback when ``fetch_job_logs`` isn't available."""
     import time
 
     terminal = {"COMPLETED", "CANCELED", "ERROR", "DELETED"}
-    last_stage: Optional[str] = None
+    last_stage: str | None = None
     while True:
         info = api.inspect_job(job_id=job_id, namespace=namespace)
         stage = str(info.status.stage) if info.status else "UNKNOWN"
@@ -792,7 +797,7 @@ def _poll_for_job(api: Any, job_id: str, namespace: Optional[str], poll_interval
         time.sleep(poll_interval)
 
 
-def _download_job_artifacts(api: Any, bucket_id: str, run_id: str, output: Optional[str]) -> list[str]:
+def _download_job_artifacts(api: Any, bucket_id: str, run_id: str, output: str | None) -> list[str]:
     """Download every file under ``<run_id>/`` from ``bucket_id`` into a local directory."""
     from huggingface_hub import BucketFile
 

From 1d6f5b315e58ce6ddb821934aec8db2a17ffeb65 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Tue, 16 Jun 2026 17:45:35 +0530
Subject: [PATCH 27/30] update

---
 .ai/skills/diffusers-cli/generate.md    | 23 ++++------
 src/diffusers/commands/_output.py       | 60 +++++++++----------------
 src/diffusers/commands/custom_blocks.py | 15 +++++++
 src/diffusers/commands/describe.py      | 15 +++++++
 src/diffusers/commands/generate.py      | 27 +++++++++--
 5 files changed, 83 insertions(+), 57 deletions(-)

diff --git a/.ai/skills/diffusers-cli/generate.md b/.ai/skills/diffusers-cli/generate.md
index ba64cae017c9..4ba9738ba94e 100644
--- a/.ai/skills/diffusers-cli/generate.md
+++ b/.ai/skills/diffusers-cli/generate.md
@@ -105,21 +105,14 @@ diffusers-cli generate \
 
 What happens:
 
-1. Token is read from `args.token` or `huggingface_hub.get_token()`.
-2. A bucket (`<user>/jobs-artifacts` by default) is auto-created.
-3. Job is submitted via `run_job` (not `run_uv_job` — needed to honor the image) with image
-   `pytorch/pytorch:2.10.0-cuda12.8-cudnn9-runtime` (torch 2.10 + CUDA 12.8, matches HF Jobs host driver max of
-   CUDA 12.9).
-4. Container runs:
-   ```
-   sh -c "uv pip install --system --break-system-packages <small-deps> && diffusers-cli generate ..."
-   ```
-   Only `diffusers`-tarball + `accelerate` + `transformers` + `safetensors` are installed inline (~50 MB instead
-   of ~3 GB) because torch+CUDA come from the image. `--break-system-packages` bypasses PEP 668 in the image's
-   system Python.
-5. Container logs stream to stderr; on completion the CLI downloads any files the job uploaded to the bucket
-   under its `run_id` prefix into `./outputs/`.
-6. A timing breakdown (`queued_seconds`, `run_seconds`, `total_seconds`) is printed and included in the JSON
+1. Your HF token is picked up (from `--token` or your login).
+2. A bucket (`<user>/jobs-artifacts` by default) is created if it doesn't exist.
+3. The job runs in a pytorch container that already has torch + CUDA preinstalled. Only the small Python
+   deps (`diffusers`, `accelerate`, `transformers`, `safetensors`) are installed at container start — about
+   50 MB instead of 3 GB.
+4. Container logs stream to your terminal. When the job finishes, the CLI downloads every file the job
+   uploaded to the bucket under its `run_id` prefix into `./outputs/`.
+5. A timing breakdown (`queued_seconds`, `run_seconds`, `total_seconds`) is printed and included in the JSON
    payload.
 
 Flags:
diff --git a/src/diffusers/commands/_output.py b/src/diffusers/commands/_output.py
index c155d789990f..5b08e2c909e4 100644
--- a/src/diffusers/commands/_output.py
+++ b/src/diffusers/commands/_output.py
@@ -11,25 +11,22 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Dual-audience output sink for ``diffusers-cli``.
+"""Output formatting for ``diffusers-cli``.
 
-Every subcommand routes user-visible output through the singleton ``out``. The mode is one of ``human`` (default for
-terminals), ``agent`` (auto-selected when an AI coding agent is detected), or ``json`` (machine-parseable). The set of
-methods on ``out`` covers the shapes our commands actually produce — free-form text, key/value results, structured
-dicts, and tabular schemas — so leaf commands never branch on ``args.json`` themselves.
+Commands print through the singleton ``out`` instead of calling ``print`` directly. ``out`` picks the right format
+(human, agent, or json) based on the top-level ``--format`` flag, so commands don't have to check the mode themselves.
 """
 
 from __future__ import annotations
 
 import json
 import os
-import sys
 from enum import Enum
 from typing import Any, Sequence
 
 
-# Environment variables set by known AI coding agents. Presence of any one triggers AGENT mode
-# under `--format auto`.
+# Environment variables set by known AI coding agents. If any of these is set, `--format auto`
+# picks AGENT mode instead of HUMAN.
 _AGENT_ENV_VARS = (
     "CLAUDECODE",  # Claude Code
     "CLAUDE_CODE",  # alt spelling
@@ -41,7 +38,7 @@
 
 
 def is_agent() -> bool:
-    """Return True if the process appears to be invoked by an AI coding agent."""
+    """Return True if the CLI is being run by an AI coding agent."""
     return any(os.environ.get(v) for v in _AGENT_ENV_VARS)
 
 
@@ -53,7 +50,7 @@ class OutputFormat(str, Enum):
 
 
 class Output:
-    """Singleton output sink. Resolve mode once at startup, then call ``out.<method>``."""
+    """Picks the print format for each method based on the active mode (human / agent / json)."""
 
     mode: OutputFormat
 
@@ -61,7 +58,7 @@ def __init__(self) -> None:
         self.set_mode(OutputFormat.AUTO)
 
     def set_mode(self, mode: OutputFormat) -> None:
-        """Set the active output mode. AUTO resolves to AGENT or HUMAN via ``is_agent()``."""
+        """Set the active output mode. AUTO becomes AGENT or HUMAN based on is_agent()."""
         if mode == OutputFormat.AUTO:
             mode = OutputFormat.AGENT if is_agent() else OutputFormat.HUMAN
         self.mode = mode
@@ -69,23 +66,20 @@ def set_mode(self, mode: OutputFormat) -> None:
     # ------------------------------------------------------------------ stdout
 
     def text(self, msg: str) -> None:
-        """Free-form line. Printed plain in every mode."""
+        """Print a line of text. Same in every mode."""
         print(msg)
 
     def dict(self, data: dict[str, Any]) -> None:
-        """Structured object — JSON in every mode (indented for HUMAN, compact otherwise).
-
-        Use for payloads that don't decompose cleanly into key/value pairs (e.g. describe schemas).
-        """
+        """Print a dict as JSON. Indented for HUMAN, compact for AGENT and JSON."""
         indent = 2 if self.mode == OutputFormat.HUMAN else None
         print(json.dumps(data, indent=indent, default=str))
 
     def result(self, message: str, **data: Any) -> None:
-        """Success summary.
+        """Print a result summary.
 
-        - HUMAN: ``message`` followed by `` key: value`` lines.
-        - AGENT: ``key=value`` pairs space-separated on one line (TSV-ish, parser-friendly).
-        - JSON: compact JSON of ``data``.
+        - HUMAN: the message line followed by `` key: value`` lines.
+        - AGENT: ``key=value`` pairs separated by spaces on one line.
+        - JSON: compact JSON of the data dict.
         """
         if self.mode == OutputFormat.HUMAN:
             print(message)
@@ -104,9 +98,13 @@ def table(
         *,
         headers: list[str] | None = None,
     ) -> None:
-        """Tabular data — HUMAN gets padded columns, AGENT gets TSV, JSON gets the list.
+        """Print a list of dicts as a table.
 
-        Headers default to the keys of the first item.
+        - HUMAN: columns padded so each column lines up.
+        - AGENT: tab-separated values, one row per line.
+        - JSON: the list itself as a JSON array.
+
+        ``headers`` defaults to the keys of the first item.
         """
         if not items:
             if self.mode in (OutputFormat.HUMAN, OutputFormat.AGENT):
@@ -129,26 +127,12 @@ def table(
                 print("\t".join(row))
             return
 
-        # HUMAN: pad each column to its widest cell for readable alignment.
+        # HUMAN: pad each column to its widest cell so they line up.
         widths = [max(len(h), *(len(r[i]) for r in rows)) for i, h in enumerate(headers)]
         print("  ".join(h.ljust(widths[i]) for i, h in enumerate(headers)))
         for row in rows:
             print("  ".join(c.ljust(widths[i]) for i, c in enumerate(row)))
 
-    # ------------------------------------------------------------------ stderr
-
-    def hint(self, message: str) -> None:
-        """Next-step suggestion. Always goes to stderr so it never pollutes parseable stdout."""
-        print(f"Hint: {message}", file=sys.stderr)
-
-    def warning(self, message: str) -> None:
-        """Non-fatal warning — stderr, every mode."""
-        print(f"Warning: {message}", file=sys.stderr)
-
-    def error(self, message: str) -> None:
-        """Error — stderr, every mode."""
-        print(f"Error: {message}", file=sys.stderr)
-
 
 def _cell(value: Any) -> str:
     if value is None:
@@ -156,5 +140,5 @@ def _cell(value: Any) -> str:
     return str(value)
 
 
-# Module-level singleton imported by every subcommand.
+# Shared instance imported by every subcommand.
 out = Output()
diff --git a/src/diffusers/commands/custom_blocks.py b/src/diffusers/commands/custom_blocks.py
index 10a17ee60b8c..324978c83d3a 100644
--- a/src/diffusers/commands/custom_blocks.py
+++ b/src/diffusers/commands/custom_blocks.py
@@ -38,10 +38,25 @@ def conversion_command_factory(args: Namespace):
 class CustomBlocksCommand(BaseDiffusersCLICommand):
     @staticmethod
     def register_subcommand(parser: ArgumentParser):
+        from argparse import RawDescriptionHelpFormatter
+
+        epilog = (
+            "Examples\n"
+            "  $ diffusers-cli custom_blocks\n"
+            "  $ diffusers-cli custom_blocks --block_module_name my_block.py\n"
+            "  $ diffusers-cli custom_blocks --block_module_name my_block.py --block_class_name MyDenoiseBlock\n"
+            "\n"
+            "Learn more\n"
+            "  Use `diffusers-cli <command> --help` for more information about a command.\n"
+            "  Read the documentation at https://huggingface.co/docs/diffusers\n"
+        )
+
         conversion_parser = parser.add_parser(
             "custom_blocks",
             help="Package a local ModularPipelineBlocks subclass for the Hub.",
             usage="\n  diffusers-cli custom_blocks [options]",
+            epilog=epilog,
+            formatter_class=RawDescriptionHelpFormatter,
         )
         conversion_parser._optionals.title = "Options"
         conversion_parser.add_argument(
diff --git a/src/diffusers/commands/describe.py b/src/diffusers/commands/describe.py
index 5a9514961211..12b894dfe9ce 100644
--- a/src/diffusers/commands/describe.py
+++ b/src/diffusers/commands/describe.py
@@ -178,10 +178,25 @@ class DescribeCommand(BaseDiffusersCLICommand):
 
     @staticmethod
     def register_subcommand(subparsers: _SubParsersAction) -> None:
+        from argparse import RawDescriptionHelpFormatter
+
+        epilog = (
+            "Examples\n"
+            "  $ diffusers-cli describe -m stabilityai/stable-diffusion-xl-base-1.0\n"
+            "  $ diffusers-cli describe -m black-forest-labs/FLUX.1-dev --verbose\n"
+            "  $ diffusers-cli --format json describe -m stabilityai/stable-diffusion-xl-base-1.0\n"
+            "\n"
+            "Learn more\n"
+            "  Use `diffusers-cli <command> --help` for more information about a command.\n"
+            "  Read the documentation at https://huggingface.co/docs/diffusers\n"
+        )
+
         parser: ArgumentParser = subparsers.add_parser(
             "describe",
             help="Print the input schema for a diffusers pipeline repo. No weights downloaded.",
             usage="\n  diffusers-cli describe [options]",
+            epilog=epilog,
+            formatter_class=RawDescriptionHelpFormatter,
         )
         parser._optionals.title = "Options"
         parser.add_argument(
diff --git a/src/diffusers/commands/generate.py b/src/diffusers/commands/generate.py
index c86cd28df730..8f5d0d5c824b 100644
--- a/src/diffusers/commands/generate.py
+++ b/src/diffusers/commands/generate.py
@@ -106,7 +106,6 @@ def _hub_attention_backends() -> tuple[str, ...]:
         "poll_interval",
         "func",
         "format",  # top-level --format is a local rendering flag; never forward to the container
-        "device",  # local device pin; container auto-detects its own (cuda:0 or LOCAL_RANK)
     }
 )
 
@@ -638,9 +637,6 @@ def _maybe_submit_remote(args: Namespace, task: str) -> bool:
     if not args.remote:
         return False
 
-    if args.device is not None:
-        out.warning(f"--device {args.device!r} is ignored with --remote; the container auto-detects its GPU.")
-
     print(
         f"[diffusers-cli] preparing remote {task!r} job on flavor={args.flavor!r}...",
         file=sys.stderr,
@@ -836,10 +832,33 @@ class GenerateCommand(BaseDiffusersCLICommand):
 
     @staticmethod
     def register_subcommand(subparsers: _SubParsersAction) -> None:
+        from argparse import RawDescriptionHelpFormatter
+
+        epilog = (
+            "Examples\n"
+            "  $ diffusers-cli generate -m black-forest-labs/FLUX.1-dev --dtype bf16 \\\n"
+            '      --pipeline-kwargs \'{"prompt": "a cat on the moon"}\'\n'
+            "  $ diffusers-cli generate -m black-forest-labs/FLUX.1-dev --dtype bf16 \\\n"
+            '      --pipeline-kwargs \'{"prompt": "make the fur grey", "image": "https://example.com/cat.png"}\'\n'
+            "  $ diffusers-cli generate -m black-forest-labs/FLUX.1-dev --dtype bf16 \\\n"
+            '      --pipeline-kwargs \'{"prompt": "a tiny cat"}\' \\\n'
+            '      --lora \'{"lora_id": "alvdansen/littletinies", "lora_scale": 0.8}\'\n'
+            "  $ diffusers-cli generate -m black-forest-labs/FLUX.1-dev --dtype bf16 \\\n"
+            '      --pipeline-kwargs \'{"prompt": "a cat"}\' --remote --flavor a100-large\n'
+            "  $ diffusers-cli generate -m black-forest-labs/FLUX.1-dev --dtype bf16 --context-parallel \\\n"
+            '      --pipeline-kwargs \'{"prompt": "a cat"}\' --remote --flavor 4xa100-large\n'
+            "\n"
+            "Learn more\n"
+            "  Use `diffusers-cli <command> --help` for more information about a command.\n"
+            "  Read the documentation at https://huggingface.co/docs/diffusers\n"
+        )
+
         parser: ArgumentParser = subparsers.add_parser(
             "generate",
             help="Run any diffusers pipeline locally or remotely with HF Jobs.",
             usage="\n  diffusers-cli generate [options]",
+            epilog=epilog,
+            formatter_class=RawDescriptionHelpFormatter,
         )
         parser._optionals.title = "Options"
         _add_loading_arguments(parser)

From 043919219956a60cf7f749bcddcf5f65435a0440 Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Thu, 18 Jun 2026 15:02:53 +0530
Subject: [PATCH 28/30] update

---
 src/diffusers/commands/generate.py | 61 +++++++++++++++++++++++++++---
 1 file changed, 56 insertions(+), 5 deletions(-)

diff --git a/src/diffusers/commands/generate.py b/src/diffusers/commands/generate.py
index 8f5d0d5c824b..e20a9c21d9d8 100644
--- a/src/diffusers/commands/generate.py
+++ b/src/diffusers/commands/generate.py
@@ -164,6 +164,20 @@ def _add_optimization_arguments(parser: ArgumentParser) -> None:
             "Requires a DiT-based pipeline and launching the CLI under torchrun with ≥2 GPUs."
         ),
     )
+    parser.add_argument(
+        "--compile",
+        nargs="?",
+        const="{}",
+        default=None,
+        metavar="JSON",
+        help=(
+            "torch.compile every denoiser submodule on the pipeline. Accepts an optional JSON "
+            'object of kwargs forwarded to ``torch.compile``, e.g. \'{"mode": "max-autotune", '
+            '"fullgraph": true}\'. Bare ``--compile`` uses torch defaults. Adds a one-time compilation '
+            "cost on the first step but speeds up every subsequent step — worth it for multi-step "
+            "generation (50+ steps)."
+        ),
+    )
 
 
 def _add_output_arguments(parser: ArgumentParser) -> None:
@@ -334,15 +348,52 @@ def _enable_context_parallel(pipeline: Any) -> None:
 
 
 def _apply_optimizations(pipeline: Any, args: Namespace) -> None:
-    """Apply VAE tiling/slicing, attention backend, and context-parallel toggles."""
-    if args.vae_tiling and hasattr(pipeline, "enable_vae_tiling"):
-        pipeline.enable_vae_tiling()
-    if args.vae_slicing and hasattr(pipeline, "enable_vae_slicing"):
-        pipeline.enable_vae_slicing()
+    """Apply VAE tiling/slicing, attention backend, context-parallel, and torch.compile toggles."""
+    vae = getattr(pipeline, "vae", None)
+    if args.vae_tiling and vae is not None and hasattr(vae, "enable_tiling"):
+        vae.enable_tiling()
+    if args.vae_slicing and vae is not None and hasattr(vae, "enable_slicing"):
+        vae.enable_slicing()
     if args.attention_backend != "default":
         _set_attention_backend(pipeline, args.attention_backend)
     if args.context_parallel:
         _enable_context_parallel(pipeline)
+    if args.compile is not None:
+        _compile_denoiser(pipeline, args.compile)
+
+
+def _compile_denoiser(pipeline: Any, compile_spec: str) -> None:
+    """Compile every ``transformer*`` and ``unet*`` submodule on the pipeline.
+
+    ``compile_spec`` is the raw JSON string from ``--compile`` (``"{}"`` for bare flag). Decoded into kwargs and
+    forwarded verbatim to the compile call.
+
+    Prefers regional compilation via ``module.compile_repeated_blocks(**kwargs)`` — only compiles the repeated inner
+    blocks (the bulk of the compute), much faster first-step latency than compiling the whole module. Falls back to
+    full ``torch.compile`` if the model doesn't expose ``_repeated_blocks``.
+    """
+    import torch
+
+    try:
+        compile_kwargs = json.loads(compile_spec)
+    except json.JSONDecodeError as e:
+        raise SystemExit(f"--compile must be valid JSON: {e}") from e
+    if not isinstance(compile_kwargs, dict):
+        raise SystemExit("--compile must decode to a JSON object.")
+
+    for attr in dir(pipeline):
+        if not (attr.startswith("transformer") or attr.startswith("unet")):
+            continue
+        module = getattr(pipeline, attr, None)
+        if not isinstance(module, torch.nn.Module):
+            continue
+
+        if getattr(module, "_repeated_blocks", None):
+            # Regional compile — only the repeated blocks. Mutates `module` in place.
+            module.compile_repeated_blocks(**compile_kwargs)
+        else:
+            # No regional metadata declared; fall back to compiling the whole module.
+            setattr(pipeline, attr, torch.compile(module, **compile_kwargs))
 
 
 def _from_pretrained_kwargs(args: Namespace) -> dict[str, Any]:

From bf8fe64b16e789bb69a2c36c807844cd003c34fd Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Thu, 18 Jun 2026 15:06:16 +0530
Subject: [PATCH 29/30] update

---
 src/diffusers/commands/generate.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/diffusers/commands/generate.py b/src/diffusers/commands/generate.py
index e20a9c21d9d8..da9eadbafe84 100644
--- a/src/diffusers/commands/generate.py
+++ b/src/diffusers/commands/generate.py
@@ -80,7 +80,6 @@ def _hub_attention_backends() -> tuple[str, ...]:
     "safetensors",
     "sentencepiece",  # required by several text-encoder tokenizers (T5, LLaMA, …)
     "ftfy",  # required by older CLIP text-encoder paths
-    "kernels",  # required by hub-hosted attention backends (flash_hub, sage_hub, …)
 )
 
 # Base container image — provides torch + CUDA so ``uv pip install --system``

From 8354f6e09c5e7e1ebc10770a60fa76c8f36fde0a Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Thu, 18 Jun 2026 17:46:45 +0530
Subject: [PATCH 30/30] update

---
 src/diffusers/commands/generate.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/diffusers/commands/generate.py b/src/diffusers/commands/generate.py
index da9eadbafe84..432cf1576679 100644
--- a/src/diffusers/commands/generate.py
+++ b/src/diffusers/commands/generate.py
@@ -166,15 +166,16 @@ def _add_optimization_arguments(parser: ArgumentParser) -> None:
     parser.add_argument(
         "--compile",
         nargs="?",
-        const="{}",
+        const='{"mode": "max-autotune-no-cudagraphs"}',
         default=None,
         metavar="JSON",
         help=(
             "torch.compile every denoiser submodule on the pipeline. Accepts an optional JSON "
             'object of kwargs forwarded to ``torch.compile``, e.g. \'{"mode": "max-autotune", '
-            '"fullgraph": true}\'. Bare ``--compile`` uses torch defaults. Adds a one-time compilation '
-            "cost on the first step but speeds up every subsequent step — worth it for multi-step "
-            "generation (50+ steps)."
+            '"fullgraph": true}\'. Bare ``--compile`` uses ``mode=max-autotune-no-cudagraphs`` — '
+            "CUDA Graphs break with regional/repeated-block compile because sequential blocks "
+            "overwrite each other's output buffers. Adds a one-time compilation cost on the first "
+            "step but speeds up every subsequent step — worth it for multi-step generation (50+ steps)."
         ),
     )