From c092633cd7c442dd0a3f8329c3bfcf2a4a2340a2 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 6 Aug 2025 10:36:53 +0200
Subject: [PATCH] feat(models): add support to qwen-image (#5975)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/python/diffusers/backend.py | 29 +++++++++++++++++++++++++++++
 gallery/index.yaml                  | 13 +++++++++++++
 gallery/qwen-image.yaml             | 19 +++++++++++++++++++
 3 files changed, 61 insertions(+)
 create mode 100644 gallery/qwen-image.yaml

diff --git a/backend/python/diffusers/backend.py b/backend/python/diffusers/backend.py
index 1a5f1785a..17a71694a 100755
--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@@ -65,6 +65,19 @@ from diffusers.schedulers import (
     UniPCMultistepScheduler,
 )
 
+def is_float(s):
+    try:
+        float(s)
+        return True
+    except ValueError:
+        return False
+
+def is_int(s):
+    try:
+        int(s)
+        return True
+    except ValueError:
+        return False
 
 # The scheduler list mapping was taken from here: https://github.com/neggles/animatediff-cli/blob/6f336f5f4b5e38e85d7f06f1744ef42d0a45f2a7/src/animatediff/schedulers.py#L39
 # Credits to https://github.com/neggles
@@ -169,8 +182,24 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                 if ":" not in opt:
                     continue
                 key, value = opt.split(":")
+                # if value is a number, convert it to the appropriate type
+                if is_float(value):
+                    value = float(value)
+                elif is_int(value):
+                    value = int(value)
                 self.options[key] = value
 
+            # From options, extract if present "torch_dtype" and set it to the appropriate type
+            if "torch_dtype" in self.options:
+                if self.options["torch_dtype"] == "fp16":
+                    torchType = torch.float16
+                elif self.options["torch_dtype"] == "bf16":
+                    torchType = torch.bfloat16
+                elif self.options["torch_dtype"] == "fp32":
+                    torchType = torch.float32
+                # remove it from options
+                del self.options["torch_dtype"]
+
             print(f"Options: {self.options}", file=sys.stderr)
 
             local = False
diff --git a/gallery/index.yaml b/gallery/index.yaml
index 3cc71b999..936f8ff52 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,4 +1,17 @@
 ---
+- &qwenimage
+  name: "qwen-image"
+  url: "github:mudler/LocalAI/gallery/qwen-image.yaml@master"
+  urls:
+    - https://huggingface.co/Qwen/Qwen-Image
+  icon: https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_logo.png
+  license: apache-2.0
+  tags:
+    - qwen-image
+    - gpu
+    - text-to-image
+  description: |
+    We are thrilled to release Qwen-Image, an image generation foundation model in the Qwen series that achieves significant advances in complex text rendering and precise image editing. Experiments show strong general capabilities in both image generation and editing, with exceptional performance in text rendering, especially for Chinese.
 - &gptoss
   name: "gpt-oss-20b"
   url: "github:mudler/LocalAI/gallery/harmony.yaml@master"
diff --git a/gallery/qwen-image.yaml b/gallery/qwen-image.yaml
new file mode 100644
index 000000000..4786138dd
--- /dev/null
+++ b/gallery/qwen-image.yaml
@@ -0,0 +1,19 @@
+---
+name: "flux"
+
+config_file: |
+    backend: diffusers
+    cfg_scale: 0
+    diffusers:
+      cuda: true
+      enable_parameters: num_inference_steps
+      pipeline_type: DiffusionPipeline
+    f16: true
+    low_vram: true
+    name: qwen-image
+    parameters:
+      model: Qwen/Qwen-Image
+    step: 50
+    options:
+      - true_cfg_scale:4.0
+      - torch_dtype:bf16