Qwen-Image-Edit-Fusion

Running on Zero

App Files Files Community

linoyts HF Staff commited on about 1 month ago

Commit

f51afdc

verified ·

1 Parent(s): 770de96

Update app.py (#1)

Browse files

- Update app.py (97e7d5a4f67a9beb3ab9e36fa45a5ae3a61afc56)
- Update app.py (0dca5489babfbe5182f80515e2c2e9647249b05c)

Files changed (1) hide show

app.py +102 -94

app.py CHANGED Viewed

@@ -3,31 +3,19 @@ import numpy as np
 import random
 import torch
 import spaces
 from PIL import Image
-from diffusers import FlowMatchEulerDiscreteScheduler
-from optimization import optimize_pipeline_
-from diffusers import QwenImageEditPlusPipeline, QwenImageTransformer2DModel
-# from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
-# from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
-# from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
 import math
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
-from PIL import Image
 import os
-import gradio as gr
-from gradio_client import Client, handle_file
 import tempfile
 # --- Model Loading ---
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Scheduler configuration for Lightning
 scheduler_config = {
     "base_image_seq_len": 256,
     "base_shift": math.log(3),
@@ -45,40 +33,70 @@ scheduler_config = {
     "use_karras_sigmas": False,
 }
-# Initialize scheduler with Lightning config
 scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
-pipe = QwenImageEditPlusPipeline.from_pretrained("Qwen/Qwen-Image-Edit-2509",
-                                                 scheduler=scheduler,
-                                                 torch_dtype=dtype).to(device)
 pipe.load_lora_weights(
-        "lightx2v/Qwen-Image-Lightning",
-        weight_name="Qwen-Image-Lightning-4steps-V2.0.safetensors", adapter_name="fast"
-    )
 pipe.load_lora_weights(
-       "dx8152/Qwen-Image-Edit-2509-Fusion",
-        weight_name="溶图.safetensors", adapter_name="fusion"
-    )
-pipe.set_adapters(["fast", "fusion"], adapter_weights=[1.,1.])
 pipe.fuse_lora(adapter_names=["fast"])
 pipe.fuse_lora(adapter_names=["fusion"])
 pipe.unload_lora_weights()
-# pipe.transformer.__class__ = QwenImageTransformer2DModel
-# pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
-# optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
 MAX_SEED = np.iinfo(np.int32).max
 @spaces.GPU
 def infer(
-    image_subject,
     prompt="",
-    image_background=None,
     seed=42,
     randomize_seed=True,
     true_guidance_scale=1,
@@ -87,90 +105,80 @@ def infer(
     width=None,
     progress=gr.Progress(track_tqdm=True)
 ):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device=device).manual_seed(seed)
     result = pipe(
-        image=image_subject,
         prompt=prompt,
-        # height=height,
-        # width=width,
         num_inference_steps=num_inference_steps,
         generator=generator,
         true_cfg_scale=true_guidance_scale,
         num_images_per_prompt=1,
     ).images[0]
-    return [image_subject,result], seed
 # --- UI ---
-css = '''#col-container { max-width: 800px; margin: 0 auto; }
 .dark .progress-text{color: white !important}
-#examples{max-width: 800px; margin: 0 auto; }'''
 with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown("## Qwen Image Edit — Fusion")
-        gr.Markdown("""
-            Qwen Image Edit 2509 ✨
-            Using [dx8152's Qwen-Image-Edit-2509 Fusion LoRA](https://huggingface.co/dx8152/Qwen-Image-Edit-2509-Fusion) and [lightx2v Qwen-Image-Lightning LoRA]() for 4-step inference 💨
-            """
-        )
         with gr.Row():
             with gr.Column():
-                with gr.Row():
-                    image_subject = gr.Image(label="input image", type="pil")
-                    image_background = gr.Image(label="background Image", type="pil", visible=False)
-                prompt =  gr.Textbox(label="prompt")
-                run_button = gr.Button("Fuse", variant="primary")
-                with gr.Accordion("Advanced Settings", open=False):
-                        seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
-                        randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
-                        true_guidance_scale = gr.Slider(label="True Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
-                        num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=4)
-                        height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
-                        width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)
             with gr.Column():
                 result = gr.ImageSlider(label="Output Image", interactive=False)
-                prompt_preview = gr.Textbox(label="Processed Prompt", interactive=False, visible=False)
-        gr.Examples(
-        examples=[
-            ["fusion_car.png", ""],["fusion_shoes.png", ""],["wednesday_product.png", "put the product in her hand"]
-        ],
-        inputs=[image_subject, prompt],
-        outputs=[result,seed],
-        fn=infer,
-        cache_examples="lazy",
-        elem_id="examples"
-    )
-    inputs = [
-        image_subject,image_background, prompt,
-        seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width
-    ]
-    outputs = [result, seed]
-    run_event = run_button.click(
-        fn=infer,
-        inputs=inputs,
-        outputs=outputs
-    )
-demo.launch(share=True)

 import random
 import torch
 import spaces
 from PIL import Image
 import math
+from diffusers import FlowMatchEulerDiscreteScheduler, QwenImageEditPlusPipeline
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
+from briarmbg import BriaRMBG
 import os
 import tempfile
 # --- Model Loading ---
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
 scheduler_config = {
     "base_image_seq_len": 256,
     "base_shift": math.log(3),
     "use_karras_sigmas": False,
 }
 scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
+pipe = QwenImageEditPlusPipeline.from_pretrained(
+    "Qwen/Qwen-Image-Edit-2509",
+    scheduler=scheduler,
+    torch_dtype=dtype
+).to(device)
 pipe.load_lora_weights(
+    "lightx2v/Qwen-Image-Lightning",
+    weight_name="Qwen-Image-Lightning-4steps-V2.0.safetensors", adapter_name="fast"
+)
 pipe.load_lora_weights(
+    "dx8152/Qwen-Image-Edit-2509-Fusion",
+    weight_name="溶图.safetensors", adapter_name="fusion"
+)
+pipe.set_adapters(["fast", "fusion"], adapter_weights=[1., 1.])
 pipe.fuse_lora(adapter_names=["fast"])
 pipe.fuse_lora(adapter_names=["fusion"])
 pipe.unload_lora_weights()
+# ✅ Load background remover
+rmbg = BriaRMBG.from_pretrained("briaai/RMBG-1.4").to(device, dtype=torch.float32)
 MAX_SEED = np.iinfo(np.int32).max
+# --- Background Removal Helpers ---
+@torch.inference_mode()
+def numpy2pytorch(imgs):
+    h = torch.from_numpy(np.stack(imgs, axis=0)).float() / 127.0 - 1.0
+    h = h.movedim(-1, 1)
+    return h
+@torch.inference_mode()
+def run_rmbg(img: np.ndarray):
+    H, W, C = img.shape
+    k = (256.0 / float(H * W)) ** 0.5
+    resized = Image.fromarray(img).resize((int(64 * round(W * k)), int(64 * round(H * k))), Image.LANCZOS)
+    feed = numpy2pytorch([np.array(resized)]).to("cuda", dtype=torch.float32)
+    alpha = rmbg(feed)[0][0]
+    alpha = torch.nn.functional.interpolate(alpha, size=(H, W), mode="bilinear")
+    alpha = alpha.movedim(1, -1)[0].detach().float().cpu().numpy().clip(0, 1)
+    result = 127 + (img.astype(np.float32) - 127) * alpha
+    return result.clip(0, 255).astype(np.uint8), alpha
+def remove_background(image: Image.Image) -> Image.Image:
+    img_array = np.array(image)
+    result_array, alpha_mask = run_rmbg(img_array)
+    result_image = Image.fromarray(result_array)
+    if result_image.mode != 'RGBA':
+        result_image = result_image.convert('RGBA')
+    alpha = (alpha_mask * 255).astype(np.uint8)
+    alpha_pil = Image.fromarray(alpha, 'L')
+    result_image.putalpha(alpha_pil)
+    return result_image
+# --- Inference ---
 @spaces.GPU
 def infer(
+    gallery_images,
+    image_background,
     prompt="",
     seed=42,
     randomize_seed=True,
     true_guidance_scale=1,
     width=None,
     progress=gr.Progress(track_tqdm=True)
 ):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device=device).manual_seed(seed)
+    processed_subjects = []
+    if gallery_images:
+        for gimg in gallery_images:
+            pil_img = gimg[0] if isinstance(gimg, list) else gimg
+            processed_subjects.append(remove_background(pil_img))
+    all_inputs = processed_subjects
+    if image_background is not None:
+        all_inputs.append(image_background)
+    if not all_inputs:
+        raise gr.Error("Please upload at least one image or a background image.")
     result = pipe(
+        image=all_inputs,
         prompt=prompt,
         num_inference_steps=num_inference_steps,
         generator=generator,
         true_cfg_scale=true_guidance_scale,
         num_images_per_prompt=1,
     ).images[0]
+    return [image_background, result], seed
 # --- UI ---
+css = '''#col-container { max-width: 900px; margin: 0 auto; }
 .dark .progress-text{color: white !important}
+#examples{max-width: 900px; margin: 0 auto; }'''
 with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown("## Qwen Image Edit — Fusion")
+        gr.Markdown(""" Qwen Image Edit 2509 ✨ Using [dx8152's Qwen-Image-Edit-2509 Fusion LoRA](https://huggingface.co/dx8152/Qwen-Image-Edit-2509-Fusion) and [lightx2v Qwen-Image-Lightning LoRA]() for 4-step inference 💨 """ )
         with gr.Row():
             with gr.Column():
+                gallery = gr.Gallery(
+                    label="Upload subject images (background auto removed)",
+                    columns=3, rows=2, height="auto", type="pil"
+                )
+                image_background = gr.Image(label="Background Image", type="pil", visible=True)
+                prompt = gr.Textbox(label="Prompt")
+                run_button = gr.Button("Fuse Images", variant="primary")
+                with gr.Accordion("Advanced Settings", open=False):
+                    seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
+                    randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
+                    true_guidance_scale = gr.Slider(label="True Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
+                    num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=4)
+                    height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
+                    width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)
             with gr.Column():
                 result = gr.ImageSlider(label="Output Image", interactive=False)
+        # gr.Examples(
+        #     examples=[
+        #         [["fusion_car.png", "fusion_shoes.png"], "fusion_bg.png", "put the car and shoes in the background"],
+        #         [["wednesday_product.png"], "simple_room.png", "put the product in her hand"]
+        #     ],
+        #     inputs=[gallery, image_background, prompt],
+        #     outputs=[result, seed],
+        #     fn=infer,
+        #     cache_examples="lazy",
+        #     elem_id="examples"
+        # )
+        inputs = [gallery, image_background, prompt, seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width]
+        outputs = [result, seed]
+        run_button.click(fn=infer, inputs=inputs, outputs=outputs)
+demo.launch(share=True)