Qwen-Image-Edit-Fusion

Running on Zero

File size: 10,115 Bytes

71f5363
7d4ee71
 
 
 
 
ec6ec95
f51afdc
fe9c804
 
f51afdc
63c5b22
9c01f36
695bf10
7d4ee71
 
 
 
c70c8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2cff3a
f51afdc
 
 
 
 
 
c70c8bd
f51afdc
 
 
f72803c
 
 
 
 
c70c8bd
f72803c
a2cff3a
 
f51afdc
 
028ba65
7d4ee71
 
79640f8
f51afdc
db84dc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f51afdc
db84dc1
f51afdc
 
 
db84dc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f51afdc
db84dc1
f51afdc
db84dc1
 
f51afdc
 
db84dc1
 
 
f51afdc
 
db84dc1
 
 
 
 
 
 
 
 
 
 
f51afdc
db84dc1
 
f51afdc
db84dc1
 
f51afdc
db84dc1
 
f51afdc
 
db84dc1
 
 
 
 
 
 
 
 
 
 
f51afdc
db84dc1
f51afdc
 
 
028ba65
c70c8bd
f51afdc
 
fab4b60
c70c8bd
 
 
 
 
 
05eb5ed
7d4ee71
 
 
 
79640f8
f51afdc
 
db84dc1
 
 
1f222e9
db84dc1
 
 
 
f51afdc
 
 
 
 
 
 
 
79640f8
f51afdc
7d4ee71
 
 
 
79640f8
 
 
f51afdc
9c01f36
79640f8
028ba65
f51afdc
b5c1d6f
f51afdc
7d4ee71
29a13d1
7d4ee71
c70c8bd
f51afdc
7d4ee71
 
8105fe7
 
 
 
 
 
f51afdc
 
c70c8bd
f51afdc
 
 
 
 
 
 
cc842fe
79640f8
fab4b60
f51afdc

import gradio as gr
import numpy as np
import random
import torch
import spaces
from PIL import Image
import math
from diffusers import FlowMatchEulerDiscreteScheduler, QwenImageEditPlusPipeline
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file
from briarmbg import BriaRMBG  
import os
import tempfile

# --- Model Loading ---
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"

scheduler_config = {
    "base_image_seq_len": 256,
    "base_shift": math.log(3),
    "invert_sigmas": False,
    "max_image_seq_len": 8192,
    "max_shift": math.log(3),
    "num_train_timesteps": 1000,
    "shift": 1.0,
    "shift_terminal": None,
    "stochastic_sampling": False,
    "time_shift_type": "exponential",
    "use_beta_sigmas": False,
    "use_dynamic_shifting": True,
    "use_exponential_sigmas": False,
    "use_karras_sigmas": False,
}

scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)

pipe = QwenImageEditPlusPipeline.from_pretrained(
    "Qwen/Qwen-Image-Edit-2509",
    scheduler=scheduler,
    torch_dtype=dtype
).to(device)

pipe.load_lora_weights(
    "lightx2v/Qwen-Image-Lightning",
    weight_name="Qwen-Image-Lightning-4steps-V2.0.safetensors", adapter_name="fast"
)
# pipe.load_lora_weights(
#     "dx8152/Qwen-Image-Edit-2509-Fusion",
#     weight_name="溶图.safetensors", adapter_name="fusion"
# )
pipe.set_adapters(["fast"], adapter_weights=[1.])
pipe.fuse_lora(adapter_names=["fast"])
# pipe.fuse_lora(adapter_names=["fusion"])
pipe.unload_lora_weights()

# ✅ Load background remover
rmbg = BriaRMBG.from_pretrained("briaai/RMBG-1.4").to(device, dtype=torch.float32)

MAX_SEED = np.iinfo(np.int32).max


# --- Background Removal Helpers ---
def remove_alpha_channel(image: Image.Image) -> Image.Image:
    """
    Remove alpha channel from PIL Image if it exists.
    
    Args:
        image (Image.Image): Input PIL image
        
    Returns:
        Image.Image: Image with alpha channel removed (RGB format)
    """
    if image.mode in ('RGBA', 'LA'):
        # Create a white background
        background = Image.new('RGB', image.size, (255, 255, 255))
        # Paste the image onto the white background using alpha channel as mask
        if image.mode == 'RGBA':
            background.paste(image, mask=image.split()[-1])  # Use alpha channel as mask
        else:  # LA mode
            background.paste(image.convert('RGB'), mask=image.split()[-1])
        return background
    elif image.mode == 'P':
        # Convert palette mode to RGB (some palette images have transparency)
        if 'transparency' in image.info:
            image = image.convert('RGBA')
            background = Image.new('RGB', image.size, (255, 255, 255))
            background.paste(image, mask=image.split()[-1])
            return background
        else:
            return image.convert('RGB')
    elif image.mode != 'RGB':
        # Convert any other mode to RGB
        return image.convert('RGB')
    else:
        # Already RGB, return as is
        return image

# @torch.inference_mode()
def numpy2pytorch(imgs):
    h = torch.from_numpy(np.stack(imgs, axis=0)).float() / 127.0 - 1.0  # so that 127 must be strictly 0.0
    h = h.movedim(-1, 1)
    return h


# @torch.inference_mode()
def pytorch2numpy(imgs, quant=True):
    results = []
    for x in imgs:
        y = x.movedim(0, -1)

        if quant:
            y = y * 127.5 + 127.5
            y = y.detach().float().cpu().numpy().clip(0, 255).astype(np.uint8)
        else:
            y = y * 0.5 + 0.5
            y = y.detach().float().cpu().numpy().clip(0, 1).astype(np.float32)

        results.append(y)
    return results


def resize_without_crop(image, target_width, target_height):
    pil_image = Image.fromarray(image)
    resized_image = pil_image.resize((target_width, target_height), Image.LANCZOS)
    return np.array(resized_image)


@spaces.GPU()
def run_rmbg(img, sigma=0.0):
    """
    Remove background from image using BriaRMBG model.
    
    Args:
        img (np.ndarray): Input image as numpy array (H, W, C)
        sigma (float): Noise parameter for blending
        
    Returns:
        tuple: (result_image, alpha_mask) where result_image is the image with background removed
    """
    H, W, C = img.shape
    assert C == 3
    k = (256.0 / float(H * W)) ** 0.5
    feed = resize_without_crop(img, int(64 * round(W * k)), int(64 * round(H * k)))
    feed = numpy2pytorch([feed]).to(device="cuda", dtype=torch.float32)
    alpha = rmbg(feed)[0][0]
    alpha = torch.nn.functional.interpolate(alpha, size=(H, W), mode="bilinear")
    alpha = alpha.movedim(1, -1)[0]
    alpha = alpha.detach().float().cpu().numpy().clip(0, 1)
    result = 127 + (img.astype(np.float32) - 127 + sigma) * alpha
    return result.clip(0, 255).astype(np.uint8), alpha

def remove_background_from_image(image: Image.Image) -> Image.Image:
    """
    Remove background from PIL Image using RMBG model.
    
    Args:
        image (Image.Image): Input PIL image
        
    Returns:
        Image.Image: Image with background removed (transparent background)
    """
    # Convert PIL to numpy array
    img_array = np.array(image)
    
    # Remove background using RMBG
    result_array, alpha_mask = run_rmbg(img_array)
    
    # Convert back to PIL with alpha channel
    result_image = Image.fromarray(result_array)
    
    # Create RGBA image with alpha mask
    if result_image.mode != 'RGBA':
        result_image = result_image.convert('RGBA')
    
    # Handle alpha mask dimensions and convert to PIL
    # The alpha_mask might have extra dimensions, so squeeze and ensure 2D
    alpha_mask_2d = np.squeeze(alpha_mask)
    if alpha_mask_2d.ndim > 2:
        # If still more than 2D, take the first channel
        alpha_mask_2d = alpha_mask_2d[:, :, 0] if alpha_mask_2d.shape[-1] == 1 else alpha_mask_2d[:, :, 0]
    
    # Convert to uint8 and create PIL Image without deprecated mode parameter
    alpha_array = (alpha_mask_2d * 255).astype(np.uint8)
    alpha_pil = Image.fromarray(alpha_array, 'L')
    result_image.putalpha(alpha_pil)
    
    return result_image

# --- Inference ---
@spaces.GPU
def infer(
    gallery_images,  
    image_background,
    prompt="",
    seed=42,
    randomize_seed=True,
    true_guidance_scale=1,
    num_inference_steps=4,
    height=None,
    width=None,
    progress=gr.Progress(track_tqdm=True)
):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator(device=device).manual_seed(seed)

    processed_subjects = []
    if gallery_images:
        for img in gallery_images:
            image = img[0]  # Extract PIL image from gallery format
            
            image = remove_background_from_image(image)
            
            # Always remove alpha channels to ensure RGB format
            image = remove_alpha_channel(image)
            processed_subjects.append(image)

    all_inputs = processed_subjects
    if image_background is not None:
        all_inputs.append(image_background) 

    if not all_inputs:
        raise gr.Error("Please upload at least one image or a background image.")

    result = pipe(
        image=all_inputs,
        prompt=prompt,
        num_inference_steps=num_inference_steps,
        generator=generator,
        true_cfg_scale=true_guidance_scale,
        num_images_per_prompt=1,
    ).images[0]

    return [image_background, result], seed


# --- UI ---
css = '''#col-container { max-width: 900px; margin: 0 auto; }
.dark .progress-text{color: white !important}
#examples{max-width: 900px; margin: 0 auto; }'''

with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown("## Qwen Image Edit — Fusion")
        gr.Markdown(""" Qwen Image Edit 2509 ✨ Using [dx8152's Qwen-Image-Edit-2509 Fusion LoRA](https://huggingface.co/dx8152/Qwen-Image-Edit-2509-Fusion) and [lightx2v Qwen-Image-Lightning LoRA]() for 4-step inference 💨 """ )
        with gr.Row():
            with gr.Column():
                with gr.Row():
                    gallery = gr.Gallery(
                        label="Upload subject images (background auto removed)",
                        columns=3, rows=2, height="auto", type="pil"
                    )
                    image_background = gr.Image(label="Background Image", type="pil", visible=True)
                prompt = gr.Textbox(label="Prompt")
                run_button = gr.Button("Fuse Images", variant="primary")

                with gr.Accordion("Advanced Settings", open=False):
                    seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
                    randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
                    true_guidance_scale = gr.Slider(label="True Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
                    num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=4)
                    height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
                    width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)

            with gr.Column():
                result = gr.ImageSlider(label="Output Image", interactive=False)

        # gr.Examples(
        #     examples=[
        #         [["fusion_car.png", "fusion_shoes.png"], "fusion_bg.png", "put the car and shoes in the background"],
        #         [["wednesday_product.png"], "simple_room.png", "put the product in her hand"]
        #     ],
        #     inputs=[gallery, image_background, prompt],
        #     outputs=[result, seed],
        #     fn=infer,
        #     cache_examples="lazy",
        #     elem_id="examples"
        # )

        inputs = [gallery, image_background, prompt, seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width]
        outputs = [result, seed]

        run_button.click(fn=infer, inputs=inputs, outputs=outputs)

demo.launch(share=True)