File size: 11,003 Bytes
71f5363
7d4ee71
 
 
 
 
ec6ec95
f51afdc
fe9c804
 
f51afdc
63c5b22
9c01f36
695bf10
7d4ee71
 
 
 
c70c8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2cff3a
f51afdc
 
 
 
 
 
c70c8bd
f51afdc
 
 
bd03e61
 
 
 
f72803c
c70c8bd
bd03e61
a2cff3a
 
f51afdc
 
028ba65
7d4ee71
 
79640f8
f51afdc
db84dc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f51afdc
db84dc1
f51afdc
 
 
db84dc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f51afdc
db84dc1
f51afdc
db84dc1
 
f51afdc
 
db84dc1
 
 
f51afdc
 
db84dc1
 
 
 
 
 
 
 
 
 
 
f51afdc
db84dc1
 
f51afdc
db84dc1
 
f51afdc
db84dc1
 
f51afdc
 
db84dc1
 
 
 
 
 
 
 
 
 
 
f51afdc
db84dc1
f51afdc
 
e83f3dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f51afdc
028ba65
c70c8bd
e83f3dc
f51afdc
fab4b60
c70c8bd
 
 
 
05eb5ed
7d4ee71
 
 
 
79640f8
f51afdc
e83f3dc
ccddfab
db84dc1
e83f3dc
 
 
f51afdc
 
 
 
 
e83f3dc
 
f51afdc
 
 
3a70c4e
79640f8
f51afdc
3a70c4e
e83f3dc
 
7d4ee71
 
 
79640f8
 
 
f51afdc
9c01f36
79640f8
028ba65
a9a7d86
b5c1d6f
a9a7d86
7d4ee71
29a13d1
7d4ee71
c1510ee
ccddfab
7d4ee71
 
8105fe7
e83f3dc
c3ed1f4
8105fe7
 
414a753
f51afdc
c70c8bd
f51afdc
 
 
 
 
cc842fe
79640f8
fab4b60
f51afdc
c1510ee
 
a84df70
 
dac4b36
83cd494
a84df70
414a753
c1510ee
e83f3dc
c1510ee
 
 
 
 
f51afdc
e83f3dc
f51afdc
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
import gradio as gr
import numpy as np
import random
import torch
import spaces
from PIL import Image
import math
from diffusers import FlowMatchEulerDiscreteScheduler, QwenImageEditPlusPipeline
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file
from briarmbg import BriaRMBG  
import os
import tempfile

# --- Model Loading ---
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"

scheduler_config = {
    "base_image_seq_len": 256,
    "base_shift": math.log(3),
    "invert_sigmas": False,
    "max_image_seq_len": 8192,
    "max_shift": math.log(3),
    "num_train_timesteps": 1000,
    "shift": 1.0,
    "shift_terminal": None,
    "stochastic_sampling": False,
    "time_shift_type": "exponential",
    "use_beta_sigmas": False,
    "use_dynamic_shifting": True,
    "use_exponential_sigmas": False,
    "use_karras_sigmas": False,
}

scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)

pipe = QwenImageEditPlusPipeline.from_pretrained(
    "Qwen/Qwen-Image-Edit-2509",
    scheduler=scheduler,
    torch_dtype=dtype
).to(device)

pipe.load_lora_weights(
    "lightx2v/Qwen-Image-Lightning",
    weight_name="Qwen-Image-Lightning-4steps-V2.0.safetensors", adapter_name="fast"
)
pipe.load_lora_weights(
    "dx8152/Qwen-Image-Edit-2509-Fusion",
    weight_name="溶图.safetensors", adapter_name="fusion"
)
pipe.set_adapters(["fast"], adapter_weights=[1.])
pipe.fuse_lora(adapter_names=["fast"])
pipe.fuse_lora(adapter_names=["fusion"])
pipe.unload_lora_weights()

# ✅ Load background remover
rmbg = BriaRMBG.from_pretrained("briaai/RMBG-1.4").to(device, dtype=torch.float32)

MAX_SEED = np.iinfo(np.int32).max


# --- Background Removal Helpers ---
def remove_alpha_channel(image: Image.Image) -> Image.Image:
    """
    Remove alpha channel from PIL Image if it exists.
    
    Args:
        image (Image.Image): Input PIL image
        
    Returns:
        Image.Image: Image with alpha channel removed (RGB format)
    """
    if image.mode in ('RGBA', 'LA'):
        # Create a white background
        background = Image.new('RGB', image.size, (255, 255, 255))
        # Paste the image onto the white background using alpha channel as mask
        if image.mode == 'RGBA':
            background.paste(image, mask=image.split()[-1])  # Use alpha channel as mask
        else:  # LA mode
            background.paste(image.convert('RGB'), mask=image.split()[-1])
        return background
    elif image.mode == 'P':
        # Convert palette mode to RGB (some palette images have transparency)
        if 'transparency' in image.info:
            image = image.convert('RGBA')
            background = Image.new('RGB', image.size, (255, 255, 255))
            background.paste(image, mask=image.split()[-1])
            return background
        else:
            return image.convert('RGB')
    elif image.mode != 'RGB':
        # Convert any other mode to RGB
        return image.convert('RGB')
    else:
        # Already RGB, return as is
        return image

# @torch.inference_mode()
def numpy2pytorch(imgs):
    h = torch.from_numpy(np.stack(imgs, axis=0)).float() / 127.0 - 1.0  # so that 127 must be strictly 0.0
    h = h.movedim(-1, 1)
    return h


# @torch.inference_mode()
def pytorch2numpy(imgs, quant=True):
    results = []
    for x in imgs:
        y = x.movedim(0, -1)

        if quant:
            y = y * 127.5 + 127.5
            y = y.detach().float().cpu().numpy().clip(0, 255).astype(np.uint8)
        else:
            y = y * 0.5 + 0.5
            y = y.detach().float().cpu().numpy().clip(0, 1).astype(np.float32)

        results.append(y)
    return results


def resize_without_crop(image, target_width, target_height):
    pil_image = Image.fromarray(image)
    resized_image = pil_image.resize((target_width, target_height), Image.LANCZOS)
    return np.array(resized_image)


@spaces.GPU()
def run_rmbg(img, sigma=0.0):
    """
    Remove background from image using BriaRMBG model.
    
    Args:
        img (np.ndarray): Input image as numpy array (H, W, C)
        sigma (float): Noise parameter for blending
        
    Returns:
        tuple: (result_image, alpha_mask) where result_image is the image with background removed
    """
    H, W, C = img.shape
    assert C == 3
    k = (256.0 / float(H * W)) ** 0.5
    feed = resize_without_crop(img, int(64 * round(W * k)), int(64 * round(H * k)))
    feed = numpy2pytorch([feed]).to(device="cuda", dtype=torch.float32)
    alpha = rmbg(feed)[0][0]
    alpha = torch.nn.functional.interpolate(alpha, size=(H, W), mode="bilinear")
    alpha = alpha.movedim(1, -1)[0]
    alpha = alpha.detach().float().cpu().numpy().clip(0, 1)
    result = 127 + (img.astype(np.float32) - 127 + sigma) * alpha
    return result.clip(0, 255).astype(np.uint8), alpha

def remove_background_from_image(image: Image.Image) -> Image.Image:
    """
    Remove background from PIL Image using RMBG model.
    
    Args:
        image (Image.Image): Input PIL image
        
    Returns:
        Image.Image: Image with background removed (transparent background)
    """
    # Convert PIL to numpy array
    img_array = np.array(image)
    
    # Remove background using RMBG
    result_array, alpha_mask = run_rmbg(img_array)
    
    # Convert back to PIL with alpha channel
    result_image = Image.fromarray(result_array)
    
    # Create RGBA image with alpha mask
    if result_image.mode != 'RGBA':
        result_image = result_image.convert('RGBA')
    
    # Handle alpha mask dimensions and convert to PIL
    # The alpha_mask might have extra dimensions, so squeeze and ensure 2D
    alpha_mask_2d = np.squeeze(alpha_mask)
    if alpha_mask_2d.ndim > 2:
        # If still more than 2D, take the first channel
        alpha_mask_2d = alpha_mask_2d[:, :, 0] if alpha_mask_2d.shape[-1] == 1 else alpha_mask_2d[:, :, 0]
    
    # Convert to uint8 and create PIL Image without deprecated mode parameter
    alpha_array = (alpha_mask_2d * 255).astype(np.uint8)
    alpha_pil = Image.fromarray(alpha_array, 'L')
    result_image.putalpha(alpha_pil)
    
    return result_image


def calculate_dimensions(image):
    """Calculate output dimensions based on background image, keeping largest side at 1024."""
    if image is None:
        return 1024, 1024
    
    original_width, original_height = image.size
    
    if original_width > original_height:
        new_width = 1024
        aspect_ratio = original_height / original_width
        new_height = int(new_width * aspect_ratio)
    else:
        new_height = 1024
        aspect_ratio = original_width / original_height
        new_width = int(new_height * aspect_ratio)
    
    # Ensure dimensions are multiples of 8
    new_width = (new_width // 8) * 8
    new_height = (new_height // 8) * 8
    
    return new_width, new_height


# --- Inference ---
@spaces.GPU
def infer(
    product_image,  
    image_background,
    prompt="",
    seed=42,
    randomize_seed=True,
    true_guidance_scale=1,
    num_inference_steps=4,
    progress=gr.Progress(track_tqdm=True)
):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator(device=device).manual_seed(seed)

    processed_subjects = []
    if product_image:
        image = remove_background_from_image(product_image)
            
        # Always remove alpha channels to ensure RGB format
        image = remove_alpha_channel(image)
        processed_subjects.append(image)

    all_inputs = processed_subjects
    if image_background is not None:
        all_inputs.append(image_background) 

    width, height = calculate_dimensions(image_background)
    
    if not all_inputs:
        raise gr.Error("Please upload at least one image or a background image.")

    prompt = prompt +". Integrate the product from Image 1 onto Image 2 as the background, ensuring seamless blending with appropriate lighting and shadows" if len(all_inputs) > 1 else prompt
    result = pipe(
        image=all_inputs,
        prompt=prompt,
        width=width,
        height=height,
        num_inference_steps=num_inference_steps,
        generator=generator,
        true_cfg_scale=true_guidance_scale,
        num_images_per_prompt=1,
    ).images[0]

    return [image_background, result], seed


# --- UI ---
css = '''#col-container { max-width: 1100px; margin: 0 auto; }
.dark .progress-text{color: white !important}
#examples{max-width: 1100px; margin: 0 auto; }'''

with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown("## Qwen Image Edit — Product Fusion")
        gr.Markdown(""" Seamlessy blend products onto backgrounds with Qwen Image Edit 2509 ✨ Using [dx8152's Qwen-Image-Edit-2509 Fusion LoRA](https://huggingface.co/dx8152/Qwen-Image-Edit-2509-Fusion) and [lightx2v Qwen-Image-Lightning LoRA]() for 4-step inference 💨 """ )
        with gr.Row():
            with gr.Column():
                with gr.Row():
                    product_image =  gr.Image(
                        label="Product image (background auto removed)", type="pil"
                    )
                    image_background = gr.Image(label="Background Image", type="pil", visible=True)
                prompt = gr.Textbox(label="Prompt", placeholder="put the [product] on the [background]")
                run_button = gr.Button("Fuse Images", variant="primary")

                with gr.Accordion("Advanced Settings", open=False):
                    seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
                    randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
                    true_guidance_scale = gr.Slider(label="True Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
                    num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=4)

            with gr.Column():
                result = gr.ImageSlider(label="Output Image", interactive=False)

        gr.Examples(
            examples=[
                ["product.png", "wednesday.png", "put the product in her hand"],
                [None, "fusion_car.png", ""],
                ["product_2.png", "background_2.png", "put the product on the chair"],
                [None, "fusion_milkshake.png", ""],
                [None, "fusion_shoes.png", "put the shoes on the feet"],
                ["product_3.png", "background_3.jpg", "put the product on the background"],
            ],
            inputs=[product_image, image_background, prompt],
            outputs=[result, seed],
            fn=infer,
            cache_examples="lazy",
            elem_id="examples"
        )

        inputs = [product_image, image_background, prompt, seed, randomize_seed, true_guidance_scale, num_inference_steps]
        outputs = [result, seed]

        run_button.click(fn=infer, inputs=inputs, outputs=outputs)

demo.launch(share=True)