linoyts HF Staff commited on
Commit
f51afdc
·
verified ·
1 Parent(s): 770de96
Files changed (1) hide show
  1. app.py +102 -94
app.py CHANGED
@@ -3,31 +3,19 @@ import numpy as np
3
  import random
4
  import torch
5
  import spaces
6
-
7
  from PIL import Image
8
- from diffusers import FlowMatchEulerDiscreteScheduler
9
- from optimization import optimize_pipeline_
10
- from diffusers import QwenImageEditPlusPipeline, QwenImageTransformer2DModel
11
- # from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
12
- # from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
13
- # from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
14
-
15
  import math
 
16
  from huggingface_hub import hf_hub_download
17
  from safetensors.torch import load_file
18
-
19
- from PIL import Image
20
  import os
21
- import gradio as gr
22
- from gradio_client import Client, handle_file
23
  import tempfile
24
 
25
-
26
  # --- Model Loading ---
27
  dtype = torch.bfloat16
28
  device = "cuda" if torch.cuda.is_available() else "cpu"
29
 
30
- # Scheduler configuration for Lightning
31
  scheduler_config = {
32
  "base_image_seq_len": 256,
33
  "base_shift": math.log(3),
@@ -45,40 +33,70 @@ scheduler_config = {
45
  "use_karras_sigmas": False,
46
  }
47
 
48
- # Initialize scheduler with Lightning config
49
  scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
50
 
51
- pipe = QwenImageEditPlusPipeline.from_pretrained("Qwen/Qwen-Image-Edit-2509",
52
- scheduler=scheduler,
53
- torch_dtype=dtype).to(device)
 
 
 
54
  pipe.load_lora_weights(
55
- "lightx2v/Qwen-Image-Lightning",
56
- weight_name="Qwen-Image-Lightning-4steps-V2.0.safetensors", adapter_name="fast"
57
- )
58
  pipe.load_lora_weights(
59
- "dx8152/Qwen-Image-Edit-2509-Fusion",
60
- weight_name="溶图.safetensors", adapter_name="fusion"
61
- )
62
- pipe.set_adapters(["fast", "fusion"], adapter_weights=[1.,1.])
63
-
64
  pipe.fuse_lora(adapter_names=["fast"])
65
  pipe.fuse_lora(adapter_names=["fusion"])
66
  pipe.unload_lora_weights()
67
 
68
- # pipe.transformer.__class__ = QwenImageTransformer2DModel
69
- # pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
70
-
71
- # optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
72
-
73
 
74
  MAX_SEED = np.iinfo(np.int32).max
75
 
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  @spaces.GPU
78
  def infer(
79
- image_subject,
 
80
  prompt="",
81
- image_background=None,
82
  seed=42,
83
  randomize_seed=True,
84
  true_guidance_scale=1,
@@ -87,90 +105,80 @@ def infer(
87
  width=None,
88
  progress=gr.Progress(track_tqdm=True)
89
  ):
90
-
91
  if randomize_seed:
92
  seed = random.randint(0, MAX_SEED)
93
  generator = torch.Generator(device=device).manual_seed(seed)
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  result = pipe(
96
- image=image_subject,
97
  prompt=prompt,
98
- # height=height,
99
- # width=width,
100
  num_inference_steps=num_inference_steps,
101
  generator=generator,
102
  true_cfg_scale=true_guidance_scale,
103
  num_images_per_prompt=1,
104
  ).images[0]
105
 
106
- return [image_subject,result], seed
107
-
108
-
109
 
110
 
111
  # --- UI ---
112
- css = '''#col-container { max-width: 800px; margin: 0 auto; }
113
  .dark .progress-text{color: white !important}
114
- #examples{max-width: 800px; margin: 0 auto; }'''
115
-
116
 
117
  with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
118
  with gr.Column(elem_id="col-container"):
119
  gr.Markdown("## Qwen Image Edit — Fusion")
120
- gr.Markdown("""
121
- Qwen Image Edit 2509 ✨
122
- Using [dx8152's Qwen-Image-Edit-2509 Fusion LoRA](https://huggingface.co/dx8152/Qwen-Image-Edit-2509-Fusion) and [lightx2v Qwen-Image-Lightning LoRA]() for 4-step inference 💨
123
- """
124
- )
125
-
126
  with gr.Row():
127
  with gr.Column():
128
- with gr.Row():
129
- image_subject = gr.Image(label="input image", type="pil")
130
- image_background = gr.Image(label="background Image", type="pil", visible=False)
131
- prompt = gr.Textbox(label="prompt")
132
- run_button = gr.Button("Fuse", variant="primary")
133
- with gr.Accordion("Advanced Settings", open=False):
134
- seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
135
- randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
136
- true_guidance_scale = gr.Slider(label="True Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
137
- num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=4)
138
- height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
139
- width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)
140
 
 
 
 
 
 
 
 
141
 
142
-
143
  with gr.Column():
144
  result = gr.ImageSlider(label="Output Image", interactive=False)
145
- prompt_preview = gr.Textbox(label="Processed Prompt", interactive=False, visible=False)
146
-
147
- gr.Examples(
148
- examples=[
149
- ["fusion_car.png", ""],["fusion_shoes.png", ""],["wednesday_product.png", "put the product in her hand"]
150
-
151
- ],
152
- inputs=[image_subject, prompt],
153
- outputs=[result,seed],
154
- fn=infer,
155
- cache_examples="lazy",
156
- elem_id="examples"
157
- )
158
-
159
- inputs = [
160
- image_subject,image_background, prompt,
161
- seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width
162
- ]
163
- outputs = [result, seed]
164
-
165
-
166
-
167
- run_event = run_button.click(
168
- fn=infer,
169
- inputs=inputs,
170
- outputs=outputs
171
- )
172
-
173
-
174
-
175
-
176
- demo.launch(share=True)
 
3
  import random
4
  import torch
5
  import spaces
 
6
  from PIL import Image
 
 
 
 
 
 
 
7
  import math
8
+ from diffusers import FlowMatchEulerDiscreteScheduler, QwenImageEditPlusPipeline
9
  from huggingface_hub import hf_hub_download
10
  from safetensors.torch import load_file
11
+ from briarmbg import BriaRMBG
 
12
  import os
 
 
13
  import tempfile
14
 
 
15
  # --- Model Loading ---
16
  dtype = torch.bfloat16
17
  device = "cuda" if torch.cuda.is_available() else "cpu"
18
 
 
19
  scheduler_config = {
20
  "base_image_seq_len": 256,
21
  "base_shift": math.log(3),
 
33
  "use_karras_sigmas": False,
34
  }
35
 
 
36
  scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
37
 
38
+ pipe = QwenImageEditPlusPipeline.from_pretrained(
39
+ "Qwen/Qwen-Image-Edit-2509",
40
+ scheduler=scheduler,
41
+ torch_dtype=dtype
42
+ ).to(device)
43
+
44
  pipe.load_lora_weights(
45
+ "lightx2v/Qwen-Image-Lightning",
46
+ weight_name="Qwen-Image-Lightning-4steps-V2.0.safetensors", adapter_name="fast"
47
+ )
48
  pipe.load_lora_weights(
49
+ "dx8152/Qwen-Image-Edit-2509-Fusion",
50
+ weight_name="溶图.safetensors", adapter_name="fusion"
51
+ )
52
+ pipe.set_adapters(["fast", "fusion"], adapter_weights=[1., 1.])
 
53
  pipe.fuse_lora(adapter_names=["fast"])
54
  pipe.fuse_lora(adapter_names=["fusion"])
55
  pipe.unload_lora_weights()
56
 
57
+ # Load background remover
58
+ rmbg = BriaRMBG.from_pretrained("briaai/RMBG-1.4").to(device, dtype=torch.float32)
 
 
 
59
 
60
  MAX_SEED = np.iinfo(np.int32).max
61
 
62
 
63
+ # --- Background Removal Helpers ---
64
+ @torch.inference_mode()
65
+ def numpy2pytorch(imgs):
66
+ h = torch.from_numpy(np.stack(imgs, axis=0)).float() / 127.0 - 1.0
67
+ h = h.movedim(-1, 1)
68
+ return h
69
+
70
+ @torch.inference_mode()
71
+ def run_rmbg(img: np.ndarray):
72
+ H, W, C = img.shape
73
+ k = (256.0 / float(H * W)) ** 0.5
74
+ resized = Image.fromarray(img).resize((int(64 * round(W * k)), int(64 * round(H * k))), Image.LANCZOS)
75
+ feed = numpy2pytorch([np.array(resized)]).to("cuda", dtype=torch.float32)
76
+ alpha = rmbg(feed)[0][0]
77
+ alpha = torch.nn.functional.interpolate(alpha, size=(H, W), mode="bilinear")
78
+ alpha = alpha.movedim(1, -1)[0].detach().float().cpu().numpy().clip(0, 1)
79
+ result = 127 + (img.astype(np.float32) - 127) * alpha
80
+ return result.clip(0, 255).astype(np.uint8), alpha
81
+
82
+ def remove_background(image: Image.Image) -> Image.Image:
83
+ img_array = np.array(image)
84
+ result_array, alpha_mask = run_rmbg(img_array)
85
+ result_image = Image.fromarray(result_array)
86
+ if result_image.mode != 'RGBA':
87
+ result_image = result_image.convert('RGBA')
88
+ alpha = (alpha_mask * 255).astype(np.uint8)
89
+ alpha_pil = Image.fromarray(alpha, 'L')
90
+ result_image.putalpha(alpha_pil)
91
+ return result_image
92
+
93
+
94
+ # --- Inference ---
95
  @spaces.GPU
96
  def infer(
97
+ gallery_images,
98
+ image_background,
99
  prompt="",
 
100
  seed=42,
101
  randomize_seed=True,
102
  true_guidance_scale=1,
 
105
  width=None,
106
  progress=gr.Progress(track_tqdm=True)
107
  ):
 
108
  if randomize_seed:
109
  seed = random.randint(0, MAX_SEED)
110
  generator = torch.Generator(device=device).manual_seed(seed)
111
 
112
+ processed_subjects = []
113
+ if gallery_images:
114
+ for gimg in gallery_images:
115
+ pil_img = gimg[0] if isinstance(gimg, list) else gimg
116
+ processed_subjects.append(remove_background(pil_img))
117
+
118
+ all_inputs = processed_subjects
119
+ if image_background is not None:
120
+ all_inputs.append(image_background)
121
+
122
+ if not all_inputs:
123
+ raise gr.Error("Please upload at least one image or a background image.")
124
+
125
  result = pipe(
126
+ image=all_inputs,
127
  prompt=prompt,
 
 
128
  num_inference_steps=num_inference_steps,
129
  generator=generator,
130
  true_cfg_scale=true_guidance_scale,
131
  num_images_per_prompt=1,
132
  ).images[0]
133
 
134
+ return [image_background, result], seed
 
 
135
 
136
 
137
  # --- UI ---
138
+ css = '''#col-container { max-width: 900px; margin: 0 auto; }
139
  .dark .progress-text{color: white !important}
140
+ #examples{max-width: 900px; margin: 0 auto; }'''
 
141
 
142
  with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
143
  with gr.Column(elem_id="col-container"):
144
  gr.Markdown("## Qwen Image Edit — Fusion")
145
+ gr.Markdown(""" Qwen Image Edit 2509 ✨ Using [dx8152's Qwen-Image-Edit-2509 Fusion LoRA](https://huggingface.co/dx8152/Qwen-Image-Edit-2509-Fusion) and [lightx2v Qwen-Image-Lightning LoRA]() for 4-step inference 💨 """ )
 
 
 
 
 
146
  with gr.Row():
147
  with gr.Column():
148
+ gallery = gr.Gallery(
149
+ label="Upload subject images (background auto removed)",
150
+ columns=3, rows=2, height="auto", type="pil"
151
+ )
152
+ image_background = gr.Image(label="Background Image", type="pil", visible=True)
153
+ prompt = gr.Textbox(label="Prompt")
154
+ run_button = gr.Button("Fuse Images", variant="primary")
 
 
 
 
 
155
 
156
+ with gr.Accordion("Advanced Settings", open=False):
157
+ seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
158
+ randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
159
+ true_guidance_scale = gr.Slider(label="True Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
160
+ num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=4)
161
+ height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
162
+ width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)
163
 
 
164
  with gr.Column():
165
  result = gr.ImageSlider(label="Output Image", interactive=False)
166
+
167
+ # gr.Examples(
168
+ # examples=[
169
+ # [["fusion_car.png", "fusion_shoes.png"], "fusion_bg.png", "put the car and shoes in the background"],
170
+ # [["wednesday_product.png"], "simple_room.png", "put the product in her hand"]
171
+ # ],
172
+ # inputs=[gallery, image_background, prompt],
173
+ # outputs=[result, seed],
174
+ # fn=infer,
175
+ # cache_examples="lazy",
176
+ # elem_id="examples"
177
+ # )
178
+
179
+ inputs = [gallery, image_background, prompt, seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width]
180
+ outputs = [result, seed]
181
+
182
+ run_button.click(fn=infer, inputs=inputs, outputs=outputs)
183
+
184
+ demo.launch(share=True)