lulavc commited on
Commit
91c8de4
·
verified ·
1 Parent(s): 5013a18

Major UI overhaul: dark mode, accessibility, responsive layout, improved UX

Browse files
Files changed (1) hide show
  1. app.py +279 -749
app.py CHANGED
@@ -1,4 +1,4 @@
1
- """Z-Image-Turbo v1.1 - AI Vision Assistant (GLM-4.6V)"""
2
 
3
  import os
4
  import re
@@ -45,11 +45,11 @@ def polish_prompt(original_prompt: str, mode: str = "generate") -> str:
45
  if mode == "transform":
46
  return "high quality, enhanced details, professional finish"
47
  return "Ultra HD, 4K, cinematic composition, highly detailed"
48
-
49
  client = get_deepseek_client()
50
  if not client:
51
  return original_prompt
52
-
53
  if mode == "transform":
54
  system_prompt = """ROLE: Expert prompt engineer for AI image-to-image transformation.
55
 
@@ -75,7 +75,7 @@ STRICT RULES:
75
  - Include artistic style references when appropriate
76
 
77
  OUTPUT FORMAT: Only the final prompt text. No thinking, no explanation, no preamble, no word count."""
78
-
79
  try:
80
  response = client.chat.completions.create(
81
  model="deepseek-reasoner",
@@ -85,28 +85,23 @@ OUTPUT FORMAT: Only the final prompt text. No thinking, no explanation, no pream
85
  {"role": "user", "content": original_prompt}
86
  ],
87
  )
88
-
89
  msg = response.choices[0].message
90
  content = msg.content if msg.content else ""
91
-
92
  # If content is empty, try to extract final answer from reasoning_content
93
  if not content and hasattr(msg, 'reasoning_content') and msg.reasoning_content:
94
  text = msg.reasoning_content.strip()
95
- # Get the last paragraph as the final answer
96
  paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
97
  if paragraphs:
98
  content = paragraphs[-1]
99
-
100
  if content:
101
- # Clean up and limit length
102
  content = content.strip().replace("\n", " ")
103
- # Remove any thinking markers if present
104
  if "<think>" in content:
105
  content = content.split("</think>")[-1].strip()
106
- # Remove quotes if wrapped
107
  if content.startswith('"') and content.endswith('"'):
108
  content = content[1:-1]
109
- # Truncate based on mode
110
  max_words = 80 if mode == "transform" else 100
111
  words = content.split()
112
  if len(words) > max_words:
@@ -141,18 +136,118 @@ def encode_image_base64(image: Optional[Image.Image]) -> Optional[str]:
141
  buf.seek(0)
142
  return base64.b64encode(buf.getvalue()).decode('utf-8')
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  def analyze_image_with_glm(image: Optional[Image.Image]) -> str:
145
  """Analyze image using GLM-4V and return description."""
146
  if image is None:
147
  return "Please upload an image first."
148
-
149
  client = get_glm_client()
150
  if not client:
151
  return "GLM API key not configured. Please add GLM_API_KEY to space secrets."
152
-
153
  try:
154
  base64_image = encode_image_base64(image)
155
-
156
  response = client.chat.completions.create(
157
  model="glm-4.6v-flash",
158
  messages=[
@@ -165,195 +260,126 @@ def analyze_image_with_glm(image: Optional[Image.Image]) -> str:
165
  },
166
  {
167
  "type": "text",
168
- "text": """Write a detailed image description in 300-400 tokens.
169
 
170
- FORMAT: One single paragraph. Start with the main subject. No analysis, no commentary, no "Compositionally", no meta-language.
171
 
172
- DESCRIBE IN ORDER:
173
- - Main subject (person/object): appearance, clothing, pose, expression
174
- - Setting: environment, architecture, objects
175
- - Colors: specific hues, palette
176
- - Lighting: source, quality, shadows
177
- - Textures: materials like silk, marble, wood
178
- - Mood: atmosphere, emotion
179
- - Small details: background elements, decorations
180
 
181
- EXAMPLE START: "A woman in a deep burgundy Victorian gown stands elegantly in a grand ballroom, her auburn hair pinned up with pearl accessories, delicate lace gloves adorning her hands..."
182
-
183
- DO NOT START WITH: "This image", "The image shows", "Compositionally", "In this", "We see"
184
-
185
- Write the description now:"""
186
  }
187
  ]
188
  }
189
  ],
190
- max_tokens=1200,
191
  )
192
-
193
  msg = response.choices[0].message
194
- content = msg.content if msg.content else ""
195
-
196
- # GLM-4.6v returns thinking in reasoning_content and final answer in content
197
- # If content is empty or too short, use reasoning_content
198
- if len(content) < 200 and hasattr(msg, 'reasoning_content') and msg.reasoning_content:
199
- # Use the full reasoning_content if content is insufficient
200
- reasoning = msg.reasoning_content.strip()
201
- # Try to find the actual description (longest coherent paragraph)
202
- paragraphs = [p.strip() for p in reasoning.split('\n\n') if p.strip() and len(p) > 100]
203
- if paragraphs:
204
- # Find the paragraph that looks like a description (starts with subject, not analysis)
205
- for p in paragraphs:
206
- if not p.lower().startswith(('compositionally', 'the image', 'this image', 'in this', 'we see', 'i can see')):
207
- content = p
208
- break
209
- if not content or len(content) < 200:
210
- content = max(paragraphs, key=len) # Use longest paragraph as fallback
211
 
 
212
  if content:
213
- # Remove GLM special tokens
214
- content = content.replace('<|begin_of_box|>', '').replace('<|end_of_box|>', '')
215
- # Clean up any remaining artifacts
216
- content = content.strip()
217
- return content
218
- return "Could not extract description from response."
219
- except Exception:
220
- return "Error analyzing image. Please try again."
221
-
222
- def extract_prompt_from_glm_response(text: str) -> str:
223
- """Extract the actual prompt from GLM response, filtering out thinking/meta content."""
224
- if not text:
225
- return ""
226
-
227
- text = text.strip()
228
-
229
- # Clean GLM special tokens first
230
- text = text.replace('<|begin_of_box|>', '').replace('<|end_of_box|>', '')
231
-
232
- # Split into paragraphs and filter
233
- paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
234
-
235
- # Filter out thinking/meta paragraphs - these patterns indicate GLM is "thinking out loud"
236
- thinking_patterns = (
237
- 'let me', 'i need', 'i should', 'first,', 'got it', 'okay,', 'alright,',
238
- 'the user', 'the request', 'the original', 'based on', 'following the',
239
- 'i\'ll', 'i will', 'now i', 'here\'s', 'here is', 'my prompt',
240
- 'the prompt', 'this prompt', 'for this', 'to create', 'considering',
241
- 'compositionally', 'the image', 'this image', 'in this', 'we see', 'i can see'
242
- )
243
-
244
- # Find paragraphs that look like actual prompts (descriptive, starts with A/An or descriptive word)
245
- good_paragraphs = []
246
- for p in paragraphs:
247
- p_lower = p.lower()
248
- # Skip if it's thinking/meta content
249
- if any(p_lower.startswith(pat) for pat in thinking_patterns):
250
- continue
251
- # Skip very short paragraphs
252
- if len(p) < 50:
253
- continue
254
- good_paragraphs.append(p)
255
-
256
- # Return the longest good paragraph (most likely to be the actual prompt)
257
- if good_paragraphs:
258
- return max(good_paragraphs, key=len)
259
-
260
- # Fallback: if all paragraphs were filtered, take the longest one that's at least 50 chars
261
- valid_paragraphs = [p for p in paragraphs if len(p) >= 50]
262
- if valid_paragraphs:
263
- return max(valid_paragraphs, key=len)
264
-
265
- return ""
266
 
 
 
 
 
 
267
 
268
  def generate_prompt_with_glm(image_description: str, user_request: str, style: str) -> str:
269
  """Generate transformation prompt using GLM based on image description and user request."""
270
- if not image_description or image_description.startswith("Please upload") or image_description.startswith("Error") or image_description.startswith("GLM API"):
271
  return "Please analyze the image first."
272
-
273
  if not user_request or not user_request.strip():
274
  return "Please describe what changes you want."
275
-
276
  client = get_glm_client()
277
  if not client:
278
  return "GLM API key not configured. Please add GLM_API_KEY to space secrets."
279
-
280
- style_hint = f" Apply {style} style." if style and style != "None" else ""
281
-
282
- # Truncate description if too long to leave room for prompt generation
283
- desc = image_description[:1500] if len(image_description) > 1500 else image_description
284
-
285
  try:
286
  response = client.chat.completions.create(
287
  model="glm-4.6v-flash",
288
  messages=[
289
  {
290
  "role": "user",
291
- "content": f"""Write an image prompt (100-150 words) describing this transformed scene.
292
 
293
- ORIGINAL: {desc}
294
 
295
- CHANGE: {user_request}{style_hint}
296
 
297
- Write the NEW scene description now. Start with the main subject. Describe lighting, colors, atmosphere, details. One paragraph only:"""
 
 
 
 
 
 
298
  }
299
  ],
300
- max_tokens=600,
301
  )
302
-
303
  msg = response.choices[0].message
304
- content = msg.content if msg.content else ""
305
 
306
- # GLM-4.6v returns thinking in reasoning_content, final answer in content
307
- # First try content, then extract from reasoning_content if needed
 
 
 
 
 
 
308
  if content:
309
- content = content.strip()
310
- # Clean GLM tokens
311
- content = content.replace('<|begin_of_box|>', '').replace('<|end_of_box|>', '')
312
- content = content.strip().strip('"\'""')
313
 
314
- # If content is empty or looks like thinking, try reasoning_content
315
- if not content or len(content) < 50 or content.lower().startswith(('let me', 'i need', 'first', 'got it', 'okay')):
316
- if hasattr(msg, 'reasoning_content') and msg.reasoning_content:
317
- extracted = extract_prompt_from_glm_response(msg.reasoning_content)
318
- if extracted and len(extracted) >= 50:
319
- content = extracted
320
 
321
- # Also check if we got a better result from reasoning even if content exists
322
- if content and len(content) < 100 and hasattr(msg, 'reasoning_content') and msg.reasoning_content:
323
- extracted = extract_prompt_from_glm_response(msg.reasoning_content)
324
- if extracted and len(extracted) > len(content):
325
- content = extracted
 
 
326
 
327
- if content:
328
- # Final cleanup
329
- content = content.strip().strip('"\'""')
330
- # Remove any remaining thinking markers
331
- if content.lower().startswith(('let me', 'i need', 'first,', 'got it', 'okay,')):
332
- # Try to find the actual prompt after the thinking
333
- lines = content.split('\n')
334
- for i, line in enumerate(lines):
335
- line = line.strip()
336
- if line and len(line) > 50 and not line.lower().startswith(('let me', 'i need', 'first,', 'got it', 'okay,')):
337
- content = '\n'.join(lines[i:]).strip()
338
- break
339
-
340
- # Validate: at least 50 chars and doesn't look like meta content
341
- if len(content) >= 50:
342
- return content
343
 
344
- return "Could not generate a valid prompt. Please try again with a different request."
345
- except Exception:
346
- return "Error generating prompt. Please try again."
347
 
348
  print("Loading Z-Image-Turbo pipeline...")
349
 
350
- # Load text-to-image pipeline
351
  pipe_t2i = DiffusionPipeline.from_pretrained(
352
  "Tongyi-MAI/Z-Image-Turbo",
353
  )
354
  pipe_t2i.to("cuda", torch.bfloat16)
355
 
356
- # Create img2img pipeline sharing components (no duplicate loading)
357
  pipe_i2i = ZImageImg2ImgPipeline(
358
  transformer=pipe_t2i.transformer,
359
  vae=pipe_t2i.vae,
@@ -412,7 +438,6 @@ EXAMPLES_GENERATE = [
412
  ["Cozy witch cottage interior on a stormy autumn night, iron cauldrons bubbling with green smoke, wooden shelves packed with glowing potion bottles and ancient spell books, a sleepy black cat curled by the stone fireplace, bundles of dried herbs and garlic hanging from dark oak ceiling beams, warm amber candlelight flickering throughout the room", "Digital Art", "1:1 MAX (2048x2048)", 9, 42, True],
413
  ]
414
 
415
- # Transform examples: [prompt, style, strength, steps, seed, randomize]
416
  EXAMPLES_TRANSFORM = [
417
  ["Transform into ultra realistic photograph with sharp details and natural lighting", "Photorealistic", 0.7, 9, 42, True],
418
  ["Dramatic movie scene with cinematic lighting and film grain texture", "Cinematic", 0.65, 9, 42, True],
@@ -444,25 +469,22 @@ def upload_to_hf_cdn(image: Optional[Image.Image]) -> str:
444
  return f"Upload failed: {response.status_code}"
445
  except requests.Timeout:
446
  return "Upload timed out. Please try again."
447
- except Exception as e:
448
- return f"Upload error. Please try again."
449
 
450
  def do_polish_prompt(prompt: str, style: str, do_polish: bool, mode: str = "generate") -> Tuple[str, str]:
451
  """Polish prompt before generation (runs on CPU, before GPU allocation)."""
452
  if not prompt or not prompt.strip():
453
  return "", ""
454
-
455
  base_prompt = prompt.strip()
456
-
457
- # Polish if enabled
458
  if do_polish:
459
  polished = polish_prompt(base_prompt, mode=mode)
460
  else:
461
  polished = base_prompt
462
-
463
- # Add style suffix for final prompt
464
  final_prompt = polished + STYLE_SUFFIXES.get(style, "")
465
-
466
  return final_prompt, polished
467
 
468
  def do_polish_transform_prompt(prompt: str, style: str, do_polish: bool) -> Tuple[str, str]:
@@ -471,7 +493,7 @@ def do_polish_transform_prompt(prompt: str, style: str, do_polish: bool) -> Tupl
471
  base = prompt.strip() if prompt else "high quality image"
472
  final = base + STYLE_SUFFIXES.get(style, "")
473
  return final, ""
474
-
475
  return do_polish_prompt(prompt, style, True, mode="transform")
476
 
477
  @spaces.GPU
@@ -479,12 +501,12 @@ def generate(full_prompt: str, polished_display: str, ratio: str, steps: int, se
479
  if randomize:
480
  seed = torch.randint(0, 2**32 - 1, (1,)).item()
481
  seed = int(seed)
482
-
483
  if not full_prompt or not full_prompt.strip():
484
  return None, seed
485
-
486
  w, h = RATIO_DIMS.get(ratio, (1024, 1024))
487
-
488
  generator = torch.Generator("cuda").manual_seed(seed)
489
  image = pipe_t2i(
490
  prompt=full_prompt,
@@ -494,36 +516,32 @@ def generate(full_prompt: str, polished_display: str, ratio: str, steps: int, se
494
  guidance_scale=0.0,
495
  generator=generator,
496
  ).images[0]
497
-
498
  return image, seed
499
 
500
  @spaces.GPU
501
  def transform(input_image: Optional[Image.Image], full_prompt: str, polished_display: str, strength: float, steps: int, seed: int, randomize: bool, progress=gr.Progress(track_tqdm=True)) -> Tuple[Optional[Image.Image], int]:
502
  if input_image is None:
503
  return None, 0
504
-
505
  if randomize:
506
  seed = torch.randint(0, 2**32 - 1, (1,)).item()
507
  seed = int(seed)
508
-
509
  if not full_prompt or not full_prompt.strip():
510
  full_prompt = "high quality image, enhanced details"
511
-
512
- # Resize to supported dimensions
513
  input_image = input_image.convert("RGB")
514
  w, h = input_image.size
515
- # Round to nearest multiple of 16
516
  w = (w // 16) * 16
517
  h = (h // 16) * 16
518
  w = max(512, min(2048, w))
519
  h = max(512, min(2048, h))
520
  input_image = input_image.resize((w, h), Image.LANCZOS)
521
-
522
- # Adjust steps to compensate for strength (actual_steps = internal_steps * strength)
523
- # So we need internal_steps = desired_steps / strength
524
  strength = float(strength)
525
  effective_steps = max(4, int(steps / strength)) if strength > 0 else int(steps)
526
-
527
  generator = torch.Generator("cuda").manual_seed(seed)
528
  image = pipe_i2i(
529
  prompt=full_prompt,
@@ -533,66 +551,47 @@ def transform(input_image: Optional[Image.Image], full_prompt: str, polished_dis
533
  guidance_scale=0.0,
534
  generator=generator,
535
  ).images[0]
536
-
537
  return image, seed
538
 
539
  css = r"""
540
  :root {
541
- /* Dark theme - Modern AI aesthetic */
542
  --bg-primary: #0c0c0e;
543
  --bg-secondary: #141416;
544
  --bg-tertiary: #1c1c20;
545
  --surface: #232328;
546
  --surface-hover: #2a2a30;
547
-
548
- /* Accent colors - Purple/Indigo gradient */
549
  --accent-primary: #818cf8;
550
  --accent-secondary: #a78bfa;
551
  --accent-hover: #6366f1;
552
  --accent-gradient: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%);
553
  --accent-glow: rgba(99, 102, 241, 0.4);
554
-
555
- /* Text hierarchy */
556
  --text-primary: #f4f4f5;
557
  --text-secondary: #a1a1aa;
558
  --text-muted: #71717a;
559
-
560
- /* Borders */
561
  --border-subtle: rgba(255, 255, 255, 0.08);
562
  --border-default: rgba(255, 255, 255, 0.12);
563
-
564
- /* Status colors */
565
  --success: #10b981;
566
  --warning: #f59e0b;
567
  --error: #ef4444;
568
-
569
- /* Shadows */
570
  --shadow-sm: 0 1px 2px rgba(0,0,0,0.3);
571
  --shadow-md: 0 4px 6px -1px rgba(0,0,0,0.4);
572
  --shadow-lg: 0 10px 15px -3px rgba(0,0,0,0.5);
573
  --shadow-glow: 0 0 20px var(--accent-glow);
574
-
575
- /* Spacing & Radius */
576
  --radius-sm: 8px;
577
  --radius-md: 12px;
578
  --radius-lg: 16px;
579
  --transition: 0.2s ease;
580
  }
581
 
582
- /* Base container */
583
  .gradio-container {
584
  background: var(--bg-primary) !important;
585
  min-height: 100vh;
586
  color: var(--text-primary);
587
  }
588
 
589
- /* Tab container */
590
- .tabs {
591
- background: transparent !important;
592
- padding: 8px 0;
593
- }
594
 
595
- /* Tab navigation - Pill style */
596
  .tab-nav {
597
  background: var(--bg-secondary) !important;
598
  border: 1px solid var(--border-subtle) !important;
@@ -622,11 +621,6 @@ css = r"""
622
  color: var(--text-primary) !important;
623
  }
624
 
625
- .tab-nav > button:focus-visible {
626
- outline: 2px solid var(--accent-primary);
627
- outline-offset: 2px;
628
- }
629
-
630
  .tab-nav > button.selected,
631
  .tab-nav > button[aria-selected="true"],
632
  [role="tab"][aria-selected="true"] {
@@ -636,7 +630,6 @@ css = r"""
636
  box-shadow: var(--shadow-glow);
637
  }
638
 
639
- /* Primary button */
640
  button.primary, .primary {
641
  background: var(--accent-gradient) !important;
642
  border: none !important;
@@ -654,16 +647,6 @@ button.primary:hover, .primary:hover {
654
  filter: brightness(1.1);
655
  }
656
 
657
- button.primary:focus-visible, .primary:focus-visible {
658
- outline: 2px solid var(--accent-secondary);
659
- outline-offset: 2px;
660
- }
661
-
662
- button.primary:active, .primary:active {
663
- transform: scale(0.98);
664
- }
665
-
666
- /* Secondary button */
667
  button.secondary, .secondary {
668
  background: var(--surface) !important;
669
  color: var(--text-primary) !important;
@@ -678,12 +661,6 @@ button.secondary:hover, .secondary:hover {
678
  border-color: var(--accent-primary) !important;
679
  }
680
 
681
- button.secondary:focus-visible, .secondary:focus-visible {
682
- outline: 2px solid var(--accent-primary);
683
- outline-offset: 2px;
684
- }
685
-
686
- /* Content blocks */
687
  .block {
688
  background: var(--bg-secondary) !important;
689
  border: 1px solid var(--border-subtle) !important;
@@ -694,17 +671,8 @@ button.secondary:focus-visible, .secondary:focus-visible {
694
  transition: all var(--transition);
695
  }
696
 
697
- .block:hover {
698
- border-color: var(--border-default) !important;
699
- }
700
-
701
- /* Tab content area */
702
- .tabitem {
703
- background: transparent !important;
704
- padding: 16px 0;
705
- }
706
 
707
- /* Input fields */
708
  input, textarea, .gr-input, .gr-textbox textarea {
709
  background: var(--bg-tertiary) !important;
710
  border: 1px solid var(--border-default) !important;
@@ -713,13 +681,12 @@ input, textarea, .gr-input, .gr-textbox textarea {
713
  transition: all var(--transition);
714
  }
715
 
716
- input:focus, textarea:focus, .gr-input:focus, .gr-textbox textarea:focus {
717
  border-color: var(--accent-primary) !important;
718
  box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2) !important;
719
  outline: none !important;
720
  }
721
 
722
- /* Dropdown */
723
  .gr-dropdown, select {
724
  background: var(--bg-tertiary) !important;
725
  border: 1px solid var(--border-default) !important;
@@ -727,23 +694,11 @@ input:focus, textarea:focus, .gr-input:focus, .gr-textbox textarea:focus {
727
  color: var(--text-primary) !important;
728
  }
729
 
730
- /* Slider */
731
- .gr-slider input[type="range"] {
732
- accent-color: var(--accent-primary);
733
- }
734
 
735
- /* Checkbox */
736
- .gr-checkbox input[type="checkbox"] {
737
- accent-color: var(--accent-primary);
738
- }
739
-
740
- /* Labels */
741
- label, .gr-label {
742
- color: var(--text-secondary) !important;
743
- font-weight: 500;
744
- }
745
 
746
- /* Image container */
747
  .gr-image, .image-container {
748
  background: var(--bg-tertiary) !important;
749
  border: 2px dashed var(--border-default) !important;
@@ -751,77 +706,26 @@ label, .gr-label {
751
  transition: all var(--transition);
752
  }
753
 
754
- .gr-image:hover, .image-container:hover {
755
- border-color: var(--accent-primary) !important;
756
- }
757
-
758
- /* Generated image output */
759
- .gr-image img {
760
- border-radius: var(--radius-md);
761
- }
762
-
763
- /* Examples table */
764
- [class*="examples"] td:first-child {
765
- text-align: left;
766
- }
767
-
768
- [class*="examples"] table {
769
- background: var(--bg-secondary) !important;
770
- border-radius: var(--radius-md);
771
- }
772
-
773
- [class*="examples"] tr {
774
- background: transparent !important;
775
- border-bottom: 1px solid var(--border-subtle) !important;
776
- }
777
-
778
- [class*="examples"] tr:hover {
779
- background: var(--bg-tertiary) !important;
780
- }
781
-
782
- [class*="examples"] td {
783
- color: var(--text-secondary) !important;
784
- padding: 12px !important;
785
- }
786
 
787
- /* Examples header row */
788
- [class*="examples"] thead,
789
- [class*="examples"] thead tr,
790
- [class*="examples"] thead th {
 
 
791
  background: var(--surface) !important;
792
  color: var(--text-primary) !important;
793
  border-bottom: 1px solid var(--border-default) !important;
794
  }
795
 
796
- [class*="examples"] thead th {
797
- padding: 12px !important;
798
- font-weight: 600;
799
- }
800
-
801
- /* Headers */
802
- h1, h2, h3, h4 {
803
- color: var(--text-primary) !important;
804
- }
805
 
806
- h1 {
807
- font-size: clamp(1.5rem, 4vw, 2.2rem);
808
- font-weight: 700;
809
- }
810
 
811
- /* Markdown */
812
- .markdown-text, .gr-markdown {
813
- color: var(--text-secondary) !important;
814
- }
815
-
816
- .gr-markdown a {
817
- color: var(--accent-primary) !important;
818
- }
819
-
820
- .gr-markdown a:hover {
821
- color: var(--accent-secondary) !important;
822
- }
823
-
824
- /* Group styling */
825
  .gr-group {
826
  background: var(--surface) !important;
827
  border: 1px solid var(--border-subtle) !important;
@@ -829,20 +733,13 @@ h1 {
829
  padding: 16px !important;
830
  }
831
 
832
- /* Accordion */
833
  .gr-accordion {
834
  background: var(--bg-secondary) !important;
835
  border: 1px solid var(--border-subtle) !important;
836
  border-radius: var(--radius-md) !important;
837
  }
838
 
839
- /* Footer */
840
- .footer-no-box {
841
- background: transparent !important;
842
- border: none !important;
843
- box-shadow: none !important;
844
- padding: 0;
845
- }
846
 
847
  .gradio-container > footer {
848
  background: var(--bg-secondary) !important;
@@ -850,447 +747,84 @@ h1 {
850
  padding: 12px 20px;
851
  }
852
 
853
- .gradio-container > footer span,
854
- .gradio-container > footer p {
855
- color: var(--text-muted) !important;
856
- }
857
-
858
- .gradio-container > footer a {
859
- color: var(--accent-primary) !important;
860
- text-decoration: none;
861
- transition: color var(--transition);
862
- }
863
-
864
- .gradio-container > footer a:hover {
865
- color: var(--accent-secondary) !important;
866
- }
867
-
868
- /* Progress bar */
869
- .progress-bar {
870
- background: var(--bg-tertiary) !important;
871
- border-radius: 4px;
872
- }
873
 
874
- .progress-bar > div {
875
- background: var(--accent-gradient) !important;
876
- border-radius: 4px;
877
- }
878
 
879
- /* Accessibility: Reduced motion */
880
  @media (prefers-reduced-motion: reduce) {
881
- *, *::before, *::after {
882
- animation-duration: 0.01ms !important;
883
- animation-iteration-count: 1 !important;
884
- transition-duration: 0.01ms !important;
885
- }
886
  }
887
 
888
- /* Mobile responsiveness */
889
  @media (max-width: 768px) {
890
- .tab-nav {
891
- padding: 4px;
892
- gap: 4px;
893
- }
894
-
895
- .tab-nav > button {
896
- padding: 10px 16px;
897
- font-size: 0.85rem;
898
- }
899
-
900
- .block {
901
- padding: 12px;
902
- margin: 6px 0;
903
- }
904
-
905
- button.primary, .primary {
906
- padding: 10px 16px;
907
- width: 100%;
908
- }
909
-
910
- h1 {
911
- font-size: 1.4rem !important;
912
- }
913
- }
914
-
915
- @media (max-width: 480px) {
916
- .tab-nav > button {
917
- padding: 8px 12px;
918
- font-size: 0.8rem;
919
- }
920
-
921
- .block {
922
- padding: 10px;
923
- border-radius: var(--radius-md) !important;
924
- }
925
- }
926
-
927
- /* Custom scrollbar */
928
- ::-webkit-scrollbar {
929
- width: 8px;
930
- height: 8px;
931
- }
932
-
933
- ::-webkit-scrollbar-track {
934
- background: var(--bg-secondary);
935
- }
936
-
937
- ::-webkit-scrollbar-thumb {
938
- background: var(--bg-tertiary);
939
- border-radius: 4px;
940
- }
941
-
942
- ::-webkit-scrollbar-thumb:hover {
943
- background: var(--surface);
944
- }
945
-
946
- /* ===== TEXT VISIBILITY FIXES ===== */
947
- /* Ensure all text elements are readable on dark background */
948
-
949
- /* Tab navigation buttons - CRITICAL */
950
- .tab-nav button,
951
- .tab-nav > button,
952
- .tabs .tab-nav button,
953
- div[role="tablist"] button,
954
- button[role="tab"] {
955
- color: #e5e5e5 !important;
956
- font-weight: 500 !important;
957
- }
958
-
959
- .tab-nav button:hover,
960
- .tab-nav > button:hover,
961
- button[role="tab"]:hover {
962
- color: #ffffff !important;
963
- }
964
-
965
- .tab-nav button.selected,
966
- .tab-nav > button.selected,
967
- .tab-nav button[aria-selected="true"],
968
- button[role="tab"][aria-selected="true"] {
969
- color: #ffffff !important;
970
- }
971
-
972
- /* All labels - make visible */
973
- label,
974
- .gr-label,
975
- .label-wrap,
976
- .label-wrap span,
977
- span.svelte-1gfkn6j,
978
- .gr-input-label,
979
- .gr-box label,
980
- .gr-form label,
981
- .wrap label,
982
- .container label {
983
- color: #d4d4d8 !important;
984
- }
985
-
986
- /* Specific Gradio label classes */
987
- label span,
988
- .gr-label span,
989
- .label-wrap > span,
990
- span[data-testid="block-label"],
991
- .block-label span {
992
- color: #d4d4d8 !important;
993
- }
994
-
995
- /* Span elements that might be labels */
996
- span.block-label,
997
- span.svelte-1gfkn6j,
998
- .gr-block-label,
999
- .svelte-1gfkn6j {
1000
- color: #d4d4d8 !important;
1001
- }
1002
-
1003
- /* Input and textbox labels */
1004
- .gr-textbox label,
1005
- .gr-dropdown label,
1006
- .gr-slider label,
1007
- .gr-checkbox label,
1008
- .gr-number label,
1009
- .gr-image label {
1010
- color: #d4d4d8 !important;
1011
- }
1012
-
1013
- /* Accordion headers */
1014
- .gr-accordion summary,
1015
- .gr-accordion .label-wrap,
1016
- .gr-accordion button,
1017
- .accordion summary,
1018
- summary span,
1019
- details summary,
1020
- details summary span,
1021
- .gr-accordion > div:first-child,
1022
- button[aria-expanded] span {
1023
- color: #e5e5e5 !important;
1024
- }
1025
-
1026
- /* Examples table - headers and cells */
1027
- .gr-examples th,
1028
- .gr-examples thead th,
1029
- .examples th,
1030
- .examples thead th,
1031
- table th,
1032
- [class*="examples"] th,
1033
- [class*="examples"] thead th,
1034
- .gr-samples-table th,
1035
- .gr-samples th {
1036
- color: #f4f4f5 !important;
1037
- background: var(--surface) !important;
1038
- font-weight: 600 !important;
1039
- }
1040
-
1041
- /* Examples table cells */
1042
- .gr-examples td,
1043
- .examples td,
1044
- table td,
1045
- [class*="examples"] td,
1046
- .gr-samples-table td,
1047
- .gr-samples td {
1048
- color: #d4d4d8 !important;
1049
- }
1050
-
1051
- /* Examples table row text */
1052
- .gr-examples tr,
1053
- .examples tr,
1054
- [class*="examples"] tr {
1055
- color: #d4d4d8 !important;
1056
- }
1057
-
1058
- /* Markdown text */
1059
- .gr-markdown,
1060
- .gr-markdown p,
1061
- .gr-markdown span,
1062
- .markdown-text,
1063
- .prose,
1064
- .prose p {
1065
- color: #a1a1aa !important;
1066
- }
1067
-
1068
- /* Checkbox labels */
1069
- .gr-checkbox span,
1070
- .gr-checkbox label span,
1071
- input[type="checkbox"] + span,
1072
- .checkbox-wrap span,
1073
- .gr-check-radio label span {
1074
- color: #d4d4d8 !important;
1075
- }
1076
-
1077
- /* Dropdown text */
1078
- .gr-dropdown select,
1079
- .gr-dropdown option,
1080
- select option,
1081
- .dropdown-container span {
1082
- color: #e5e5e5 !important;
1083
- }
1084
-
1085
- /* Number input labels */
1086
- .gr-number span,
1087
- .gr-number label span {
1088
- color: #d4d4d8 !important;
1089
- }
1090
-
1091
- /* Slider labels */
1092
- .gr-slider span,
1093
- .gr-slider label span,
1094
- .slider-wrap span {
1095
- color: #d4d4d8 !important;
1096
- }
1097
-
1098
- /* Image upload labels */
1099
- .gr-image span,
1100
- .gr-image label span,
1101
- .image-container span {
1102
- color: #d4d4d8 !important;
1103
- }
1104
-
1105
- /* File upload text */
1106
- .gr-file span,
1107
- .gr-file label,
1108
- .upload-text {
1109
- color: #d4d4d8 !important;
1110
- }
1111
-
1112
- /* Button text (ensure readable) */
1113
- button span,
1114
- .gr-button span {
1115
- color: inherit !important;
1116
- }
1117
-
1118
- /* Secondary button text */
1119
- button.secondary span,
1120
- .secondary span {
1121
- color: #e5e5e5 !important;
1122
- }
1123
-
1124
- /* Form group labels */
1125
- .gr-group label,
1126
- .gr-group span,
1127
- .gr-box label,
1128
- .gr-box span.label {
1129
- color: #d4d4d8 !important;
1130
- }
1131
-
1132
- /* Info text and hints */
1133
- .info-text,
1134
- .gr-info,
1135
- .hint,
1136
- small {
1137
- color: #a1a1aa !important;
1138
- }
1139
-
1140
- /* Error and warning text */
1141
- .error-text,
1142
- .gr-error {
1143
- color: #ef4444 !important;
1144
- }
1145
-
1146
- .warning-text,
1147
- .gr-warning {
1148
- color: #f59e0b !important;
1149
- }
1150
-
1151
- /* Textbox placeholder */
1152
- input::placeholder,
1153
- textarea::placeholder {
1154
- color: #71717a !important;
1155
- }
1156
-
1157
- /* Selected/active states */
1158
- .selected span,
1159
- .active span,
1160
- [aria-selected="true"] span {
1161
- color: #ffffff !important;
1162
- }
1163
-
1164
- /* Modal and popup text */
1165
- .modal span,
1166
- .popup span,
1167
- .dialog span {
1168
- color: #e5e5e5 !important;
1169
- }
1170
-
1171
- /* Generic fix for any remaining dark text on dark background */
1172
- .dark span:not([style*="color"]),
1173
- .dark label:not([style*="color"]) {
1174
- color: #d4d4d8 !important;
1175
- }
1176
-
1177
- /* ========== TEXT VISIBILITY MASTER FIX ========== */
1178
- /* Force all text to be readable on dark background */
1179
-
1180
- /* Global text color reset */
1181
- .gradio-container, .gradio-container * {
1182
- color: #e5e5e5;
1183
- }
1184
-
1185
- /* Tab buttons - MUST be visible */
1186
- .tab-nav button,
1187
- .tab-nav > button,
1188
- button[role="tab"],
1189
- .tabs button {
1190
- color: #e5e5e5 !important;
1191
- }
1192
-
1193
- /* All labels */
1194
- label, .gr-label, .label-wrap, .label-wrap span,
1195
- .gr-box label, .gr-form label, .gr-group label,
1196
- [class*="label"], [class*="Label"] {
1197
- color: #d4d4d8 !important;
1198
- }
1199
-
1200
- /* All spans in form context */
1201
- .gr-block span, .gr-box span, .gr-form span,
1202
- .gr-group span, .block span {
1203
- color: #d4d4d8 !important;
1204
- }
1205
-
1206
- /* Input labels specifically */
1207
- .gr-textbox label, .gr-dropdown label, .gr-slider label,
1208
- .gr-checkbox label, .gr-number label, .gr-image label {
1209
- color: #d4d4d8 !important;
1210
- }
1211
-
1212
- /* Examples table */
1213
- table thead, table thead tr, table thead th,
1214
- [class*="examples"] thead th,
1215
- .examples-table th, .example-table th {
1216
- background: #2a2a30 !important;
1217
- color: #f4f4f5 !important;
1218
- }
1219
-
1220
- table tbody td, [class*="examples"] td {
1221
- color: #d4d4d8 !important;
1222
- }
1223
-
1224
- /* Accordion headers */
1225
- .gr-accordion summary, .gr-accordion button,
1226
- details summary, summary span,
1227
- [class*="accordion"] summary {
1228
- color: #e5e5e5 !important;
1229
- }
1230
-
1231
- /* Markdown text */
1232
- .gr-markdown, .gr-markdown p, .gr-markdown li,
1233
- .markdown-text, .prose {
1234
- color: #d4d4d8 !important;
1235
- }
1236
-
1237
- /* Placeholder text */
1238
- input::placeholder, textarea::placeholder {
1239
- color: #71717a !important;
1240
- }
1241
-
1242
- /* Secondary button text */
1243
- button.secondary, .secondary {
1244
- color: #e5e5e5 !important;
1245
- }
1246
  """
1247
 
1248
  with gr.Blocks(title="Z Image Turbo", css=css, theme=gr.themes.Base()) as demo:
1249
  gr.HTML("""
1250
  <div style="text-align: center; padding: 24px 16px 16px 16px;">
1251
  <h1 style="background: linear-gradient(135deg, #818cf8 0%, #a78bfa 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; font-size: clamp(1.5rem, 4vw, 2.2rem); margin-bottom: 8px; font-weight: 700;">
1252
- Z Image Turbo + GLM-4.6V
1253
  </h1>
1254
  <p style="color: #a1a1aa; font-size: 1rem; margin: 0;">
1255
  AI Image Generation & Transformation powered by DeepSeek Reasoning
1256
  </p>
1257
  <p style="color: #f472b6; font-size: 0.9rem; margin-top: 12px;">
1258
- If you liked it, please ❤️ it. Thank you!
1259
  </p>
1260
  </div>
1261
  """)
1262
 
1263
  with gr.Tabs():
1264
  # TAB 1: Generate Image
1265
- with gr.Tab("🎨 Generate"):
1266
  with gr.Row():
1267
  with gr.Column(scale=2):
1268
- gen_prompt = gr.Textbox(label="✏️ Prompt", placeholder="Describe your image in detail...", lines=4)
1269
- gen_polish = gr.Checkbox(label="Prompt+ by deepseek-reasoner", value=False)
1270
  with gr.Row():
1271
- gen_style = gr.Dropdown(choices=STYLES, value="None", label="🎨 Style")
1272
- gen_ratio = gr.Dropdown(choices=RATIOS, value="1:1 Square (1024x1024)", label="📐 Aspect Ratio")
1273
- with gr.Accordion("⚙️ Advanced Settings", open=False):
1274
  gen_steps = gr.Slider(minimum=4, maximum=16, value=9, step=1, label="Steps")
1275
  with gr.Row():
1276
  gen_seed = gr.Number(label="Seed", value=42, precision=0)
1277
  gen_randomize = gr.Checkbox(label="Random Seed", value=True)
1278
- gen_btn = gr.Button("🚀 Generate", variant="primary", size="lg")
1279
 
1280
  with gr.Column(scale=3):
1281
  gen_output = gr.Image(label="Generated Image", type="pil", format="png", interactive=False, height=512)
1282
- gen_polished_prompt = gr.Textbox(label="Enhanced Prompt", interactive=False, visible=True, lines=4)
1283
  gen_seed_out = gr.Number(label="Seed Used", interactive=False)
1284
  with gr.Row():
1285
- gen_share_btn = gr.Button("📤 Share", variant="secondary")
1286
  gen_share_link = gr.Textbox(label="", interactive=False, show_copy_button=True, show_label=False)
1287
 
1288
- # Hidden state to pass polished prompt to generate
1289
  gen_full_prompt = gr.State("")
1290
-
1291
  gr.Examples(examples=EXAMPLES_GENERATE, inputs=[gen_prompt, gen_style, gen_ratio, gen_steps, gen_seed, gen_randomize])
1292
 
1293
- # Chain: First polish prompt (CPU), then generate (GPU)
1294
  gen_btn.click(
1295
  fn=do_polish_prompt,
1296
  inputs=[gen_prompt, gen_style, gen_polish],
@@ -1312,23 +846,23 @@ with gr.Blocks(title="Z Image Turbo", css=css, theme=gr.themes.Base()) as demo:
1312
  gen_share_btn.click(fn=upload_to_hf_cdn, inputs=[gen_output], outputs=[gen_share_link])
1313
 
1314
  # TAB 2: AI Vision Assistant
1315
- with gr.Tab("🤖 AI Assistant"):
1316
  gr.Markdown("**AI-Powered Prompt Generator** - Upload an image, analyze it with GLM-4.6V, then generate optimized prompts.")
1317
 
1318
  with gr.Row():
1319
  with gr.Column(scale=1):
1320
- ai_image = gr.Image(label="📷 Upload Image", type="pil", height=300)
1321
- ai_analyze_btn = gr.Button("🔍 Analyze Image", variant="primary")
1322
- ai_description = gr.Textbox(label="📝 Image Description", lines=5, interactive=False)
1323
 
1324
  with gr.Column(scale=1):
1325
- ai_request = gr.Textbox(label="✏️ What changes do you want?", placeholder="e.g., 'watercolor style' or 'dramatic sunset lighting'", lines=2)
1326
- ai_style = gr.Dropdown(choices=STYLES, value="None", label="🎨 Target Style")
1327
- ai_generate_btn = gr.Button("Generate Prompt", variant="primary")
1328
- ai_generated_prompt = gr.Textbox(label="🎯 Generated Prompt", lines=6, interactive=False)
1329
- ai_send_btn = gr.Button("🚀 Send to Transform Tab", variant="primary")
1330
 
1331
- with gr.Accordion("💡 How to Use", open=False):
1332
  gr.Markdown("""
1333
  1. **Upload** an image and click "Analyze Image"
1334
  2. **Describe** the changes you want
@@ -1336,7 +870,6 @@ with gr.Blocks(title="Z Image Turbo", css=css, theme=gr.themes.Base()) as demo:
1336
  4. **Send** to Transform tab to apply changes
1337
  """)
1338
 
1339
- # Event handlers for AI Assistant
1340
  ai_analyze_btn.click(
1341
  fn=analyze_image_with_glm,
1342
  inputs=[ai_image],
@@ -1349,39 +882,37 @@ with gr.Blocks(title="Z Image Turbo", css=css, theme=gr.themes.Base()) as demo:
1349
  )
1350
 
1351
  # TAB 3: Transform Image
1352
- with gr.Tab("Transform"):
1353
  gr.Markdown("**Transform your image** - Upload and describe the transformation. Lower strength = subtle, higher = dramatic.")
1354
 
1355
  with gr.Row():
1356
  with gr.Column(scale=2):
1357
- trans_input = gr.Image(label="📷 Upload Image", type="pil", height=300)
1358
- trans_prompt = gr.Textbox(label="✏️ Transformation Prompt", placeholder="e.g., 'oil painting style, vibrant colors'", lines=3)
1359
- trans_polish = gr.Checkbox(label="Prompt+ by deepseek-reasoner", value=False)
1360
  with gr.Row():
1361
- trans_style = gr.Dropdown(choices=STYLES, value="None", label="🎨 Style")
1362
- trans_strength = gr.Slider(minimum=0.1, maximum=1.0, value=0.6, step=0.05, label="💪 Strength")
1363
- with gr.Accordion("⚙️ Advanced Settings", open=False):
1364
  trans_steps = gr.Slider(minimum=4, maximum=16, value=9, step=1, label="Steps")
1365
  with gr.Row():
1366
  trans_seed = gr.Number(label="Seed", value=42, precision=0)
1367
  trans_randomize = gr.Checkbox(label="Random Seed", value=True)
1368
- trans_btn = gr.Button("🚀 Transform", variant="primary", size="lg")
1369
 
1370
  with gr.Column(scale=3):
1371
  trans_output = gr.Image(label="Transformed Image", type="pil", format="png", interactive=False, height=512)
1372
- trans_polished_prompt = gr.Textbox(label="Enhanced Prompt", interactive=False, visible=True, lines=4)
1373
  trans_seed_out = gr.Number(label="Seed Used", interactive=False)
1374
  with gr.Row():
1375
- trans_share_btn = gr.Button("📤 Share", variant="secondary")
1376
  trans_share_link = gr.Textbox(label="", interactive=False, show_copy_button=True, show_label=False)
1377
 
1378
- # Hidden state to pass polished prompt to transform
1379
  trans_full_prompt = gr.State("")
1380
 
1381
- with gr.Accordion("💡 Example Prompts", open=False):
1382
  gr.Examples(examples=EXAMPLES_TRANSFORM, inputs=[trans_prompt, trans_style, trans_strength, trans_steps, trans_seed, trans_randomize])
1383
 
1384
- # Chain: First polish prompt (CPU), then transform (GPU)
1385
  trans_btn.click(
1386
  fn=do_polish_transform_prompt,
1387
  inputs=[trans_prompt, trans_style, trans_polish],
@@ -1402,20 +933,19 @@ with gr.Blocks(title="Z Image Turbo", css=css, theme=gr.themes.Base()) as demo:
1402
  )
1403
  trans_share_btn.click(fn=upload_to_hf_cdn, inputs=[trans_output], outputs=[trans_share_link])
1404
 
1405
- # Cross-tab handler: Send from AI Assistant to Transform
1406
  ai_send_btn.click(
1407
  fn=lambda prompt, img: (prompt, img),
1408
  inputs=[ai_generated_prompt, ai_image],
1409
  outputs=[trans_prompt, trans_input]
1410
  )
1411
 
1412
- # Footer
1413
  gr.HTML(
1414
  """
1415
  <div style="text-align: center; width: 100%; font-size: 0.9rem; padding: 1rem; margin-top: 1.5rem; background: #141416; border: 1px solid rgba(255,255,255,0.08); border-radius: 12px; color: #71717a;">
1416
  <strong style="color: #a1a1aa;">Models:</strong>
1417
- <a href="https://huggingface.co/Tongyi-MAI/Z-Image-Turbo" target="_blank" style="color: #818cf8; font-weight: 500;">Z-Image-Turbo</a>
1418
- <a href="https://huggingface.co/zai-org/GLM-4.6V" target="_blank" style="color: #818cf8; font-weight: 500;">GLM-4.6V</a>
1419
  <strong style="color: #a1a1aa;">by</strong>
1420
  <a href="https://huggingface.co/lulavc" target="_blank" style="color: #a78bfa; font-weight: 600;">@lulavc</a>
1421
  </div>
 
1
+ """Z-Image-Turbo v1.2 - Fixed GLM Reasoning Filter"""
2
 
3
  import os
4
  import re
 
45
  if mode == "transform":
46
  return "high quality, enhanced details, professional finish"
47
  return "Ultra HD, 4K, cinematic composition, highly detailed"
48
+
49
  client = get_deepseek_client()
50
  if not client:
51
  return original_prompt
52
+
53
  if mode == "transform":
54
  system_prompt = """ROLE: Expert prompt engineer for AI image-to-image transformation.
55
 
 
75
  - Include artistic style references when appropriate
76
 
77
  OUTPUT FORMAT: Only the final prompt text. No thinking, no explanation, no preamble, no word count."""
78
+
79
  try:
80
  response = client.chat.completions.create(
81
  model="deepseek-reasoner",
 
85
  {"role": "user", "content": original_prompt}
86
  ],
87
  )
88
+
89
  msg = response.choices[0].message
90
  content = msg.content if msg.content else ""
91
+
92
  # If content is empty, try to extract final answer from reasoning_content
93
  if not content and hasattr(msg, 'reasoning_content') and msg.reasoning_content:
94
  text = msg.reasoning_content.strip()
 
95
  paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
96
  if paragraphs:
97
  content = paragraphs[-1]
98
+
99
  if content:
 
100
  content = content.strip().replace("\n", " ")
 
101
  if "<think>" in content:
102
  content = content.split("</think>")[-1].strip()
 
103
  if content.startswith('"') and content.endswith('"'):
104
  content = content[1:-1]
 
105
  max_words = 80 if mode == "transform" else 100
106
  words = content.split()
107
  if len(words) > max_words:
 
136
  buf.seek(0)
137
  return base64.b64encode(buf.getvalue()).decode('utf-8')
138
 
139
+ def clean_glm_response(text: str) -> str:
140
+ """Remove GLM special tokens and clean up text."""
141
+ if not text:
142
+ return ""
143
+ text = text.replace('<|begin_of_box|>', '').replace('<|end_of_box|>', '')
144
+ text = text.strip()
145
+ return text
146
+
147
+ def is_thinking_text(text: str) -> bool:
148
+ """Check if text looks like GLM thinking/reasoning rather than actual content."""
149
+ if not text:
150
+ return True
151
+
152
+ text_lower = text.lower().strip()
153
+
154
+ # Patterns that indicate thinking/meta content
155
+ thinking_starts = (
156
+ 'let me', 'i need', 'i should', 'first,', 'first ', 'got it', 'okay,', 'okay ',
157
+ 'alright,', 'alright ', 'the user', 'the request', 'the original', 'based on',
158
+ 'following the', "i'll", 'i will', 'now i', "here's", 'here is', 'my prompt',
159
+ 'the prompt', 'this prompt', 'for this', 'to create', 'considering',
160
+ 'compositionally', 'the image shows', 'this image shows', 'in this image',
161
+ 'we see', 'i can see', 'looking at', 'analyzing', 'i notice', 'it appears',
162
+ 'so,', 'so ', 'well,', 'well ', 'hmm', 'let\'s', 'now,', 'now ', 'sure,',
163
+ 'certainly', 'of course', 'understood', 'i understand', 'right,', 'right ',
164
+ '**', '##', # Markdown headers often indicate meta content
165
+ )
166
+
167
+ # Check if starts with thinking pattern
168
+ if any(text_lower.startswith(pat) for pat in thinking_starts):
169
+ return True
170
+
171
+ # Check for thinking patterns anywhere in short text
172
+ if len(text) < 200:
173
+ thinking_contains = ('let me', 'i need to', 'i should', 'i\'ll ', 'i will ')
174
+ if any(pat in text_lower for pat in thinking_contains):
175
+ return True
176
+
177
+ return False
178
+
179
+ def extract_clean_prompt(text: str) -> str:
180
+ """Extract clean prompt from GLM response, aggressively filtering thinking content."""
181
+ if not text:
182
+ return ""
183
+
184
+ text = clean_glm_response(text)
185
+
186
+ # Try to find content after common delimiters
187
+ delimiters = [
188
+ '\n\n---\n\n', '\n---\n', '---',
189
+ '\n\nPrompt:', '\nPrompt:', 'Prompt:',
190
+ '\n\nFinal:', '\nFinal:', 'Final:',
191
+ '\n\nResult:', '\nResult:', 'Result:',
192
+ '\n\nDescription:', '\nDescription:',
193
+ ]
194
+
195
+ for delim in delimiters:
196
+ if delim in text:
197
+ parts = text.split(delim)
198
+ if len(parts) > 1:
199
+ candidate = parts[-1].strip()
200
+ if len(candidate) >= 50 and not is_thinking_text(candidate):
201
+ return candidate.strip('"\'""')
202
+
203
+ # Split into paragraphs
204
+ paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
205
+
206
+ # Also split by single newlines if we didn't get good paragraphs
207
+ if len(paragraphs) <= 1:
208
+ paragraphs = [p.strip() for p in text.split('\n') if p.strip() and len(p.strip()) > 30]
209
+
210
+ # Filter out thinking paragraphs
211
+ good_paragraphs = []
212
+ for p in paragraphs:
213
+ if len(p) < 40:
214
+ continue
215
+ if is_thinking_text(p):
216
+ continue
217
+ # Remove quotes
218
+ p = p.strip('"\'""')
219
+ good_paragraphs.append(p)
220
+
221
+ # Return the longest good paragraph
222
+ if good_paragraphs:
223
+ result = max(good_paragraphs, key=len)
224
+ return result.strip('"\'""')
225
+
226
+ # Fallback: try to get ANY paragraph that's long enough
227
+ valid = [p.strip('"\'""') for p in paragraphs if len(p) >= 50]
228
+ if valid:
229
+ # Prefer later paragraphs (more likely to be final answer)
230
+ return valid[-1]
231
+
232
+ # Last resort: return cleaned original if it's long enough
233
+ cleaned = text.strip('"\'""')
234
+ if len(cleaned) >= 50 and not is_thinking_text(cleaned[:100]):
235
+ return cleaned
236
+
237
+ return ""
238
+
239
  def analyze_image_with_glm(image: Optional[Image.Image]) -> str:
240
  """Analyze image using GLM-4V and return description."""
241
  if image is None:
242
  return "Please upload an image first."
243
+
244
  client = get_glm_client()
245
  if not client:
246
  return "GLM API key not configured. Please add GLM_API_KEY to space secrets."
247
+
248
  try:
249
  base64_image = encode_image_base64(image)
250
+
251
  response = client.chat.completions.create(
252
  model="glm-4.6v-flash",
253
  messages=[
 
260
  },
261
  {
262
  "type": "text",
263
+ "text": """Describe this image in ONE paragraph, 80-120 words.
264
 
265
+ START DIRECTLY with the main subject. NO meta-language.
266
 
267
+ WRONG: "This image shows...", "I can see...", "The image depicts..."
268
+ RIGHT: "A woman in red dress...", "Golden sunset over mountains...", "Vintage car parked..."
 
 
 
 
 
 
269
 
270
+ Include: subject, setting, colors, lighting, mood, key details.
271
+ Output ONLY the description paragraph."""
 
 
 
272
  }
273
  ]
274
  }
275
  ],
276
+ max_tokens=600,
277
  )
278
+
279
  msg = response.choices[0].message
280
+ content = clean_glm_response(msg.content) if msg.content else ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
 
282
+ # Try to extract clean content
283
  if content:
284
+ extracted = extract_clean_prompt(content)
285
+ if extracted and len(extracted) >= 50:
286
+ content = extracted
287
+
288
+ # If content is bad, try reasoning_content
289
+ if not content or len(content) < 50 or is_thinking_text(content):
290
+ if hasattr(msg, 'reasoning_content') and msg.reasoning_content:
291
+ extracted = extract_clean_prompt(msg.reasoning_content)
292
+ if extracted and len(extracted) >= 50:
293
+ content = extracted
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
+ if content and len(content) >= 50:
296
+ return content
297
+ return "Could not extract description. Please try again."
298
+ except Exception as e:
299
+ return f"Error analyzing image: {str(e)}"
300
 
301
  def generate_prompt_with_glm(image_description: str, user_request: str, style: str) -> str:
302
  """Generate transformation prompt using GLM based on image description and user request."""
303
+ if not image_description or image_description.startswith("Please") or image_description.startswith("Error") or image_description.startswith("GLM API") or image_description.startswith("Could not"):
304
  return "Please analyze the image first."
305
+
306
  if not user_request or not user_request.strip():
307
  return "Please describe what changes you want."
308
+
309
  client = get_glm_client()
310
  if not client:
311
  return "GLM API key not configured. Please add GLM_API_KEY to space secrets."
312
+
313
+ style_hint = f" Style: {style}." if style and style != "None" else ""
314
+ desc = image_description[:1200] if len(image_description) > 1200 else image_description
315
+
 
 
316
  try:
317
  response = client.chat.completions.create(
318
  model="glm-4.6v-flash",
319
  messages=[
320
  {
321
  "role": "user",
322
+ "content": f"""Write an image generation prompt (80-120 words) for this transformed scene.
323
 
324
+ ORIGINAL IMAGE: {desc}
325
 
326
+ REQUESTED CHANGE: {user_request}{style_hint}
327
 
328
+ RULES:
329
+ - Describe the FINAL result, not the transformation process
330
+ - Start with main subject, then setting, colors, lighting, mood
331
+ - ONE paragraph only, no thinking, no explanation
332
+ - Output ONLY the prompt text
333
+
334
+ Write the prompt now:"""
335
  }
336
  ],
337
+ max_tokens=500,
338
  )
339
+
340
  msg = response.choices[0].message
 
341
 
342
+ # Get both content and reasoning_content
343
+ content = clean_glm_response(msg.content) if msg.content else ""
344
+ reasoning = ""
345
+ if hasattr(msg, 'reasoning_content') and msg.reasoning_content:
346
+ reasoning = msg.reasoning_content
347
+
348
+ # Try to extract from content first
349
+ result = ""
350
  if content:
351
+ extracted = extract_clean_prompt(content)
352
+ if extracted and len(extracted) >= 50 and not is_thinking_text(extracted):
353
+ result = extracted
 
354
 
355
+ # If content extraction failed, try reasoning_content
356
+ if not result and reasoning:
357
+ extracted = extract_clean_prompt(reasoning)
358
+ if extracted and len(extracted) >= 50 and not is_thinking_text(extracted):
359
+ result = extracted
 
360
 
361
+ # Compare both and pick the better one
362
+ if result and reasoning and not is_thinking_text(result):
363
+ reasoning_extracted = extract_clean_prompt(reasoning)
364
+ # Prefer the one that's cleaner (doesn't start with thinking)
365
+ if reasoning_extracted and len(reasoning_extracted) > len(result):
366
+ if not is_thinking_text(reasoning_extracted):
367
+ result = reasoning_extracted
368
 
369
+ if result and len(result) >= 50:
370
+ return result.strip('"\'""')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
 
372
+ return "Could not generate prompt. Please try again with different input."
373
+ except Exception as e:
374
+ return f"Error: {str(e)}"
375
 
376
  print("Loading Z-Image-Turbo pipeline...")
377
 
 
378
  pipe_t2i = DiffusionPipeline.from_pretrained(
379
  "Tongyi-MAI/Z-Image-Turbo",
380
  )
381
  pipe_t2i.to("cuda", torch.bfloat16)
382
 
 
383
  pipe_i2i = ZImageImg2ImgPipeline(
384
  transformer=pipe_t2i.transformer,
385
  vae=pipe_t2i.vae,
 
438
  ["Cozy witch cottage interior on a stormy autumn night, iron cauldrons bubbling with green smoke, wooden shelves packed with glowing potion bottles and ancient spell books, a sleepy black cat curled by the stone fireplace, bundles of dried herbs and garlic hanging from dark oak ceiling beams, warm amber candlelight flickering throughout the room", "Digital Art", "1:1 MAX (2048x2048)", 9, 42, True],
439
  ]
440
 
 
441
  EXAMPLES_TRANSFORM = [
442
  ["Transform into ultra realistic photograph with sharp details and natural lighting", "Photorealistic", 0.7, 9, 42, True],
443
  ["Dramatic movie scene with cinematic lighting and film grain texture", "Cinematic", 0.65, 9, 42, True],
 
469
  return f"Upload failed: {response.status_code}"
470
  except requests.Timeout:
471
  return "Upload timed out. Please try again."
472
+ except Exception:
473
+ return "Upload error. Please try again."
474
 
475
  def do_polish_prompt(prompt: str, style: str, do_polish: bool, mode: str = "generate") -> Tuple[str, str]:
476
  """Polish prompt before generation (runs on CPU, before GPU allocation)."""
477
  if not prompt or not prompt.strip():
478
  return "", ""
479
+
480
  base_prompt = prompt.strip()
481
+
 
482
  if do_polish:
483
  polished = polish_prompt(base_prompt, mode=mode)
484
  else:
485
  polished = base_prompt
486
+
 
487
  final_prompt = polished + STYLE_SUFFIXES.get(style, "")
 
488
  return final_prompt, polished
489
 
490
  def do_polish_transform_prompt(prompt: str, style: str, do_polish: bool) -> Tuple[str, str]:
 
493
  base = prompt.strip() if prompt else "high quality image"
494
  final = base + STYLE_SUFFIXES.get(style, "")
495
  return final, ""
496
+
497
  return do_polish_prompt(prompt, style, True, mode="transform")
498
 
499
  @spaces.GPU
 
501
  if randomize:
502
  seed = torch.randint(0, 2**32 - 1, (1,)).item()
503
  seed = int(seed)
504
+
505
  if not full_prompt or not full_prompt.strip():
506
  return None, seed
507
+
508
  w, h = RATIO_DIMS.get(ratio, (1024, 1024))
509
+
510
  generator = torch.Generator("cuda").manual_seed(seed)
511
  image = pipe_t2i(
512
  prompt=full_prompt,
 
516
  guidance_scale=0.0,
517
  generator=generator,
518
  ).images[0]
519
+
520
  return image, seed
521
 
522
  @spaces.GPU
523
  def transform(input_image: Optional[Image.Image], full_prompt: str, polished_display: str, strength: float, steps: int, seed: int, randomize: bool, progress=gr.Progress(track_tqdm=True)) -> Tuple[Optional[Image.Image], int]:
524
  if input_image is None:
525
  return None, 0
526
+
527
  if randomize:
528
  seed = torch.randint(0, 2**32 - 1, (1,)).item()
529
  seed = int(seed)
530
+
531
  if not full_prompt or not full_prompt.strip():
532
  full_prompt = "high quality image, enhanced details"
533
+
 
534
  input_image = input_image.convert("RGB")
535
  w, h = input_image.size
 
536
  w = (w // 16) * 16
537
  h = (h // 16) * 16
538
  w = max(512, min(2048, w))
539
  h = max(512, min(2048, h))
540
  input_image = input_image.resize((w, h), Image.LANCZOS)
541
+
 
 
542
  strength = float(strength)
543
  effective_steps = max(4, int(steps / strength)) if strength > 0 else int(steps)
544
+
545
  generator = torch.Generator("cuda").manual_seed(seed)
546
  image = pipe_i2i(
547
  prompt=full_prompt,
 
551
  guidance_scale=0.0,
552
  generator=generator,
553
  ).images[0]
554
+
555
  return image, seed
556
 
557
  css = r"""
558
  :root {
 
559
  --bg-primary: #0c0c0e;
560
  --bg-secondary: #141416;
561
  --bg-tertiary: #1c1c20;
562
  --surface: #232328;
563
  --surface-hover: #2a2a30;
 
 
564
  --accent-primary: #818cf8;
565
  --accent-secondary: #a78bfa;
566
  --accent-hover: #6366f1;
567
  --accent-gradient: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%);
568
  --accent-glow: rgba(99, 102, 241, 0.4);
 
 
569
  --text-primary: #f4f4f5;
570
  --text-secondary: #a1a1aa;
571
  --text-muted: #71717a;
 
 
572
  --border-subtle: rgba(255, 255, 255, 0.08);
573
  --border-default: rgba(255, 255, 255, 0.12);
 
 
574
  --success: #10b981;
575
  --warning: #f59e0b;
576
  --error: #ef4444;
 
 
577
  --shadow-sm: 0 1px 2px rgba(0,0,0,0.3);
578
  --shadow-md: 0 4px 6px -1px rgba(0,0,0,0.4);
579
  --shadow-lg: 0 10px 15px -3px rgba(0,0,0,0.5);
580
  --shadow-glow: 0 0 20px var(--accent-glow);
 
 
581
  --radius-sm: 8px;
582
  --radius-md: 12px;
583
  --radius-lg: 16px;
584
  --transition: 0.2s ease;
585
  }
586
 
 
587
  .gradio-container {
588
  background: var(--bg-primary) !important;
589
  min-height: 100vh;
590
  color: var(--text-primary);
591
  }
592
 
593
+ .tabs { background: transparent !important; padding: 8px 0; }
 
 
 
 
594
 
 
595
  .tab-nav {
596
  background: var(--bg-secondary) !important;
597
  border: 1px solid var(--border-subtle) !important;
 
621
  color: var(--text-primary) !important;
622
  }
623
 
 
 
 
 
 
624
  .tab-nav > button.selected,
625
  .tab-nav > button[aria-selected="true"],
626
  [role="tab"][aria-selected="true"] {
 
630
  box-shadow: var(--shadow-glow);
631
  }
632
 
 
633
  button.primary, .primary {
634
  background: var(--accent-gradient) !important;
635
  border: none !important;
 
647
  filter: brightness(1.1);
648
  }
649
 
 
 
 
 
 
 
 
 
 
 
650
  button.secondary, .secondary {
651
  background: var(--surface) !important;
652
  color: var(--text-primary) !important;
 
661
  border-color: var(--accent-primary) !important;
662
  }
663
 
 
 
 
 
 
 
664
  .block {
665
  background: var(--bg-secondary) !important;
666
  border: 1px solid var(--border-subtle) !important;
 
671
  transition: all var(--transition);
672
  }
673
 
674
+ .tabitem { background: transparent !important; padding: 16px 0; }
 
 
 
 
 
 
 
 
675
 
 
676
  input, textarea, .gr-input, .gr-textbox textarea {
677
  background: var(--bg-tertiary) !important;
678
  border: 1px solid var(--border-default) !important;
 
681
  transition: all var(--transition);
682
  }
683
 
684
+ input:focus, textarea:focus {
685
  border-color: var(--accent-primary) !important;
686
  box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2) !important;
687
  outline: none !important;
688
  }
689
 
 
690
  .gr-dropdown, select {
691
  background: var(--bg-tertiary) !important;
692
  border: 1px solid var(--border-default) !important;
 
694
  color: var(--text-primary) !important;
695
  }
696
 
697
+ .gr-slider input[type="range"] { accent-color: var(--accent-primary); }
698
+ .gr-checkbox input[type="checkbox"] { accent-color: var(--accent-primary); }
 
 
699
 
700
+ label, .gr-label { color: var(--text-secondary) !important; font-weight: 500; }
 
 
 
 
 
 
 
 
 
701
 
 
702
  .gr-image, .image-container {
703
  background: var(--bg-tertiary) !important;
704
  border: 2px dashed var(--border-default) !important;
 
706
  transition: all var(--transition);
707
  }
708
 
709
+ .gr-image:hover { border-color: var(--accent-primary) !important; }
710
+ .gr-image img { border-radius: var(--radius-md); }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711
 
712
+ [class*="examples"] td:first-child { text-align: left; }
713
+ [class*="examples"] table { background: var(--bg-secondary) !important; border-radius: var(--radius-md); }
714
+ [class*="examples"] tr { background: transparent !important; border-bottom: 1px solid var(--border-subtle) !important; }
715
+ [class*="examples"] tr:hover { background: var(--bg-tertiary) !important; }
716
+ [class*="examples"] td { color: var(--text-secondary) !important; padding: 12px !important; }
717
+ [class*="examples"] thead, [class*="examples"] thead tr, [class*="examples"] thead th {
718
  background: var(--surface) !important;
719
  color: var(--text-primary) !important;
720
  border-bottom: 1px solid var(--border-default) !important;
721
  }
722
 
723
+ h1, h2, h3, h4 { color: var(--text-primary) !important; }
724
+ h1 { font-size: clamp(1.5rem, 4vw, 2.2rem); font-weight: 700; }
 
 
 
 
 
 
 
725
 
726
+ .markdown-text, .gr-markdown { color: var(--text-secondary) !important; }
727
+ .gr-markdown a { color: var(--accent-primary) !important; }
 
 
728
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
729
  .gr-group {
730
  background: var(--surface) !important;
731
  border: 1px solid var(--border-subtle) !important;
 
733
  padding: 16px !important;
734
  }
735
 
 
736
  .gr-accordion {
737
  background: var(--bg-secondary) !important;
738
  border: 1px solid var(--border-subtle) !important;
739
  border-radius: var(--radius-md) !important;
740
  }
741
 
742
+ .footer-no-box { background: transparent !important; border: none !important; box-shadow: none !important; padding: 0; }
 
 
 
 
 
 
743
 
744
  .gradio-container > footer {
745
  background: var(--bg-secondary) !important;
 
747
  padding: 12px 20px;
748
  }
749
 
750
+ .gradio-container > footer span, .gradio-container > footer p { color: var(--text-muted) !important; }
751
+ .gradio-container > footer a { color: var(--accent-primary) !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
752
 
753
+ .progress-bar { background: var(--bg-tertiary) !important; border-radius: 4px; }
754
+ .progress-bar > div { background: var(--accent-gradient) !important; border-radius: 4px; }
 
 
755
 
 
756
  @media (prefers-reduced-motion: reduce) {
757
+ *, *::before, *::after { animation-duration: 0.01ms !important; transition-duration: 0.01ms !important; }
 
 
 
 
758
  }
759
 
 
760
  @media (max-width: 768px) {
761
+ .tab-nav { padding: 4px; gap: 4px; }
762
+ .tab-nav > button { padding: 10px 16px; font-size: 0.85rem; }
763
+ .block { padding: 12px; margin: 6px 0; }
764
+ button.primary { padding: 10px 16px; width: 100%; }
765
+ h1 { font-size: 1.4rem !important; }
766
+ }
767
+
768
+ ::-webkit-scrollbar { width: 8px; height: 8px; }
769
+ ::-webkit-scrollbar-track { background: var(--bg-secondary); }
770
+ ::-webkit-scrollbar-thumb { background: var(--bg-tertiary); border-radius: 4px; }
771
+ ::-webkit-scrollbar-thumb:hover { background: var(--surface); }
772
+
773
+ .gradio-container, .gradio-container * { color: #e5e5e5; }
774
+ .tab-nav button, .tab-nav > button, button[role="tab"], .tabs button { color: #e5e5e5 !important; }
775
+ label, .gr-label, .label-wrap, .label-wrap span, .gr-box label, .gr-form label, .gr-group label { color: #d4d4d8 !important; }
776
+ .gr-block span, .gr-box span, .gr-form span, .gr-group span, .block span { color: #d4d4d8 !important; }
777
+ table thead, table thead tr, table thead th, [class*="examples"] thead th { background: #2a2a30 !important; color: #f4f4f5 !important; }
778
+ table tbody td, [class*="examples"] td { color: #d4d4d8 !important; }
779
+ .gr-accordion summary, .gr-accordion button, details summary, summary span { color: #e5e5e5 !important; }
780
+ .gr-markdown, .gr-markdown p, .gr-markdown li, .markdown-text, .prose { color: #d4d4d8 !important; }
781
+ input::placeholder, textarea::placeholder { color: #71717a !important; }
782
+ button.secondary, .secondary { color: #e5e5e5 !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
783
  """
784
 
785
  with gr.Blocks(title="Z Image Turbo", css=css, theme=gr.themes.Base()) as demo:
786
  gr.HTML("""
787
  <div style="text-align: center; padding: 24px 16px 16px 16px;">
788
  <h1 style="background: linear-gradient(135deg, #818cf8 0%, #a78bfa 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; font-size: clamp(1.5rem, 4vw, 2.2rem); margin-bottom: 8px; font-weight: 700;">
789
+ Z Image Turbo + GLM-4.6V
790
  </h1>
791
  <p style="color: #a1a1aa; font-size: 1rem; margin: 0;">
792
  AI Image Generation & Transformation powered by DeepSeek Reasoning
793
  </p>
794
  <p style="color: #f472b6; font-size: 0.9rem; margin-top: 12px;">
795
+ If you liked it, please like it. Thank you!
796
  </p>
797
  </div>
798
  """)
799
 
800
  with gr.Tabs():
801
  # TAB 1: Generate Image
802
+ with gr.Tab("Generate"):
803
  with gr.Row():
804
  with gr.Column(scale=2):
805
+ gen_prompt = gr.Textbox(label="Prompt", placeholder="Describe your image in detail...", lines=4)
806
+ gen_polish = gr.Checkbox(label="Prompt+ by deepseek-reasoner", value=False)
807
  with gr.Row():
808
+ gen_style = gr.Dropdown(choices=STYLES, value="None", label="Style")
809
+ gen_ratio = gr.Dropdown(choices=RATIOS, value="1:1 Square (1024x1024)", label="Aspect Ratio")
810
+ with gr.Accordion("Advanced Settings", open=False):
811
  gen_steps = gr.Slider(minimum=4, maximum=16, value=9, step=1, label="Steps")
812
  with gr.Row():
813
  gen_seed = gr.Number(label="Seed", value=42, precision=0)
814
  gen_randomize = gr.Checkbox(label="Random Seed", value=True)
815
+ gen_btn = gr.Button("Generate", variant="primary", size="lg")
816
 
817
  with gr.Column(scale=3):
818
  gen_output = gr.Image(label="Generated Image", type="pil", format="png", interactive=False, height=512)
819
+ gen_polished_prompt = gr.Textbox(label="Enhanced Prompt", interactive=False, visible=True, lines=4)
820
  gen_seed_out = gr.Number(label="Seed Used", interactive=False)
821
  with gr.Row():
822
+ gen_share_btn = gr.Button("Share", variant="secondary")
823
  gen_share_link = gr.Textbox(label="", interactive=False, show_copy_button=True, show_label=False)
824
 
 
825
  gen_full_prompt = gr.State("")
 
826
  gr.Examples(examples=EXAMPLES_GENERATE, inputs=[gen_prompt, gen_style, gen_ratio, gen_steps, gen_seed, gen_randomize])
827
 
 
828
  gen_btn.click(
829
  fn=do_polish_prompt,
830
  inputs=[gen_prompt, gen_style, gen_polish],
 
846
  gen_share_btn.click(fn=upload_to_hf_cdn, inputs=[gen_output], outputs=[gen_share_link])
847
 
848
  # TAB 2: AI Vision Assistant
849
+ with gr.Tab("AI Assistant"):
850
  gr.Markdown("**AI-Powered Prompt Generator** - Upload an image, analyze it with GLM-4.6V, then generate optimized prompts.")
851
 
852
  with gr.Row():
853
  with gr.Column(scale=1):
854
+ ai_image = gr.Image(label="Upload Image", type="pil", height=300)
855
+ ai_analyze_btn = gr.Button("Analyze Image", variant="primary")
856
+ ai_description = gr.Textbox(label="Image Description", lines=5, interactive=False)
857
 
858
  with gr.Column(scale=1):
859
+ ai_request = gr.Textbox(label="What changes do you want?", placeholder="e.g., 'watercolor style' or 'dramatic sunset lighting'", lines=2)
860
+ ai_style = gr.Dropdown(choices=STYLES, value="None", label="Target Style")
861
+ ai_generate_btn = gr.Button("Generate Prompt", variant="primary")
862
+ ai_generated_prompt = gr.Textbox(label="Generated Prompt", lines=6, interactive=False)
863
+ ai_send_btn = gr.Button("Send to Transform Tab", variant="primary")
864
 
865
+ with gr.Accordion("How to Use", open=False):
866
  gr.Markdown("""
867
  1. **Upload** an image and click "Analyze Image"
868
  2. **Describe** the changes you want
 
870
  4. **Send** to Transform tab to apply changes
871
  """)
872
 
 
873
  ai_analyze_btn.click(
874
  fn=analyze_image_with_glm,
875
  inputs=[ai_image],
 
882
  )
883
 
884
  # TAB 3: Transform Image
885
+ with gr.Tab("Transform"):
886
  gr.Markdown("**Transform your image** - Upload and describe the transformation. Lower strength = subtle, higher = dramatic.")
887
 
888
  with gr.Row():
889
  with gr.Column(scale=2):
890
+ trans_input = gr.Image(label="Upload Image", type="pil", height=300)
891
+ trans_prompt = gr.Textbox(label="Transformation Prompt", placeholder="e.g., 'oil painting style, vibrant colors'", lines=3)
892
+ trans_polish = gr.Checkbox(label="Prompt+ by deepseek-reasoner", value=False)
893
  with gr.Row():
894
+ trans_style = gr.Dropdown(choices=STYLES, value="None", label="Style")
895
+ trans_strength = gr.Slider(minimum=0.1, maximum=1.0, value=0.6, step=0.05, label="Strength")
896
+ with gr.Accordion("Advanced Settings", open=False):
897
  trans_steps = gr.Slider(minimum=4, maximum=16, value=9, step=1, label="Steps")
898
  with gr.Row():
899
  trans_seed = gr.Number(label="Seed", value=42, precision=0)
900
  trans_randomize = gr.Checkbox(label="Random Seed", value=True)
901
+ trans_btn = gr.Button("Transform", variant="primary", size="lg")
902
 
903
  with gr.Column(scale=3):
904
  trans_output = gr.Image(label="Transformed Image", type="pil", format="png", interactive=False, height=512)
905
+ trans_polished_prompt = gr.Textbox(label="Enhanced Prompt", interactive=False, visible=True, lines=4)
906
  trans_seed_out = gr.Number(label="Seed Used", interactive=False)
907
  with gr.Row():
908
+ trans_share_btn = gr.Button("Share", variant="secondary")
909
  trans_share_link = gr.Textbox(label="", interactive=False, show_copy_button=True, show_label=False)
910
 
 
911
  trans_full_prompt = gr.State("")
912
 
913
+ with gr.Accordion("Example Prompts", open=False):
914
  gr.Examples(examples=EXAMPLES_TRANSFORM, inputs=[trans_prompt, trans_style, trans_strength, trans_steps, trans_seed, trans_randomize])
915
 
 
916
  trans_btn.click(
917
  fn=do_polish_transform_prompt,
918
  inputs=[trans_prompt, trans_style, trans_polish],
 
933
  )
934
  trans_share_btn.click(fn=upload_to_hf_cdn, inputs=[trans_output], outputs=[trans_share_link])
935
 
936
+ # Cross-tab handler
937
  ai_send_btn.click(
938
  fn=lambda prompt, img: (prompt, img),
939
  inputs=[ai_generated_prompt, ai_image],
940
  outputs=[trans_prompt, trans_input]
941
  )
942
 
 
943
  gr.HTML(
944
  """
945
  <div style="text-align: center; width: 100%; font-size: 0.9rem; padding: 1rem; margin-top: 1.5rem; background: #141416; border: 1px solid rgba(255,255,255,0.08); border-radius: 12px; color: #71717a;">
946
  <strong style="color: #a1a1aa;">Models:</strong>
947
+ <a href="https://huggingface.co/Tongyi-MAI/Z-Image-Turbo" target="_blank" style="color: #818cf8; font-weight: 500;">Z-Image-Turbo</a> |
948
+ <a href="https://huggingface.co/zai-org/GLM-4.6V" target="_blank" style="color: #818cf8; font-weight: 500;">GLM-4.6V</a> |
949
  <strong style="color: #a1a1aa;">by</strong>
950
  <a href="https://huggingface.co/lulavc" target="_blank" style="color: #a78bfa; font-weight: 600;">@lulavc</a>
951
  </div>