Spaces:

jgitsolutions
/

AdvancedUpscalerCPU

Sleeping

App Files Files Community

jgitsolutions commited on 15 days ago

Commit

e58a107

verified ·

1 Parent(s): b2759ab

Upload 3 files

Browse files

Files changed (3) hide show

RealESRGAN_x2plus.pth +3 -0
app.py +610 -141
requirements.txt +8 -1

RealESRGAN_x2plus.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49fafd45f8fd7aa8d31ab2a22d14d91b536c34494a5cfe31eb5d89c2fa266abb
+size 67061725

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import torch
-from diffusers import StableDiffusionUpscalePipeline
 from transformers import AutoImageProcessor, Swin2SRForImageSuperResolution
 import gc
 from PIL import Image
@@ -10,180 +10,649 @@ import io
 import os
 import requests
 from spandrel import ModelLoader
-# Setup logging
-log_capture_string = io.StringIO()
 ch = logging.StreamHandler(log_capture_string)
 ch.setLevel(logging.INFO)
-logger = logging.getLogger()
 logger.setLevel(logging.INFO)
 logger.addHandler(ch)
-def get_logs():
     return log_capture_string.getvalue()
-# Global models cache
-models = {}
-def download_file(url, filename):
-    if not os.path.exists(filename):
-        logger.info(f"Downloading {filename}...")
-        response = requests.get(url, stream=True)
-        with open(filename, 'wb') as f:
-            for chunk in response.iter_content(chunk_size=8192):
-                f.write(chunk)
-        logger.info(f"Downloaded {filename}.")
-    return filename
-def load_realesrgan_x2():
-    if "realesrgan_x2" not in models:
-        logger.info("Loading RealESRGAN x2plus model...")
-        url = "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth"
-        model_path = download_file(url, "RealESRGAN_x2plus.pth")
-        model = ModelLoader().load_from_file(model_path)
-        model.eval()
-        # Move to CPU (or CUDA if available, but we focus on CPU here)
-        device = torch.device("cpu")
-        model.to(device)
-        models["realesrgan_x2"] = model
-        logger.info("RealESRGAN x2plus loaded.")
-    return models["realesrgan_x2"]
-def load_swin2sr_x2():
-    if "swin2sr_x2" not in models:
-        logger.info("Loading Swin2SR x2 model...")
-        model_id = "caidas/swin2SR-classical-sr-x2-64"
-        processor = AutoImageProcessor.from_pretrained(model_id)
-        model = Swin2SRForImageSuperResolution.from_pretrained(model_id)
-        models["swin2sr_x2"] = (processor, model)
-        logger.info("Swin2SR x2 loaded.")
-    return models["swin2sr_x2"]
-def load_sd_x4():
-    if "sd_x4" not in models:
-        logger.info("Loading Stable Diffusion x4 model (this might take a while)...")
-        model_id = "stabilityai/stable-diffusion-x4-upscaler"
-        pipe = StableDiffusionUpscalePipeline.from_pretrained(
-            model_id,
-            torch_dtype=torch.float32,
-            low_cpu_mem_usage=True
-        )
-        pipe.enable_attention_slicing("max")
-        pipe.enable_vae_tiling()
-        models["sd_x4"] = pipe
-        logger.info("Stable Diffusion x4 loaded.")
-    return models["sd_x4"]
-def upscale_realesrgan(input_img):
-    model = load_realesrgan_x2()
-    # Convert PIL to Tensor
-    img_np = np.array(input_img).astype(np.float32) / 255.0
-    img_tensor = torch.from_numpy(img_np).permute(2, 0, 1).unsqueeze(0)
-    with torch.no_grad():
-        output_tensor = model(img_tensor)
-    # Convert Tensor back to PIL
-    output_np = output_tensor.squeeze(0).permute(1, 2, 0).clamp(0, 1).numpy()
-    output_np = (output_np * 255.0).round().astype(np.uint8)
-    return Image.fromarray(output_np)
-def upscale_swin2sr(input_img, scale=2):
-    processor, model = load_swin2sr_x2()
-    inputs = processor(images=input_img, return_tensors="pt")
-    with torch.no_grad():
-        outputs = model(**inputs)
-    output = outputs.reconstruction.data.squeeze().float().cpu().clamp_(0, 1).numpy()
-    output = np.moveaxis(output, source=0, destination=-1)
-    output = (output * 255.0).round().astype(np.uint8)
-    return Image.fromarray(output)
-def upscale_diffusion_cpu(input_img, prompt):
-    pipe = load_sd_x4()
-    # Resize input if too large to prevent OOM
-    max_size = 512
-    if max(input_img.size) > max_size:
-        ratio = max_size / max(input_img.size)
-        new_size = (int(input_img.size[0] * ratio), int(input_img.size[1] * ratio))
-        input_img = input_img.resize(new_size, Image.Resampling.LANCZOS)
-        logger.warning(f"Resized input to {new_size} to prevent OOM")
-    generator = torch.manual_seed(42)
-    output = pipe(
-        prompt=prompt,
-        image=input_img,
-        num_inference_steps=20,
-        guidance_scale=7.0,
-        generator=generator
-    ).images[0]
-    return output
-def process_image(input_img, model_name, prompt):
     if input_img is None:
-        return None, get_logs()
-    logger.info(f"Processing image with {model_name}...")
     try:
-        if model_name == "RealESRGAN x2":
-            output = upscale_realesrgan(input_img)
-        elif model_name == "Swin2SR x2":
-            output = upscale_swin2sr(input_img, scale=2)
-        elif model_name == "Stable Diffusion x4":
-            output = upscale_diffusion_cpu(input_img, prompt)
-        else:
-            output = input_img # Fallback
         gc.collect()
-        logger.info("Processing complete.")
-        return output, get_logs()
     except Exception as e:
-        logger.error(f"Error: {str(e)}")
-        return None, get_logs()
 desc = """
-### Multi-Model Upscaler
-Select a model to upscale your image.
-* **RealESRGAN x2**: Very fast, sharp results. Best for general photos.
-* **Swin2SR x2**: Accurate, good for compressed images. Slower than RealESRGAN.
-* **Stable Diffusion x4**: Slow, creative, high memory usage. Adds details but may hallucinate.
 """
-with gr.Blocks(title="Universal Upscaler") as iface:
     gr.Markdown(desc)
     with gr.Row():
-        with gr.Column():
             input_image = gr.Image(type="pil", label="Input Image")
-            model_selector = gr.Dropdown(
-                choices=["RealESRGAN x2", "Swin2SR x2", "Stable Diffusion x4"],
-                value="RealESRGAN x2",
-                label="Select Model"
-            )
-            prompt_input = gr.Textbox(
-                label="Prompt (for Stable Diffusion only)",
-                value="highly detailed, 4k, sharp"
-            )
-            submit_btn = gr.Button("Upscale")
-        with gr.Column():
-            output_image = gr.Image(type="pil", label="Upscaled Image")
-            logs_output = gr.TextArea(label="Logs", interactive=False)
     submit_btn.click(
         fn=process_image,
         inputs=[input_image, model_selector, prompt_input],
-        outputs=[output_image, logs_output]
     )
 iface.launch()

 import gradio as gr
 import torch
+from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_upscale import StableDiffusionUpscalePipeline
 from transformers import AutoImageProcessor, Swin2SRForImageSuperResolution
 import gc
 from PIL import Image
 import os
 import requests
 from spandrel import ModelLoader
+from abc import ABC, abstractmethod
+from typing import Optional, Tuple, Dict
+import psutil
+import time
+import traceback
+# --- Configuration ---
+class Config:
+    """Configuration settings for the application."""
+    MODEL_DIR = "weights"
+    REALESRGAN_URL = "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth"
+    REALESRGAN_FILENAME = "RealESRGAN_x2plus.pth"
+    SWIN2SR_ID = "caidas/swin2SR-classical-sr-x2-64"
+    SD_ID = "stabilityai/stable-diffusion-x4-upscaler"
+    # SOTA Models (2025)
+    SPAN_URL = "https://huggingface.co/Phips/2xNomosUni_span_multijpg/resolve/main/2xNomosUni_span_multijpg.safetensors"
+    SPAN_FILENAME = "2xNomosUni_span_multijpg.safetensors"
+    HATS_URL = "https://huggingface.co/Phips/4xNomos8kSCHAT-S/resolve/main/4xNomos8kSCHAT-S.safetensors"
+    HATS_FILENAME = "4xNomos8kSCHAT-S.safetensors"
+    MAX_IMAGE_SIZE_SD = 512  # Max dimension for SD input to prevent OOM
+    DEVICE = "cpu" # Force CPU for this demo, can be "cuda" if available
+    @staticmethod
+    def ensure_model_dir():
+        if not os.path.exists(Config.MODEL_DIR):
+            os.makedirs(Config.MODEL_DIR)
+# --- Logging Setup ---
+class LogCapture(io.StringIO):
+    """Custom StringIO to capture logs."""
+    pass
+log_capture_string = LogCapture()
 ch = logging.StreamHandler(log_capture_string)
 ch.setLevel(logging.INFO)
+formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+ch.setFormatter(formatter)
+logger = logging.getLogger("UpscalerApp")
 logger.setLevel(logging.INFO)
 logger.addHandler(ch)
+def get_logs() -> str:
+    """Retrieve captured logs."""
     return log_capture_string.getvalue()
+# --- System Monitoring ---
+def get_system_usage() -> str:
+    """Returns current CPU and RAM usage."""
+    cpu_percent = psutil.cpu_percent()
+    ram_percent = psutil.virtual_memory().percent
+    ram_used_gb = psutil.virtual_memory().used / (1024 ** 3)
+    return f"CPU: {cpu_percent}% | RAM: {ram_percent}% ({ram_used_gb:.1f} GB used)"
+# --- Abstract Base Class for Models ---
+class UpscalerStrategy(ABC):
+    """Abstract base class for upscaling strategies."""
+    def __init__(self):
+        self.model = None
+        self.name = "Unknown"
+    @abstractmethod
+    def load(self) -> None:
+        """Load the model into memory."""
+        pass
+    @abstractmethod
+    def upscale(self, image: Image.Image, **kwargs) -> Image.Image:
+        """Upscale the given image."""
+        pass
+    def unload(self) -> None:
+        """Unload the model to free memory."""
+        if self.model is not None:
+            del self.model
+            self.model = None
+            gc.collect()
+            logger.info(f"Unloaded {self.name}")
+# --- Helper Functions for Optimization ---
+def manual_tile_upscale(model, img_tensor, tile_size=256, tile_pad=10, scale=2):
+    """
+    Low-level tiling implementation for custom models.
+    Prevents OOM by processing image in chunks.
+    """
+    B, C, H, W = img_tensor.shape
+    # Calculate tile dimensions
+    tile_h = (H + tile_size - 1) // tile_size
+    tile_w = (W + tile_size - 1) // tile_size
+    output = torch.zeros(B, C, H * scale, W * scale,
+                        device=img_tensor.device, dtype=img_tensor.dtype)
+    for th in range(tile_h):
+        for tw in range(tile_w):
+            # Calculate input tile coordinates with padding
+            x1 = th * tile_size
+            y1 = tw * tile_size
+            x2 = min((th + 1) * tile_size, H)
+            y2 = min((tw + 1) * tile_size, W)
+            # Add halo for context
+            x1_pad = max(0, x1 - tile_pad)
+            y1_pad = max(0, y1 - tile_pad)
+            x2_pad = min(H, x2 + tile_pad)
+            y2_pad = min(W, y2 + tile_pad)
+            # Extract padded tile
+            tile = img_tensor[:, :, x1_pad:x2_pad, y1_pad:y2_pad]
+            # Process tile
+            with torch.no_grad():
+                tile_out = model(tile)
+            # Calculate output crop region (remove halo)
+            halo_x1 = (x1 - x1_pad) * scale
+            halo_y1 = (y1 - y1_pad) * scale
+            out_x2 = halo_x1 + (x2 - x1) * scale
+            out_y2 = halo_y1 + (y2 - y1) * scale
+            # Place in output
+            output[:, :, x1*scale:x2*scale, y1*scale:y2*scale] = \
+                tile_out[:, :, halo_x1:out_x2, halo_y1:out_y2]
+    return output
+def select_tile_config(height, width):
+    """
+    Dynamically select tile size based on image resolution.
+    """
+    megapixels = (height * width) / (1024 ** 2)
+    if megapixels < 2:  # < 1080p
+        return {'tile': 512, 'tile_pad': 10}
+    elif megapixels < 6:  # < 4K
+        return {'tile': 384, 'tile_pad': 15}
+    elif megapixels < 16:  # < 8K
+        return {'tile': 256, 'tile_pad': 20}
+    else:  # 8K+
+        return {'tile': 128, 'tile_pad': 25}
+# --- Concrete Implementations ---
+class RealESRGANStrategy(UpscalerStrategy):
+    def __init__(self):
+        super().__init__()
+        self.name = "RealESRGAN x2"
+        self.compiled = False
+    def load(self) -> None:
+        if self.model is None:
+            logger.info(f"Loading {self.name}...")
+            Config.ensure_model_dir()
+            model_path = os.path.join(Config.MODEL_DIR, Config.REALESRGAN_FILENAME)
+            if not os.path.exists(model_path):
+                logger.info(f"Downloading {Config.REALESRGAN_FILENAME}...")
+                try:
+                    response = requests.get(Config.REALESRGAN_URL, stream=True)
+                    response.raise_for_status()
+                    with open(model_path, 'wb') as f:
+                        for chunk in response.iter_content(chunk_size=8192):
+                            f.write(chunk)
+                    logger.info("Download complete.")
+                except Exception as e:
+                    logger.error(f"Failed to download model: {e}")
+                    raise
+            try:
+                self.model = ModelLoader().load_from_file(model_path)
+                self.model.eval()
+                self.model.to(Config.DEVICE)
+                # Optimization: torch.compile
+                if not self.compiled:
+                    try:
+                        # 'reduce-overhead' uses CUDA graphs, so only use it on CUDA
+                        if Config.DEVICE == 'cuda':
+                            self.model = torch.compile(self.model, mode='reduce-overhead')
+                            logger.info("✓ torch.compile enabled (reduce-overhead mode)")
+                        elif os.name == 'nt' and Config.DEVICE == 'cpu':
+                             # Windows requires MSVC for Inductor (default cpu backend)
+                             # We skip it to avoid "Compiler: cl is not found" error unless user has it.
+                             logger.info("ℹ Skipping torch.compile on Windows CPU to avoid MSVC requirement.")
+                        elif (psutil.cpu_count(logical=False) or 0) < 4 and Config.DEVICE == 'cpu':
+                             # Skip compilation on weak CPUs (e.g. HF Spaces Free Tier) to avoid long startup times
+                             logger.info("ℹ Skipping torch.compile on low-core CPU to prevent timeout.")
+                        else:
+                            # On Linux/Mac CPU, use default mode or skip if problematic. Default is usually safe.
+                            self.model = torch.compile(self.model)
+                            logger.info("✓ torch.compile enabled (default mode)")
+                        self.compiled = True
+                    except Exception as e:
+                        logger.warning(f"⚠ torch.compile not available or failed: {e}")
+                        self.compiled = True # Mark as tried
+                logger.info(f"{self.name} loaded successfully.")
+            except Exception as e:
+                logger.error(f"Failed to load model architecture: {e}")
+                raise
+    def upscale(self, image: Image.Image, **kwargs) -> Image.Image:
+        if self.model is None:
+            self.load()
+        logger.info(f"Starting inference with {self.name}...")
+        start_time = time.time()
+        img_np = np.array(image).astype(np.float32) / 255.0
+        img_tensor = torch.from_numpy(img_np).permute(2, 0, 1).unsqueeze(0).to(Config.DEVICE)
+        # Optimization: Dynamic Tiling
+        h, w = img_np.shape[:2]
+        tile_config = select_tile_config(h, w)
+        logger.info(f"Using tile config: {tile_config}")
+        # Optimization: Mixed Precision (AMP)
+        # Use bfloat16 for CPU if supported, else float32 (autocast handles this mostly)
+        # For CUDA, float16 is standard.
+        dtype = torch.float16 if Config.DEVICE == 'cuda' else torch.bfloat16
+        try:
+            with torch.autocast(device_type=Config.DEVICE, dtype=dtype):
+                if tile_config['tile'] > 0:
+                     output_tensor = manual_tile_upscale(
+                        self.model,
+                        img_tensor,
+                        tile_size=tile_config['tile'],
+                        tile_pad=tile_config['tile_pad'],
+                        scale=2
+                    )
+                else:
+                    output_tensor = self.model(img_tensor) # type: ignore
+        except Exception as e:
+             logger.warning(f"AMP/Tiling failed, falling back to standard FP32: {e}")
+             # Fallback to standard execution
+             output_tensor = self.model(img_tensor) # type: ignore
+        output_np = output_tensor.squeeze(0).permute(1, 2, 0).clamp(0, 1).float().cpu().numpy()
+        output_np = (output_np * 255.0).round().astype(np.uint8)
+        elapsed = time.time() - start_time
+        logger.info(f"Inference finished in {elapsed:.2f}s")
+        # Benchmark info (from doc)
+        output_megapixels = (output_np.shape[0] * output_np.shape[1]) / (1024 ** 2)
+        throughput = output_megapixels / elapsed
+        logger.info(f"Speed: {throughput:.2f} MP/s")
+        return Image.fromarray(output_np)
+class Swin2SRStrategy(UpscalerStrategy):
+    def __init__(self):
+        super().__init__()
+        self.name = "Swin2SR x2"
+        self.processor = None
+    def load(self) -> None:
+        if self.model is None:
+            logger.info(f"Loading {self.name}...")
+            try:
+                self.processor = AutoImageProcessor.from_pretrained(Config.SWIN2SR_ID)
+                model = Swin2SRForImageSuperResolution.from_pretrained(Config.SWIN2SR_ID)
+                self.model = model.to(Config.DEVICE) # type: ignore
+                logger.info(f"{self.name} loaded successfully.")
+            except Exception as e:
+                logger.error(f"Failed to load Swin2SR: {e}")
+                raise
+    def upscale(self, image: Image.Image, **kwargs) -> Image.Image:
+        if self.model is None or self.processor is None:
+            self.load()
+        logger.info(f"Starting inference with {self.name}...")
+        start_time = time.time()
+        if self.processor is None:
+             raise ValueError("Processor not loaded")
+        inputs = self.processor(images=image, return_tensors="pt").to(Config.DEVICE)
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+        output = outputs.reconstruction.data.squeeze().float().cpu().clamp_(0, 1).numpy()
+        output = np.moveaxis(output, source=0, destination=-1)
+        output = (output * 255.0).round().astype(np.uint8)
+        logger.info(f"Inference finished in {time.time() - start_time:.2f}s")
+        return Image.fromarray(output)
+class StableDiffusionStrategy(UpscalerStrategy):
+    def __init__(self):
+        super().__init__()
+        self.name = "Stable Diffusion x4"
+    def load(self) -> None:
+        if self.model is None:
+            logger.info(f"Loading {self.name} (this may take time)...")
+            try:
+                self.model = StableDiffusionUpscalePipeline.from_pretrained(
+                    Config.SD_ID,
+                    torch_dtype=torch.float32,
+                    low_cpu_mem_usage=True
+                )
+                # Optimizations for CPU
+                self.model.enable_attention_slicing("max")
+                self.model.enable_vae_tiling()
+                logger.info(f"{self.name} loaded successfully.")
+            except Exception as e:
+                logger.error(f"Failed to load Stable Diffusion: {e}")
+                raise
+    def upscale(self, image: Image.Image, **kwargs) -> Image.Image:
+        if self.model is None:
+            self.load()
+        prompt = kwargs.get("prompt", "high quality, detailed")
+        # Pre-check size
+        if max(image.size) > Config.MAX_IMAGE_SIZE_SD:
+            ratio = Config.MAX_IMAGE_SIZE_SD / max(image.size)
+            new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
+            image = image.resize(new_size, Image.Resampling.LANCZOS)
+            logger.warning(f"Resized input to {new_size} to prevent OOM on CPU.")
+        logger.info(f"Starting inference with {self.name}...")
+        start_time = time.time()
+        generator = torch.manual_seed(42)
+        output = self.model(
+            prompt=prompt,
+            image=image,
+            num_inference_steps=20,
+            guidance_scale=7.0,
+            generator=generator
+        ).images[0] # type: ignore
+        logger.info(f"Inference finished in {time.time() - start_time:.2f}s")
+        return output
+class SpanStrategy(UpscalerStrategy):
+    def __init__(self):
+        super().__init__()
+        self.name = "SPAN (NomosUni) x2"
+        self.compiled = False
+    def load(self) -> None:
+        if self.model is None:
+            logger.info(f"Loading {self.name}...")
+            Config.ensure_model_dir()
+            model_path = os.path.join(Config.MODEL_DIR, Config.SPAN_FILENAME)
+            if not os.path.exists(model_path):
+                logger.info(f"Downloading {Config.SPAN_FILENAME}...")
+                try:
+                    response = requests.get(Config.SPAN_URL, stream=True)
+                    response.raise_for_status()
+                    with open(model_path, 'wb') as f:
+                        for chunk in response.iter_content(chunk_size=8192):
+                            f.write(chunk)
+                    logger.info("Download complete.")
+                except Exception as e:
+                    logger.error(f"Failed to download model: {e}")
+                    raise
+            try:
+                self.model = ModelLoader().load_from_file(model_path)
+                self.model.eval()
+                self.model.to(Config.DEVICE)
+                # Optimization: torch.compile
+                if not self.compiled:
+                    try:
+                        if Config.DEVICE == 'cuda':
+                            self.model = torch.compile(self.model, mode='reduce-overhead')
+                            logger.info("✓ torch.compile enabled (reduce-overhead mode)")
+                        elif os.name == 'nt' and Config.DEVICE == 'cpu':
+                             logger.info("ℹ Skipping torch.compile on Windows CPU.")
+                        elif (psutil.cpu_count(logical=False) or 0) < 4 and Config.DEVICE == 'cpu':
+                             logger.info("ℹ Skipping torch.compile on low-core CPU.")
+                        else:
+                            self.model = torch.compile(self.model)
+                            logger.info("✓ torch.compile enabled (default mode)")
+                        self.compiled = True
+                    except Exception as e:
+                        logger.warning(f"⚠ torch.compile failed: {e}")
+                        self.compiled = True
+                logger.info(f"{self.name} loaded successfully.")
+            except Exception as e:
+                logger.error(f"Failed to load model architecture: {e}")
+                raise
+    def upscale(self, image: Image.Image, **kwargs) -> Image.Image:
+        if self.model is None:
+            self.load()
+        logger.info(f"Starting inference with {self.name}...")
+        start_time = time.time()
+        img_np = np.array(image).astype(np.float32) / 255.0
+        img_tensor = torch.from_numpy(img_np).permute(2, 0, 1).unsqueeze(0).to(Config.DEVICE)
+        # SPAN is very efficient, but we still use tiling for safety on huge images
+        h, w = img_np.shape[:2]
+        tile_config = select_tile_config(h, w)
+        dtype = torch.float16 if Config.DEVICE == 'cuda' else torch.bfloat16
+        try:
+            with torch.autocast(device_type=Config.DEVICE, dtype=dtype):
+                if tile_config['tile'] > 0:
+                     output_tensor = manual_tile_upscale(
+                        self.model,
+                        img_tensor,
+                        tile_size=tile_config['tile'],
+                        tile_pad=tile_config['tile_pad'],
+                        scale=2
+                    )
+                else:
+                    output_tensor = self.model(img_tensor) # type: ignore
+        except Exception as e:
+             logger.warning(f"AMP/Tiling failed, falling back: {e}")
+             output_tensor = self.model(img_tensor) # type: ignore
+        output_np = output_tensor.squeeze(0).permute(1, 2, 0).clamp(0, 1).float().cpu().numpy()
+        output_np = (output_np * 255.0).round().astype(np.uint8)
+        elapsed = time.time() - start_time
+        logger.info(f"Inference finished in {elapsed:.2f}s")
+        return Image.fromarray(output_np)
+class HatsStrategy(UpscalerStrategy):
+    def __init__(self):
+        super().__init__()
+        self.name = "HAT-S x4"
+        self.compiled = False
+    def load(self) -> None:
+        if self.model is None:
+            logger.info(f"Loading {self.name}...")
+            Config.ensure_model_dir()
+            model_path = os.path.join(Config.MODEL_DIR, Config.HATS_FILENAME)
+            if not os.path.exists(model_path):
+                logger.info(f"Downloading {Config.HATS_FILENAME}...")
+                try:
+                    response = requests.get(Config.HATS_URL, stream=True)
+                    response.raise_for_status()
+                    with open(model_path, 'wb') as f:
+                        for chunk in response.iter_content(chunk_size=8192):
+                            f.write(chunk)
+                    logger.info("Download complete.")
+                except Exception as e:
+                    logger.error(f"Failed to download model: {e}")
+                    raise
+            try:
+                self.model = ModelLoader().load_from_file(model_path)
+                self.model.eval()
+                self.model.to(Config.DEVICE)
+                if not self.compiled:
+                    try:
+                        if Config.DEVICE == 'cuda':
+                            self.model = torch.compile(self.model, mode='reduce-overhead')
+                        elif os.name == 'nt' and Config.DEVICE == 'cpu':
+                             pass
+                        elif (psutil.cpu_count(logical=False) or 0) < 4 and Config.DEVICE == 'cpu':
+                             pass
+                        else:
+                            self.model = torch.compile(self.model)
+                        self.compiled = True
+                    except Exception:
+                        self.compiled = True
+                logger.info(f"{self.name} loaded successfully.")
+            except Exception as e:
+                logger.error(f"Failed to load model architecture: {e}")
+                raise
+    def upscale(self, image: Image.Image, **kwargs) -> Image.Image:
+        if self.model is None:
+            self.load()
+        logger.info(f"Starting inference with {self.name}...")
+        start_time = time.time()
+        img_np = np.array(image).astype(np.float32) / 255.0
+        img_tensor = torch.from_numpy(img_np).permute(2, 0, 1).unsqueeze(0).to(Config.DEVICE)
+        h, w = img_np.shape[:2]
+        tile_config = select_tile_config(h, w)
+        dtype = torch.float16 if Config.DEVICE == 'cuda' else torch.bfloat16
+        try:
+            with torch.autocast(device_type=Config.DEVICE, dtype=dtype):
+                if tile_config['tile'] > 0:
+                     output_tensor = manual_tile_upscale(
+                        self.model,
+                        img_tensor,
+                        tile_size=tile_config['tile'],
+                        tile_pad=tile_config['tile_pad'],
+                        scale=4 # HAT-S is x4
+                    )
+                else:
+                    output_tensor = self.model(img_tensor) # type: ignore
+        except Exception as e:
+             logger.warning(f"AMP/Tiling failed, falling back: {e}")
+             output_tensor = self.model(img_tensor) # type: ignore
+        output_np = output_tensor.squeeze(0).permute(1, 2, 0).clamp(0, 1).float().cpu().numpy()
+        output_np = (output_np * 255.0).round().astype(np.uint8)
+        elapsed = time.time() - start_time
+        logger.info(f"Inference finished in {elapsed:.2f}s")
+        return Image.fromarray(output_np)
+# --- Model Manager (Singleton-ish) ---
+class UpscalerManager:
+    """Manages model lifecycle and selection."""
+    def __init__(self):
+        self.strategies: Dict[str, UpscalerStrategy] = {
+            "SPAN (NomosUni) x2": SpanStrategy(),
+            "RealESRGAN x2": RealESRGANStrategy(),
+            "HAT-S x4": HatsStrategy(),
+            "Swin2SR x2": Swin2SRStrategy(),
+            "Stable Diffusion x4": StableDiffusionStrategy()
+        }
+        self.current_model_name: Optional[str] = None
+    def get_strategy(self, name: str) -> UpscalerStrategy:
+        if name not in self.strategies:
+            raise ValueError(f"Model {name} not found.")
+        # Memory Optimization for Free Tier (16GB RAM limit):
+        # Ensure only one model is loaded at a time.
+        if self.current_model_name != name:
+            if self.current_model_name is not None:
+                logger.info(f"Switching models: Unloading {self.current_model_name}...")
+                self.strategies[self.current_model_name].unload()
+            self.current_model_name = name
+        return self.strategies[name]
+    def unload_all(self):
+        """Unload all models to free memory."""
+        for strategy in self.strategies.values():
+            strategy.unload()
+        gc.collect()
+        logger.info("All models unloaded.")
+manager = UpscalerManager()
+# --- Gradio Interface Logic ---
+def process_image(input_img: Image.Image, model_name: str, prompt: str) -> Tuple[Optional[Image.Image], str, str]:
     if input_img is None:
+        return None, get_logs(), get_system_usage()
     try:
+        strategy = manager.get_strategy(model_name)
+        # Optional: Unload others if memory is tight (simple logic here)
+        # For now, we just rely on the user or OS, but in prod we might auto-unload.
+        output = strategy.upscale(input_img, prompt=prompt)
+        # Explicit GC after heavy operations
         gc.collect()
+        return output, get_logs(), get_system_usage()
     except Exception as e:
+        error_msg = f"Critical Error: {str(e)}\n{traceback.format_exc()}"
+        logger.error(error_msg)
+        # Return the error message in the logs output so the user sees it
+        return None, get_logs() + "\n\n" + error_msg, get_system_usage()
+def unload_models():
+    manager.unload_all()
+    return get_logs(), get_system_usage()
+# --- UI Construction ---
 desc = """
+### 🚀 Enterprise-Grade Universal Upscaler (SOTA 2025)
+Select a specialized model to upscale your image.
+* **SPAN (NomosUni) x2**: ⚡ **SOTA Speed**. Fastest CPU model. Best for general use.
+* **RealESRGAN x2**: 🛡️ **Robust**. Best for removing JPEG artifacts and noise.
+* **HAT-S x4**: 💎 **SOTA Quality**. Best texture detail (slower).
+* **Swin2SR x2**: 🎯 High fidelity, removes compression artifacts.
+* **Stable Diffusion x4**: 🎨 Generative upscaling. Adds missing details (slow, high RAM).
 """
+with gr.Blocks(title="Universal Upscaler Pro") as iface:
     gr.Markdown(desc)
     with gr.Row():
+        with gr.Column(scale=1):
             input_image = gr.Image(type="pil", label="Input Image")
+            with gr.Group():
+                model_selector = gr.Dropdown(
+                    choices=list(manager.strategies.keys()),
+                    value="SPAN (NomosUni) x2",
+                    label="Select Model Architecture"
+                )
+                prompt_input = gr.Textbox(
+                    label="Prompt (Stable Diffusion Only)",
+                    value="highly detailed, 4k, sharp",
+                    placeholder="Describe the image content..."
+                )
+            with gr.Accordion("Advanced Settings", open=False):
+                gr.Markdown("Memory Management")
+                unload_btn = gr.Button("Unload All Models (Free RAM)", variant="secondary")
+            submit_btn = gr.Button("✨ Upscale Image", variant="primary", size="lg")
+            system_info = gr.Label(value=get_system_usage(), label="System Status")
+        with gr.Column(scale=1):
+            output_image = gr.Image(type="pil", label="Upscaled Result")
+            logs_output = gr.TextArea(label="Execution Logs", interactive=False, lines=10)
+    # Event Wiring
     submit_btn.click(
         fn=process_image,
         inputs=[input_image, model_selector, prompt_input],
+        outputs=[output_image, logs_output, system_info]
     )
+    unload_btn.click(
+        fn=unload_models,
+        inputs=[],
+        outputs=[logs_output, system_info]
+    )
+    # Auto-refresh system info every 2 seconds (optional, can be heavy on UI)
+    # iface.load(get_system_usage, None, system_info, every=2)
 iface.launch()

requirements.txt CHANGED Viewed

@@ -7,4 +7,11 @@ pillow
 gradio
 opencv-python
 spandrel
-requests

 gradio
 opencv-python
 spandrel
+requests
+psutil
+onnx
+onnxruntime
+basicsr
+realesrgan
+openvino
+optimum