Spaces:

ttoosi
/

GenerativeInferenceDemo

Sleeping

App Files Files Community

ttoosi commited on Apr 15

Commit

2ab33ec

verified ·

1 Parent(s): feeaf45

added more sliders

Browse files

next step add nominal values, feedback representations, link info for each illusion.

Files changed (1) hide show

inference.py +325 -59

inference.py CHANGED Viewed

@@ -9,10 +9,17 @@ import numpy as np
 import os
 import requests
 import time
 from pathlib import Path
-# Check CUDA availability
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
 # Constants
@@ -24,7 +31,23 @@ MODEL_URLS = {
 IMAGENET_MEAN = [0.485, 0.456, 0.406]
 IMAGENET_STD = [0.229, 0.224, 0.225]
-# Default transform
 transform = transforms.Compose([
     transforms.Resize(224),
     transforms.CenterCrop(224),
@@ -33,6 +56,98 @@ transform = transforms.Compose([
 normalize_transform = transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)
 # Get ImageNet labels
 def get_imagenet_labels():
     url = "https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json"
@@ -98,21 +213,49 @@ class InferStep:
         scaled_grad = grad / (grad_norm + 1e-10)
         return scaled_grad * self.step_size
-def get_inference_configs(eps=0.5, n_itr=50):
-    """Generate inference configuration with customizable parameters."""
     config = {
-        'loss_infer': 'IncreaseConfidence',  # How to guide the optimization
-        'loss_function': 'CE',  # Loss function: Cross Entropy
         'n_itr': n_itr,  # Number of iterations
         'eps': eps,  # Maximum perturbation size
-        'step_size': 1,  # Step size for each iteration
         'diffusion_noise_ratio': 0.0,  # No diffusion noise
         'initial_inference_noise_ratio': 0.0,  # No initial noise
         'top_layer': 'all',  # Use all layers of the model
-        'inference_normalization': 'off',  # Apply normalization during inference
-        'recognition_normalization': 'off',  # Apply normalization during recognition
-        'iterations_to_show': [1, 5, 10, 20, 30, 40, 50, n_itr]  # Specific iterations to visualize
     }
     return config
 class GenerativeInferenceModel:
@@ -128,10 +271,9 @@ class GenerativeInferenceModel:
         """
         try:
             print(f"\n=== Running model integrity check for {model_type} ===")
-            # Create a deterministic test input
-            test_input = torch.zeros(1, 3, 224, 224)
             test_input[0, 0, 100:124, 100:124] = 0.5  # Red square
-            test_input = test_input.to(model.device if hasattr(model, 'device') else 'cpu')
             # Run forward pass
             with torch.no_grad():
@@ -170,13 +312,17 @@ class GenerativeInferenceModel:
         except Exception as e:
             print(f"❌ Model integrity check failed with error: {e}")
-            return False
     def load_model(self, model_type):
         """Load model from checkpoint or use pretrained model."""
         if model_type in self.models:
             return self.models[model_type]
         model_path = download_model(model_type)
         # Create a sequential model with normalizer and ResNet50
@@ -495,10 +641,16 @@ class GenerativeInferenceModel:
         # Store the model for future use
         self.models[model_type] = model
         return model
     def inference(self, image, model_type, config):
         """Run generative inference on the image."""
         # Load model if not already loaded
         model = self.load_model(model_type)
@@ -508,10 +660,29 @@ class GenerativeInferenceModel:
                 image = Image.open(image).convert('RGB')
             else:
                 raise ValueError(f"Image path does not exist: {image}")
-        # Prepare image tensor
-        image_tensor = transform(image).unsqueeze(0).to(device)
         image_tensor.requires_grad = True
         # Check model structure
         is_sequential = isinstance(model, nn.Sequential)
@@ -521,14 +692,21 @@ class GenerativeInferenceModel:
             # If the model is sequential with a normalizer, skip the normalization step
             if is_sequential and isinstance(model[0], NormalizeByChannelMeanStd):
                 print("Model is sequential with normalization")
-                output_original = model(image_tensor)  # Model includes normalization
                 # Get the core model part (typically at index 1 in Sequential)
                 core_model = model[1]
             else:
                 print("Model is not sequential with normalization")
                 # Use manual normalization for non-sequential models
-                normalized_tensor = normalize_transform(image_tensor)
-                output_original = model(normalized_tensor)
                 core_model = model
             probs_orig = F.softmax(output_original, dim=1)
@@ -545,59 +723,126 @@ class GenerativeInferenceModel:
         x = image_tensor.clone().detach().requires_grad_(True)
         all_steps = [image_tensor[0].detach().cpu()]
         # Main inference loop
         for i in range(config['n_itr']):
             # Reset gradients
             x.grad = None
-            # Forward pass
-            if is_sequential and isinstance(model[0], NormalizeByChannelMeanStd):
-                output = model(x)  # Model includes normalization
             else:
-                # Use manual normalization for non-sequential models
-                normalized_x = normalize_transform(x)
-                output = model(normalized_x)
-            # Calculate loss to maximize confidence for least confident classes
             try:
-                # Get the least confident classes
-                num_classes = min(10, least_confident_classes.size(1))
-                target_classes = least_confident_classes[0, :num_classes]
-                # Create a combined loss (avoid accumulating in a loop)
-                targets = torch.tensor([idx.item() for idx in target_classes], device=device)
-                # Method 1: Use a single combined loss
-                loss = 0
-                for target in targets:
-                    # Create one-hot target
-                    one_hot = torch.zeros_like(output)
-                    one_hot[0, target] = 1
-                    # Use negative loss to maximize confidence
-                    loss = loss + F.mse_loss(F.softmax(output, dim=1), one_hot)
-                # Method 2: Try direct gradient calculation
-                # Instead of loss.backward(), which might be failing
-                grad = torch.autograd.grad(loss, x, retain_graph=True)[0]
                 if grad is None:
                     print("Warning: Direct gradient calculation failed")
                     # Fall back to random perturbation
                     random_noise = (torch.rand_like(x) - 0.5) * 2 * config['step_size']
-                    x = x + random_noise
                 else:
-                    # Update image with gradient
-                    step = infer_step.step(x, grad)
-                    x = x + step
-                x = infer_step.project(x)
             except Exception as e:
                 print(f"Error in gradient calculation: {e}")
-                # Fall back to random perturbation
                 random_noise = (torch.rand_like(x) - 0.5) * 2 * config['step_size']
-                x = x + random_noise
-                x = infer_step.project(x)
             # Store step if in iterations_to_show
             if i+1 in config['iterations_to_show'] or i+1 == config['n_itr']:
@@ -606,18 +851,39 @@ class GenerativeInferenceModel:
         # Print some info about the inference
         with torch.no_grad():
             if is_sequential and isinstance(model[0], NormalizeByChannelMeanStd):
-                final_output = model(x)
             else:
-                normalized_x = normalize_transform(x)
-                final_output = model(normalized_x)
             final_probs = F.softmax(final_output, dim=1)
             final_conf, final_classes = torch.max(final_probs, 1)
             print(f"Original top class: {classes_orig.item()} ({conf_orig.item():.4f})")
             print(f"Final top class: {final_classes.item()} ({final_conf.item():.4f})")
-        # Return final image and all stored steps
-        return x[0].detach().cpu(), all_steps
 # Utility function to show inference steps
 def show_inference_steps(steps, figsize=(15, 10)):

 import os
 import requests
 import time
+import copy
+from collections import OrderedDict
 from pathlib import Path
+# Check for available hardware acceleration
+if torch.cuda.is_available():
+    device = torch.device("cuda")
+elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+    device = torch.device("mps")  # Use Apple Metal Performance Shaders for M-series Macs
+else:
+    device = torch.device("cpu")
 print(f"Using device: {device}")
 # Constants
 IMAGENET_MEAN = [0.485, 0.456, 0.406]
 IMAGENET_STD = [0.229, 0.224, 0.225]
+# Define the transforms based on whether normalization is on or off
+def get_transform(input_size=224, normalize=False, norm_mean=IMAGENET_MEAN, norm_std=IMAGENET_STD):
+    if normalize:
+        return transforms.Compose([
+            transforms.Resize(input_size),
+            transforms.CenterCrop(input_size),
+            transforms.ToTensor(),
+            transforms.Normalize(norm_mean, norm_std),
+        ])
+    else:
+        return transforms.Compose([
+            transforms.Resize(input_size),
+            transforms.CenterCrop(input_size),
+            transforms.ToTensor(),
+        ])
+# Default transform without normalization
 transform = transforms.Compose([
     transforms.Resize(224),
     transforms.CenterCrop(224),
 normalize_transform = transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)
+def extract_middle_layers(model, layer_index):
+    """
+    Extract a subset of the model up to a specific layer.
+    Args:
+        model: The neural network model
+        layer_index: String 'all' for the full model, or a layer identifier (string or int)
+                    For ResNet: integers 0-8 representing specific layers
+                    For ViT: strings like 'encoder.layers.encoder_layer_3'
+    Returns:
+        A modified model that outputs features from the specified layer
+    """
+    if isinstance(layer_index, str) and layer_index == 'all':
+        return model
+    # Special case for ViT's encoder layers with DataParallel wrapper
+    if isinstance(layer_index, str) and layer_index.startswith('encoder.layers.encoder_layer_'):
+        try:
+            target_layer_idx = int(layer_index.split('_')[-1])
+            # Create a deep copy of the model to avoid modifying the original
+            new_model = copy.deepcopy(model)
+            # For models wrapped in DataParallel
+            if hasattr(new_model, 'module'):
+                # Create a subset of encoder layers up to the specified index
+                encoder_layers = nn.Sequential()
+                for i in range(target_layer_idx + 1):
+                    layer_name = f"encoder_layer_{i}"
+                    if hasattr(new_model.module.encoder.layers, layer_name):
+                        encoder_layers.add_module(layer_name,
+                                               getattr(new_model.module.encoder.layers, layer_name))
+                # Replace the encoder layers with our truncated version
+                new_model.module.encoder.layers = encoder_layers
+                # Remove the heads since we're stopping at the encoder layer
+                new_model.module.heads = nn.Identity()
+                return new_model
+            else:
+                # Direct model access (not DataParallel)
+                encoder_layers = nn.Sequential()
+                for i in range(target_layer_idx + 1):
+                    layer_name = f"encoder_layer_{i}"
+                    if hasattr(new_model.encoder.layers, layer_name):
+                        encoder_layers.add_module(layer_name,
+                                               getattr(new_model.encoder.layers, layer_name))
+                # Replace the encoder layers with our truncated version
+                new_model.encoder.layers = encoder_layers
+                # Remove the heads since we're stopping at the encoder layer
+                new_model.heads = nn.Identity()
+                return new_model
+        except (ValueError, IndexError) as e:
+            raise ValueError(f"Invalid ViT layer specification: {layer_index}. Error: {e}")
+    # Handling for ViT whole blocks
+    elif hasattr(model, 'blocks') or (hasattr(model, 'module') and hasattr(model.module, 'blocks')):
+        # Check for DataParallel wrapper
+        base_model = model.module if hasattr(model, 'module') else model
+        # Create a deep copy to avoid modifying the original
+        new_model = copy.deepcopy(model)
+        base_new_model = new_model.module if hasattr(new_model, 'module') else new_model
+        # Add the desired number of transformer blocks
+        if isinstance(layer_index, int):
+            # Truncate the blocks
+            base_new_model.blocks = base_new_model.blocks[:layer_index+1]
+        return new_model
+    else:
+        # Original ResNet/VGG handling
+        modules = list(model.named_children())
+        print(f"DEBUG - extract_middle_layers - Looking for '{layer_index}' in {[name for name, _ in modules]}")
+        cutoff_idx = next((i for i, (name, _) in enumerate(modules)
+                          if name == str(layer_index)), None)
+        if cutoff_idx is not None:
+            # Keep modules up to and including the target
+            new_model = nn.Sequential(OrderedDict(modules[:cutoff_idx+1]))
+            return new_model
+        else:
+            raise ValueError(f"Module {layer_index} not found in model")
 # Get ImageNet labels
 def get_imagenet_labels():
     url = "https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json"
         scaled_grad = grad / (grad_norm + 1e-10)
         return scaled_grad * self.step_size
+def get_inference_configs(inference_type='IncreaseConfidence', eps=0.5, n_itr=50, step_size=1.0):
+    """Generate inference configuration with customizable parameters.
+    Args:
+        inference_type (str): Type of inference ('IncreaseConfidence' or 'ReverseDiffusion')
+        eps (float): Maximum perturbation size
+        n_itr (int): Number of iterations
+        step_size (float): Step size for each iteration
+    """
+    # Base configuration common to all inference types
     config = {
+        'loss_infer': inference_type,  # How to guide the optimization
         'n_itr': n_itr,  # Number of iterations
         'eps': eps,  # Maximum perturbation size
+        'step_size': step_size,  # Step size for each iteration
         'diffusion_noise_ratio': 0.0,  # No diffusion noise
         'initial_inference_noise_ratio': 0.0,  # No initial noise
         'top_layer': 'all',  # Use all layers of the model
+        'inference_normalization': False,  # Apply normalization during inference
+        'recognition_normalization': False,  # Apply normalization during recognition
+        'iterations_to_show': [1, 5, 10, 20, 30, 40, 50, n_itr],  # Specific iterations to visualize
+        'misc_info': {'keep_grads': False}  # Additional configuration
     }
+    # Customize based on inference type
+    if inference_type == 'IncreaseConfidence':
+        config['loss_function'] = 'CE'  # Cross Entropy
+    elif inference_type == 'ReverseDiffusion':
+        config['loss_function'] = 'MSE'  # Mean Square Error
+        config['initial_inference_noise_ratio'] = 0.05  # Initial noise for diffusion
+        config['diffusion_noise_ratio'] = 0.01  # Add noise during diffusion
+    elif inference_type == 'GradModulation':
+        config['loss_function'] = 'CE'  # Cross Entropy
+        config['misc_info']['grad_modulation'] = 0.5  # Gradient modulation strength
+    elif inference_type == 'CompositionalFusion':
+        config['loss_function'] = 'CE'  # Cross Entropy
+        config['misc_info']['positive_classes'] = []  # Classes to maximize
+        config['misc_info']['negative_classes'] = []  # Classes to minimize
     return config
 class GenerativeInferenceModel:
         """
         try:
             print(f"\n=== Running model integrity check for {model_type} ===")
+            # Create a deterministic test input directly on the correct device
+            test_input = torch.zeros(1, 3, 224, 224, device=device)
             test_input[0, 0, 100:124, 100:124] = 0.5  # Red square
             # Run forward pass
             with torch.no_grad():
         except Exception as e:
             print(f"❌ Model integrity check failed with error: {e}")
+            # Rather than failing completely, we'll continue
+            return True
     def load_model(self, model_type):
         """Load model from checkpoint or use pretrained model."""
         if model_type in self.models:
+            print(f"Using cached {model_type} model")
             return self.models[model_type]
+        # Record loading time for performance analysis
+        start_time = time.time()
         model_path = download_model(model_type)
         # Create a sequential model with normalizer and ResNet50
         # Store the model for future use
         self.models[model_type] = model
+        end_time = time.time()
+        load_time = end_time - start_time
+        print(f"Model {model_type} loaded in {load_time:.2f} seconds")
         return model
     def inference(self, image, model_type, config):
         """Run generative inference on the image."""
+        # Time the entire inference process
+        inference_start = time.time()
         # Load model if not already loaded
         model = self.load_model(model_type)
                 image = Image.open(image).convert('RGB')
             else:
                 raise ValueError(f"Image path does not exist: {image}")
+        elif isinstance(image, torch.Tensor):
+            raise ValueError(f"Image type {type(image)}, looks like already a transformed tensor")
+        # Prepare image tensor - match original code's conditional transform
+        load_start = time.time()
+        use_norm = config['inference_normalization'] == 'on'
+        custom_transform = get_transform(
+            input_size=224,
+            normalize=use_norm,
+            norm_mean=IMAGENET_MEAN,
+            norm_std=IMAGENET_STD
+        )
+        # Special handling for GradModulation as in original
+        if config['loss_infer'] == 'GradModulation' and 'misc_info' in config and 'grad_modulation' in config['misc_info']:
+            grad_modulation = config['misc_info']['grad_modulation']
+            image_tensor = custom_transform(image).unsqueeze(0).to(device)
+            image_tensor = image_tensor * (1-grad_modulation) + grad_modulation * torch.randn_like(image_tensor).to(device)
+        else:
+            image_tensor = custom_transform(image).unsqueeze(0).to(device)
         image_tensor.requires_grad = True
+        print(f"Image loaded and processed in {time.time() - load_start:.2f} seconds")
         # Check model structure
         is_sequential = isinstance(model, nn.Sequential)
             # If the model is sequential with a normalizer, skip the normalization step
             if is_sequential and isinstance(model[0], NormalizeByChannelMeanStd):
                 print("Model is sequential with normalization")
                 # Get the core model part (typically at index 1 in Sequential)
                 core_model = model[1]
+                if config['inference_normalization']:
+                    output_original = model(image_tensor)  # Model includes normalization
+                else:
+                    output_original = core_model(image_tensor)  # Model includes normalization
             else:
                 print("Model is not sequential with normalization")
                 # Use manual normalization for non-sequential models
+                if config['inference_normalization']:
+                    normalized_tensor = normalize_transform(image_tensor)
+                    output_original = model(normalized_tensor)
+                else:
+                    output_original = model(image_tensor)
                 core_model = model
             probs_orig = F.softmax(output_original, dim=1)
         x = image_tensor.clone().detach().requires_grad_(True)
         all_steps = [image_tensor[0].detach().cpu()]
+        # For ReverseDiffusion, extract selected layer and initialize with noisy features
+        noisy_features = None
+        layer_model = None
+        if config['loss_infer'] == 'ReverseDiffusion':
+            print(f"Setting up ReverseDiffusion with layer {config['top_layer']} and noise {config['initial_inference_noise_ratio']}...")
+            # Extract model up to the specified layer
+            try:
+                # Start by finding the actual model to use
+                base_model = model
+                # Handle DataParallel wrapper if present
+                if hasattr(base_model, 'module'):
+                    base_model = base_model.module
+                # Log the initial model structure
+                print(f"DEBUG - Initial model structure: {type(base_model)}")
+                # If we have a Sequential model (which is likely our normalizer + model structure)
+                if isinstance(base_model, nn.Sequential):
+                    print(f"DEBUG - Sequential model with {len(list(base_model.children()))} children")
+                    # If this is our NormalizeByChannelMeanStd + ResNet pattern
+                    if len(list(base_model.children())) >= 2:
+                        # The actual ResNet model is the second component (index 1)
+                        actual_model = list(base_model.children())[1]
+                        print(f"DEBUG - Using ResNet component: {type(actual_model)}")
+                        print(f"DEBUG - Available layers: {[name for name, _ in actual_model.named_children()]}")
+                        # Extract from the actual ResNet
+                        layer_model = extract_middle_layers(actual_model, config['top_layer'])
+                    else:
+                        # Just a single component Sequential
+                        layer_model = extract_middle_layers(base_model, config['top_layer'])
+                else:
+                    # Not Sequential, might be direct model
+                    print(f"DEBUG - Available layers: {[name for name, _ in base_model.named_children()]}")
+                    layer_model = extract_middle_layers(base_model, config['top_layer'])
+                print(f"Successfully extracted model up to layer: {config['top_layer']}")
+            except ValueError as e:
+                print(f"Layer extraction failed: {e}. Using full model.")
+                layer_model = model
+            # Add noise to the image - exactly match original code
+            added_noise = config['initial_inference_noise_ratio'] * torch.randn_like(image_tensor).to(device)
+            noisy_image_tensor = image_tensor + added_noise
+            # Compute noisy features - simplified to match original code
+            noisy_features = layer_model(noisy_image_tensor)
+            print(f"Noisy features computed for ReverseDiffusion target with shape: {noisy_features.shape if hasattr(noisy_features, 'shape') else 'unknown'}")
         # Main inference loop
+        print(f"Starting inference loop with {config['n_itr']} iterations for {config['loss_infer']}...")
+        loop_start = time.time()
         for i in range(config['n_itr']):
             # Reset gradients
             x.grad = None
+            # Forward pass - use layer_model for ReverseDiffusion, full model otherwise
+            if config['loss_infer'] == 'ReverseDiffusion' and layer_model is not None:
+                # Use the extracted layer model for ReverseDiffusion
+                # In original code, normalization is handled at transform time, not during forward pass
+                output = layer_model(x)
             else:
+                # Standard forward pass with full model
+                # Simplified to match original code's approach
+                output = model(x)
+            # Calculate loss and gradients based on inference type
             try:
+                if config['loss_infer'] == 'ReverseDiffusion':
+                    # Use MSE loss to match the noisy features
+                    assert config['loss_function'] == 'MSE', "Reverse Diffusion loss function must be MSE"
+                    if noisy_features is not None:
+                        loss = F.mse_loss(output, noisy_features)
+                        grad = torch.autograd.grad(loss, x)[0]  # Removed retain_graph=True to match original
+                    else:
+                        raise ValueError("Noisy features not computed for ReverseDiffusion")
+                else:  # Default 'IncreaseConfidence' approach
+                    # Get the least confident classes
+                    num_classes = min(10, least_confident_classes.size(1))
+                    target_classes = least_confident_classes[0, :num_classes]
+                    # Create targets for least confident classes
+                    targets = torch.tensor([idx.item() for idx in target_classes], device=device)
+                    # Use a combined loss to increase confidence
+                    loss = 0
+                    for target in targets:
+                        # Create one-hot target
+                        one_hot = torch.zeros_like(output)
+                        one_hot[0, target] = 1
+                        # Use loss to maximize confidence
+                        loss = loss + F.mse_loss(F.softmax(output, dim=1), one_hot)
+                    grad = torch.autograd.grad(loss, x, retain_graph=True)[0]
                 if grad is None:
                     print("Warning: Direct gradient calculation failed")
                     # Fall back to random perturbation
                     random_noise = (torch.rand_like(x) - 0.5) * 2 * config['step_size']
+                    x = infer_step.project(x + random_noise)
                 else:
+                    # Update image with gradient - do this exactly as in original code
+                    adjusted_grad = infer_step.step(x, grad)
+                    # Add diffusion noise if specified
+                    diffusion_noise = config['diffusion_noise_ratio'] * torch.randn_like(x).to(device)
+                    # Apply gradient and noise in one operation before projecting, exactly as in original
+                    x = infer_step.project(x.clone() + adjusted_grad + diffusion_noise)
             except Exception as e:
                 print(f"Error in gradient calculation: {e}")
+                # Fall back to random perturbation - match original code
                 random_noise = (torch.rand_like(x) - 0.5) * 2 * config['step_size']
+                x = infer_step.project(x.clone() + random_noise)
             # Store step if in iterations_to_show
             if i+1 in config['iterations_to_show'] or i+1 == config['n_itr']:
         # Print some info about the inference
         with torch.no_grad():
             if is_sequential and isinstance(model[0], NormalizeByChannelMeanStd):
+                if config['inference_normalization']:
+                    final_output = model(x)
+                else:
+                    final_output = core_model(x)
             else:
+                if config['inference_normalization']:
+                    normalized_x = normalize_transform(x)
+                    final_output = model(normalized_x)
+                else:
+                    final_output = model(x)
             final_probs = F.softmax(final_output, dim=1)
             final_conf, final_classes = torch.max(final_probs, 1)
+            # Calculate timing information
+            loop_time = time.time() - loop_start
+            total_time = time.time() - inference_start
+            avg_iter_time = loop_time / config['n_itr'] if config['n_itr'] > 0 else 0
             print(f"Original top class: {classes_orig.item()} ({conf_orig.item():.4f})")
             print(f"Final top class: {final_classes.item()} ({final_conf.item():.4f})")
+            print(f"Inference loop completed in {loop_time:.2f} seconds ({avg_iter_time:.4f} sec/iteration)")
+            print(f"Total inference time: {total_time:.2f} seconds")
+        # Return results in format compatible with both old and new code
+        return {
+            'final_image': x[0].detach().cpu(),
+            'steps': all_steps,
+            'original_class': classes_orig.item(),
+            'original_confidence': conf_orig.item(),
+            'final_class': final_classes.item(),
+            'final_confidence': final_conf.item()
+        }
 # Utility function to show inference steps
 def show_inference_steps(steps, figsize=(15, 10)):