Spaces:
Sleeping
Sleeping
| """ | |
| Real AI Depth Estimation using Hugging Face Transformers | |
| Uses Depth-Anything V2 directly (no ONNX conversion needed!) | |
| """ | |
| import numpy as np | |
| import torch | |
| from PIL import Image | |
| from transformers import AutoImageProcessor, AutoModelForDepthEstimation | |
| class TransformersDepthEstimator: | |
| """ | |
| Depth estimation using Hugging Face Transformers | |
| Easier than ONNX - works directly with PyTorch models! | |
| """ | |
| def __init__(self, model_size="small", device=None, cache_dir=None): | |
| """ | |
| Initialize depth estimator | |
| Args: | |
| model_size: "small", "base", or "large" | |
| device: "cuda", "cpu", or None (auto-detect) | |
| cache_dir: Where to cache models (default: project folder) | |
| """ | |
| self.model_size = model_size | |
| # Auto-detect device if not specified | |
| if device is None: | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| else: | |
| self.device = device | |
| # Set cache directory to project folder | |
| if cache_dir is None: | |
| from pathlib import Path | |
| cache_dir = Path(__file__).parent.parent / "models" / "cache" / "huggingface" | |
| cache_dir.mkdir(parents=True, exist_ok=True) | |
| cache_dir = str(cache_dir) | |
| print(f"[*] Loading Depth-Anything V2 {model_size.upper()} model...") | |
| print(f"[*] Device: {self.device.upper()}") | |
| print(f"[*] Cache dir: {cache_dir}") | |
| # Model repository mapping | |
| model_map = { | |
| "small": "depth-anything/Depth-Anything-V2-Small-hf", | |
| "base": "depth-anything/Depth-Anything-V2-Base-hf", | |
| "large": "depth-anything/Depth-Anything-V2-Large-hf" | |
| } | |
| if model_size not in model_map: | |
| raise ValueError(f"Invalid model_size. Choose from: {list(model_map.keys())}") | |
| repo_id = model_map[model_size] | |
| # Load processor and model with custom cache directory | |
| self.processor = AutoImageProcessor.from_pretrained( | |
| repo_id, | |
| cache_dir=cache_dir | |
| ) | |
| self.model = AutoModelForDepthEstimation.from_pretrained( | |
| repo_id, | |
| cache_dir=cache_dir | |
| ) | |
| # Move model to device | |
| self.model.to(self.device) | |
| self.model.eval() | |
| print(f"[+] Model loaded successfully!") | |
| print(f"[+] Cached in: {cache_dir}") | |
| def predict(self, image): | |
| """ | |
| Predict depth map for an image | |
| Args: | |
| image: numpy array (H, W, 3) in RGB format | |
| Returns: | |
| depth: numpy array (H, W) with depth values [0, 1] | |
| """ | |
| # Convert numpy to PIL if needed | |
| if isinstance(image, np.ndarray): | |
| image_pil = Image.fromarray(image) | |
| else: | |
| image_pil = image | |
| # Prepare image | |
| inputs = self.processor(images=image_pil, return_tensors="pt") | |
| # Move inputs to device | |
| inputs = {k: v.to(self.device) for k, v in inputs.items()} | |
| # Inference | |
| with torch.no_grad(): | |
| outputs = self.model(**inputs) | |
| predicted_depth = outputs.predicted_depth | |
| # Interpolate to original size | |
| prediction = torch.nn.functional.interpolate( | |
| predicted_depth.unsqueeze(1), | |
| size=image_pil.size[::-1], | |
| mode="bicubic", | |
| align_corners=False, | |
| ) | |
| # Convert to numpy and normalize | |
| depth = prediction.squeeze().cpu().numpy() | |
| # Normalize to [0, 1] | |
| depth = (depth - depth.min()) / (depth.max() - depth.min()) | |
| return depth | |
| # Test function | |
| if __name__ == "__main__": | |
| import cv2 | |
| print("=" * 70) | |
| print(" Testing Depth-Anything V2 with Transformers") | |
| print("=" * 70) | |
| # Create estimator | |
| estimator = TransformersDepthEstimator(model_size="small") | |
| # Create test image | |
| print("[*] Creating test image...") | |
| test_image = np.random.randint(0, 255, (518, 518, 3), dtype=np.uint8) | |
| # Predict depth | |
| print("[*] Running depth estimation...") | |
| import time | |
| start = time.time() | |
| depth = estimator.predict(test_image) | |
| elapsed = (time.time() - start) * 1000 | |
| print(f"[+] Depth estimation complete!") | |
| print(f"[+] Processing time: {elapsed:.2f}ms") | |
| print(f"[+] Output shape: {depth.shape}") | |
| print(f"[+] Depth range: [{depth.min():.3f}, {depth.max():.3f}]") | |
| print("\n" + "=" * 70) | |
| print(" SUCCESS! Real AI Depth Estimation Working!") | |
| print("=" * 70) | |
| print("\nYou can now use real AI depth estimation!") | |
| print("\nTo use in your app:") | |
| print(" from backend.utils.transformers_depth import TransformersDepthEstimator") | |
| print(" estimator = TransformersDepthEstimator('small')") | |
| print(" depth = estimator.predict(image)") | |
| print("=" * 70) | |