DimensioDepth / backend /utils /transformers_depth.py
wwieerrz's picture
🔥 Add REAL AI Models - Depth-Anything V2!
191a797
"""
Real AI Depth Estimation using Hugging Face Transformers
Uses Depth-Anything V2 directly (no ONNX conversion needed!)
"""
import numpy as np
import torch
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
class TransformersDepthEstimator:
"""
Depth estimation using Hugging Face Transformers
Easier than ONNX - works directly with PyTorch models!
"""
def __init__(self, model_size="small", device=None, cache_dir=None):
"""
Initialize depth estimator
Args:
model_size: "small", "base", or "large"
device: "cuda", "cpu", or None (auto-detect)
cache_dir: Where to cache models (default: project folder)
"""
self.model_size = model_size
# Auto-detect device if not specified
if device is None:
self.device = "cuda" if torch.cuda.is_available() else "cpu"
else:
self.device = device
# Set cache directory to project folder
if cache_dir is None:
from pathlib import Path
cache_dir = Path(__file__).parent.parent / "models" / "cache" / "huggingface"
cache_dir.mkdir(parents=True, exist_ok=True)
cache_dir = str(cache_dir)
print(f"[*] Loading Depth-Anything V2 {model_size.upper()} model...")
print(f"[*] Device: {self.device.upper()}")
print(f"[*] Cache dir: {cache_dir}")
# Model repository mapping
model_map = {
"small": "depth-anything/Depth-Anything-V2-Small-hf",
"base": "depth-anything/Depth-Anything-V2-Base-hf",
"large": "depth-anything/Depth-Anything-V2-Large-hf"
}
if model_size not in model_map:
raise ValueError(f"Invalid model_size. Choose from: {list(model_map.keys())}")
repo_id = model_map[model_size]
# Load processor and model with custom cache directory
self.processor = AutoImageProcessor.from_pretrained(
repo_id,
cache_dir=cache_dir
)
self.model = AutoModelForDepthEstimation.from_pretrained(
repo_id,
cache_dir=cache_dir
)
# Move model to device
self.model.to(self.device)
self.model.eval()
print(f"[+] Model loaded successfully!")
print(f"[+] Cached in: {cache_dir}")
def predict(self, image):
"""
Predict depth map for an image
Args:
image: numpy array (H, W, 3) in RGB format
Returns:
depth: numpy array (H, W) with depth values [0, 1]
"""
# Convert numpy to PIL if needed
if isinstance(image, np.ndarray):
image_pil = Image.fromarray(image)
else:
image_pil = image
# Prepare image
inputs = self.processor(images=image_pil, return_tensors="pt")
# Move inputs to device
inputs = {k: v.to(self.device) for k, v in inputs.items()}
# Inference
with torch.no_grad():
outputs = self.model(**inputs)
predicted_depth = outputs.predicted_depth
# Interpolate to original size
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=image_pil.size[::-1],
mode="bicubic",
align_corners=False,
)
# Convert to numpy and normalize
depth = prediction.squeeze().cpu().numpy()
# Normalize to [0, 1]
depth = (depth - depth.min()) / (depth.max() - depth.min())
return depth
# Test function
if __name__ == "__main__":
import cv2
print("=" * 70)
print(" Testing Depth-Anything V2 with Transformers")
print("=" * 70)
# Create estimator
estimator = TransformersDepthEstimator(model_size="small")
# Create test image
print("[*] Creating test image...")
test_image = np.random.randint(0, 255, (518, 518, 3), dtype=np.uint8)
# Predict depth
print("[*] Running depth estimation...")
import time
start = time.time()
depth = estimator.predict(test_image)
elapsed = (time.time() - start) * 1000
print(f"[+] Depth estimation complete!")
print(f"[+] Processing time: {elapsed:.2f}ms")
print(f"[+] Output shape: {depth.shape}")
print(f"[+] Depth range: [{depth.min():.3f}, {depth.max():.3f}]")
print("\n" + "=" * 70)
print(" SUCCESS! Real AI Depth Estimation Working!")
print("=" * 70)
print("\nYou can now use real AI depth estimation!")
print("\nTo use in your app:")
print(" from backend.utils.transformers_depth import TransformersDepthEstimator")
print(" estimator = TransformersDepthEstimator('small')")
print(" depth = estimator.predict(image)")
print("=" * 70)