Spaces:

dimdimz
/

DimensioDepth

Sleeping

App Files Files Community

DimensioDepth / backend /utils /transformers_depth.py

wwieerrz

🔥 Add REAL AI Models - Depth-Anything V2!

191a797 about 1 month ago

raw

history blame contribute delete

4.82 kB

	"""
	Real AI Depth Estimation using Hugging Face Transformers
	Uses Depth-Anything V2 directly (no ONNX conversion needed!)
	"""

	import numpy as np
	import torch
	from PIL import Image
	from transformers import AutoImageProcessor, AutoModelForDepthEstimation

	class TransformersDepthEstimator:
	"""
	Depth estimation using Hugging Face Transformers
	Easier than ONNX - works directly with PyTorch models!
	"""

	def __init__(self, model_size="small", device=None, cache_dir=None):
	"""
	Initialize depth estimator

	Args:
	model_size: "small", "base", or "large"
	device: "cuda", "cpu", or None (auto-detect)
	cache_dir: Where to cache models (default: project folder)
	"""
	self.model_size = model_size

	# Auto-detect device if not specified
	if device is None:
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	else:
	self.device = device

	# Set cache directory to project folder
	if cache_dir is None:
	from pathlib import Path
	cache_dir = Path(__file__).parent.parent / "models" / "cache" / "huggingface"
	cache_dir.mkdir(parents=True, exist_ok=True)
	cache_dir = str(cache_dir)

	print(f"[*] Loading Depth-Anything V2 {model_size.upper()} model...")
	print(f"[*] Device: {self.device.upper()}")
	print(f"[*] Cache dir: {cache_dir}")

	# Model repository mapping
	model_map = {
	"small": "depth-anything/Depth-Anything-V2-Small-hf",
	"base": "depth-anything/Depth-Anything-V2-Base-hf",
	"large": "depth-anything/Depth-Anything-V2-Large-hf"
	}

	if model_size not in model_map:
	raise ValueError(f"Invalid model_size. Choose from: {list(model_map.keys())}")

	repo_id = model_map[model_size]

	# Load processor and model with custom cache directory
	self.processor = AutoImageProcessor.from_pretrained(
	repo_id,
	cache_dir=cache_dir
	)
	self.model = AutoModelForDepthEstimation.from_pretrained(
	repo_id,
	cache_dir=cache_dir
	)

	# Move model to device
	self.model.to(self.device)
	self.model.eval()

	print(f"[+] Model loaded successfully!")
	print(f"[+] Cached in: {cache_dir}")

	def predict(self, image):
	"""
	Predict depth map for an image

	Args:
	image: numpy array (H, W, 3) in RGB format

	Returns:
	depth: numpy array (H, W) with depth values [0, 1]
	"""
	# Convert numpy to PIL if needed
	if isinstance(image, np.ndarray):
	image_pil = Image.fromarray(image)
	else:
	image_pil = image

	# Prepare image
	inputs = self.processor(images=image_pil, return_tensors="pt")

	# Move inputs to device
	inputs = {k: v.to(self.device) for k, v in inputs.items()}

	# Inference
	with torch.no_grad():
	outputs = self.model(**inputs)
	predicted_depth = outputs.predicted_depth

	# Interpolate to original size
	prediction = torch.nn.functional.interpolate(
	predicted_depth.unsqueeze(1),
	size=image_pil.size[::-1],
	mode="bicubic",
	align_corners=False,
	)

	# Convert to numpy and normalize
	depth = prediction.squeeze().cpu().numpy()

	# Normalize to [0, 1]
	depth = (depth - depth.min()) / (depth.max() - depth.min())

	return depth


	# Test function
	if __name__ == "__main__":
	import cv2

	print("=" * 70)
	print(" Testing Depth-Anything V2 with Transformers")
	print("=" * 70)

	# Create estimator
	estimator = TransformersDepthEstimator(model_size="small")

	# Create test image
	print("[*] Creating test image...")
	test_image = np.random.randint(0, 255, (518, 518, 3), dtype=np.uint8)

	# Predict depth
	print("[*] Running depth estimation...")
	import time
	start = time.time()
	depth = estimator.predict(test_image)
	elapsed = (time.time() - start) * 1000

	print(f"[+] Depth estimation complete!")
	print(f"[+] Processing time: {elapsed:.2f}ms")
	print(f"[+] Output shape: {depth.shape}")
	print(f"[+] Depth range: [{depth.min():.3f}, {depth.max():.3f}]")

	print("\n" + "=" * 70)
	print(" SUCCESS! Real AI Depth Estimation Working!")
	print("=" * 70)
	print("\nYou can now use real AI depth estimation!")
	print("\nTo use in your app:")
	print(" from backend.utils.transformers_depth import TransformersDepthEstimator")
	print(" estimator = TransformersDepthEstimator('small')")
	print(" depth = estimator.predict(image)")
	print("=" * 70)