Spaces:

dimdimz
/

DimensioDepth

Sleeping

App Files Files Community

DimensioDepth / app.py

wwieerrz

🎚️ AWESOME: Add Advanced Video Export Controls!

c17c64c about 1 month ago

raw

history blame contribute delete

19.5 kB

	"""
	DimensioDepth - Add Dimension to Everything
	Advanced AI Depth Estimation with 3D Visualization

	Powered by Depth-Anything V2 \| Runs on Hugging Face Spaces
	"""

	import streamlit as st
	import numpy as np
	import cv2
	from PIL import Image
	from pathlib import Path
	import sys

	# Page config
	st.set_page_config(
	page_title="DimensioDepth - AI Depth Estimation",
	page_icon="🎨",
	layout="wide"
	)

	# Add backend to path
	sys.path.append(str(Path(__file__).parent / "backend"))

	# Import backend utilities
	from backend.utils.image_processing import (
	depth_to_colormap,
	create_side_by_side
	)

	# Try to import REAL AI model
	@st.cache_resource
	def load_model():
	try:
	print("[*] Attempting to import TransformersDepthEstimator...")
	from backend.utils.transformers_depth import TransformersDepthEstimator
	print("[*] Import successful! Loading REAL AI Depth-Anything V2 BASE model...")
	print("[*] This will download ~372MB on first run (one-time download)")
	depth_estimator = TransformersDepthEstimator(model_size="base")
	print("[+] REAL AI MODE ACTIVE - BASE MODEL!")
	print("[+] Quality: SUPERB (best available)")
	return depth_estimator, True, "BASE (372MB)"
	except Exception as e:
	print(f"[!] FULL ERROR TRACEBACK:")
	import traceback
	traceback.print_exc()
	print(f"[!] Error type: {type(e).__name__}")
	print(f"[!] Error message: {str(e)}")
	print("[*] Falling back to DEMO MODE")
	return None, False, "Demo Mode"

	depth_estimator, USE_REAL_AI, MODEL_SIZE = load_model()


	def estimate_depth(image):
	"""Estimate depth from an input image using REAL AI or DEMO MODE"""
	if image is None:
	return None, None, "Please upload an image first"

	try:
	# Convert PIL to numpy if needed
	if isinstance(image, Image.Image):
	image = np.array(image)

	# Generate depth map
	if USE_REAL_AI:
	depth = depth_estimator.predict(image)
	mode_text = "REAL AI (Depth-Anything V2)"
	else:
	from backend.utils.demo_depth import generate_smart_depth
	depth = generate_smart_depth(image)
	mode_text = "DEMO MODE (Synthetic)"

	# Create colored depth map with Inferno colormap (best for depth)
	depth_colored = depth_to_colormap(depth, cv2.COLORMAP_INFERNO)

	# Create grayscale depth map
	depth_gray = (depth * 255).astype(np.uint8)
	depth_gray = cv2.cvtColor(depth_gray, cv2.COLOR_GRAY2RGB)

	return depth_colored, depth_gray, mode_text, image.shape, depth.shape

	except Exception as e:
	st.error(f"Error during depth estimation: {str(e)}")
	import traceback
	traceback.print_exc()
	return None, None, None, None, None


	# Header
	st.title("🎨 DimensioDepth - Add Dimension to Everything")
	st.markdown("### Transform 2D images into stunning 3D depth visualizations")

	# Status banner
	if USE_REAL_AI:
	st.success(f"🚀 REAL AI MODE ACTIVE! - Powered by Depth-Anything V2 {MODEL_SIZE} - SUPERB Quality!")
	else:
	st.info("Running in DEMO MODE - Ultra-fast synthetic depth estimation")

	st.markdown("---")

	# Main interface
	col1, col2 = st.columns(2)

	with col1:
	st.subheader("Input")
	uploaded_file = st.file_uploader("Upload Your Image", type=['png', 'jpg', 'jpeg'])
	process_btn = st.button("🚀 Generate Depth Map", type="primary")

	with col2:
	st.subheader("Output")

	# Processing
	if uploaded_file is not None and process_btn:
	# Load image
	image = Image.open(uploaded_file)

	with col1:
	st.image(image, caption="Original Image", use_column_width=True)

	with st.spinner("Generating depth map..."):
	depth_colored, depth_gray, mode_text, input_shape, output_shape = estimate_depth(image)

	if depth_colored is not None:
	# Store in session state for video export
	st.session_state['depth_colored'] = depth_colored
	st.session_state['depth_gray'] = depth_gray
	st.session_state['original_image'] = np.array(image)

	with col2:
	tab1, tab2 = st.tabs(["Colored", "Grayscale"])

	with tab1:
	st.image(depth_colored, caption="Depth Map (Colored)", use_column_width=True)

	with tab2:
	st.image(depth_gray, caption="Depth Map (Grayscale)", use_column_width=True)

	# Info
	st.success(f"✅ Depth Estimation Complete!")
	st.info(f"""
	Mode: {mode_text}
	Input Size: {input_shape[1]}x{input_shape[0]}
	Output Size: {output_shape[1]}x{output_shape[0]}
	{f'Powered by: Depth-Anything V2 {MODEL_SIZE}' if USE_REAL_AI else 'Processing: Ultra-fast (<50ms) synthetic depth'}
	""")

	# Video Export Section
	st.markdown("---")
	st.subheader("🎬 Video Export")

	if 'depth_colored' in st.session_state:
	with st.expander("Export Depth Map as Video", expanded=True):
	col_vid1, col_vid2 = st.columns(2)

	with col_vid1:
	video_duration = st.slider("Duration (seconds)", 1, 30, 10, help="Length of each animation loop")
	video_fps = st.selectbox("FPS", [24, 30, 60], index=1)
	video_resolution = st.selectbox("Resolution", [
	"Original",
	"4K UHD (3840x2160)",
	"1080p (1920x1080)",
	"720p (1280x720)",
	"Square 1080p (1080x1080)",
	"Portrait 1080p (1080x1920)",
	"Portrait 720p (720x1280)"
	], index=2)

	with col_vid2:
	video_effect = st.selectbox("Camera Effect", [
	"Zoom In",
	"Zoom Out",
	"Pan Left",
	"Pan Right",
	"Pan Up",
	"Pan Down",
	"Rotate CW",
	"Rotate CCW",
	"Ken Burns (Zoom + Pan)",
	"Dolly In",
	"Dolly Out",
	"Tilt Up",
	"Tilt Down",
	"Orbit"
	])

	effect_intensity = st.slider("Effect Intensity", 0.1, 3.0, 1.0, 0.1,
	help="Control how strong the camera movement is (0.5 = subtle, 2.0 = dramatic)")

	# Additional controls row
	col_vid3, col_vid4 = st.columns(2)
	with col_vid3:
	loop_count = st.slider("Number of Loops", 1, 10, 1,
	help="How many times to repeat the animation")

	with col_vid4:
	video_quality = st.selectbox("Video Quality", [
	"High (8 Mbps)",
	"Medium (5 Mbps)",
	"Low (3 Mbps)"
	], index=0)

	if st.button("🎬 Export Video", type="primary"):
	with st.spinner("Generating video..."):
	try:
	import cv2
	import tempfile

	# CRITICAL FIX: Use original image instead of depth map for video export!
	# This ensures we export the real photo with camera effects, not the colored depth visualization
	original_image = st.session_state['original_image']

	# Parse resolution
	if "4K" in video_resolution:
	width, height = 3840, 2160
	elif "1080p" in video_resolution:
	if "Portrait" in video_resolution:
	width, height = 1080, 1920
	elif "Square" in video_resolution:
	width, height = 1080, 1080
	else:
	width, height = 1920, 1080
	elif "720p" in video_resolution:
	if "Portrait" in video_resolution:
	width, height = 720, 1280
	else:
	width, height = 1280, 720
	else: # Original
	height, width = original_image.shape[:2]

	# Parse video quality/bitrate
	if "High" in video_quality:
	bitrate = 8_000_000
	elif "Medium" in video_quality:
	bitrate = 5_000_000
	else: # Low
	bitrate = 3_000_000

	# Resize original image (not depth map!)
	image_resized = cv2.resize(original_image, (width, height))

	# Calculate total frames with loops
	frames_per_loop = video_duration * video_fps
	total_frames = frames_per_loop * loop_count

	with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp_file:
	output_path = tmp_file.name

	fourcc = cv2.VideoWriter_fourcc(*'mp4v')
	out = cv2.VideoWriter(output_path, fourcc, video_fps, (width, height))

	for frame_num in range(total_frames):
	# Calculate progress within current loop (0 to 1)
	progress = (frame_num % frames_per_loop) / frames_per_loop

	# Apply effect - NOW USING REAL PHOTO instead of depth map!
	# Effect intensity multiplier allows user to control how dramatic the movement is
	if video_effect == "Zoom In":
	scale = 1.0 + (progress * 0.5 * effect_intensity)
	center_x, center_y = width // 2, height // 2
	new_w, new_h = int(width / scale), int(height / scale)
	x1, y1 = center_x - new_w // 2, center_y - new_h // 2
	x2, y2 = x1 + new_w, y1 + new_h
	cropped = image_resized[max(0, y1):min(height, y2), max(0, x1):min(width, x2)]
	frame = cv2.resize(cropped, (width, height))

	elif video_effect == "Zoom Out":
	scale = 1.5 - (progress * 0.5 * effect_intensity)
	center_x, center_y = width // 2, height // 2
	new_w, new_h = int(width / scale), int(height / scale)
	x1, y1 = center_x - new_w // 2, center_y - new_h // 2
	x2, y2 = x1 + new_w, y1 + new_h
	cropped = image_resized[max(0, y1):min(height, y2), max(0, x1):min(width, x2)]
	frame = cv2.resize(cropped, (width, height))

	elif video_effect == "Ken Burns (Zoom + Pan)":
	# Ken Burns: zoom in while panning
	scale = 1.0 + (progress * 0.4 * effect_intensity)
	pan_x = int(width * progress * 0.2 * effect_intensity)
	pan_y = int(height * progress * 0.1 * effect_intensity)
	center_x = width // 2 + pan_x
	center_y = height // 2 + pan_y
	new_w, new_h = int(width / scale), int(height / scale)
	x1, y1 = center_x - new_w // 2, center_y - new_h // 2
	x2, y2 = x1 + new_w, y1 + new_h
	cropped = image_resized[max(0, y1):min(height, y2), max(0, x1):min(width, x2)]
	frame = cv2.resize(cropped, (width, height))

	elif video_effect == "Dolly In":
	# Dolly in: smooth zoom with slight scale
	scale = 1.0 + (progress * 0.3 * effect_intensity)
	center_x, center_y = width // 2, height // 2
	new_w, new_h = int(width / scale), int(height / scale)
	x1, y1 = center_x - new_w // 2, center_y - new_h // 2
	x2, y2 = x1 + new_w, y1 + new_h
	cropped = image_resized[max(0, y1):min(height, y2), max(0, x1):min(width, x2)]
	frame = cv2.resize(cropped, (width, height))

	elif video_effect == "Dolly Out":
	scale = 1.3 - (progress * 0.3 * effect_intensity)
	center_x, center_y = width // 2, height // 2
	new_w, new_h = int(width / scale), int(height / scale)
	x1, y1 = center_x - new_w // 2, center_y - new_h // 2
	x2, y2 = x1 + new_w, y1 + new_h
	cropped = image_resized[max(0, y1):min(height, y2), max(0, x1):min(width, x2)]
	frame = cv2.resize(cropped, (width, height))

	elif video_effect == "Pan Left":
	offset = int(width * progress * 0.3 * effect_intensity)
	frame = np.roll(image_resized, -offset, axis=1)

	elif video_effect == "Pan Right":
	offset = int(width * progress * 0.3 * effect_intensity)
	frame = np.roll(image_resized, offset, axis=1)

	elif video_effect == "Pan Up":
	offset = int(height * progress * 0.3 * effect_intensity)
	frame = np.roll(image_resized, -offset, axis=0)

	elif video_effect == "Pan Down":
	offset = int(height * progress * 0.3 * effect_intensity)
	frame = np.roll(image_resized, offset, axis=0)

	elif video_effect == "Tilt Up":
	# Tilt up: perspective transformation
	tilt_factor = progress * 0.3 * effect_intensity
	pts1 = np.float32([[0, 0], [width, 0], [0, height], [width, height]])
	pts2 = np.float32([
	[0, int(height * tilt_factor)],
	[width, int(height * tilt_factor)],
	[0, height],
	[width, height]
	])
	matrix = cv2.getPerspectiveTransform(pts1, pts2)
	frame = cv2.warpPerspective(image_resized, matrix, (width, height))

	elif video_effect == "Tilt Down":
	tilt_factor = progress * 0.3 * effect_intensity
	pts1 = np.float32([[0, 0], [width, 0], [0, height], [width, height]])
	pts2 = np.float32([
	[0, 0],
	[width, 0],
	[0, height - int(height * tilt_factor)],
	[width, height - int(height * tilt_factor)]
	])
	matrix = cv2.getPerspectiveTransform(pts1, pts2)
	frame = cv2.warpPerspective(image_resized, matrix, (width, height))

	elif video_effect == "Rotate CW":
	angle = progress * 360 * effect_intensity
	center = (width // 2, height // 2)
	rotation_matrix = cv2.getRotationMatrix2D(center, -angle, 1.0)
	frame = cv2.warpAffine(image_resized, rotation_matrix, (width, height))

	elif video_effect == "Rotate CCW":
	angle = progress * 360 * effect_intensity
	center = (width // 2, height // 2)
	rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
	frame = cv2.warpAffine(image_resized, rotation_matrix, (width, height))

	elif video_effect == "Orbit":
	# Orbit: rotate + slight zoom
	angle = progress * 360 * effect_intensity
	scale = 1.0 + (np.sin(progress * np.pi) * 0.2 * effect_intensity)
	center = (width // 2, height // 2)
	rotation_matrix = cv2.getRotationMatrix2D(center, angle, scale)
	frame = cv2.warpAffine(image_resized, rotation_matrix, (width, height))

	else:
	frame = image_resized.copy()

	# Convert RGB to BGR for cv2
	frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
	out.write(frame_bgr)

	out.release()

	# Read video and provide download
	with open(output_path, 'rb') as f:
	video_bytes = f.read()

	total_duration = video_duration * loop_count
	st.success(f"✅ Video generated! {total_frames} frames at {video_fps} FPS ({total_duration}s total, {loop_count} loop{'s' if loop_count > 1 else ''})")
	st.info(f"📊 Settings: {video_resolution} \| {video_quality} \| Effect Intensity: {effect_intensity}x")
	st.download_button(
	label="📥 Download Video",
	data=video_bytes,
	file_name=f"dimensio_{video_effect.lower().replace(' ', '_').replace('(', '').replace(')', '')}_{width}x{height}_{video_fps}fps.mp4",
	mime="video/mp4"
	)

	except Exception as e:
	st.error(f"Error generating video: {str(e)}")
	import traceback
	traceback.print_exc()
	else:
	st.info("👆 Upload an image and generate depth map first to enable video export")

	# Info section
	st.markdown("---")
	st.markdown("""
	## 💡 About DimensioDepth

	### Features:
	- ✅ Real AI depth estimation with Depth-Anything V2 BASE model
	- ✅ Fast processing (~800ms on CPU, ~200ms on GPU)
	- ✅ SUPERB quality depth maps
	- ✅ Professional video export with cinematic camera movements
	- ✅ Advanced controls - Effect intensity, loops, quality settings

	### Video Export Controls:
	- ⏱️ Duration - 1 to 30 seconds per loop
	- 🔁 Loops - Repeat animation 1-10 times
	- 🎚️ Effect Intensity - Control movement strength (0.1x to 3.0x)
	- 0.5x = Subtle, professional movements
	- 1.0x = Default, balanced effects
	- 2.0x = Dramatic, bold camera work
	- 📐 Resolutions - Original, 4K UHD, 1080p, 720p, Square, Portrait modes
	- 🎬 Quality - High (8 Mbps), Medium (5 Mbps), Low (3 Mbps)
	- 🎞️ Frame Rates - 24fps (cinematic), 30fps (standard), 60fps (smooth)

	### Camera Effects:
	- 📹 Zoom In/Out - Smooth zoom controls
	- 🎬 Pan - Left, Right, Up, Down panning
	- 🎥 Dolly - Professional dolly in/out shots
	- 🎞️ Tilt - Up/Down tilt movements
	- 🔄 Rotate - Clockwise/Counter-clockwise rotation
	- ⭐ Ken Burns - Classic zoom + pan effect
	- 🌀 Orbit - Smooth orbital rotation

	### Use Cases:
	- 🎨 Creative & Artistic: Depth-enhanced photos, 3D effects
	- 🎬 VFX & Film: Depth map generation for compositing
	- 🔬 Research: Computer vision, depth perception studies
	- 📱 Content Creation: Engaging 3D effects for social media

	Made with ❤️ for the AI community
	""")