Spaces:
Sleeping
Sleeping
| """ | |
| DimensioDepth - Add Dimension to Everything | |
| Advanced AI Depth Estimation with 3D Visualization | |
| Powered by Depth-Anything V2 | Runs on Hugging Face Spaces | |
| """ | |
| import streamlit as st | |
| import numpy as np | |
| import cv2 | |
| from PIL import Image | |
| from pathlib import Path | |
| import sys | |
| # Page config | |
| st.set_page_config( | |
| page_title="DimensioDepth - AI Depth Estimation", | |
| page_icon="π¨", | |
| layout="wide" | |
| ) | |
| # Add backend to path | |
| sys.path.append(str(Path(__file__).parent / "backend")) | |
| # Import backend utilities | |
| from backend.utils.image_processing import ( | |
| depth_to_colormap, | |
| create_side_by_side | |
| ) | |
| # Try to import REAL AI model | |
| def load_model(): | |
| try: | |
| print("[*] Attempting to import TransformersDepthEstimator...") | |
| from backend.utils.transformers_depth import TransformersDepthEstimator | |
| print("[*] Import successful! Loading REAL AI Depth-Anything V2 BASE model...") | |
| print("[*] This will download ~372MB on first run (one-time download)") | |
| depth_estimator = TransformersDepthEstimator(model_size="base") | |
| print("[+] REAL AI MODE ACTIVE - BASE MODEL!") | |
| print("[+] Quality: SUPERB (best available)") | |
| return depth_estimator, True, "BASE (372MB)" | |
| except Exception as e: | |
| print(f"[!] FULL ERROR TRACEBACK:") | |
| import traceback | |
| traceback.print_exc() | |
| print(f"[!] Error type: {type(e).__name__}") | |
| print(f"[!] Error message: {str(e)}") | |
| print("[*] Falling back to DEMO MODE") | |
| return None, False, "Demo Mode" | |
| depth_estimator, USE_REAL_AI, MODEL_SIZE = load_model() | |
| def estimate_depth(image): | |
| """Estimate depth from an input image using REAL AI or DEMO MODE""" | |
| if image is None: | |
| return None, None, "Please upload an image first" | |
| try: | |
| # Convert PIL to numpy if needed | |
| if isinstance(image, Image.Image): | |
| image = np.array(image) | |
| # Generate depth map | |
| if USE_REAL_AI: | |
| depth = depth_estimator.predict(image) | |
| mode_text = "REAL AI (Depth-Anything V2)" | |
| else: | |
| from backend.utils.demo_depth import generate_smart_depth | |
| depth = generate_smart_depth(image) | |
| mode_text = "DEMO MODE (Synthetic)" | |
| # Create colored depth map with Inferno colormap (best for depth) | |
| depth_colored = depth_to_colormap(depth, cv2.COLORMAP_INFERNO) | |
| # Create grayscale depth map | |
| depth_gray = (depth * 255).astype(np.uint8) | |
| depth_gray = cv2.cvtColor(depth_gray, cv2.COLOR_GRAY2RGB) | |
| return depth_colored, depth_gray, mode_text, image.shape, depth.shape | |
| except Exception as e: | |
| st.error(f"Error during depth estimation: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return None, None, None, None, None | |
| # Header | |
| st.title("π¨ DimensioDepth - Add Dimension to Everything") | |
| st.markdown("### Transform 2D images into stunning 3D depth visualizations") | |
| # Status banner | |
| if USE_REAL_AI: | |
| st.success(f"π REAL AI MODE ACTIVE! - Powered by Depth-Anything V2 {MODEL_SIZE} - SUPERB Quality!") | |
| else: | |
| st.info("Running in DEMO MODE - Ultra-fast synthetic depth estimation") | |
| st.markdown("---") | |
| # Main interface | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("Input") | |
| uploaded_file = st.file_uploader("Upload Your Image", type=['png', 'jpg', 'jpeg']) | |
| process_btn = st.button("π Generate Depth Map", type="primary") | |
| with col2: | |
| st.subheader("Output") | |
| # Processing | |
| if uploaded_file is not None and process_btn: | |
| # Load image | |
| image = Image.open(uploaded_file) | |
| with col1: | |
| st.image(image, caption="Original Image", use_column_width=True) | |
| with st.spinner("Generating depth map..."): | |
| depth_colored, depth_gray, mode_text, input_shape, output_shape = estimate_depth(image) | |
| if depth_colored is not None: | |
| # Store in session state for video export | |
| st.session_state['depth_colored'] = depth_colored | |
| st.session_state['depth_gray'] = depth_gray | |
| st.session_state['original_image'] = np.array(image) | |
| with col2: | |
| tab1, tab2 = st.tabs(["Colored", "Grayscale"]) | |
| with tab1: | |
| st.image(depth_colored, caption="Depth Map (Colored)", use_column_width=True) | |
| with tab2: | |
| st.image(depth_gray, caption="Depth Map (Grayscale)", use_column_width=True) | |
| # Info | |
| st.success(f"β Depth Estimation Complete!") | |
| st.info(f""" | |
| **Mode**: {mode_text} | |
| **Input Size**: {input_shape[1]}x{input_shape[0]} | |
| **Output Size**: {output_shape[1]}x{output_shape[0]} | |
| {f'**Powered by**: Depth-Anything V2 {MODEL_SIZE}' if USE_REAL_AI else '**Processing**: Ultra-fast (<50ms) synthetic depth'} | |
| """) | |
| # Video Export Section | |
| st.markdown("---") | |
| st.subheader("π¬ Video Export") | |
| if 'depth_colored' in st.session_state: | |
| with st.expander("Export Depth Map as Video", expanded=True): | |
| col_vid1, col_vid2 = st.columns(2) | |
| with col_vid1: | |
| video_duration = st.slider("Duration (seconds)", 1, 30, 10, help="Length of each animation loop") | |
| video_fps = st.selectbox("FPS", [24, 30, 60], index=1) | |
| video_resolution = st.selectbox("Resolution", [ | |
| "Original", | |
| "4K UHD (3840x2160)", | |
| "1080p (1920x1080)", | |
| "720p (1280x720)", | |
| "Square 1080p (1080x1080)", | |
| "Portrait 1080p (1080x1920)", | |
| "Portrait 720p (720x1280)" | |
| ], index=2) | |
| with col_vid2: | |
| video_effect = st.selectbox("Camera Effect", [ | |
| "Zoom In", | |
| "Zoom Out", | |
| "Pan Left", | |
| "Pan Right", | |
| "Pan Up", | |
| "Pan Down", | |
| "Rotate CW", | |
| "Rotate CCW", | |
| "Ken Burns (Zoom + Pan)", | |
| "Dolly In", | |
| "Dolly Out", | |
| "Tilt Up", | |
| "Tilt Down", | |
| "Orbit" | |
| ]) | |
| effect_intensity = st.slider("Effect Intensity", 0.1, 3.0, 1.0, 0.1, | |
| help="Control how strong the camera movement is (0.5 = subtle, 2.0 = dramatic)") | |
| # Additional controls row | |
| col_vid3, col_vid4 = st.columns(2) | |
| with col_vid3: | |
| loop_count = st.slider("Number of Loops", 1, 10, 1, | |
| help="How many times to repeat the animation") | |
| with col_vid4: | |
| video_quality = st.selectbox("Video Quality", [ | |
| "High (8 Mbps)", | |
| "Medium (5 Mbps)", | |
| "Low (3 Mbps)" | |
| ], index=0) | |
| if st.button("π¬ Export Video", type="primary"): | |
| with st.spinner("Generating video..."): | |
| try: | |
| import cv2 | |
| import tempfile | |
| # CRITICAL FIX: Use original image instead of depth map for video export! | |
| # This ensures we export the real photo with camera effects, not the colored depth visualization | |
| original_image = st.session_state['original_image'] | |
| # Parse resolution | |
| if "4K" in video_resolution: | |
| width, height = 3840, 2160 | |
| elif "1080p" in video_resolution: | |
| if "Portrait" in video_resolution: | |
| width, height = 1080, 1920 | |
| elif "Square" in video_resolution: | |
| width, height = 1080, 1080 | |
| else: | |
| width, height = 1920, 1080 | |
| elif "720p" in video_resolution: | |
| if "Portrait" in video_resolution: | |
| width, height = 720, 1280 | |
| else: | |
| width, height = 1280, 720 | |
| else: # Original | |
| height, width = original_image.shape[:2] | |
| # Parse video quality/bitrate | |
| if "High" in video_quality: | |
| bitrate = 8_000_000 | |
| elif "Medium" in video_quality: | |
| bitrate = 5_000_000 | |
| else: # Low | |
| bitrate = 3_000_000 | |
| # Resize original image (not depth map!) | |
| image_resized = cv2.resize(original_image, (width, height)) | |
| # Calculate total frames with loops | |
| frames_per_loop = video_duration * video_fps | |
| total_frames = frames_per_loop * loop_count | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp_file: | |
| output_path = tmp_file.name | |
| fourcc = cv2.VideoWriter_fourcc(*'mp4v') | |
| out = cv2.VideoWriter(output_path, fourcc, video_fps, (width, height)) | |
| for frame_num in range(total_frames): | |
| # Calculate progress within current loop (0 to 1) | |
| progress = (frame_num % frames_per_loop) / frames_per_loop | |
| # Apply effect - NOW USING REAL PHOTO instead of depth map! | |
| # Effect intensity multiplier allows user to control how dramatic the movement is | |
| if video_effect == "Zoom In": | |
| scale = 1.0 + (progress * 0.5 * effect_intensity) | |
| center_x, center_y = width // 2, height // 2 | |
| new_w, new_h = int(width / scale), int(height / scale) | |
| x1, y1 = center_x - new_w // 2, center_y - new_h // 2 | |
| x2, y2 = x1 + new_w, y1 + new_h | |
| cropped = image_resized[max(0, y1):min(height, y2), max(0, x1):min(width, x2)] | |
| frame = cv2.resize(cropped, (width, height)) | |
| elif video_effect == "Zoom Out": | |
| scale = 1.5 - (progress * 0.5 * effect_intensity) | |
| center_x, center_y = width // 2, height // 2 | |
| new_w, new_h = int(width / scale), int(height / scale) | |
| x1, y1 = center_x - new_w // 2, center_y - new_h // 2 | |
| x2, y2 = x1 + new_w, y1 + new_h | |
| cropped = image_resized[max(0, y1):min(height, y2), max(0, x1):min(width, x2)] | |
| frame = cv2.resize(cropped, (width, height)) | |
| elif video_effect == "Ken Burns (Zoom + Pan)": | |
| # Ken Burns: zoom in while panning | |
| scale = 1.0 + (progress * 0.4 * effect_intensity) | |
| pan_x = int(width * progress * 0.2 * effect_intensity) | |
| pan_y = int(height * progress * 0.1 * effect_intensity) | |
| center_x = width // 2 + pan_x | |
| center_y = height // 2 + pan_y | |
| new_w, new_h = int(width / scale), int(height / scale) | |
| x1, y1 = center_x - new_w // 2, center_y - new_h // 2 | |
| x2, y2 = x1 + new_w, y1 + new_h | |
| cropped = image_resized[max(0, y1):min(height, y2), max(0, x1):min(width, x2)] | |
| frame = cv2.resize(cropped, (width, height)) | |
| elif video_effect == "Dolly In": | |
| # Dolly in: smooth zoom with slight scale | |
| scale = 1.0 + (progress * 0.3 * effect_intensity) | |
| center_x, center_y = width // 2, height // 2 | |
| new_w, new_h = int(width / scale), int(height / scale) | |
| x1, y1 = center_x - new_w // 2, center_y - new_h // 2 | |
| x2, y2 = x1 + new_w, y1 + new_h | |
| cropped = image_resized[max(0, y1):min(height, y2), max(0, x1):min(width, x2)] | |
| frame = cv2.resize(cropped, (width, height)) | |
| elif video_effect == "Dolly Out": | |
| scale = 1.3 - (progress * 0.3 * effect_intensity) | |
| center_x, center_y = width // 2, height // 2 | |
| new_w, new_h = int(width / scale), int(height / scale) | |
| x1, y1 = center_x - new_w // 2, center_y - new_h // 2 | |
| x2, y2 = x1 + new_w, y1 + new_h | |
| cropped = image_resized[max(0, y1):min(height, y2), max(0, x1):min(width, x2)] | |
| frame = cv2.resize(cropped, (width, height)) | |
| elif video_effect == "Pan Left": | |
| offset = int(width * progress * 0.3 * effect_intensity) | |
| frame = np.roll(image_resized, -offset, axis=1) | |
| elif video_effect == "Pan Right": | |
| offset = int(width * progress * 0.3 * effect_intensity) | |
| frame = np.roll(image_resized, offset, axis=1) | |
| elif video_effect == "Pan Up": | |
| offset = int(height * progress * 0.3 * effect_intensity) | |
| frame = np.roll(image_resized, -offset, axis=0) | |
| elif video_effect == "Pan Down": | |
| offset = int(height * progress * 0.3 * effect_intensity) | |
| frame = np.roll(image_resized, offset, axis=0) | |
| elif video_effect == "Tilt Up": | |
| # Tilt up: perspective transformation | |
| tilt_factor = progress * 0.3 * effect_intensity | |
| pts1 = np.float32([[0, 0], [width, 0], [0, height], [width, height]]) | |
| pts2 = np.float32([ | |
| [0, int(height * tilt_factor)], | |
| [width, int(height * tilt_factor)], | |
| [0, height], | |
| [width, height] | |
| ]) | |
| matrix = cv2.getPerspectiveTransform(pts1, pts2) | |
| frame = cv2.warpPerspective(image_resized, matrix, (width, height)) | |
| elif video_effect == "Tilt Down": | |
| tilt_factor = progress * 0.3 * effect_intensity | |
| pts1 = np.float32([[0, 0], [width, 0], [0, height], [width, height]]) | |
| pts2 = np.float32([ | |
| [0, 0], | |
| [width, 0], | |
| [0, height - int(height * tilt_factor)], | |
| [width, height - int(height * tilt_factor)] | |
| ]) | |
| matrix = cv2.getPerspectiveTransform(pts1, pts2) | |
| frame = cv2.warpPerspective(image_resized, matrix, (width, height)) | |
| elif video_effect == "Rotate CW": | |
| angle = progress * 360 * effect_intensity | |
| center = (width // 2, height // 2) | |
| rotation_matrix = cv2.getRotationMatrix2D(center, -angle, 1.0) | |
| frame = cv2.warpAffine(image_resized, rotation_matrix, (width, height)) | |
| elif video_effect == "Rotate CCW": | |
| angle = progress * 360 * effect_intensity | |
| center = (width // 2, height // 2) | |
| rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0) | |
| frame = cv2.warpAffine(image_resized, rotation_matrix, (width, height)) | |
| elif video_effect == "Orbit": | |
| # Orbit: rotate + slight zoom | |
| angle = progress * 360 * effect_intensity | |
| scale = 1.0 + (np.sin(progress * np.pi) * 0.2 * effect_intensity) | |
| center = (width // 2, height // 2) | |
| rotation_matrix = cv2.getRotationMatrix2D(center, angle, scale) | |
| frame = cv2.warpAffine(image_resized, rotation_matrix, (width, height)) | |
| else: | |
| frame = image_resized.copy() | |
| # Convert RGB to BGR for cv2 | |
| frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) | |
| out.write(frame_bgr) | |
| out.release() | |
| # Read video and provide download | |
| with open(output_path, 'rb') as f: | |
| video_bytes = f.read() | |
| total_duration = video_duration * loop_count | |
| st.success(f"β Video generated! {total_frames} frames at {video_fps} FPS ({total_duration}s total, {loop_count} loop{'s' if loop_count > 1 else ''})") | |
| st.info(f"π Settings: {video_resolution} | {video_quality} | Effect Intensity: {effect_intensity}x") | |
| st.download_button( | |
| label="π₯ Download Video", | |
| data=video_bytes, | |
| file_name=f"dimensio_{video_effect.lower().replace(' ', '_').replace('(', '').replace(')', '')}_{width}x{height}_{video_fps}fps.mp4", | |
| mime="video/mp4" | |
| ) | |
| except Exception as e: | |
| st.error(f"Error generating video: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| else: | |
| st.info("π Upload an image and generate depth map first to enable video export") | |
| # Info section | |
| st.markdown("---") | |
| st.markdown(""" | |
| ## π‘ About DimensioDepth | |
| ### Features: | |
| - β Real AI depth estimation with Depth-Anything V2 BASE model | |
| - β Fast processing (~800ms on CPU, ~200ms on GPU) | |
| - β SUPERB quality depth maps | |
| - β **Professional video export** with cinematic camera movements | |
| - β **Advanced controls** - Effect intensity, loops, quality settings | |
| ### Video Export Controls: | |
| - β±οΈ **Duration** - 1 to 30 seconds per loop | |
| - π **Loops** - Repeat animation 1-10 times | |
| - ποΈ **Effect Intensity** - Control movement strength (0.1x to 3.0x) | |
| - 0.5x = Subtle, professional movements | |
| - 1.0x = Default, balanced effects | |
| - 2.0x = Dramatic, bold camera work | |
| - π **Resolutions** - Original, 4K UHD, 1080p, 720p, Square, Portrait modes | |
| - π¬ **Quality** - High (8 Mbps), Medium (5 Mbps), Low (3 Mbps) | |
| - ποΈ **Frame Rates** - 24fps (cinematic), 30fps (standard), 60fps (smooth) | |
| ### Camera Effects: | |
| - πΉ **Zoom In/Out** - Smooth zoom controls | |
| - π¬ **Pan** - Left, Right, Up, Down panning | |
| - π₯ **Dolly** - Professional dolly in/out shots | |
| - ποΈ **Tilt** - Up/Down tilt movements | |
| - π **Rotate** - Clockwise/Counter-clockwise rotation | |
| - β **Ken Burns** - Classic zoom + pan effect | |
| - π **Orbit** - Smooth orbital rotation | |
| ### Use Cases: | |
| - π¨ **Creative & Artistic**: Depth-enhanced photos, 3D effects | |
| - π¬ **VFX & Film**: Depth map generation for compositing | |
| - π¬ **Research**: Computer vision, depth perception studies | |
| - π± **Content Creation**: Engaging 3D effects for social media | |
| Made with β€οΈ for the AI community | |
| """) | |