"""
DimensioDepth - Add Dimension to Everything
Advanced AI Depth Estimation with 3D Visualization

Powered by Depth-Anything V2 | Runs on Hugging Face Spaces
"""

import streamlit as st
import numpy as np
import cv2
from PIL import Image
from pathlib import Path
import sys

# Page config
st.set_page_config(
    page_title="DimensioDepth - AI Depth Estimation",
    page_icon="🎨",
    layout="wide"
)

# Add backend to path
sys.path.append(str(Path(__file__).parent / "backend"))

# Import backend utilities
from backend.utils.image_processing import (
    depth_to_colormap,
    create_side_by_side
)

# Try to import REAL AI model
@st.cache_resource
def load_model():
    try:
        print("[*] Attempting to import TransformersDepthEstimator...")
        from backend.utils.transformers_depth import TransformersDepthEstimator
        print("[*] Import successful! Loading REAL AI Depth-Anything V2 BASE model...")
        print("[*] This will download ~372MB on first run (one-time download)")
        depth_estimator = TransformersDepthEstimator(model_size="base")
        print("[+] REAL AI MODE ACTIVE - BASE MODEL!")
        print("[+] Quality: SUPERB (best available)")
        return depth_estimator, True, "BASE (372MB)"
    except Exception as e:
        print(f"[!] FULL ERROR TRACEBACK:")
        import traceback
        traceback.print_exc()
        print(f"[!] Error type: {type(e).__name__}")
        print(f"[!] Error message: {str(e)}")
        print("[*] Falling back to DEMO MODE")
        return None, False, "Demo Mode"

depth_estimator, USE_REAL_AI, MODEL_SIZE = load_model()


def estimate_depth(image):
    """Estimate depth from an input image using REAL AI or DEMO MODE"""
    if image is None:
        return None, None, "Please upload an image first"

    try:
        # Convert PIL to numpy if needed
        if isinstance(image, Image.Image):
            image = np.array(image)

        # Generate depth map
        if USE_REAL_AI:
            depth = depth_estimator.predict(image)
            mode_text = "REAL AI (Depth-Anything V2)"
        else:
            from backend.utils.demo_depth import generate_smart_depth
            depth = generate_smart_depth(image)
            mode_text = "DEMO MODE (Synthetic)"

        # Create colored depth map with Inferno colormap (best for depth)
        depth_colored = depth_to_colormap(depth, cv2.COLORMAP_INFERNO)

        # Create grayscale depth map
        depth_gray = (depth * 255).astype(np.uint8)
        depth_gray = cv2.cvtColor(depth_gray, cv2.COLOR_GRAY2RGB)

        return depth_colored, depth_gray, mode_text, image.shape, depth.shape

    except Exception as e:
        st.error(f"Error during depth estimation: {str(e)}")
        import traceback
        traceback.print_exc()
        return None, None, None, None, None


# Header
st.title("🎨 DimensioDepth - Add Dimension to Everything")
st.markdown("### Transform 2D images into stunning 3D depth visualizations")

# Status banner
if USE_REAL_AI:
    st.success(f"🚀 REAL AI MODE ACTIVE! - Powered by Depth-Anything V2 {MODEL_SIZE} - SUPERB Quality!")
else:
    st.info("Running in DEMO MODE - Ultra-fast synthetic depth estimation")

st.markdown("---")

# Main interface
col1, col2 = st.columns(2)

with col1:
    st.subheader("Input")
    uploaded_file = st.file_uploader("Upload Your Image", type=['png', 'jpg', 'jpeg'])
    process_btn = st.button("🚀 Generate Depth Map", type="primary")

with col2:
    st.subheader("Output")

# Processing
if uploaded_file is not None and process_btn:
    # Load image
    image = Image.open(uploaded_file)

    with col1:
        st.image(image, caption="Original Image", use_column_width=True)

    with st.spinner("Generating depth map..."):
        depth_colored, depth_gray, mode_text, input_shape, output_shape = estimate_depth(image)

    if depth_colored is not None:
        # Store in session state for video export
        st.session_state['depth_colored'] = depth_colored
        st.session_state['depth_gray'] = depth_gray
        st.session_state['original_image'] = np.array(image)

        with col2:
            tab1, tab2 = st.tabs(["Colored", "Grayscale"])

            with tab1:
                st.image(depth_colored, caption="Depth Map (Colored)", use_column_width=True)

            with tab2:
                st.image(depth_gray, caption="Depth Map (Grayscale)", use_column_width=True)

        # Info
        st.success(f"✅ Depth Estimation Complete!")
        st.info(f"""
**Mode**: {mode_text}
**Input Size**: {input_shape[1]}x{input_shape[0]}
**Output Size**: {output_shape[1]}x{output_shape[0]}
{f'**Powered by**: Depth-Anything V2 {MODEL_SIZE}' if USE_REAL_AI else '**Processing**: Ultra-fast (<50ms) synthetic depth'}
        """)

# Video Export Section
st.markdown("---")
st.subheader("🎬 Video Export")

if 'depth_colored' in st.session_state:
    with st.expander("Export Depth Map as Video", expanded=True):
        col_vid1, col_vid2 = st.columns(2)

        with col_vid1:
            video_duration = st.slider("Duration (seconds)", 1, 30, 10, help="Length of each animation loop")
            video_fps = st.selectbox("FPS", [24, 30, 60], index=1)
            video_resolution = st.selectbox("Resolution", [
                "Original",
                "4K UHD (3840x2160)",
                "1080p (1920x1080)",
                "720p (1280x720)",
                "Square 1080p (1080x1080)",
                "Portrait 1080p (1080x1920)",
                "Portrait 720p (720x1280)"
            ], index=2)

        with col_vid2:
            video_effect = st.selectbox("Camera Effect", [
                "Zoom In",
                "Zoom Out",
                "Pan Left",
                "Pan Right",
                "Pan Up",
                "Pan Down",
                "Rotate CW",
                "Rotate CCW",
                "Ken Burns (Zoom + Pan)",
                "Dolly In",
                "Dolly Out",
                "Tilt Up",
                "Tilt Down",
                "Orbit"
            ])

            effect_intensity = st.slider("Effect Intensity", 0.1, 3.0, 1.0, 0.1,
                help="Control how strong the camera movement is (0.5 = subtle, 2.0 = dramatic)")

        # Additional controls row
        col_vid3, col_vid4 = st.columns(2)
        with col_vid3:
            loop_count = st.slider("Number of Loops", 1, 10, 1,
                help="How many times to repeat the animation")

        with col_vid4:
            video_quality = st.selectbox("Video Quality", [
                "High (8 Mbps)",
                "Medium (5 Mbps)",
                "Low (3 Mbps)"
            ], index=0)

        if st.button("🎬 Export Video", type="primary"):
            with st.spinner("Generating video..."):
                try:
                    import cv2
                    import tempfile

                    # CRITICAL FIX: Use original image instead of depth map for video export!
                    # This ensures we export the real photo with camera effects, not the colored depth visualization
                    original_image = st.session_state['original_image']

                    # Parse resolution
                    if "4K" in video_resolution:
                        width, height = 3840, 2160
                    elif "1080p" in video_resolution:
                        if "Portrait" in video_resolution:
                            width, height = 1080, 1920
                        elif "Square" in video_resolution:
                            width, height = 1080, 1080
                        else:
                            width, height = 1920, 1080
                    elif "720p" in video_resolution:
                        if "Portrait" in video_resolution:
                            width, height = 720, 1280
                        else:
                            width, height = 1280, 720
                    else:  # Original
                        height, width = original_image.shape[:2]

                    # Parse video quality/bitrate
                    if "High" in video_quality:
                        bitrate = 8_000_000
                    elif "Medium" in video_quality:
                        bitrate = 5_000_000
                    else:  # Low
                        bitrate = 3_000_000

                    # Resize original image (not depth map!)
                    image_resized = cv2.resize(original_image, (width, height))

                    # Calculate total frames with loops
                    frames_per_loop = video_duration * video_fps
                    total_frames = frames_per_loop * loop_count

                    with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp_file:
                        output_path = tmp_file.name

                    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
                    out = cv2.VideoWriter(output_path, fourcc, video_fps, (width, height))

                    for frame_num in range(total_frames):
                        # Calculate progress within current loop (0 to 1)
                        progress = (frame_num % frames_per_loop) / frames_per_loop

                        # Apply effect - NOW USING REAL PHOTO instead of depth map!
                        # Effect intensity multiplier allows user to control how dramatic the movement is
                        if video_effect == "Zoom In":
                            scale = 1.0 + (progress * 0.5 * effect_intensity)
                            center_x, center_y = width // 2, height // 2
                            new_w, new_h = int(width / scale), int(height / scale)
                            x1, y1 = center_x - new_w // 2, center_y - new_h // 2
                            x2, y2 = x1 + new_w, y1 + new_h
                            cropped = image_resized[max(0, y1):min(height, y2), max(0, x1):min(width, x2)]
                            frame = cv2.resize(cropped, (width, height))

                        elif video_effect == "Zoom Out":
                            scale = 1.5 - (progress * 0.5 * effect_intensity)
                            center_x, center_y = width // 2, height // 2
                            new_w, new_h = int(width / scale), int(height / scale)
                            x1, y1 = center_x - new_w // 2, center_y - new_h // 2
                            x2, y2 = x1 + new_w, y1 + new_h
                            cropped = image_resized[max(0, y1):min(height, y2), max(0, x1):min(width, x2)]
                            frame = cv2.resize(cropped, (width, height))

                        elif video_effect == "Ken Burns (Zoom + Pan)":
                            # Ken Burns: zoom in while panning
                            scale = 1.0 + (progress * 0.4 * effect_intensity)
                            pan_x = int(width * progress * 0.2 * effect_intensity)
                            pan_y = int(height * progress * 0.1 * effect_intensity)
                            center_x = width // 2 + pan_x
                            center_y = height // 2 + pan_y
                            new_w, new_h = int(width / scale), int(height / scale)
                            x1, y1 = center_x - new_w // 2, center_y - new_h // 2
                            x2, y2 = x1 + new_w, y1 + new_h
                            cropped = image_resized[max(0, y1):min(height, y2), max(0, x1):min(width, x2)]
                            frame = cv2.resize(cropped, (width, height))

                        elif video_effect == "Dolly In":
                            # Dolly in: smooth zoom with slight scale
                            scale = 1.0 + (progress * 0.3 * effect_intensity)
                            center_x, center_y = width // 2, height // 2
                            new_w, new_h = int(width / scale), int(height / scale)
                            x1, y1 = center_x - new_w // 2, center_y - new_h // 2
                            x2, y2 = x1 + new_w, y1 + new_h
                            cropped = image_resized[max(0, y1):min(height, y2), max(0, x1):min(width, x2)]
                            frame = cv2.resize(cropped, (width, height))

                        elif video_effect == "Dolly Out":
                            scale = 1.3 - (progress * 0.3 * effect_intensity)
                            center_x, center_y = width // 2, height // 2
                            new_w, new_h = int(width / scale), int(height / scale)
                            x1, y1 = center_x - new_w // 2, center_y - new_h // 2
                            x2, y2 = x1 + new_w, y1 + new_h
                            cropped = image_resized[max(0, y1):min(height, y2), max(0, x1):min(width, x2)]
                            frame = cv2.resize(cropped, (width, height))

                        elif video_effect == "Pan Left":
                            offset = int(width * progress * 0.3 * effect_intensity)
                            frame = np.roll(image_resized, -offset, axis=1)

                        elif video_effect == "Pan Right":
                            offset = int(width * progress * 0.3 * effect_intensity)
                            frame = np.roll(image_resized, offset, axis=1)

                        elif video_effect == "Pan Up":
                            offset = int(height * progress * 0.3 * effect_intensity)
                            frame = np.roll(image_resized, -offset, axis=0)

                        elif video_effect == "Pan Down":
                            offset = int(height * progress * 0.3 * effect_intensity)
                            frame = np.roll(image_resized, offset, axis=0)

                        elif video_effect == "Tilt Up":
                            # Tilt up: perspective transformation
                            tilt_factor = progress * 0.3 * effect_intensity
                            pts1 = np.float32([[0, 0], [width, 0], [0, height], [width, height]])
                            pts2 = np.float32([
                                [0, int(height * tilt_factor)],
                                [width, int(height * tilt_factor)],
                                [0, height],
                                [width, height]
                            ])
                            matrix = cv2.getPerspectiveTransform(pts1, pts2)
                            frame = cv2.warpPerspective(image_resized, matrix, (width, height))

                        elif video_effect == "Tilt Down":
                            tilt_factor = progress * 0.3 * effect_intensity
                            pts1 = np.float32([[0, 0], [width, 0], [0, height], [width, height]])
                            pts2 = np.float32([
                                [0, 0],
                                [width, 0],
                                [0, height - int(height * tilt_factor)],
                                [width, height - int(height * tilt_factor)]
                            ])
                            matrix = cv2.getPerspectiveTransform(pts1, pts2)
                            frame = cv2.warpPerspective(image_resized, matrix, (width, height))

                        elif video_effect == "Rotate CW":
                            angle = progress * 360 * effect_intensity
                            center = (width // 2, height // 2)
                            rotation_matrix = cv2.getRotationMatrix2D(center, -angle, 1.0)
                            frame = cv2.warpAffine(image_resized, rotation_matrix, (width, height))

                        elif video_effect == "Rotate CCW":
                            angle = progress * 360 * effect_intensity
                            center = (width // 2, height // 2)
                            rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
                            frame = cv2.warpAffine(image_resized, rotation_matrix, (width, height))

                        elif video_effect == "Orbit":
                            # Orbit: rotate + slight zoom
                            angle = progress * 360 * effect_intensity
                            scale = 1.0 + (np.sin(progress * np.pi) * 0.2 * effect_intensity)
                            center = (width // 2, height // 2)
                            rotation_matrix = cv2.getRotationMatrix2D(center, angle, scale)
                            frame = cv2.warpAffine(image_resized, rotation_matrix, (width, height))

                        else:
                            frame = image_resized.copy()

                        # Convert RGB to BGR for cv2
                        frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                        out.write(frame_bgr)

                    out.release()

                    # Read video and provide download
                    with open(output_path, 'rb') as f:
                        video_bytes = f.read()

                    total_duration = video_duration * loop_count
                    st.success(f"✅ Video generated! {total_frames} frames at {video_fps} FPS ({total_duration}s total, {loop_count} loop{'s' if loop_count > 1 else ''})")
                    st.info(f"📊 Settings: {video_resolution} | {video_quality} | Effect Intensity: {effect_intensity}x")
                    st.download_button(
                        label="📥 Download Video",
                        data=video_bytes,
                        file_name=f"dimensio_{video_effect.lower().replace(' ', '_').replace('(', '').replace(')', '')}_{width}x{height}_{video_fps}fps.mp4",
                        mime="video/mp4"
                    )

                except Exception as e:
                    st.error(f"Error generating video: {str(e)}")
                    import traceback
                    traceback.print_exc()
else:
    st.info("👆 Upload an image and generate depth map first to enable video export")

# Info section
st.markdown("---")
st.markdown("""
## 💡 About DimensioDepth

### Features:
- ✅ Real AI depth estimation with Depth-Anything V2 BASE model
- ✅ Fast processing (~800ms on CPU, ~200ms on GPU)
- ✅ SUPERB quality depth maps
- ✅ **Professional video export** with cinematic camera movements
- ✅ **Advanced controls** - Effect intensity, loops, quality settings

### Video Export Controls:
- ⏱️ **Duration** - 1 to 30 seconds per loop
- 🔁 **Loops** - Repeat animation 1-10 times
- 🎚️ **Effect Intensity** - Control movement strength (0.1x to 3.0x)
  - 0.5x = Subtle, professional movements
  - 1.0x = Default, balanced effects
  - 2.0x = Dramatic, bold camera work
- 📐 **Resolutions** - Original, 4K UHD, 1080p, 720p, Square, Portrait modes
- 🎬 **Quality** - High (8 Mbps), Medium (5 Mbps), Low (3 Mbps)
- 🎞️ **Frame Rates** - 24fps (cinematic), 30fps (standard), 60fps (smooth)

### Camera Effects:
- 📹 **Zoom In/Out** - Smooth zoom controls
- 🎬 **Pan** - Left, Right, Up, Down panning
- 🎥 **Dolly** - Professional dolly in/out shots
- 🎞️ **Tilt** - Up/Down tilt movements
- 🔄 **Rotate** - Clockwise/Counter-clockwise rotation
- ⭐ **Ken Burns** - Classic zoom + pan effect
- 🌀 **Orbit** - Smooth orbital rotation

### Use Cases:
- 🎨 **Creative & Artistic**: Depth-enhanced photos, 3D effects
- 🎬 **VFX & Film**: Depth map generation for compositing
- 🔬 **Research**: Computer vision, depth perception studies
- 📱 **Content Creation**: Engaging 3D effects for social media

Made with ❤️ for the AI community
""")