Spaces:

ddriscoll
/

SOC3242-01_Group_3_Interactive

Sleeping

App Files Files Community

David Driscoll commited on Feb 17

Commit

b37a8e6

1 Parent(s): 8947b35

Caching and lag reduction

Browse files

Files changed (1) hide show

app.py +108 -81

app.py CHANGED Viewed

@@ -7,144 +7,171 @@ from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
 from PIL import Image
 import mediapipe as mp
 from fer import FER  # Facial emotion recognition
-from concurrent.futures import ThreadPoolExecutor
 # -----------------------------
-# Asynchronous Processing Setup
 # -----------------------------
-executor = ThreadPoolExecutor(max_workers=4)
-latest_results = {
-    "posture": None,
-    "emotion": None,
-    "objects": None,
-    "faces": None
-}
-futures = {
-    "posture": None,
-    "emotion": None,
-    "objects": None,
-    "faces": None
-}
-def async_analyze(key, func, image):
-    # If a background task is done, update our cache.
-    if futures[key] is not None and futures[key].done():
-        latest_results[key] = futures[key].result()
-        futures[key] = None
-    # If we already have a cached result, return it immediately and schedule a new update if none is running.
-    if latest_results[key] is not None:
-        if futures[key] is None:
-            futures[key] = executor.submit(func, image)
-        return latest_results[key]
-    # Otherwise, compute synchronously (blocking) to initialize the cache.
-    result = func(image)
-    latest_results[key] = result
-    futures[key] = executor.submit(func, image)
-    return result
 # -----------------------------
 # Initialize Models and Helpers
 # -----------------------------
-# MediaPipe Pose for posture analysis
 mp_pose = mp.solutions.pose
 pose = mp_pose.Pose()
 mp_drawing = mp.solutions.drawing_utils
-# MediaPipe Face Detection for face detection
 mp_face_detection = mp.solutions.face_detection
 face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)
-# Object Detection Model: Faster R-CNN (pretrained on COCO)
 object_detection_model = models.detection.fasterrcnn_resnet50_fpn(
     weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT
 )
 object_detection_model.eval()
 obj_transform = transforms.Compose([transforms.ToTensor()])
-# Facial Emotion Detection using FER (requires TensorFlow)
 emotion_detector = FER(mtcnn=True)
 # -----------------------------
-# Heavy (Synchronous) Analysis Functions
 # -----------------------------
-def _analyze_posture(image):
     frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
-    output_frame = frame.copy()
     frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-    posture_result = "No posture detected"
     pose_results = pose.process(frame_rgb)
     if pose_results.pose_landmarks:
-        posture_result = "Posture detected"
-        mp_drawing.draw_landmarks(
-            output_frame, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
-            mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
-            mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
         )
-    annotated_image = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
-    return annotated_image, f"Posture Analysis: {posture_result}"
-def _analyze_emotion(image):
     frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
     frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     emotions = emotion_detector.detect_emotions(frame_rgb)
     if emotions:
         top_emotion, score = max(emotions[0]["emotions"].items(), key=lambda x: x[1])
-        emotion_text = f"{top_emotion} ({score:.2f})"
     else:
-        emotion_text = "No face detected for emotion analysis"
-    annotated_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-    return annotated_image, f"Emotion Analysis: {emotion_text}"
-def _analyze_objects(image):
     frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
-    output_frame = frame.copy()
     frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     image_pil = Image.fromarray(frame_rgb)
     img_tensor = obj_transform(image_pil)
     with torch.no_grad():
         detections = object_detection_model([img_tensor])[0]
     threshold = 0.8
-    detected_boxes = detections["boxes"][detections["scores"] > threshold]
-    for box in detected_boxes:
-        box = box.int().cpu().numpy()
-        cv2.rectangle(output_frame, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 2)
-    object_result = f"Detected {len(detected_boxes)} object(s)" if len(detected_boxes) else "No objects detected"
-    annotated_image = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
-    return annotated_image, f"Object Detection: {object_result}"
-def _analyze_faces(image):
     frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
-    output_frame = frame.copy()
     frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     face_results = face_detection.process(frame_rgb)
-    face_result = "No faces detected"
     if face_results.detections:
-        face_result = f"Detected {len(face_results.detections)} face(s)"
-        h, w, _ = output_frame.shape
         for detection in face_results.detections:
             bbox = detection.location_data.relative_bounding_box
             x = int(bbox.xmin * w)
             y = int(bbox.ymin * h)
             box_w = int(bbox.width * w)
             box_h = int(bbox.height * h)
-            cv2.rectangle(output_frame, (x, y), (x + box_w, y + box_h), (0, 0, 255), 2)
-    annotated_image = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
-    return annotated_image, f"Face Detection: {face_result}"
 # -----------------------------
-# Asynchronous Wrappers for Each Analysis
 # -----------------------------
-def analyze_posture_async(image):
-    return async_analyze("posture", _analyze_posture, image)
-def analyze_emotion_async(image):
-    return async_analyze("emotion", _analyze_emotion, image)
-def analyze_objects_async(image):
-    return async_analyze("objects", _analyze_objects, image)
-def analyze_faces_async(image):
-    return async_analyze("faces", _analyze_faces, image)
 # -----------------------------
 # Custom CSS for a High-Tech Look (White Font)
@@ -183,7 +210,7 @@ body {
 # Create Individual Interfaces for Each Analysis
 # -----------------------------
 posture_interface = gr.Interface(
-    fn=analyze_posture_async,
     inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture Your Posture"),
     outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Posture Analysis")],
     title="Posture Analysis",
@@ -192,7 +219,7 @@ posture_interface = gr.Interface(
 )
 emotion_interface = gr.Interface(
-    fn=analyze_emotion_async,
     inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture Your Face"),
     outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Emotion Analysis")],
     title="Emotion Analysis",
@@ -201,7 +228,7 @@ emotion_interface = gr.Interface(
 )
 objects_interface = gr.Interface(
-    fn=analyze_objects_async,
     inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture the Scene"),
     outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Object Detection")],
     title="Object Detection",
@@ -210,7 +237,7 @@ objects_interface = gr.Interface(
 )
 faces_interface = gr.Interface(
-    fn=analyze_faces_async,
     inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture Your Face"),
     outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Face Detection")],
     title="Face Detection",

 from PIL import Image
 import mediapipe as mp
 from fer import FER  # Facial emotion recognition
 # -----------------------------
+# Configuration: Adjust skip rate (lower = more frequent heavy updates)
 # -----------------------------
+SKIP_RATE = 5
+# -----------------------------
+# Global caches for overlay info and frame counters
+# -----------------------------
+posture_cache = {"landmarks": None, "text": "Initializing...", "counter": 0}
+emotion_cache = {"text": "Initializing...", "counter": 0}
+objects_cache = {"boxes": None, "text": "Initializing...", "counter": 0}
+faces_cache = {"boxes": None, "text": "Initializing...", "counter": 0}
 # -----------------------------
 # Initialize Models and Helpers
 # -----------------------------
 mp_pose = mp.solutions.pose
 pose = mp_pose.Pose()
 mp_drawing = mp.solutions.drawing_utils
 mp_face_detection = mp.solutions.face_detection
 face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)
 object_detection_model = models.detection.fasterrcnn_resnet50_fpn(
     weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT
 )
 object_detection_model.eval()
 obj_transform = transforms.Compose([transforms.ToTensor()])
 emotion_detector = FER(mtcnn=True)
 # -----------------------------
+# Fast Overlay Functions
 # -----------------------------
+def draw_posture_overlay(raw_frame, landmarks):
+    # Draw each landmark as a small circle
+    for (x, y) in landmarks:
+        cv2.circle(raw_frame, (x, y), 4, (0, 255, 0), -1)
+    return raw_frame
+def draw_boxes_overlay(raw_frame, boxes, color):
+    for (x1, y1, x2, y2) in boxes:
+        cv2.rectangle(raw_frame, (x1, y1), (x2, y2), color, 2)
+    return raw_frame
+# -----------------------------
+# Heavy (Synchronous) Detection Functions
+# These functions compute the overlay info on the current frame.
+# -----------------------------
+def compute_posture_overlay(image):
     frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    h, w, _ = frame.shape
     frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     pose_results = pose.process(frame_rgb)
     if pose_results.pose_landmarks:
+        landmarks = []
+        for lm in pose_results.pose_landmarks.landmark:
+            landmarks.append((int(lm.x * w), int(lm.y * h)))
         )
+        text = "Posture detected"
+    else:
+        landmarks = []
+        text = "No posture detected"
+    return landmarks, text
+def compute_emotion_overlay(image):
     frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
     frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     emotions = emotion_detector.detect_emotions(frame_rgb)
     if emotions:
         top_emotion, score = max(emotions[0]["emotions"].items(), key=lambda x: x[1])
+        text = f"{top_emotion} ({score:.2f})"
     else:
+        text = "No face detected"
+    return text
+def compute_objects_overlay(image):
     frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
     frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     image_pil = Image.fromarray(frame_rgb)
     img_tensor = obj_transform(image_pil)
     with torch.no_grad():
         detections = object_detection_model([img_tensor])[0]
     threshold = 0.8
+    boxes = []
+    for box, score in zip(detections["boxes"], detections["scores"]):
+        if score > threshold:
+            boxes.append(tuple(box.int().cpu().numpy()))
+    text = f"Detected {len(boxes)} object(s)" if boxes else "No objects detected"
+    return boxes, text
+def compute_faces_overlay(image):
     frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    h, w, _ = frame.shape
     frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     face_results = face_detection.process(frame_rgb)
+    boxes = []
     if face_results.detections:
         for detection in face_results.detections:
             bbox = detection.location_data.relative_bounding_box
             x = int(bbox.xmin * w)
             y = int(bbox.ymin * h)
             box_w = int(bbox.width * w)
             box_h = int(bbox.height * h)
+            boxes.append((x, y, x + box_w, y + box_h))
+        text = f"Detected {len(boxes)} face(s)"
+    else:
+        text = "No faces detected"
+    return boxes, text
 # -----------------------------
+# Main Analysis Functions (run every frame)
+# They update the cache every SKIP_RATE frames and always return a current frame with overlay.
 # -----------------------------
+def analyze_posture_current(image):
+    global posture_cache
+    posture_cache["counter"] += 1
+    current_frame = np.array(image)  # raw RGB frame (as numpy array)
+    # Update overlay info every SKIP_RATE frames
+    if posture_cache["counter"] % SKIP_RATE == 0 or posture_cache["landmarks"] is None:
+        landmarks, text = compute_posture_overlay(image)
+        posture_cache["landmarks"] = landmarks
+        posture_cache["text"] = text
+    # Draw cached landmarks on the current frame copy
+    output = current_frame.copy()
+    if posture_cache["landmarks"]:
+        output = draw_posture_overlay(output, posture_cache["landmarks"])
+    return output, f"Posture Analysis: {posture_cache['text']}"
+def analyze_emotion_current(image):
+    global emotion_cache
+    emotion_cache["counter"] += 1
+    current_frame = np.array(image)
+    if emotion_cache["counter"] % SKIP_RATE == 0 or emotion_cache["text"] is None:
+        text = compute_emotion_overlay(image)
+        emotion_cache["text"] = text
+    # For emotion, we don't overlay anything; just return the current frame.
+    return current_frame, f"Emotion Analysis: {emotion_cache['text']}"
+def analyze_objects_current(image):
+    global objects_cache
+    objects_cache["counter"] += 1
+    current_frame = np.array(image)
+    if objects_cache["counter"] % SKIP_RATE == 0 or objects_cache["boxes"] is None:
+        boxes, text = compute_objects_overlay(image)
+        objects_cache["boxes"] = boxes
+        objects_cache["text"] = text
+    output = current_frame.copy()
+    if objects_cache["boxes"]:
+        output = draw_boxes_overlay(output, objects_cache["boxes"], (255, 255, 0))
+    return output, f"Object Detection: {objects_cache['text']}"
+def analyze_faces_current(image):
+    global faces_cache
+    faces_cache["counter"] += 1
+    current_frame = np.array(image)
+    if faces_cache["counter"] % SKIP_RATE == 0 or faces_cache["boxes"] is None:
+        boxes, text = compute_faces_overlay(image)
+        faces_cache["boxes"] = boxes
+        faces_cache["text"] = text
+    output = current_frame.copy()
+    if faces_cache["boxes"]:
+        output = draw_boxes_overlay(output, faces_cache["boxes"], (0, 0, 255))
+    return output, f"Face Detection: {faces_cache['text']}"
 # -----------------------------
 # Custom CSS for a High-Tech Look (White Font)
 # Create Individual Interfaces for Each Analysis
 # -----------------------------
 posture_interface = gr.Interface(
+    fn=analyze_posture_current,
     inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture Your Posture"),
     outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Posture Analysis")],
     title="Posture Analysis",
 )
 emotion_interface = gr.Interface(
+    fn=analyze_emotion_current,
     inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture Your Face"),
     outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Emotion Analysis")],
     title="Emotion Analysis",
 )
 objects_interface = gr.Interface(
+    fn=analyze_objects_current,
     inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture the Scene"),
     outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Object Detection")],
     title="Object Detection",
 )
 faces_interface = gr.Interface(
+    fn=analyze_faces_current,
     inputs=gr.Image(sources=["webcam"], streaming=True, label="Capture Your Face"),
     outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.Textbox(label="Face Detection")],
     title="Face Detection",