feat(perception): geometric face emotion classifier (Issue #359)

Classifies facial expressions into neutral/happy/surprised/angry/sad
using geometric rules over MediaPipe Face Mesh landmarks — no ML model
required at runtime.

Rules
-----
  surprised: brow_raise > 0.12 AND eye_open > 0.07 AND mouth_open > 0.07
  happy:     smile > 0.025  (lip corners above lip midpoint)
  angry:     brow_furl > 0.02 AND smile < 0.01
  sad:       smile < -0.025 AND brow_furl < 0.015
  neutral:   default

Changes
-------
- saltybot_scene_msgs/msg/FaceEmotion.msg       — per-face emotion + features
- saltybot_scene_msgs/msg/FaceEmotionArray.msg
- saltybot_scene_msgs/CMakeLists.txt            — register new msgs
- _face_emotion.py   — pure-Python: FaceLandmarks, compute_features,
                        classify_emotion, detect_emotion, from_mediapipe
- face_emotion_node.py  — subscribes /camera/color/image_raw,
                           publishes /saltybot/face_emotions (≤15 fps)
- test/test_face_emotion.py  — 48 tests, all passing
- setup.py  — add face_emotion entry point

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
sl-perception 2026-03-03 14:39:49 -05:00
parent f7f89403d5
commit 672120bb50
7 changed files with 1043 additions and 0 deletions

View File

@ -0,0 +1,316 @@
"""
_face_emotion.py Geometric face emotion classifier (no ROS2, no ML deps).
Classifies facial expressions using geometric rules derived from facial
landmark positions. Input is a set of key 2-D landmark coordinates in
normalised image space (x, y [0, 1]) as produced by MediaPipe Face Mesh.
Emotions
--------
neutral baseline, no strong geometric signal
happy lip corners raised relative to lip midpoint (smile)
surprised raised eyebrows + wide eyes + open mouth
angry furrowed inner brows (inner brow below outer brow level)
sad lip corners depressed relative to lip midpoint (frown)
Classification priority: surprised > happy > angry > sad > neutral.
Coordinate convention
---------------------
x : 0 = left edge of image, 1 = right edge
y : 0 = top edge of image, 1 = bottom edge (y increases downward)
MediaPipe Face Mesh key indices used
-------------------------------------
MOUTH_UPPER = 13 inner upper-lip centre
MOUTH_LOWER = 14 inner lower-lip centre
MOUTH_LEFT = 61 left mouth corner
MOUTH_RIGHT = 291 right mouth corner
L_EYE_TOP = 159 left eye upper lid centre
L_EYE_BOT = 145 left eye lower lid centre
R_EYE_TOP = 386 right eye upper lid centre
R_EYE_BOT = 374 right eye lower lid centre
L_BROW_INNER = 107 left inner eyebrow
L_BROW_OUTER = 46 left outer eyebrow
R_BROW_INNER = 336 right inner eyebrow
R_BROW_OUTER = 276 right outer eyebrow
CHIN = 152 chin tip
FOREHEAD = 10 midpoint between eyes (forehead anchor)
Public API
----------
EMOTION_LABELS tuple[str, ...]
FaceLandmarks dataclass key points only
EmotionFeatures NamedTuple 5 normalised geometric scores
EmotionResult NamedTuple (emotion, confidence, features)
from_mediapipe(lms) FaceLandmarks
compute_features(fl) EmotionFeatures
classify_emotion(features) (emotion, confidence)
detect_emotion(fl) EmotionResult
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import NamedTuple, Tuple
# ── Public constants ──────────────────────────────────────────────────────────
EMOTION_LABELS: Tuple[str, ...] = (
'neutral', 'happy', 'surprised', 'angry', 'sad'
)
# MediaPipe Face Mesh 468-vertex indices for key facial geometry
MOUTH_UPPER = 13
MOUTH_LOWER = 14
MOUTH_LEFT = 61
MOUTH_RIGHT = 291
L_EYE_TOP = 159
L_EYE_BOT = 145
R_EYE_TOP = 386
R_EYE_BOT = 374
L_EYE_LEFT = 33
L_EYE_RIGHT = 133
R_EYE_LEFT = 362
R_EYE_RIGHT = 263
L_BROW_INNER = 107
L_BROW_OUTER = 46
R_BROW_INNER = 336
R_BROW_OUTER = 276
CHIN = 152
FOREHEAD = 10
# ── Data types ────────────────────────────────────────────────────────────────
@dataclass
class FaceLandmarks:
"""
Key geometric points extracted from a MediaPipe Face Mesh result.
All coordinates are normalised to [0, 1] image space:
x = 0 (left) 1 (right)
y = 0 (top) 1 (bottom)
"""
mouth_upper: Tuple[float, float]
mouth_lower: Tuple[float, float]
mouth_left: Tuple[float, float] # left corner (image-left = face-right)
mouth_right: Tuple[float, float]
l_eye_top: Tuple[float, float]
l_eye_bot: Tuple[float, float]
r_eye_top: Tuple[float, float]
r_eye_bot: Tuple[float, float]
l_eye_left: Tuple[float, float]
l_eye_right: Tuple[float, float]
r_eye_left: Tuple[float, float]
r_eye_right: Tuple[float, float]
l_brow_inner: Tuple[float, float]
l_brow_outer: Tuple[float, float]
r_brow_inner: Tuple[float, float]
r_brow_outer: Tuple[float, float]
chin: Tuple[float, float]
forehead: Tuple[float, float]
class EmotionFeatures(NamedTuple):
"""
Five normalised geometric features derived from FaceLandmarks.
All features are relative to face_height; positive = expressive direction.
"""
mouth_open: float # mouth height / face_height (≥ 0)
smile: float # (lip-mid y avg corner y) / face_h (+ = smile)
brow_raise: float # (eye-top y inner-brow y) / face_h (+ = raised)
brow_furl: float # (inner-brow y outer-brow y) / face_h (+ = angry)
eye_open: float # eye height / face_height (≥ 0)
face_height: float # forehead-to-chin distance in image coordinates
class EmotionResult(NamedTuple):
"""Result of detect_emotion()."""
emotion: str # one of EMOTION_LABELS
confidence: float # 0.01.0
features: EmotionFeatures
# ── MediaPipe adapter ─────────────────────────────────────────────────────────
def from_mediapipe(lms) -> FaceLandmarks:
"""
Build a FaceLandmarks from a single MediaPipe NormalizedLandmarkList.
Parameters
----------
lms : mediapipe.framework.formats.landmark_pb2.NormalizedLandmarkList
(element of FaceMeshResults.multi_face_landmarks)
Returns
-------
FaceLandmarks with normalised (x, y) coordinates.
"""
def _pt(idx: int) -> Tuple[float, float]:
lm = lms.landmark[idx]
return float(lm.x), float(lm.y)
return FaceLandmarks(
mouth_upper = _pt(MOUTH_UPPER),
mouth_lower = _pt(MOUTH_LOWER),
mouth_left = _pt(MOUTH_LEFT),
mouth_right = _pt(MOUTH_RIGHT),
l_eye_top = _pt(L_EYE_TOP),
l_eye_bot = _pt(L_EYE_BOT),
r_eye_top = _pt(R_EYE_TOP),
r_eye_bot = _pt(R_EYE_BOT),
l_eye_left = _pt(L_EYE_LEFT),
l_eye_right = _pt(L_EYE_RIGHT),
r_eye_left = _pt(R_EYE_LEFT),
r_eye_right = _pt(R_EYE_RIGHT),
l_brow_inner = _pt(L_BROW_INNER),
l_brow_outer = _pt(L_BROW_OUTER),
r_brow_inner = _pt(R_BROW_INNER),
r_brow_outer = _pt(R_BROW_OUTER),
chin = _pt(CHIN),
forehead = _pt(FOREHEAD),
)
# ── Feature extraction ────────────────────────────────────────────────────────
def compute_features(fl: FaceLandmarks) -> EmotionFeatures:
"""
Derive five normalised geometric features from a FaceLandmarks object.
Coordinate notes
----------------
Image y increases downward, so:
mouth_open > 0 when lower lip is below upper lip (mouth open)
smile > 0 when corners are above the lip midpoint (raised)
brow_raise > 0 when inner brow is above the eye-top (raised brow)
brow_furl > 0 when inner brow is lower than outer brow (furrowed)
eye_open > 0 when lower lid is below upper lid (open eye)
"""
face_h = max(fl.chin[1] - fl.forehead[1], 1e-4)
# ── Mouth openness ────────────────────────────────────────────────────
mouth_h = fl.mouth_lower[1] - fl.mouth_upper[1]
mouth_open = max(0.0, mouth_h) / face_h
# ── Smile / frown ─────────────────────────────────────────────────────
# lip midpoint y
mid_y = (fl.mouth_upper[1] + fl.mouth_lower[1]) / 2.0
# average corner y
corner_y = (fl.mouth_left[1] + fl.mouth_right[1]) / 2.0
# positive when corners are above midpoint (y_corner < y_mid)
smile = (mid_y - corner_y) / face_h
# ── Eyebrow raise ─────────────────────────────────────────────────────
# Distance from inner brow (above) to eye top (below) — larger = more raised
l_raise = fl.l_eye_top[1] - fl.l_brow_inner[1]
r_raise = fl.r_eye_top[1] - fl.r_brow_inner[1]
brow_raise = max(0.0, (l_raise + r_raise) / 2.0) / face_h
# ── Brow furl (angry) ─────────────────────────────────────────────────
# Inner brow below outer brow (in image y) → inner y > outer y → positive
l_furl = (fl.l_brow_inner[1] - fl.l_brow_outer[1]) / face_h
r_furl = (fl.r_brow_inner[1] - fl.r_brow_outer[1]) / face_h
brow_furl = (l_furl + r_furl) / 2.0
# ── Eye openness ──────────────────────────────────────────────────────
l_eye_h = fl.l_eye_bot[1] - fl.l_eye_top[1]
r_eye_h = fl.r_eye_bot[1] - fl.r_eye_top[1]
eye_open = max(0.0, (l_eye_h + r_eye_h) / 2.0) / face_h
return EmotionFeatures(
mouth_open = float(mouth_open),
smile = float(smile),
brow_raise = float(brow_raise),
brow_furl = float(brow_furl),
eye_open = float(eye_open),
face_height = float(face_h),
)
# ── Classification rules ──────────────────────────────────────────────────────
# Thresholds are relative to face_height (all features normalised by face_h).
# Chosen to match realistic facial proportions:
# brow_raise: 0.060.10 neutral, > 0.12 raised
# eye_open: 0.040.06 normal, > 0.07 wide
# mouth_open: 0.00.02 closed, > 0.07 open
# smile: -0.020.02 neutral, > 0.025 smile, < -0.025 frown
# brow_furl: -0.020.01 neutral, > 0.02 furrowed
_T_SURPRISED_BROW = 0.12 # brow_raise threshold for surprised
_T_SURPRISED_EYE = 0.07 # eye_open threshold for surprised
_T_SURPRISED_MOUTH = 0.07 # mouth_open threshold for surprised
_T_HAPPY_SMILE = 0.025 # smile threshold for happy
_T_ANGRY_FURL = 0.02 # brow_furl threshold for angry
_T_ANGRY_NO_SMILE = 0.01 # smile must be below this for angry
_T_SAD_FROWN = 0.025 # -smile threshold for sad
_T_SAD_NO_FURL = 0.015 # brow_furl must be below this for sad
def classify_emotion(features: EmotionFeatures) -> Tuple[str, float]:
"""
Apply geometric rules to classify emotion from EmotionFeatures.
Priority order: surprised happy angry sad neutral.
Returns
-------
(emotion, confidence) where confidence (0, 1].
"""
f = features
# ── Surprised ────────────────────────────────────────────────────────
if (f.brow_raise > _T_SURPRISED_BROW
and f.eye_open > _T_SURPRISED_EYE
and f.mouth_open > _T_SURPRISED_MOUTH):
# Confidence from excess over each threshold
br_exc = (f.brow_raise - _T_SURPRISED_BROW) / 0.05
ey_exc = (f.eye_open - _T_SURPRISED_EYE) / 0.03
mo_exc = (f.mouth_open - _T_SURPRISED_MOUTH) / 0.06
conf = min(1.0, 0.5 + (br_exc + ey_exc + mo_exc) / 6.0)
return 'surprised', conf
# ── Happy ─────────────────────────────────────────────────────────────
if f.smile > _T_HAPPY_SMILE:
conf = min(1.0, 0.5 + (f.smile - _T_HAPPY_SMILE) / 0.05)
return 'happy', conf
# ── Angry ─────────────────────────────────────────────────────────────
if f.brow_furl > _T_ANGRY_FURL and f.smile < _T_ANGRY_NO_SMILE:
conf = min(1.0, 0.5 + (f.brow_furl - _T_ANGRY_FURL) / 0.04)
return 'angry', conf
# ── Sad ───────────────────────────────────────────────────────────────
if f.smile < -_T_SAD_FROWN and f.brow_furl < _T_SAD_NO_FURL:
conf = min(1.0, 0.5 + (-f.smile - _T_SAD_FROWN) / 0.04)
return 'sad', conf
return 'neutral', 0.8
# ── Main entry point ──────────────────────────────────────────────────────────
def detect_emotion(fl: FaceLandmarks) -> EmotionResult:
"""
Classify emotion from face landmark geometry.
Parameters
----------
fl : FaceLandmarks normalised key point positions
Returns
-------
EmotionResult(emotion, confidence, features)
"""
features = compute_features(fl)
emotion, confidence = classify_emotion(features)
return EmotionResult(
emotion = emotion,
confidence = float(confidence),
features = features,
)

View File

@ -0,0 +1,207 @@
"""
face_emotion_node.py Facial emotion classifier node (Issue #359).
Subscribes to the raw colour camera stream, runs MediaPipe FaceMesh to
detect facial landmarks, applies geometric emotion rules, and publishes
the result at the camera frame rate ( param max_fps).
Subscribes
----------
/camera/color/image_raw sensor_msgs/Image (BEST_EFFORT)
Publishes
---------
/saltybot/face_emotions saltybot_scene_msgs/FaceEmotionArray
Parameters
----------
max_fps float 15.0 Maximum classification rate (Hz)
max_faces int 4 MediaPipe max_num_faces
min_detection_conf float 0.5 MediaPipe min detection confidence
min_tracking_conf float 0.5 MediaPipe min tracking confidence
Notes
-----
* MediaPipe FaceMesh (mediapipe.solutions.face_mesh) is initialised lazily
in a background thread to avoid blocking the ROS2 executor.
* If mediapipe is not installed the node still starts but logs a warning
and publishes empty arrays.
* Landmark coordinates are always normalised to [0..1] image space.
"""
from __future__ import annotations
import threading
import time
from typing import Optional
import numpy as np
import rclpy
from rclpy.node import Node
from rclpy.qos import QoSProfile, ReliabilityPolicy, HistoryPolicy
from sensor_msgs.msg import Image
from saltybot_scene_msgs.msg import FaceEmotion, FaceEmotionArray
from ._face_emotion import FaceLandmarks, detect_emotion, from_mediapipe
_SENSOR_QOS = QoSProfile(
reliability=ReliabilityPolicy.BEST_EFFORT,
history=HistoryPolicy.KEEP_LAST,
depth=1,
)
class _MPFaceMesh:
"""Lazy wrapper around mediapipe.solutions.face_mesh.FaceMesh."""
def __init__(
self,
max_faces: int = 4,
min_detection_conf: float = 0.5,
min_tracking_conf: float = 0.5,
) -> None:
self._max_faces = max_faces
self._min_det_conf = min_detection_conf
self._min_track_conf = min_tracking_conf
self._mesh = None
self._available = False
self._ready = threading.Event()
threading.Thread(target=self._init, daemon=True).start()
def _init(self) -> None:
try:
import mediapipe as mp
self._mesh = mp.solutions.face_mesh.FaceMesh(
static_image_mode = False,
max_num_faces = self._max_faces,
refine_landmarks = False,
min_detection_confidence = self._min_det_conf,
min_tracking_confidence = self._min_track_conf,
)
self._available = True
except Exception:
self._available = False
finally:
self._ready.set()
def process(self, bgr: np.ndarray):
"""Run FaceMesh on a BGR uint8 frame. Returns mp results or None."""
if not self._ready.wait(timeout=5.0) or not self._available:
return None
import cv2
rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
return self._mesh.process(rgb)
class FaceEmotionNode(Node):
def __init__(self) -> None:
super().__init__('face_emotion_node')
# ── Parameters ──────────────────────────────────────────────────────
self.declare_parameter('max_fps', 15.0)
self.declare_parameter('max_faces', 4)
self.declare_parameter('min_detection_conf', 0.5)
self.declare_parameter('min_tracking_conf', 0.5)
p = self.get_parameter
self._min_period = 1.0 / max(float(p('max_fps').value), 0.1)
max_faces = int(p('max_faces').value)
min_det = float(p('min_detection_conf').value)
min_trk = float(p('min_tracking_conf').value)
# Lazy-initialised MediaPipe
self._mp = _MPFaceMesh(
max_faces = max_faces,
min_detection_conf = min_det,
min_tracking_conf = min_trk,
)
self._last_proc: float = 0.0
# ── Subscriber / publisher ───────────────────────────────────────────
self._sub = self.create_subscription(
Image, '/camera/color/image_raw',
self._on_image, _SENSOR_QOS,
)
self._pub = self.create_publisher(
FaceEmotionArray, '/saltybot/face_emotions', 10
)
self.get_logger().info(
f'face_emotion_node ready — '
f'max_fps={1.0/self._min_period:.1f}, max_faces={max_faces}'
)
# ── Callback ──────────────────────────────────────────────────────────────
def _on_image(self, msg: Image) -> None:
now = time.monotonic()
if now - self._last_proc < self._min_period:
return
self._last_proc = now
enc = msg.encoding.lower()
if enc in ('bgr8', 'rgb8'):
data = np.frombuffer(msg.data, dtype=np.uint8)
try:
frame = data.reshape((msg.height, msg.width, 3))
except ValueError:
return
if enc == 'rgb8':
import cv2
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
else:
self.get_logger().warn(
f'Unsupported image encoding {msg.encoding!r}',
throttle_duration_sec=10.0,
)
return
results = self._mp.process(frame)
out = FaceEmotionArray()
out.header.stamp = msg.header.stamp
out.header.frame_id = msg.header.frame_id or 'camera_color_optical_frame'
if results and results.multi_face_landmarks:
for face_id, lms in enumerate(results.multi_face_landmarks):
try:
fl = from_mediapipe(lms)
res = detect_emotion(fl)
except Exception as exc:
self.get_logger().warn(
f'Emotion detection failed for face {face_id}: {exc}',
throttle_duration_sec=5.0,
)
continue
fe = FaceEmotion()
fe.header = out.header
fe.face_id = face_id
fe.emotion = res.emotion
fe.confidence = float(res.confidence)
fe.mouth_open = float(res.features.mouth_open)
fe.smile = float(res.features.smile)
fe.brow_raise = float(res.features.brow_raise)
fe.eye_open = float(res.features.eye_open)
out.faces.append(fe)
out.face_count = len(out.faces)
self._pub.publish(out)
def main(args=None) -> None:
rclpy.init(args=args)
node = FaceEmotionNode()
try:
rclpy.spin(node)
finally:
node.destroy_node()
rclpy.shutdown()
if __name__ == '__main__':
main()

View File

@ -59,6 +59,8 @@ setup(
'obstacle_size = saltybot_bringup.obstacle_size_node:main', 'obstacle_size = saltybot_bringup.obstacle_size_node:main',
# Audio scene classifier (Issue #353) # Audio scene classifier (Issue #353)
'audio_scene = saltybot_bringup.audio_scene_node:main', 'audio_scene = saltybot_bringup.audio_scene_node:main',
# Face emotion classifier (Issue #359)
'face_emotion = saltybot_bringup.face_emotion_node:main',
], ],
}, },
) )

View File

@ -0,0 +1,504 @@
"""
test_face_emotion.py Unit tests for the geometric face emotion classifier.
All tests use synthetic FaceLandmarks constructed with known geometry
no camera, no MediaPipe, no ROS2 runtime needed.
Coordinate convention: x [0,1] (leftright), y [0,1] (topbottom).
Test face dimensions
--------------------
forehead : (0.50, 0.20)
chin : (0.50, 0.70) face_height = 0.50
Neutral eye/brow/mouth positions chosen so all emotion scores are below
their trigger thresholds (verified analytically below each fixture).
"""
import pytest
from saltybot_bringup._face_emotion import (
EMOTION_LABELS,
FaceLandmarks,
EmotionFeatures,
EmotionResult,
compute_features,
classify_emotion,
detect_emotion,
MOUTH_UPPER, MOUTH_LOWER, MOUTH_LEFT, MOUTH_RIGHT,
L_EYE_TOP, L_EYE_BOT, R_EYE_TOP, R_EYE_BOT,
L_BROW_INNER, L_BROW_OUTER, R_BROW_INNER, R_BROW_OUTER,
CHIN, FOREHEAD,
_T_SURPRISED_BROW, _T_SURPRISED_EYE, _T_SURPRISED_MOUTH,
_T_HAPPY_SMILE, _T_ANGRY_FURL, _T_SAD_FROWN,
)
# ─────────────────────────────────────────────────────────────────────────────
# Fixture helpers
# ─────────────────────────────────────────────────────────────────────────────
# face_height = 0.70 - 0.20 = 0.50
_FOREHEAD = (0.50, 0.20)
_CHIN = (0.50, 0.70)
_FH = 0.50 # face_height
def _neutral_face() -> FaceLandmarks:
"""
Neutral expression:
mouth_open = (0.57-0.55)/0.50 = 0.04 < 0.07 (not open)
smile = (0.56-0.56)/0.50 = 0.00 in [-0.025, 0.025]
brow_raise = (0.38-0.33)/0.50 = 0.10 < 0.12 (not raised enough)
eye_open = (0.42-0.38)/0.50 = 0.08 > 0.07 but surprised needs ALL 3
brow_furl = (0.33-0.34)/0.50 = -0.02 < 0.02 (not furrowed)
neutral
"""
return FaceLandmarks(
mouth_upper = (0.50, 0.55),
mouth_lower = (0.50, 0.57),
mouth_left = (0.46, 0.56),
mouth_right = (0.54, 0.56),
l_eye_top = (0.44, 0.38),
l_eye_bot = (0.44, 0.42),
r_eye_top = (0.56, 0.38),
r_eye_bot = (0.56, 0.42),
l_eye_left = (0.41, 0.40),
l_eye_right = (0.47, 0.40),
r_eye_left = (0.53, 0.40),
r_eye_right = (0.59, 0.40),
l_brow_inner = (0.46, 0.33),
l_brow_outer = (0.41, 0.34),
r_brow_inner = (0.54, 0.33),
r_brow_outer = (0.59, 0.34),
chin = _CHIN,
forehead = _FOREHEAD,
)
def _happy_face() -> FaceLandmarks:
"""
Happy: corners raised to y=0.54, mid_y=0.56 smile=(0.56-0.54)/0.50=0.04 > 0.025
All other signals stay below their surprised/angry/sad thresholds.
"""
fl = _neutral_face()
fl.mouth_left = (0.46, 0.54) # corners above midpoint
fl.mouth_right = (0.54, 0.54)
return fl
def _surprised_face() -> FaceLandmarks:
"""
Surprised:
brow_raise = (0.38-0.26)/0.50 = 0.24 > 0.12
eye_open = (0.45-0.38)/0.50 = 0.14 > 0.07
mouth_open = (0.65-0.55)/0.50 = 0.20 > 0.07
"""
fl = _neutral_face()
fl.l_brow_inner = (0.46, 0.26) # brows raised high
fl.r_brow_inner = (0.54, 0.26)
fl.l_brow_outer = (0.41, 0.27)
fl.r_brow_outer = (0.59, 0.27)
fl.l_eye_bot = (0.44, 0.45) # eyes wide open
fl.r_eye_bot = (0.56, 0.45)
fl.mouth_lower = (0.50, 0.65) # mouth open
return fl
def _angry_face() -> FaceLandmarks:
"""
Angry:
brow_furl = (0.37-0.30)/0.50 = 0.14 > 0.02
smile = (0.56-0.56)/0.50 = 0.00 < 0.01 (neutral mouth)
brow_raise = (0.38-0.37)/0.50 = 0.02 < 0.12 (not surprised)
"""
fl = _neutral_face()
fl.l_brow_inner = (0.46, 0.37) # inner brow lowered (toward eye)
fl.r_brow_inner = (0.54, 0.37)
fl.l_brow_outer = (0.41, 0.30) # outer brow raised
fl.r_brow_outer = (0.59, 0.30)
return fl
def _sad_face() -> FaceLandmarks:
"""
Sad:
smile = (0.56-0.59)/0.50 = -0.06 < -0.025
brow_furl = (0.33-0.34)/0.50 = -0.02 < 0.015 (not furrowed not angry)
"""
fl = _neutral_face()
fl.mouth_left = (0.46, 0.59) # corners below midpoint (frown)
fl.mouth_right = (0.54, 0.59)
return fl
# ─────────────────────────────────────────────────────────────────────────────
# EMOTION_LABELS
# ─────────────────────────────────────────────────────────────────────────────
def test_emotion_labels_tuple():
assert isinstance(EMOTION_LABELS, tuple)
def test_emotion_labels_count():
assert len(EMOTION_LABELS) == 5
def test_emotion_labels_values():
assert set(EMOTION_LABELS) == {'neutral', 'happy', 'surprised', 'angry', 'sad'}
def test_emotion_labels_neutral_first():
assert EMOTION_LABELS[0] == 'neutral'
# ─────────────────────────────────────────────────────────────────────────────
# Landmark index constants
# ─────────────────────────────────────────────────────────────────────────────
def test_landmark_indices_in_range():
indices = [
MOUTH_UPPER, MOUTH_LOWER, MOUTH_LEFT, MOUTH_RIGHT,
L_EYE_TOP, L_EYE_BOT, R_EYE_TOP, R_EYE_BOT,
L_BROW_INNER, L_BROW_OUTER, R_BROW_INNER, R_BROW_OUTER,
CHIN, FOREHEAD,
]
for idx in indices:
assert 0 <= idx < 468, f'Landmark index {idx} out of range'
def test_landmark_indices_unique():
indices = [
MOUTH_UPPER, MOUTH_LOWER, MOUTH_LEFT, MOUTH_RIGHT,
L_EYE_TOP, L_EYE_BOT, R_EYE_TOP, R_EYE_BOT,
L_BROW_INNER, L_BROW_OUTER, R_BROW_INNER, R_BROW_OUTER,
CHIN, FOREHEAD,
]
assert len(set(indices)) == len(indices)
# ─────────────────────────────────────────────────────────────────────────────
# compute_features — neutral face
# ─────────────────────────────────────────────────────────────────────────────
def test_features_face_height():
f = compute_features(_neutral_face())
assert abs(f.face_height - 0.50) < 1e-6
def test_features_mouth_open_neutral():
f = compute_features(_neutral_face())
# (0.57 - 0.55) / 0.50 = 0.04
assert abs(f.mouth_open - 0.04) < 1e-6
def test_features_smile_neutral():
f = compute_features(_neutral_face())
# mid_y=0.56, corner_y=0.56 → smile=0.0
assert abs(f.smile) < 1e-6
def test_features_brow_raise_neutral():
f = compute_features(_neutral_face())
# (0.38 - 0.33) / 0.50 = 0.10
assert abs(f.brow_raise - 0.10) < 1e-6
def test_features_brow_furl_neutral():
f = compute_features(_neutral_face())
# l_furl = (0.33 - 0.34) / 0.50 = -0.02 per side → mean = -0.02
assert abs(f.brow_furl - (-0.02)) < 1e-6
def test_features_eye_open_neutral():
f = compute_features(_neutral_face())
# (0.42 - 0.38) / 0.50 = 0.08
assert abs(f.eye_open - 0.08) < 1e-6
def test_features_mouth_open_non_negative():
"""Mouth open must always be ≥ 0."""
# Invert lips (impossible geometry)
fl = _neutral_face()
fl.mouth_upper = (0.50, 0.60)
fl.mouth_lower = (0.50, 0.55)
f = compute_features(fl)
assert f.mouth_open == 0.0
def test_features_eye_open_non_negative():
fl = _neutral_face()
fl.l_eye_top = (0.44, 0.42)
fl.l_eye_bot = (0.44, 0.38)
f = compute_features(fl)
assert f.eye_open >= 0.0
# ─────────────────────────────────────────────────────────────────────────────
# compute_features — happy face
# ─────────────────────────────────────────────────────────────────────────────
def test_features_smile_happy():
f = compute_features(_happy_face())
# mid_y=0.56, corner_y=0.54 → smile=(0.56-0.54)/0.50=0.04
assert abs(f.smile - 0.04) < 1e-6
def test_features_smile_positive_happy():
f = compute_features(_happy_face())
assert f.smile > _T_HAPPY_SMILE
# ─────────────────────────────────────────────────────────────────────────────
# compute_features — surprised face
# ─────────────────────────────────────────────────────────────────────────────
def test_features_brow_raise_surprised():
f = compute_features(_surprised_face())
# (0.38 - 0.26) / 0.50 = 0.24
assert abs(f.brow_raise - 0.24) < 1e-6
def test_features_brow_raise_above_threshold():
f = compute_features(_surprised_face())
assert f.brow_raise > _T_SURPRISED_BROW
def test_features_eye_open_surprised():
f = compute_features(_surprised_face())
# (0.45 - 0.38) / 0.50 = 0.14
assert abs(f.eye_open - 0.14) < 1e-6
def test_features_mouth_open_surprised():
f = compute_features(_surprised_face())
# (0.65 - 0.55) / 0.50 = 0.20
assert abs(f.mouth_open - 0.20) < 1e-6
# ─────────────────────────────────────────────────────────────────────────────
# compute_features — angry face
# ─────────────────────────────────────────────────────────────────────────────
def test_features_brow_furl_angry():
f = compute_features(_angry_face())
# l_furl = (0.37 - 0.30) / 0.50 = 0.14 per side → mean = 0.14
assert abs(f.brow_furl - 0.14) < 1e-6
def test_features_brow_furl_above_threshold():
f = compute_features(_angry_face())
assert f.brow_furl > _T_ANGRY_FURL
def test_features_smile_near_zero_angry():
f = compute_features(_angry_face())
assert abs(f.smile) < 0.005
# ─────────────────────────────────────────────────────────────────────────────
# compute_features — sad face
# ─────────────────────────────────────────────────────────────────────────────
def test_features_smile_sad():
f = compute_features(_sad_face())
# mid_y=0.56, corner_y=0.59 → smile=(0.56-0.59)/0.50=-0.06
assert abs(f.smile - (-0.06)) < 1e-6
def test_features_smile_negative_sad():
f = compute_features(_sad_face())
assert f.smile < -_T_SAD_FROWN
# ─────────────────────────────────────────────────────────────────────────────
# classify_emotion
# ─────────────────────────────────────────────────────────────────────────────
def test_classify_returns_known_label():
feat = compute_features(_neutral_face())
label, _ = classify_emotion(feat)
assert label in EMOTION_LABELS
def test_classify_confidence_range():
for make in [_neutral_face, _happy_face, _surprised_face,
_angry_face, _sad_face]:
feat = compute_features(make())
_, conf = classify_emotion(feat)
assert 0.0 < conf <= 1.0, f'{make.__name__}: confidence={conf}'
def test_classify_neutral():
feat = compute_features(_neutral_face())
label, conf = classify_emotion(feat)
assert label == 'neutral', f'Expected neutral, got {label!r}'
assert conf > 0.0
def test_classify_happy():
feat = compute_features(_happy_face())
label, conf = classify_emotion(feat)
assert label == 'happy', f'Expected happy, got {label!r}'
def test_classify_surprised():
feat = compute_features(_surprised_face())
label, conf = classify_emotion(feat)
assert label == 'surprised', f'Expected surprised, got {label!r}'
def test_classify_angry():
feat = compute_features(_angry_face())
label, conf = classify_emotion(feat)
assert label == 'angry', f'Expected angry, got {label!r}'
def test_classify_sad():
feat = compute_features(_sad_face())
label, conf = classify_emotion(feat)
assert label == 'sad', f'Expected sad, got {label!r}'
def test_classify_surprised_priority_over_happy():
"""Surprised should win even if smile is also positive."""
fl = _surprised_face()
fl.mouth_left = (0.46, 0.54) # add smile too
fl.mouth_right = (0.54, 0.54)
feat = compute_features(fl)
label, _ = classify_emotion(feat)
assert label == 'surprised'
# ─────────────────────────────────────────────────────────────────────────────
# detect_emotion — end-to-end
# ─────────────────────────────────────────────────────────────────────────────
def test_detect_returns_result():
res = detect_emotion(_neutral_face())
assert isinstance(res, EmotionResult)
def test_detect_neutral():
res = detect_emotion(_neutral_face())
assert res.emotion == 'neutral'
def test_detect_happy():
res = detect_emotion(_happy_face())
assert res.emotion == 'happy'
def test_detect_surprised():
res = detect_emotion(_surprised_face())
assert res.emotion == 'surprised'
def test_detect_angry():
res = detect_emotion(_angry_face())
assert res.emotion == 'angry'
def test_detect_sad():
res = detect_emotion(_sad_face())
assert res.emotion == 'sad'
def test_detect_confidence_range():
for make in [_neutral_face, _happy_face, _surprised_face,
_angry_face, _sad_face]:
res = detect_emotion(make())
assert 0.0 < res.confidence <= 1.0, (
f'{make.__name__}: confidence={res.confidence}'
)
def test_detect_features_attached():
res = detect_emotion(_neutral_face())
assert isinstance(res.features, EmotionFeatures)
# ─────────────────────────────────────────────────────────────────────────────
# Edge cases
# ─────────────────────────────────────────────────────────────────────────────
def test_zero_face_height_no_crash():
"""Degenerate face (chin == forehead) must not crash."""
fl = _neutral_face()
fl.forehead = fl.chin # face_height → ~1e-4 clamp
res = detect_emotion(fl)
assert res.emotion in EMOTION_LABELS
def test_extreme_smile_caps_confidence():
"""Wildly exaggerated smile should not push confidence above 1.0."""
fl = _neutral_face()
fl.mouth_left = (0.46, 0.20) # corners pulled very high
fl.mouth_right = (0.54, 0.20)
res = detect_emotion(fl)
assert res.confidence <= 1.0
def test_extreme_brow_raise_caps_confidence():
fl = _surprised_face()
fl.l_brow_inner = (0.46, 0.01) # brows at top of image
fl.r_brow_inner = (0.54, 0.01)
res = detect_emotion(fl)
assert res.confidence <= 1.0
def test_gradual_smile_crosses_threshold():
"""Smile just above threshold → happy; just below → neutral."""
fl = _neutral_face()
# Just below threshold: smile < _T_HAPPY_SMILE
offset = _T_HAPPY_SMILE * _FH * 0.5 # half the threshold distance
mid_y = 0.56
fl.mouth_left = (0.46, mid_y - offset)
fl.mouth_right = (0.54, mid_y - offset)
res_below = detect_emotion(fl)
# Just above threshold: smile > _T_HAPPY_SMILE
offset_above = _T_HAPPY_SMILE * _FH * 1.5
fl.mouth_left = (0.46, mid_y - offset_above)
fl.mouth_right = (0.54, mid_y - offset_above)
res_above = detect_emotion(fl)
assert res_below.emotion == 'neutral'
assert res_above.emotion == 'happy'
def test_gradual_frown_crosses_threshold():
"""Frown just above threshold → sad; just below → neutral."""
fl = _neutral_face()
mid_y = 0.56
offset_below = _T_SAD_FROWN * _FH * 0.5
fl.mouth_left = (0.46, mid_y + offset_below)
fl.mouth_right = (0.54, mid_y + offset_below)
res_below = detect_emotion(fl)
offset_above = _T_SAD_FROWN * _FH * 1.5
fl.mouth_left = (0.46, mid_y + offset_above)
fl.mouth_right = (0.54, mid_y + offset_above)
res_above = detect_emotion(fl)
assert res_below.emotion == 'neutral'
assert res_above.emotion == 'sad'
def test_angry_requires_no_smile():
"""If brows are furrowed but face is smiling, should not be angry."""
fl = _angry_face()
fl.mouth_left = (0.46, 0.54) # add a clear smile
fl.mouth_right = (0.54, 0.54)
res = detect_emotion(fl)
assert res.emotion != 'angry'
def test_symmetry_independent():
"""One-sided brow raise still contributes to brow_raise metric."""
fl = _neutral_face()
fl.l_brow_inner = (0.46, 0.26) # left brow raised
# right brow unchanged at 0.33
f = compute_features(fl)
# brow_raise = avg of left (0.24) and right (0.10) = 0.17 > 0.12
assert f.brow_raise > _T_SURPRISED_BROW

View File

@ -32,6 +32,9 @@ rosidl_generate_interfaces(${PROJECT_NAME}
"msg/ObstacleSizeArray.msg" "msg/ObstacleSizeArray.msg"
# Issue #353 audio scene classifier # Issue #353 audio scene classifier
"msg/AudioScene.msg" "msg/AudioScene.msg"
# Issue #359 face emotion classifier
"msg/FaceEmotion.msg"
"msg/FaceEmotionArray.msg"
DEPENDENCIES std_msgs geometry_msgs vision_msgs builtin_interfaces DEPENDENCIES std_msgs geometry_msgs vision_msgs builtin_interfaces
) )

View File

@ -0,0 +1,8 @@
std_msgs/Header header
uint32 face_id # track ID or 0-based detection index
string emotion # 'neutral' | 'happy' | 'surprised' | 'angry' | 'sad'
float32 confidence # 0.01.0
float32 mouth_open # mouth height / face height (0=closed)
float32 smile # lip-corner elevation (positive=smile, negative=frown)
float32 brow_raise # inner-brow to eye-top gap / face height (positive=raised)
float32 eye_open # eye height / face height

View File

@ -0,0 +1,3 @@
std_msgs/Header header
FaceEmotion[] faces
uint32 face_count