Merge pull request 'feat(social): hand gesture pointing direction node (Issue #221)' (#226) from sl-perception/issue-221-pointing into main

2026-03-02 11:57:27 -05:00 · 2026-03-02 11:57:27 -05:00 · 213d7fe13d
commit 213d7fe13d
parent b29d65883f bd4a9f094d
6 changed files with 662 additions and 0 deletions
--- a/jetson/ros2_ws/src/saltybot_social/saltybot_social/_pointing_ray.py
+++ b/jetson/ros2_ws/src/saltybot_social/saltybot_social/_pointing_ray.py
@ -0,0 +1,180 @@
 """
 _pointing_ray.py — 3-D pointing ray computation from hand landmarks + depth (no ROS2 deps).
 Given MediaPipe Hands 21-landmark output, a depth image (float32, metres),
 and camera intrinsic parameters, computes:
  - 3-D position of the INDEX_MCP (knuckle) — ray origin
  - 3-D direction (unit vector) INDEX_MCP → INDEX_TIP
  - Estimated pointing target (origin + direction * range)
 Coordinate frame: camera_color_optical_frame (standard ROS optical frame,
 +X right, +Y down, +Z into the scene).
 """
 from __future__ import annotations
 import math
 from typing import List, Optional, Tuple
 # MediaPipe Hands landmark indices (matches gesture_classifier.py)
 _WRIST     = 0
 _INDEX_MCP = 5
 _INDEX_TIP = 8
 def unproject(
    u: float, v: float, depth_m: float,
    fx: float, fy: float, cx: float, cy: float,
 ) -> Tuple[float, float, float]:
    """
    Back-project pixel (u, v) at depth depth_m to a 3-D point in the camera frame.
    Parameters
    ----------
    u, v    : pixel coordinates (float, origin top-left)
    depth_m : depth in metres
    fx, fy  : focal lengths (pixels)
    cx, cy  : principal point (pixels)
    Returns
    -------
    (X, Y, Z) in metres, camera_color_optical_frame
    """
    X = (u - cx) * depth_m / fx
    Y = (v - cy) * depth_m / fy
    Z = depth_m
    return X, Y, Z
 def sample_depth(
    depth_img,
    u: float,
    v: float,
    window: int = 5,
    d_min: float = 0.1,
    d_max: float = 8.0,
 ) -> float:
    """
    Return the median of valid depth values in a (window × window) patch centred at (u, v).
    Parameters
    ----------
    depth_img : np.ndarray (H, W) float32, depth in metres
    u, v      : centre pixel (float)
    window    : patch side length (pixels)
    d_min     : minimum valid depth (m)
    d_max     : maximum valid depth (m)
    Returns
    -------
    float : median depth, or NaN if no valid samples in the patch
    """
    import numpy as np
    h, w = depth_img.shape[:2]
    half = window // 2
    u0 = max(0,     int(round(u)) - half)
    u1 = min(w,     int(round(u)) + half + 1)
    v0 = max(0,     int(round(v)) - half)
    v1 = min(h,     int(round(v)) + half + 1)
    patch = depth_img[v0:v1, u0:u1]
    valid = patch[(patch > d_min) & (patch < d_max)]
    if valid.size == 0:
        return float('nan')
    return float(np.median(valid))
 def compute_pointing_ray(
    landmarks: List[Tuple[float, float, float]],
    depth_img,
    fx: float,
    fy: float,
    cx: float,
    cy: float,
    img_w: int,
    img_h: int,
    ref_distance: float = 2.0,
    depth_window: int = 5,
 ) -> Optional[dict]:
    """
    Compute a 3-D pointing ray from MediaPipe Hands landmarks and a depth image.
    Parameters
    ----------
    landmarks     : list of 21 (nx, ny, nz) normalised MediaPipe landmarks
                    (nx ∈ [0,1] left→right, ny ∈ [0,1] top→bottom)
    depth_img     : np.ndarray (H, W) float32, depth in metres
    fx, fy        : focal lengths (pixels)
    cx, cy        : principal point (pixels)
    img_w, img_h  : image dimensions (pixels)
    ref_distance  : fallback range when depth is unavailable (m)
    depth_window  : window size for depth median sampling (pixels)
    Returns
    -------
    dict with keys:
        origin_3d    : (X, Y, Z)  INDEX_MCP in camera frame (m)
        direction_3d : (dx, dy, dz)  unit vector MCP → TIP
        target_3d    : (X, Y, Z)  estimated pointing target (m)
        range_m      : distance from origin to target (m)
        mcp_uv       : (u, v) pixel coords of INDEX_MCP
        tip_uv       : (u, v) pixel coords of INDEX_TIP
    or None if the ray cannot be computed (both depths invalid).
    """
    mcp_lm = landmarks[_INDEX_MCP]
    tip_lm = landmarks[_INDEX_TIP]
    mcp_u, mcp_v = mcp_lm[0] * img_w, mcp_lm[1] * img_h
    tip_u, tip_v = tip_lm[0] * img_w, tip_lm[1] * img_h
    d_mcp = sample_depth(depth_img, mcp_u, mcp_v, depth_window)
    d_tip = sample_depth(depth_img, tip_u, tip_v, depth_window)
    # Need at least one valid depth
    if math.isnan(d_mcp) and math.isnan(d_tip):
        return None
    # Fill missing depth with the other measurement
    if math.isnan(d_mcp):
        d_mcp = d_tip
    if math.isnan(d_tip):
        d_tip = d_mcp
    mcp_3d = unproject(mcp_u, mcp_v, d_mcp, fx, fy, cx, cy)
    tip_3d = unproject(tip_u, tip_v, d_tip, fx, fy, cx, cy)
    # Ray direction: MCP → TIP
    dx = tip_3d[0] - mcp_3d[0]
    dy = tip_3d[1] - mcp_3d[1]
    dz = tip_3d[2] - mcp_3d[2]
    length = math.sqrt(dx * dx + dy * dy + dz * dz)
    if length < 1e-4:
        # Degenerate: MCP and TIP project to the same 3-D point
        # Fall back to the image-plane direction extended at ref_distance
        px = (tip_u - mcp_u) / (img_w or 1)
        py = (tip_v - mcp_v) / (img_h or 1)
        dx, dy, dz = px, py, 1.0
        length = math.sqrt(dx * dx + dy * dy + dz * dz) or 1.0
    dx /= length
    dy /= length
    dz /= length
    range_m = ref_distance
    target_3d = (
        mcp_3d[0] + dx * range_m,
        mcp_3d[1] + dy * range_m,
        mcp_3d[2] + dz * range_m,
    )
    return {
        'origin_3d':    mcp_3d,
        'direction_3d': (dx, dy, dz),
        'target_3d':    target_3d,
        'range_m':      range_m,
        'mcp_uv':       (mcp_u, mcp_v),
        'tip_uv':       (tip_u, tip_v),
    }
--- a/jetson/ros2_ws/src/saltybot_social/saltybot_social/pointing_node.py
+++ b/jetson/ros2_ws/src/saltybot_social/saltybot_social/pointing_node.py
@ -0,0 +1,264 @@
 """
 pointing_node.py — Hand gesture pointing direction (Issue #221).
 Converts a 'point' gesture into a precise 3-D ray using MediaPipe Hands
 re-run on the D435i colour stream and the registered depth image.
 Subscribes:
  /social/gestures                         saltybot_social_msgs/GestureArray
  /camera/color/image_raw                  sensor_msgs/Image   (D435i colour)
  /camera/depth/image_rect_raw             sensor_msgs/Image   (D435i depth, float32 m)
  /camera/color/camera_info                sensor_msgs/CameraInfo
 Publishes:
  /saltybot/pointing_target                saltybot_social_msgs/PointingTarget  (5 Hz)
 Algorithm
 ---------
 1. Cache the most recent 'point' Gesture from /social/gestures.
 2. Synchronise colour + depth frames (ApproximateTimeSynchronizer, 50 ms slop).
 3. When a 'point' gesture was received within `gesture_timeout_s`, run
   MediaPipe Hands on the colour frame.
 4. Locate the hand closest to the gesture anchor (hand_x, hand_y).
 5. Call compute_pointing_ray(): unproject INDEX_MCP and INDEX_TIP using depth,
   form the unit direction vector, extend to `ref_distance_m`.
 6. A 5 Hz timer publishes the latest PointingTarget (is_active=false if stale).
 Parameters
 ----------
 ref_distance_m    float  2.0   Default range when depth is valid (m)
 gesture_timeout_s float  1.0   Max age of a cached 'point' gesture (s)
 min_confidence    float  0.50  MediaPipe Hands min detection confidence
 model_complexity  int    0     MediaPipe model complexity (0=lite)
 publish_hz        float  5.0   Output publication rate (Hz)
 """
 from __future__ import annotations
 import math
 import time
 from typing import Optional
 import rclpy
 from rclpy.node import Node
 from rclpy.qos import QoSProfile, ReliabilityPolicy, HistoryPolicy, DurabilityPolicy
 import message_filters
 import numpy as np
 from cv_bridge import CvBridge
 from sensor_msgs.msg import Image, CameraInfo
 from saltybot_social_msgs.msg import GestureArray, PointingTarget
 from ._pointing_ray import compute_pointing_ray
 # Optional MediaPipe — absent in CI
 try:
    import cv2
    import mediapipe as mp
    _HAS_MP = True
 except ImportError:
    _HAS_MP = False
 _SENSOR_QOS = QoSProfile(
    reliability=ReliabilityPolicy.BEST_EFFORT,
    history=HistoryPolicy.KEEP_LAST,
    depth=4,
 )
 _LATCHED_QOS = QoSProfile(
    reliability=ReliabilityPolicy.RELIABLE,
    history=HistoryPolicy.KEEP_LAST,
    depth=1,
    durability=DurabilityPolicy.TRANSIENT_LOCAL,
 )
 class PointingNode(Node):
    def __init__(self):
        super().__init__('pointing_node')
        self.declare_parameter('ref_distance_m',    2.0)
        self.declare_parameter('gesture_timeout_s', 1.0)
        self.declare_parameter('min_confidence',    0.50)
        self.declare_parameter('model_complexity',  0)
        self.declare_parameter('publish_hz',        5.0)
        self._ref_dist      = self.get_parameter('ref_distance_m').value
        self._gesture_tmo   = self.get_parameter('gesture_timeout_s').value
        self._min_conf      = self.get_parameter('min_confidence').value
        self._complexity    = self.get_parameter('model_complexity').value
        publish_hz          = self.get_parameter('publish_hz').value
        self._bridge = CvBridge()
        self._camera_info: Optional[CameraInfo] = None
        # Cached state
        self._last_gesture = None          # most recent Gesture with type='point'
        self._last_gesture_t = 0.0         # time.monotonic() stamp
        self._last_result: Optional[dict] = None
        # MediaPipe Hands (lazy init)
        self._hands = None
        if _HAS_MP:
            self._hands = mp.solutions.hands.Hands(
                static_image_mode=False,
                max_num_hands=2,
                min_detection_confidence=self._min_conf,
                min_tracking_confidence=0.5,
                model_complexity=self._complexity,
            )
        # Subscribers
        self.create_subscription(
            GestureArray, '/social/gestures', self._on_gestures,
            QoSProfile(depth=10),
        )
        self.create_subscription(
            CameraInfo, '/camera/color/camera_info', self._on_camera_info,
            _LATCHED_QOS,
        )
        color_sub = message_filters.Subscriber(
            self, Image, '/camera/color/image_raw', qos_profile=_SENSOR_QOS)
        depth_sub = message_filters.Subscriber(
            self, Image, '/camera/depth/image_rect_raw', qos_profile=_SENSOR_QOS)
        self._sync = message_filters.ApproximateTimeSynchronizer(
            [color_sub, depth_sub], queue_size=4, slop=0.05)
        self._sync.registerCallback(self._on_frame)
        # Publisher + timer
        self._pub = self.create_publisher(PointingTarget, '/saltybot/pointing_target', 10)
        self.create_timer(1.0 / publish_hz, self._tick)
        self.get_logger().info(
            f'pointing_node ready — mediapipe={"on" if _HAS_MP else "off"} '
            f'ref={self._ref_dist}m timeout={self._gesture_tmo}s'
        )
    # ── Callbacks ─────────────────────────────────────────────────────────────
    def _on_gestures(self, msg: GestureArray) -> None:
        for g in msg.gestures:
            if g.gesture_type == 'point':
                self._last_gesture   = g
                self._last_gesture_t = time.monotonic()
                break   # first point gesture wins
    def _on_camera_info(self, msg: CameraInfo) -> None:
        self._camera_info = msg
    def _on_frame(self, color_msg: Image, depth_msg: Image) -> None:
        if self._camera_info is None:
            return
        age = time.monotonic() - self._last_gesture_t
        if age > self._gesture_tmo:
            return   # no recent point gesture — skip expensive MediaPipe call
        try:
            bgr   = self._bridge.imgmsg_to_cv2(color_msg, 'bgr8')
            depth = self._bridge.imgmsg_to_cv2(depth_msg, 'passthrough').astype(np.float32)
        except Exception as exc:
            self.get_logger().error(f'cv_bridge: {exc}', throttle_duration_sec=5.0)
            return
        if depth.max() > 100.0:        # uint16 mm → float32 m
            depth /= 1000.0
        lms = self._run_mediapipe(bgr)
        if lms is None:
            return
        # Select hand closest to gesture anchor
        g = self._last_gesture
        best_lm  = self._pick_hand(lms, g.hand_x, g.hand_y)
        if best_lm is None:
            return
        h, w = bgr.shape[:2]
        K = np.array(self._camera_info.k).reshape(3, 3)
        fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
        result = compute_pointing_ray(
            landmarks     = best_lm,
            depth_img     = depth,
            fx=fx, fy=fy, cx=cx, cy=cy,
            img_w=w, img_h=h,
            ref_distance  = self._ref_dist,
        )
        if result is not None:
            self._last_result = result
    # ── MediaPipe helpers ──────────────────────────────────────────────────────
    def _run_mediapipe(self, bgr: np.ndarray):
        """Run MediaPipe Hands on a BGR frame; returns list of landmark lists or None."""
        if self._hands is None:
            return None
        import cv2
        rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
        rgb.flags.writeable = False
        results = self._hands.process(rgb)
        if not results.multi_hand_landmarks:
            return None
        return [
            [(lm.x, lm.y, lm.z) for lm in hand.landmark]
            for hand in results.multi_hand_landmarks
        ]
    def _pick_hand(self, landmark_sets, anchor_x: float, anchor_y: float):
        """Return the landmark set whose INDEX_TIP is closest to (anchor_x, anchor_y)."""
        best, best_dist = None, float('inf')
        for lms in landmark_sets:
            tip = lms[8]   # INDEX_TIP = 8
            dist = math.hypot(tip[0] - anchor_x, tip[1] - anchor_y)
            if dist < best_dist:
                best, best_dist = lms, dist
        return best
    # ── 5 Hz publish tick ─────────────────────────────────────────────────────
    def _tick(self) -> None:
        msg = PointingTarget()
        msg.header.stamp    = self.get_clock().now().to_msg()
        msg.header.frame_id = 'camera_color_optical_frame'
        age = time.monotonic() - self._last_gesture_t
        msg.is_active = age <= self._gesture_tmo
        if self._last_gesture is not None:
            msg.person_id        = self._last_gesture.person_id
            msg.confidence       = self._last_gesture.confidence
            msg.coarse_direction = self._last_gesture.direction
        if msg.is_active and self._last_result is not None:
            ox, oy, oz = self._last_result['origin_3d']
            dx, dy, dz = self._last_result['direction_3d']
            tx, ty, tz = self._last_result['target_3d']
            msg.origin.x    = ox;   msg.origin.y    = oy;   msg.origin.z    = oz
            msg.direction.x = dx;   msg.direction.y = dy;   msg.direction.z = dz
            msg.target.x    = tx;   msg.target.y    = ty;   msg.target.z    = tz
            msg.range_m     = self._last_result['range_m']
        self._pub.publish(msg)
    def destroy_node(self) -> None:
        if self._hands:
            self._hands.close()
        super().destroy_node()
 def main(args=None):
    rclpy.init(args=args)
    node = PointingNode()
    try:
        rclpy.spin(node)
    finally:
        node.destroy_node()
        rclpy.shutdown()
 if __name__ == '__main__':
    main()
--- a/jetson/ros2_ws/src/saltybot_social/setup.py
+++ b/jetson/ros2_ws/src/saltybot_social/setup.py
@ -37,6 +37,8 @@ setup(
            'voice_command_node = saltybot_social.voice_command_node:main',
            # Multi-camera gesture recognition (Issue #140)
            'gesture_node = saltybot_social.gesture_node:main',
            # Hand gesture pointing direction (Issue #221)
            'pointing_node = saltybot_social.pointing_node:main',
            # Facial expression recognition (Issue #161)
            'emotion_node = saltybot_social.emotion_node:main',
            # Robot mesh communication (Issue #171)
--- a/jetson/ros2_ws/src/saltybot_social/test/test_pointing.py
+++ b/jetson/ros2_ws/src/saltybot_social/test/test_pointing.py
@ -0,0 +1,193 @@
 """
 test_pointing.py — Unit tests for pointing ray helpers (no ROS2 required).
 Covers:
  - unproject: pixel + depth → 3-D point
  - sample_depth: median depth sampling with outlier rejection
  - compute_pointing_ray: full 3-D ray computation
 """
 import sys
 import os
 import math
 import numpy as np
 import pytest
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
 from saltybot_social._pointing_ray import unproject, sample_depth, compute_pointing_ray
 # ── unproject ─────────────────────────────────────────────────────────────────
 class TestUnproject:
    def test_principal_point_gives_zero_xy(self):
        X, Y, Z = unproject(320.0, 240.0, 1.0, fx=500.0, fy=500.0, cx=320.0, cy=240.0)
        assert X == pytest.approx(0.0)
        assert Y == pytest.approx(0.0)
        assert Z == pytest.approx(1.0)
    def test_z_equals_depth(self):
        _, _, Z = unproject(0.0, 0.0, 3.5, fx=600.0, fy=600.0, cx=320.0, cy=240.0)
        assert Z == pytest.approx(3.5)
    def test_right_of_principal_gives_positive_x(self):
        X, _, _ = unproject(420.0, 240.0, 1.0, fx=500.0, fy=500.0, cx=320.0, cy=240.0)
        assert X > 0.0
    def test_below_principal_gives_positive_y(self):
        _, Y, _ = unproject(320.0, 340.0, 1.0, fx=500.0, fy=500.0, cx=320.0, cy=240.0)
        assert Y > 0.0
    def test_x_scales_linearly_with_depth(self):
        X1, _, _ = unproject(400.0, 240.0, 1.0, fx=500.0, fy=500.0, cx=320.0, cy=240.0)
        X2, _, _ = unproject(400.0, 240.0, 2.0, fx=500.0, fy=500.0, cx=320.0, cy=240.0)
        assert X2 == pytest.approx(X1 * 2.0)
    def test_known_value(self):
        # (u=370, v=290, d=2.0, fx=fy=500, cx=320, cy=240)
        # X = (370-320)*2/500 = 0.2, Y = (290-240)*2/500 = 0.2, Z = 2.0
        X, Y, Z = unproject(370.0, 290.0, 2.0, fx=500.0, fy=500.0, cx=320.0, cy=240.0)
        assert X == pytest.approx(0.2)
        assert Y == pytest.approx(0.2)
        assert Z == pytest.approx(2.0)
 # ── sample_depth ──────────────────────────────────────────────────────────────
 class TestSampleDepth:
    def _uniform_depth(self, h=480, w=640, val=2.0):
        return np.full((h, w), val, dtype=np.float32)
    def test_uniform_image_returns_value(self):
        img = self._uniform_depth(val=1.5)
        d = sample_depth(img, 320.0, 240.0)
        assert d == pytest.approx(1.5)
    def test_out_of_range_returns_nan(self):
        img = np.zeros((480, 640), dtype=np.float32)   # all zeros (< d_min=0.1)
        d = sample_depth(img, 320.0, 240.0)
        assert math.isnan(d)
    def test_ignores_outliers_above_d_max(self):
        img = self._uniform_depth(val=1.0)
        # Put a large spike at the centre
        img[240, 320] = 100.0
        d = sample_depth(img, 320.0, 240.0, window=3, d_max=8.0)
        assert d == pytest.approx(1.0)
    def test_edge_pixel_does_not_crash(self):
        img = self._uniform_depth(val=2.0)
        d = sample_depth(img, 0.0, 0.0)
        assert d == pytest.approx(2.0)
    def test_median_of_mixed_values(self):
        img = np.zeros((10, 10), dtype=np.float32)
        img[5, 5] = 1.0
        img[5, 6] = 2.0
        img[6, 5] = 3.0
        d = sample_depth(img, 5.0, 5.0, window=3, d_min=0.0)
        # Valid: 1.0, 2.0, 3.0 — median = 2.0
        assert d == pytest.approx(2.0)
 # ── compute_pointing_ray ──────────────────────────────────────────────────────
 def _make_landmarks(mcp_nx=0.5, mcp_ny=0.5, tip_nx=0.6, tip_ny=0.4):
    """21 dummy landmarks; only MCP (idx 5) and TIP (idx 8) matter."""
    lms = [(0.5, 0.5, 0.0)] * 21
    lms[5] = (mcp_nx, mcp_ny, 0.0)   # INDEX_MCP
    lms[8] = (tip_nx, tip_ny, 0.0)   # INDEX_TIP
    return lms
 def _depth_img(val=2.0, h=480, w=640):
    return np.full((h, w), val, dtype=np.float32)
 _FX = 600.0; _FY = 600.0; _CX = 320.0; _CY = 240.0
 _W  = 640;   _H  = 480
 class TestComputePointingRay:
    def test_returns_dict_on_valid_input(self):
        lms = _make_landmarks()
        result = compute_pointing_ray(
            lms, _depth_img(), _FX, _FY, _CX, _CY, _W, _H)
        assert result is not None
        assert 'origin_3d' in result
        assert 'direction_3d' in result
        assert 'target_3d' in result
        assert 'range_m' in result
    def test_returns_none_when_depth_all_zero(self):
        lms = _make_landmarks()
        result = compute_pointing_ray(
            lms, np.zeros((_H, _W), dtype=np.float32),
            _FX, _FY, _CX, _CY, _W, _H)
        assert result is None
    def test_direction_is_unit_vector(self):
        lms = _make_landmarks(mcp_nx=0.4, mcp_ny=0.5, tip_nx=0.6, tip_ny=0.4)
        result = compute_pointing_ray(
            lms, _depth_img(), _FX, _FY, _CX, _CY, _W, _H)
        assert result is not None
        dx, dy, dz = result['direction_3d']
        norm = math.sqrt(dx*dx + dy*dy + dz*dz)
        assert norm == pytest.approx(1.0, abs=1e-5)
    def test_target_is_origin_plus_range_times_direction(self):
        lms = _make_landmarks()
        result = compute_pointing_ray(
            lms, _depth_img(val=2.0), _FX, _FY, _CX, _CY, _W, _H,
            ref_distance=3.0)
        ox, oy, oz = result['origin_3d']
        dx, dy, dz = result['direction_3d']
        tx, ty, tz = result['target_3d']
        r = result['range_m']
        assert tx == pytest.approx(ox + dx * r, abs=1e-5)
        assert ty == pytest.approx(oy + dy * r, abs=1e-5)
        assert tz == pytest.approx(oz + dz * r, abs=1e-5)
    def test_mcp_uv_matches_landmark_projection(self):
        lms = _make_landmarks(mcp_nx=0.5, mcp_ny=0.5)
        result = compute_pointing_ray(
            lms, _depth_img(), _FX, _FY, _CX, _CY, _W, _H)
        mu, mv = result['mcp_uv']
        assert mu == pytest.approx(0.5 * _W)
        assert mv == pytest.approx(0.5 * _H)
    def test_fallback_when_only_one_depth_valid(self):
        """Only the MCP pixel has valid depth; TIP is in an invalid region."""
        depth = np.zeros((_H, _W), dtype=np.float32)
        # Set valid depth only at the MCP location
        mcp_u, mcp_v = int(0.5 * _W), int(0.5 * _H)
        for du in range(-3, 4):
            for dv in range(-3, 4):
                u_ = max(0, min(_W - 1, mcp_u + du))
                v_ = max(0, min(_H - 1, mcp_v + dv))
                depth[v_, u_] = 2.0
        lms = _make_landmarks(mcp_nx=0.5, mcp_ny=0.5, tip_nx=0.9, tip_ny=0.1)
        result = compute_pointing_ray(
            lms, depth, _FX, _FY, _CX, _CY, _W, _H)
        # Should still return a result (using MCP depth for TIP fallback)
        assert result is not None
        dx, dy, dz = result['direction_3d']
        norm = math.sqrt(dx*dx + dy*dy + dz*dz)
        assert norm == pytest.approx(1.0, abs=1e-4)
    def test_ref_distance_used_as_range(self):
        lms = _make_landmarks()
        result = compute_pointing_ray(
            lms, _depth_img(), _FX, _FY, _CX, _CY, _W, _H,
            ref_distance=5.0)
        assert result['range_m'] == pytest.approx(5.0)
 if __name__ == '__main__':
    pytest.main([__file__, '-v'])
--- a/jetson/ros2_ws/src/saltybot_social_msgs/CMakeLists.txt
+++ b/jetson/ros2_ws/src/saltybot_social_msgs/CMakeLists.txt
@ -44,6 +44,8 @@ rosidl_generate_interfaces(${PROJECT_NAME}
  # Issue #171 — robot mesh comms
  "msg/MeshPeer.msg"
  "msg/MeshHandoff.msg"
  # Issue #221 — hand gesture pointing direction
  "msg/PointingTarget.msg"
  DEPENDENCIES std_msgs geometry_msgs builtin_interfaces
 )
--- a/jetson/ros2_ws/src/saltybot_social_msgs/msg/PointingTarget.msg
+++ b/jetson/ros2_ws/src/saltybot_social_msgs/msg/PointingTarget.msg
@ -0,0 +1,21 @@
 # PointingTarget.msg — 3-D pointing ray derived from hand landmarks + depth (Issue #221).
 #
 # Published on /saltybot/pointing_target at 5 Hz.
 # Frame: camera_color_optical_frame (D435i colour stream).
 #
 # is_active is false when no 'point' gesture has been received recently.
 std_msgs/Header header
 int32   person_id          # from Gesture.msg (-1 = unidentified)
 float32 confidence         # detection confidence from the source Gesture
 bool    is_active          # true only when a recent 'point' gesture is present
 string  coarse_direction   # Gesture.msg direction field: "left"/"right"/"up"/"forward"/"down"
 # 3-D ray in camera_color_optical_frame (metres)
 geometry_msgs/Point   origin      # INDEX_MCP (knuckle) position — ray origin
 geometry_msgs/Vector3 direction   # unit vector from MCP toward INDEX_TIP
 # Estimated pointing target
 geometry_msgs/Point target        # origin + direction * range_m
 float32             range_m       # distance along the ray (m)