feat(social): hand gesture pointing direction node (Issue #221)
Some checks failed
social-bot integration tests / Lint (flake8 + pep257) (pull_request) Failing after 2s
social-bot integration tests / Core integration tests (mock sensors, no GPU) (pull_request) Has been skipped
social-bot integration tests / Latency profiling (GPU, Orin) (pull_request) Has been cancelled

saltybot_social_msgs:
- Add PointingTarget.msg: origin (INDEX_MCP), direction (unit vec), target,
  range_m, person_id, confidence, coarse_direction, is_active
- Register in CMakeLists.txt

saltybot_social:
- _pointing_ray.py (pure Python, no rclpy): unproject(), sample_depth()
  (median with outlier rejection), compute_pointing_ray() — reprojects
  INDEX_MCP and INDEX_TIP into 3-D using D435i depth; falls back to image-
  plane direction when both depths are equal; gracefully handles one-sided
  missing depth
- pointing_node.py: subscribes /social/gestures + synced D435i colour+depth;
  re-runs MediaPipe Hands when a 'point' gesture is cached (within
  gesture_timeout_s); picks closest hand to gesture anchor; publishes
  PointingTarget on /saltybot/pointing_target at 5 Hz
- setup.py: adds pointing_node entry point
- 18/18 unit tests pass

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
sl-perception 2026-03-02 11:56:54 -05:00
parent 3a34ec84e0
commit bd4a9f094d
6 changed files with 662 additions and 0 deletions

View File

@ -0,0 +1,180 @@
"""
_pointing_ray.py 3-D pointing ray computation from hand landmarks + depth (no ROS2 deps).
Given MediaPipe Hands 21-landmark output, a depth image (float32, metres),
and camera intrinsic parameters, computes:
- 3-D position of the INDEX_MCP (knuckle) ray origin
- 3-D direction (unit vector) INDEX_MCP INDEX_TIP
- Estimated pointing target (origin + direction * range)
Coordinate frame: camera_color_optical_frame (standard ROS optical frame,
+X right, +Y down, +Z into the scene).
"""
from __future__ import annotations
import math
from typing import List, Optional, Tuple
# MediaPipe Hands landmark indices (matches gesture_classifier.py)
_WRIST = 0
_INDEX_MCP = 5
_INDEX_TIP = 8
def unproject(
u: float, v: float, depth_m: float,
fx: float, fy: float, cx: float, cy: float,
) -> Tuple[float, float, float]:
"""
Back-project pixel (u, v) at depth depth_m to a 3-D point in the camera frame.
Parameters
----------
u, v : pixel coordinates (float, origin top-left)
depth_m : depth in metres
fx, fy : focal lengths (pixels)
cx, cy : principal point (pixels)
Returns
-------
(X, Y, Z) in metres, camera_color_optical_frame
"""
X = (u - cx) * depth_m / fx
Y = (v - cy) * depth_m / fy
Z = depth_m
return X, Y, Z
def sample_depth(
depth_img,
u: float,
v: float,
window: int = 5,
d_min: float = 0.1,
d_max: float = 8.0,
) -> float:
"""
Return the median of valid depth values in a (window × window) patch centred at (u, v).
Parameters
----------
depth_img : np.ndarray (H, W) float32, depth in metres
u, v : centre pixel (float)
window : patch side length (pixels)
d_min : minimum valid depth (m)
d_max : maximum valid depth (m)
Returns
-------
float : median depth, or NaN if no valid samples in the patch
"""
import numpy as np
h, w = depth_img.shape[:2]
half = window // 2
u0 = max(0, int(round(u)) - half)
u1 = min(w, int(round(u)) + half + 1)
v0 = max(0, int(round(v)) - half)
v1 = min(h, int(round(v)) + half + 1)
patch = depth_img[v0:v1, u0:u1]
valid = patch[(patch > d_min) & (patch < d_max)]
if valid.size == 0:
return float('nan')
return float(np.median(valid))
def compute_pointing_ray(
landmarks: List[Tuple[float, float, float]],
depth_img,
fx: float,
fy: float,
cx: float,
cy: float,
img_w: int,
img_h: int,
ref_distance: float = 2.0,
depth_window: int = 5,
) -> Optional[dict]:
"""
Compute a 3-D pointing ray from MediaPipe Hands landmarks and a depth image.
Parameters
----------
landmarks : list of 21 (nx, ny, nz) normalised MediaPipe landmarks
(nx [0,1] leftright, ny [0,1] topbottom)
depth_img : np.ndarray (H, W) float32, depth in metres
fx, fy : focal lengths (pixels)
cx, cy : principal point (pixels)
img_w, img_h : image dimensions (pixels)
ref_distance : fallback range when depth is unavailable (m)
depth_window : window size for depth median sampling (pixels)
Returns
-------
dict with keys:
origin_3d : (X, Y, Z) INDEX_MCP in camera frame (m)
direction_3d : (dx, dy, dz) unit vector MCP TIP
target_3d : (X, Y, Z) estimated pointing target (m)
range_m : distance from origin to target (m)
mcp_uv : (u, v) pixel coords of INDEX_MCP
tip_uv : (u, v) pixel coords of INDEX_TIP
or None if the ray cannot be computed (both depths invalid).
"""
mcp_lm = landmarks[_INDEX_MCP]
tip_lm = landmarks[_INDEX_TIP]
mcp_u, mcp_v = mcp_lm[0] * img_w, mcp_lm[1] * img_h
tip_u, tip_v = tip_lm[0] * img_w, tip_lm[1] * img_h
d_mcp = sample_depth(depth_img, mcp_u, mcp_v, depth_window)
d_tip = sample_depth(depth_img, tip_u, tip_v, depth_window)
# Need at least one valid depth
if math.isnan(d_mcp) and math.isnan(d_tip):
return None
# Fill missing depth with the other measurement
if math.isnan(d_mcp):
d_mcp = d_tip
if math.isnan(d_tip):
d_tip = d_mcp
mcp_3d = unproject(mcp_u, mcp_v, d_mcp, fx, fy, cx, cy)
tip_3d = unproject(tip_u, tip_v, d_tip, fx, fy, cx, cy)
# Ray direction: MCP → TIP
dx = tip_3d[0] - mcp_3d[0]
dy = tip_3d[1] - mcp_3d[1]
dz = tip_3d[2] - mcp_3d[2]
length = math.sqrt(dx * dx + dy * dy + dz * dz)
if length < 1e-4:
# Degenerate: MCP and TIP project to the same 3-D point
# Fall back to the image-plane direction extended at ref_distance
px = (tip_u - mcp_u) / (img_w or 1)
py = (tip_v - mcp_v) / (img_h or 1)
dx, dy, dz = px, py, 1.0
length = math.sqrt(dx * dx + dy * dy + dz * dz) or 1.0
dx /= length
dy /= length
dz /= length
range_m = ref_distance
target_3d = (
mcp_3d[0] + dx * range_m,
mcp_3d[1] + dy * range_m,
mcp_3d[2] + dz * range_m,
)
return {
'origin_3d': mcp_3d,
'direction_3d': (dx, dy, dz),
'target_3d': target_3d,
'range_m': range_m,
'mcp_uv': (mcp_u, mcp_v),
'tip_uv': (tip_u, tip_v),
}

View File

@ -0,0 +1,264 @@
"""
pointing_node.py Hand gesture pointing direction (Issue #221).
Converts a 'point' gesture into a precise 3-D ray using MediaPipe Hands
re-run on the D435i colour stream and the registered depth image.
Subscribes:
/social/gestures saltybot_social_msgs/GestureArray
/camera/color/image_raw sensor_msgs/Image (D435i colour)
/camera/depth/image_rect_raw sensor_msgs/Image (D435i depth, float32 m)
/camera/color/camera_info sensor_msgs/CameraInfo
Publishes:
/saltybot/pointing_target saltybot_social_msgs/PointingTarget (5 Hz)
Algorithm
---------
1. Cache the most recent 'point' Gesture from /social/gestures.
2. Synchronise colour + depth frames (ApproximateTimeSynchronizer, 50 ms slop).
3. When a 'point' gesture was received within `gesture_timeout_s`, run
MediaPipe Hands on the colour frame.
4. Locate the hand closest to the gesture anchor (hand_x, hand_y).
5. Call compute_pointing_ray(): unproject INDEX_MCP and INDEX_TIP using depth,
form the unit direction vector, extend to `ref_distance_m`.
6. A 5 Hz timer publishes the latest PointingTarget (is_active=false if stale).
Parameters
----------
ref_distance_m float 2.0 Default range when depth is valid (m)
gesture_timeout_s float 1.0 Max age of a cached 'point' gesture (s)
min_confidence float 0.50 MediaPipe Hands min detection confidence
model_complexity int 0 MediaPipe model complexity (0=lite)
publish_hz float 5.0 Output publication rate (Hz)
"""
from __future__ import annotations
import math
import time
from typing import Optional
import rclpy
from rclpy.node import Node
from rclpy.qos import QoSProfile, ReliabilityPolicy, HistoryPolicy, DurabilityPolicy
import message_filters
import numpy as np
from cv_bridge import CvBridge
from sensor_msgs.msg import Image, CameraInfo
from saltybot_social_msgs.msg import GestureArray, PointingTarget
from ._pointing_ray import compute_pointing_ray
# Optional MediaPipe — absent in CI
try:
import cv2
import mediapipe as mp
_HAS_MP = True
except ImportError:
_HAS_MP = False
_SENSOR_QOS = QoSProfile(
reliability=ReliabilityPolicy.BEST_EFFORT,
history=HistoryPolicy.KEEP_LAST,
depth=4,
)
_LATCHED_QOS = QoSProfile(
reliability=ReliabilityPolicy.RELIABLE,
history=HistoryPolicy.KEEP_LAST,
depth=1,
durability=DurabilityPolicy.TRANSIENT_LOCAL,
)
class PointingNode(Node):
def __init__(self):
super().__init__('pointing_node')
self.declare_parameter('ref_distance_m', 2.0)
self.declare_parameter('gesture_timeout_s', 1.0)
self.declare_parameter('min_confidence', 0.50)
self.declare_parameter('model_complexity', 0)
self.declare_parameter('publish_hz', 5.0)
self._ref_dist = self.get_parameter('ref_distance_m').value
self._gesture_tmo = self.get_parameter('gesture_timeout_s').value
self._min_conf = self.get_parameter('min_confidence').value
self._complexity = self.get_parameter('model_complexity').value
publish_hz = self.get_parameter('publish_hz').value
self._bridge = CvBridge()
self._camera_info: Optional[CameraInfo] = None
# Cached state
self._last_gesture = None # most recent Gesture with type='point'
self._last_gesture_t = 0.0 # time.monotonic() stamp
self._last_result: Optional[dict] = None
# MediaPipe Hands (lazy init)
self._hands = None
if _HAS_MP:
self._hands = mp.solutions.hands.Hands(
static_image_mode=False,
max_num_hands=2,
min_detection_confidence=self._min_conf,
min_tracking_confidence=0.5,
model_complexity=self._complexity,
)
# Subscribers
self.create_subscription(
GestureArray, '/social/gestures', self._on_gestures,
QoSProfile(depth=10),
)
self.create_subscription(
CameraInfo, '/camera/color/camera_info', self._on_camera_info,
_LATCHED_QOS,
)
color_sub = message_filters.Subscriber(
self, Image, '/camera/color/image_raw', qos_profile=_SENSOR_QOS)
depth_sub = message_filters.Subscriber(
self, Image, '/camera/depth/image_rect_raw', qos_profile=_SENSOR_QOS)
self._sync = message_filters.ApproximateTimeSynchronizer(
[color_sub, depth_sub], queue_size=4, slop=0.05)
self._sync.registerCallback(self._on_frame)
# Publisher + timer
self._pub = self.create_publisher(PointingTarget, '/saltybot/pointing_target', 10)
self.create_timer(1.0 / publish_hz, self._tick)
self.get_logger().info(
f'pointing_node ready — mediapipe={"on" if _HAS_MP else "off"} '
f'ref={self._ref_dist}m timeout={self._gesture_tmo}s'
)
# ── Callbacks ─────────────────────────────────────────────────────────────
def _on_gestures(self, msg: GestureArray) -> None:
for g in msg.gestures:
if g.gesture_type == 'point':
self._last_gesture = g
self._last_gesture_t = time.monotonic()
break # first point gesture wins
def _on_camera_info(self, msg: CameraInfo) -> None:
self._camera_info = msg
def _on_frame(self, color_msg: Image, depth_msg: Image) -> None:
if self._camera_info is None:
return
age = time.monotonic() - self._last_gesture_t
if age > self._gesture_tmo:
return # no recent point gesture — skip expensive MediaPipe call
try:
bgr = self._bridge.imgmsg_to_cv2(color_msg, 'bgr8')
depth = self._bridge.imgmsg_to_cv2(depth_msg, 'passthrough').astype(np.float32)
except Exception as exc:
self.get_logger().error(f'cv_bridge: {exc}', throttle_duration_sec=5.0)
return
if depth.max() > 100.0: # uint16 mm → float32 m
depth /= 1000.0
lms = self._run_mediapipe(bgr)
if lms is None:
return
# Select hand closest to gesture anchor
g = self._last_gesture
best_lm = self._pick_hand(lms, g.hand_x, g.hand_y)
if best_lm is None:
return
h, w = bgr.shape[:2]
K = np.array(self._camera_info.k).reshape(3, 3)
fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
result = compute_pointing_ray(
landmarks = best_lm,
depth_img = depth,
fx=fx, fy=fy, cx=cx, cy=cy,
img_w=w, img_h=h,
ref_distance = self._ref_dist,
)
if result is not None:
self._last_result = result
# ── MediaPipe helpers ──────────────────────────────────────────────────────
def _run_mediapipe(self, bgr: np.ndarray):
"""Run MediaPipe Hands on a BGR frame; returns list of landmark lists or None."""
if self._hands is None:
return None
import cv2
rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
rgb.flags.writeable = False
results = self._hands.process(rgb)
if not results.multi_hand_landmarks:
return None
return [
[(lm.x, lm.y, lm.z) for lm in hand.landmark]
for hand in results.multi_hand_landmarks
]
def _pick_hand(self, landmark_sets, anchor_x: float, anchor_y: float):
"""Return the landmark set whose INDEX_TIP is closest to (anchor_x, anchor_y)."""
best, best_dist = None, float('inf')
for lms in landmark_sets:
tip = lms[8] # INDEX_TIP = 8
dist = math.hypot(tip[0] - anchor_x, tip[1] - anchor_y)
if dist < best_dist:
best, best_dist = lms, dist
return best
# ── 5 Hz publish tick ─────────────────────────────────────────────────────
def _tick(self) -> None:
msg = PointingTarget()
msg.header.stamp = self.get_clock().now().to_msg()
msg.header.frame_id = 'camera_color_optical_frame'
age = time.monotonic() - self._last_gesture_t
msg.is_active = age <= self._gesture_tmo
if self._last_gesture is not None:
msg.person_id = self._last_gesture.person_id
msg.confidence = self._last_gesture.confidence
msg.coarse_direction = self._last_gesture.direction
if msg.is_active and self._last_result is not None:
ox, oy, oz = self._last_result['origin_3d']
dx, dy, dz = self._last_result['direction_3d']
tx, ty, tz = self._last_result['target_3d']
msg.origin.x = ox; msg.origin.y = oy; msg.origin.z = oz
msg.direction.x = dx; msg.direction.y = dy; msg.direction.z = dz
msg.target.x = tx; msg.target.y = ty; msg.target.z = tz
msg.range_m = self._last_result['range_m']
self._pub.publish(msg)
def destroy_node(self) -> None:
if self._hands:
self._hands.close()
super().destroy_node()
def main(args=None):
rclpy.init(args=args)
node = PointingNode()
try:
rclpy.spin(node)
finally:
node.destroy_node()
rclpy.shutdown()
if __name__ == '__main__':
main()

View File

@ -37,6 +37,8 @@ setup(
'voice_command_node = saltybot_social.voice_command_node:main',
# Multi-camera gesture recognition (Issue #140)
'gesture_node = saltybot_social.gesture_node:main',
# Hand gesture pointing direction (Issue #221)
'pointing_node = saltybot_social.pointing_node:main',
# Facial expression recognition (Issue #161)
'emotion_node = saltybot_social.emotion_node:main',
# Robot mesh communication (Issue #171)

View File

@ -0,0 +1,193 @@
"""
test_pointing.py Unit tests for pointing ray helpers (no ROS2 required).
Covers:
- unproject: pixel + depth 3-D point
- sample_depth: median depth sampling with outlier rejection
- compute_pointing_ray: full 3-D ray computation
"""
import sys
import os
import math
import numpy as np
import pytest
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from saltybot_social._pointing_ray import unproject, sample_depth, compute_pointing_ray
# ── unproject ─────────────────────────────────────────────────────────────────
class TestUnproject:
def test_principal_point_gives_zero_xy(self):
X, Y, Z = unproject(320.0, 240.0, 1.0, fx=500.0, fy=500.0, cx=320.0, cy=240.0)
assert X == pytest.approx(0.0)
assert Y == pytest.approx(0.0)
assert Z == pytest.approx(1.0)
def test_z_equals_depth(self):
_, _, Z = unproject(0.0, 0.0, 3.5, fx=600.0, fy=600.0, cx=320.0, cy=240.0)
assert Z == pytest.approx(3.5)
def test_right_of_principal_gives_positive_x(self):
X, _, _ = unproject(420.0, 240.0, 1.0, fx=500.0, fy=500.0, cx=320.0, cy=240.0)
assert X > 0.0
def test_below_principal_gives_positive_y(self):
_, Y, _ = unproject(320.0, 340.0, 1.0, fx=500.0, fy=500.0, cx=320.0, cy=240.0)
assert Y > 0.0
def test_x_scales_linearly_with_depth(self):
X1, _, _ = unproject(400.0, 240.0, 1.0, fx=500.0, fy=500.0, cx=320.0, cy=240.0)
X2, _, _ = unproject(400.0, 240.0, 2.0, fx=500.0, fy=500.0, cx=320.0, cy=240.0)
assert X2 == pytest.approx(X1 * 2.0)
def test_known_value(self):
# (u=370, v=290, d=2.0, fx=fy=500, cx=320, cy=240)
# X = (370-320)*2/500 = 0.2, Y = (290-240)*2/500 = 0.2, Z = 2.0
X, Y, Z = unproject(370.0, 290.0, 2.0, fx=500.0, fy=500.0, cx=320.0, cy=240.0)
assert X == pytest.approx(0.2)
assert Y == pytest.approx(0.2)
assert Z == pytest.approx(2.0)
# ── sample_depth ──────────────────────────────────────────────────────────────
class TestSampleDepth:
def _uniform_depth(self, h=480, w=640, val=2.0):
return np.full((h, w), val, dtype=np.float32)
def test_uniform_image_returns_value(self):
img = self._uniform_depth(val=1.5)
d = sample_depth(img, 320.0, 240.0)
assert d == pytest.approx(1.5)
def test_out_of_range_returns_nan(self):
img = np.zeros((480, 640), dtype=np.float32) # all zeros (< d_min=0.1)
d = sample_depth(img, 320.0, 240.0)
assert math.isnan(d)
def test_ignores_outliers_above_d_max(self):
img = self._uniform_depth(val=1.0)
# Put a large spike at the centre
img[240, 320] = 100.0
d = sample_depth(img, 320.0, 240.0, window=3, d_max=8.0)
assert d == pytest.approx(1.0)
def test_edge_pixel_does_not_crash(self):
img = self._uniform_depth(val=2.0)
d = sample_depth(img, 0.0, 0.0)
assert d == pytest.approx(2.0)
def test_median_of_mixed_values(self):
img = np.zeros((10, 10), dtype=np.float32)
img[5, 5] = 1.0
img[5, 6] = 2.0
img[6, 5] = 3.0
d = sample_depth(img, 5.0, 5.0, window=3, d_min=0.0)
# Valid: 1.0, 2.0, 3.0 — median = 2.0
assert d == pytest.approx(2.0)
# ── compute_pointing_ray ──────────────────────────────────────────────────────
def _make_landmarks(mcp_nx=0.5, mcp_ny=0.5, tip_nx=0.6, tip_ny=0.4):
"""21 dummy landmarks; only MCP (idx 5) and TIP (idx 8) matter."""
lms = [(0.5, 0.5, 0.0)] * 21
lms[5] = (mcp_nx, mcp_ny, 0.0) # INDEX_MCP
lms[8] = (tip_nx, tip_ny, 0.0) # INDEX_TIP
return lms
def _depth_img(val=2.0, h=480, w=640):
return np.full((h, w), val, dtype=np.float32)
_FX = 600.0; _FY = 600.0; _CX = 320.0; _CY = 240.0
_W = 640; _H = 480
class TestComputePointingRay:
def test_returns_dict_on_valid_input(self):
lms = _make_landmarks()
result = compute_pointing_ray(
lms, _depth_img(), _FX, _FY, _CX, _CY, _W, _H)
assert result is not None
assert 'origin_3d' in result
assert 'direction_3d' in result
assert 'target_3d' in result
assert 'range_m' in result
def test_returns_none_when_depth_all_zero(self):
lms = _make_landmarks()
result = compute_pointing_ray(
lms, np.zeros((_H, _W), dtype=np.float32),
_FX, _FY, _CX, _CY, _W, _H)
assert result is None
def test_direction_is_unit_vector(self):
lms = _make_landmarks(mcp_nx=0.4, mcp_ny=0.5, tip_nx=0.6, tip_ny=0.4)
result = compute_pointing_ray(
lms, _depth_img(), _FX, _FY, _CX, _CY, _W, _H)
assert result is not None
dx, dy, dz = result['direction_3d']
norm = math.sqrt(dx*dx + dy*dy + dz*dz)
assert norm == pytest.approx(1.0, abs=1e-5)
def test_target_is_origin_plus_range_times_direction(self):
lms = _make_landmarks()
result = compute_pointing_ray(
lms, _depth_img(val=2.0), _FX, _FY, _CX, _CY, _W, _H,
ref_distance=3.0)
ox, oy, oz = result['origin_3d']
dx, dy, dz = result['direction_3d']
tx, ty, tz = result['target_3d']
r = result['range_m']
assert tx == pytest.approx(ox + dx * r, abs=1e-5)
assert ty == pytest.approx(oy + dy * r, abs=1e-5)
assert tz == pytest.approx(oz + dz * r, abs=1e-5)
def test_mcp_uv_matches_landmark_projection(self):
lms = _make_landmarks(mcp_nx=0.5, mcp_ny=0.5)
result = compute_pointing_ray(
lms, _depth_img(), _FX, _FY, _CX, _CY, _W, _H)
mu, mv = result['mcp_uv']
assert mu == pytest.approx(0.5 * _W)
assert mv == pytest.approx(0.5 * _H)
def test_fallback_when_only_one_depth_valid(self):
"""Only the MCP pixel has valid depth; TIP is in an invalid region."""
depth = np.zeros((_H, _W), dtype=np.float32)
# Set valid depth only at the MCP location
mcp_u, mcp_v = int(0.5 * _W), int(0.5 * _H)
for du in range(-3, 4):
for dv in range(-3, 4):
u_ = max(0, min(_W - 1, mcp_u + du))
v_ = max(0, min(_H - 1, mcp_v + dv))
depth[v_, u_] = 2.0
lms = _make_landmarks(mcp_nx=0.5, mcp_ny=0.5, tip_nx=0.9, tip_ny=0.1)
result = compute_pointing_ray(
lms, depth, _FX, _FY, _CX, _CY, _W, _H)
# Should still return a result (using MCP depth for TIP fallback)
assert result is not None
dx, dy, dz = result['direction_3d']
norm = math.sqrt(dx*dx + dy*dy + dz*dz)
assert norm == pytest.approx(1.0, abs=1e-4)
def test_ref_distance_used_as_range(self):
lms = _make_landmarks()
result = compute_pointing_ray(
lms, _depth_img(), _FX, _FY, _CX, _CY, _W, _H,
ref_distance=5.0)
assert result['range_m'] == pytest.approx(5.0)
if __name__ == '__main__':
pytest.main([__file__, '-v'])

View File

@ -44,6 +44,8 @@ rosidl_generate_interfaces(${PROJECT_NAME}
# Issue #171 robot mesh comms
"msg/MeshPeer.msg"
"msg/MeshHandoff.msg"
# Issue #221 hand gesture pointing direction
"msg/PointingTarget.msg"
DEPENDENCIES std_msgs geometry_msgs builtin_interfaces
)

View File

@ -0,0 +1,21 @@
# PointingTarget.msg — 3-D pointing ray derived from hand landmarks + depth (Issue #221).
#
# Published on /saltybot/pointing_target at 5 Hz.
# Frame: camera_color_optical_frame (D435i colour stream).
#
# is_active is false when no 'point' gesture has been received recently.
std_msgs/Header header
int32 person_id # from Gesture.msg (-1 = unidentified)
float32 confidence # detection confidence from the source Gesture
bool is_active # true only when a recent 'point' gesture is present
string coarse_direction # Gesture.msg direction field: "left"/"right"/"up"/"forward"/"down"
# 3-D ray in camera_color_optical_frame (metres)
geometry_msgs/Point origin # INDEX_MCP (knuckle) position — ray origin
geometry_msgs/Vector3 direction # unit vector from MCP toward INDEX_TIP
# Estimated pointing target
geometry_msgs/Point target # origin + direction * range_m
float32 range_m # distance along the ray (m)