Merge pull request 'feat: person detection + tracking (YOLOv8n TensorRT)' (#54) from sl-jetson/person-detection into main

2026-02-28 23:25:22 -05:00 · 2026-02-28 23:25:22 -05:00 · 9014b6738d
commit 9014b6738d
parent 2ffd462223 c44a30561a
15 changed files with 1383 additions and 0 deletions
--- a/jetson/ros2_ws/src/saltybot_perception/.gitignore
+++ b/jetson/ros2_ws/src/saltybot_perception/.gitignore
@ -0,0 +1,11 @@
+# TensorRT engines are hardware-specific — don't commit them
+models/*.engine
+models/*.onnx
+
+# Python bytecode
+__pycache__/
+*.pyc
+*.pyo
+
+# Test cache
+.pytest_cache/
--- a/jetson/ros2_ws/src/saltybot_perception/config/person_detection_params.yaml
+++ b/jetson/ros2_ws/src/saltybot_perception/config/person_detection_params.yaml
@ -0,0 +1,37 @@
+person_detector:
+  ros__parameters:
+    # ── Model paths ──────────────────────────────────────────────────────────
+    # TensorRT FP16 engine (built with scripts/build_trt_engine.py)
+    # Stored on NVMe for fast load and hardware-specific optimisation.
+    engine_path: "/mnt/nvme/saltybot/models/yolov8n.engine"
+
+    # ONNX fallback — used when engine_path not found (dev / CI environments)
+    onnx_path: "/mnt/nvme/saltybot/models/yolov8n.onnx"
+
+    # ── Detection thresholds ─────────────────────────────────────────────────
+    confidence_threshold: 0.40   # YOLO class confidence (0–1). Lower → more detections but more FP.
+    nms_iou_threshold: 0.45      # NMS IoU threshold. Higher → fewer suppressed boxes.
+
+    # ── Depth filtering ───────────────────────────────────────────────────────
+    # Only consider persons within this depth range (metres).
+    # RealSense D435i reliable range: 0.3–5.0 m
+    min_depth: 0.5               # ignore very close objects (robot body artefacts)
+    max_depth: 5.0               # ignore persons beyond following range
+
+    # ── Tracker settings ──────────────────────────────────────────────────────
+    # Hold last known track position for this many seconds after losing detection.
+    # Handles brief occlusion (person walks behind furniture).
+    track_hold_duration: 2.0     # seconds
+
+    # Minimum IoU between current detection and existing track to re-associate.
+    track_iou_threshold: 0.25
+
+    # ── Output coordinate frame ───────────────────────────────────────────────
+    # Frame for /person/target PoseStamped. Must be reachable via TF.
+    # sl-controls follow loop expects base_link.
+    target_frame: "base_link"
+
+    # ── Debug ─────────────────────────────────────────────────────────────────
+    # Publish annotated RGB image to /person/debug_image.
+    # Adds ~5ms overhead. Disable on production hardware.
+    publish_debug_image: false
--- a/jetson/ros2_ws/src/saltybot_perception/launch/person_detection.launch.py
+++ b/jetson/ros2_ws/src/saltybot_perception/launch/person_detection.launch.py
@ -0,0 +1,98 @@
+"""
+person_detection.launch.py — Launch person detection node with config.
+
+Usage:
+    ros2 launch saltybot_perception person_detection.launch.py
+
+    # Override engine path:
+    ros2 launch saltybot_perception person_detection.launch.py \\
+        engine_path:=/mnt/nvme/saltybot/models/yolov8n.engine
+
+    # Use ONNX fallback (dev/CI):
+    ros2 launch saltybot_perception person_detection.launch.py \\
+        onnx_path:=/mnt/nvme/saltybot/models/yolov8n.onnx
+
+    # Enable debug image stream:
+    ros2 launch saltybot_perception person_detection.launch.py \\
+        publish_debug_image:=true
+
+Prerequisites:
+  - RealSense D435i node running and publishing:
+      /camera/color/image_raw
+      /camera/depth/image_rect_raw
+      /camera/color/camera_info
+  - TF tree containing base_link ← camera_color_optical_frame
+  - YOLOv8n TensorRT engine (build with scripts/build_trt_engine.py)
+"""
+
+import os
+from launch import LaunchDescription
+from launch.actions import DeclareLaunchArgument
+from launch.substitutions import LaunchConfiguration
+from launch_ros.actions import Node
+from ament_index_python.packages import get_package_share_directory
+
+
+def generate_launch_description():
+    pkg_dir = get_package_share_directory('saltybot_perception')
+    default_config = os.path.join(pkg_dir, 'config', 'person_detection_params.yaml')
+    default_engine = '/mnt/nvme/saltybot/models/yolov8n.engine'
+    default_onnx = '/mnt/nvme/saltybot/models/yolov8n.onnx'
+
+    return LaunchDescription([
+        # ── Launch arguments ───────────────────────────────────────────────
+        DeclareLaunchArgument(
+            'engine_path',
+            default_value=default_engine,
+            description='Path to TensorRT .engine file (built by build_trt_engine.py)',
+        ),
+        DeclareLaunchArgument(
+            'onnx_path',
+            default_value=default_onnx,
+            description='Path to ONNX model (fallback when engine_path not found)',
+        ),
+        DeclareLaunchArgument(
+            'publish_debug_image',
+            default_value='false',
+            description='Publish annotated debug image on /person/debug_image',
+        ),
+        DeclareLaunchArgument(
+            'target_frame',
+            default_value='base_link',
+            description='TF frame for /person/target PoseStamped output',
+        ),
+        DeclareLaunchArgument(
+            'confidence_threshold',
+            default_value='0.4',
+            description='Minimum YOLO detection confidence',
+        ),
+        DeclareLaunchArgument(
+            'max_depth',
+            default_value='5.0',
+            description='Maximum person tracking distance in metres',
+        ),
+
+        # ── Person detector node ───────────────────────────────────────────
+        Node(
+            package='saltybot_perception',
+            executable='person_detector',
+            name='person_detector',
+            output='screen',
+            parameters=[
+                default_config,
+                {
+                    'engine_path': LaunchConfiguration('engine_path'),
+                    'onnx_path': LaunchConfiguration('onnx_path'),
+                    'publish_debug_image': LaunchConfiguration('publish_debug_image'),
+                    'target_frame': LaunchConfiguration('target_frame'),
+                    'confidence_threshold': LaunchConfiguration('confidence_threshold'),
+                    'max_depth': LaunchConfiguration('max_depth'),
+                },
+            ],
+            remappings=[
+                # Standard RealSense topic names — no remapping needed by default
+                # ('/camera/color/image_raw', '/camera/color/image_raw'),
+                # ('/camera/depth/image_rect_raw', '/camera/depth/image_rect_raw'),
+            ],
+        ),
+    ])
--- a/jetson/ros2_ws/src/saltybot_perception/models/.gitkeep
+++ b/jetson/ros2_ws/src/saltybot_perception/models/.gitkeep
--- a/jetson/ros2_ws/src/saltybot_perception/package.xml
+++ b/jetson/ros2_ws/src/saltybot_perception/package.xml
@ -0,0 +1,42 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>saltybot_perception</name>
+  <version>0.1.0</version>
+  <description>
+    Person detection and tracking for saltybot person-following mode.
+    Uses YOLOv8n with TensorRT FP16 on Jetson Orin Nano Super (67 TOPS).
+    Publishes person bounding boxes and 3D target position for the follow loop.
+  </description>
+  <maintainer email="seb@vayrette.com">seb</maintainer>
+  <license>MIT</license>
+
+  <depend>rclpy</depend>
+  <depend>sensor_msgs</depend>
+  <depend>geometry_msgs</depend>
+  <depend>vision_msgs</depend>
+  <depend>tf2_ros</depend>
+  <depend>tf2_geometry_msgs</depend>
+  <depend>cv_bridge</depend>
+  <depend>image_transport</depend>
+
+  <exec_depend>python3-numpy</exec_depend>
+  <exec_depend>python3-opencv</exec_depend>
+  <exec_depend>python3-launch-ros</exec_depend>
+
+  <!-- TensorRT (Jetson) — optional, falls back to onnxruntime -->
+  <!-- exec_depend>python3-tensorrt</exec_depend -->
+  <!-- exec_depend>python3-pycuda</exec_depend -->
+
+  <!-- ONNX Runtime fallback -->
+  <!-- exec_depend>python3-onnxruntime</exec_depend -->
+
+  <test_depend>ament_copyright</test_depend>
+  <test_depend>ament_flake8</test_depend>
+  <test_depend>ament_pep257</test_depend>
+  <test_depend>python3-pytest</test_depend>
+
+  <export>
+    <build_type>ament_python</build_type>
+  </export>
+</package>
--- a/jetson/ros2_ws/src/saltybot_perception/resource/saltybot_perception
+++ b/jetson/ros2_ws/src/saltybot_perception/resource/saltybot_perception
--- a/jetson/ros2_ws/src/saltybot_perception/saltybot_perception/init.py
+++ b/jetson/ros2_ws/src/saltybot_perception/saltybot_perception/init.py
--- a/jetson/ros2_ws/src/saltybot_perception/saltybot_perception/detection_utils.py
+++ b/jetson/ros2_ws/src/saltybot_perception/saltybot_perception/detection_utils.py
@ -0,0 +1,124 @@
+"""
+detection_utils.py — Pure-Python helpers with no ROS2 dependencies.
+Importable in tests without a running ROS2 environment.
+"""
+
+import numpy as np
+
+
+def nms(boxes: np.ndarray, scores: np.ndarray,
+        iou_threshold: float = 0.45) -> list[int]:
+    """
+    Non-maximum suppression.
+
+    Args:
+        boxes: [N, 4] float array of (x1, y1, x2, y2) boxes
+        scores: [N] float array of confidence scores
+        iou_threshold: suppress boxes with IoU > this value
+
+    Returns:
+        List of kept indices (sorted by descending score).
+    """
+    if len(boxes) == 0:
+        return []
+
+    x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
+    areas = (x2 - x1) * (y2 - y1)
+    order = scores.argsort()[::-1]
+    keep = []
+
+    while len(order) > 0:
+        i = order[0]
+        keep.append(int(i))
+        if len(order) == 1:
+            break
+        ix1 = np.maximum(x1[i], x1[order[1:]])
+        iy1 = np.maximum(y1[i], y1[order[1:]])
+        ix2 = np.minimum(x2[i], x2[order[1:]])
+        iy2 = np.minimum(y2[i], y2[order[1:]])
+        iw = np.maximum(0.0, ix2 - ix1)
+        ih = np.maximum(0.0, iy2 - iy1)
+        inter = iw * ih
+        iou = inter / (areas[i] + areas[order[1:]] - inter + 1e-6)
+        order = order[1:][iou < iou_threshold]
+
+    return keep
+
+
+def letterbox(image: np.ndarray, size: int = 640, pad_value: int = 114):
+    """
+    Letterbox-resize `image` to `size`×`size`.
+
+    Returns:
+        (canvas, scale, pad_w, pad_h)
+        canvas:  uint8 [size, size, 3]
+        scale:   float — resize scale factor
+        pad_w:   int — horizontal padding applied
+        pad_h:   int — vertical padding applied
+    """
+    import cv2
+    h, w = image.shape[:2]
+    scale = min(size / w, size / h)
+    new_w = int(round(w * scale))
+    new_h = int(round(h * scale))
+    pad_w = (size - new_w) // 2
+    pad_h = (size - new_h) // 2
+
+    resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
+    canvas = np.full((size, size, image.shape[2] if image.ndim == 3 else 1),
+                     pad_value, dtype=np.uint8)
+    canvas[pad_h:pad_h + new_h, pad_w:pad_w + new_w] = resized
+    return canvas, scale, pad_w, pad_h
+
+
+def remap_bbox(x1, y1, x2, y2, scale, pad_w, pad_h, orig_w, orig_h):
+    """Map bbox from letterboxed-image space back to original image space."""
+    x1 = float(np.clip((x1 - pad_w) / scale, 0, orig_w))
+    y1 = float(np.clip((y1 - pad_h) / scale, 0, orig_h))
+    x2 = float(np.clip((x2 - pad_w) / scale, 0, orig_w))
+    y2 = float(np.clip((y2 - pad_h) / scale, 0, orig_h))
+    return x1, y1, x2, y2
+
+
+def get_depth_at(depth_img: np.ndarray, u: float, v: float,
+                 window: int = 7, min_d: float = 0.3, max_d: float = 6.0) -> float:
+    """
+    Median depth in a `window`×`window` region around pixel (u, v).
+
+    Args:
+        depth_img: float32 depth image in metres
+        u, v: pixel coordinates
+        window: patch side length
+        min_d, max_d: valid depth range
+
+    Returns:
+        Median depth in metres, or 0.0 if no valid pixels.
+    """
+    h, w = depth_img.shape
+    u, v = int(u), int(v)
+    half = window // 2
+    u1, u2 = max(0, u - half), min(w, u + half + 1)
+    v1, v2 = max(0, v - half), min(h, v + half + 1)
+    patch = depth_img[v1:v2, u1:u2]
+    valid = patch[(patch > min_d) & (patch < max_d)]
+    return float(np.median(valid)) if len(valid) > 0 else 0.0
+
+
+def pixel_to_3d(u: float, v: float, depth_m: float, K) -> tuple[float, float, float]:
+    """
+    Back-project pixel (u, v) at depth_m to 3D point in camera frame.
+
+    Args:
+        u, v: pixel coordinates
+        depth_m: depth in metres
+        K: camera intrinsic matrix (row-major, 9 elements or 3×3 array)
+
+    Returns:
+        (X, Y, Z) in camera optical frame
+    """
+    K = np.asarray(K).ravel()
+    fx, fy = K[0], K[4]
+    cx, cy = K[2], K[5]
+    X = (u - cx) * depth_m / fx
+    Y = (v - cy) * depth_m / fy
+    return X, Y, depth_m
--- a/jetson/ros2_ws/src/saltybot_perception/saltybot_perception/person_detector_node.py
+++ b/jetson/ros2_ws/src/saltybot_perception/saltybot_perception/person_detector_node.py
@ -0,0 +1,471 @@
+"""
+person_detector_node.py — Person detection + tracking for saltybot.
+
+Pipeline:
+  1. Receive synchronized color + depth frames from RealSense D435i.
+  2. Run YOLOv8n (TensorRT FP16 on Orin Nano Super, ONNX fallback elsewhere).
+  3. Filter detections for class 'person' (COCO class 0).
+  4. Estimate 3D position from aligned depth at bounding box centre.
+  5. Track target person across frames (SimplePersonTracker).
+  6. Publish:
+       /person/detections  — vision_msgs/Detection2DArray (all detected persons)
+       /person/target      — geometry_msgs/PoseStamped   (tracked person 3D pos)
+       /person/debug_image — sensor_msgs/Image           (annotated RGB, lazy)
+
+TensorRT engine:
+  - Build once with scripts/build_trt_engine.py
+  - Place engine at path specified by `engine_path` param
+  - Falls back to ONNX Runtime (onnxruntime[-gpu]) if engine not found
+
+Coordinate frame:
+  /person/target is published in `base_link` frame.
+  If TF unavailable, falls back to `camera_color_optical_frame`.
+"""
+
+import os
+import math
+import time
+import numpy as np
+
+import rclpy
+from rclpy.node import Node
+from rclpy.duration import Duration
+from rclpy.qos import QoSProfile, ReliabilityPolicy, HistoryPolicy, DurabilityPolicy
+
+import message_filters
+import cv2
+from cv_bridge import CvBridge
+
+from sensor_msgs.msg import Image, CameraInfo
+from geometry_msgs.msg import PoseStamped, PointStamped
+from vision_msgs.msg import (
+    Detection2D,
+    Detection2DArray,
+    ObjectHypothesisWithPose,
+    BoundingBox2D,
+)
+import tf2_ros
+import tf2_geometry_msgs  # noqa: F401 — registers PointStamped transform support
+
+from .tracker import SimplePersonTracker
+from .detection_utils import nms, letterbox, remap_bbox, get_depth_at, pixel_to_3d
+
+_PERSON_CLASS_ID = 0      # COCO class index for 'person'
+_YOLO_INPUT_SIZE = 640
+
+
+# ── Inference backends ─────────────────────────────────────────────────────────
+
+class _TRTBackend:
+    """TensorRT inference engine (Jetson Orin)."""
+
+    def __init__(self, engine_path: str):
+        import tensorrt as trt
+        import pycuda.driver as cuda
+        import pycuda.autoinit  # noqa: F401
+
+        self._cuda = cuda
+        logger = trt.Logger(trt.Logger.WARNING)
+        with open(engine_path, 'rb') as f, trt.Runtime(logger) as runtime:
+            self._engine = runtime.deserialize_cuda_engine(f.read())
+        self._context = self._engine.create_execution_context()
+
+        self._inputs = []
+        self._outputs = []
+        self._bindings = []
+        for i in range(self._engine.num_io_tensors):
+            name = self._engine.get_tensor_name(i)
+            dtype = trt.nptype(self._engine.get_tensor_dtype(name))
+            shape = tuple(self._engine.get_tensor_shape(name))
+            nbytes = int(np.prod(shape)) * np.dtype(dtype).itemsize
+            host_mem = cuda.pagelocked_empty(shape, dtype)
+            device_mem = cuda.mem_alloc(nbytes)
+            self._bindings.append(int(device_mem))
+            if self._engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
+                self._inputs.append({'host': host_mem, 'device': device_mem})
+            else:
+                self._outputs.append({'host': host_mem, 'device': device_mem})
+        self._stream = cuda.Stream()
+
+    def infer(self, input_data: np.ndarray) -> list[np.ndarray]:
+        np.copyto(self._inputs[0]['host'], input_data.ravel())
+        self._cuda.memcpy_htod_async(
+            self._inputs[0]['device'], self._inputs[0]['host'], self._stream)
+        self._context.execute_async_v2(self._bindings, self._stream.handle)
+        for out in self._outputs:
+            self._cuda.memcpy_dtoh_async(out['host'], out['device'], self._stream)
+        self._stream.synchronize()
+        return [out['host'].copy() for out in self._outputs]
+
+
+class _ONNXBackend:
+    """ONNX Runtime inference (CPU / CUDA EP — fallback for non-Jetson)."""
+
+    def __init__(self, onnx_path: str):
+        import onnxruntime as ort
+        providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
+        self._session = ort.InferenceSession(onnx_path, providers=providers)
+        self._input_name = self._session.get_inputs()[0].name
+
+    def infer(self, input_data: np.ndarray) -> list[np.ndarray]:
+        return self._session.run(None, {self._input_name: input_data})
+
+
+# ── Node ──────────────────────────────────────────────────────────────────────
+
+class PersonDetectorNode(Node):
+
+    def __init__(self):
+        super().__init__('person_detector')
+        self._bridge = CvBridge()
+        self._camera_info: CameraInfo | None = None
+        self._backend = None
+
+        # ── Parameters ──────────────────────────────────────────────────────
+        self.declare_parameter('engine_path', '')
+        self.declare_parameter('onnx_path', '')
+        self.declare_parameter('confidence_threshold', 0.4)
+        self.declare_parameter('nms_iou_threshold', 0.45)
+        self.declare_parameter('min_depth', 0.5)
+        self.declare_parameter('max_depth', 5.0)
+        self.declare_parameter('track_hold_duration', 2.0)
+        self.declare_parameter('track_iou_threshold', 0.25)
+        self.declare_parameter('target_frame', 'base_link')
+        self.declare_parameter('publish_debug_image', False)
+
+        self._conf_thresh = self.get_parameter('confidence_threshold').value
+        self._nms_thresh = self.get_parameter('nms_iou_threshold').value
+        self._min_depth = self.get_parameter('min_depth').value
+        self._max_depth = self.get_parameter('max_depth').value
+        self._target_frame = self.get_parameter('target_frame').value
+        self._pub_debug = self.get_parameter('publish_debug_image').value
+
+        hold_dur = self.get_parameter('track_hold_duration').value
+        track_iou = self.get_parameter('track_iou_threshold').value
+        self._tracker = SimplePersonTracker(
+            hold_duration=hold_dur,
+            iou_threshold=track_iou,
+            min_depth=self._min_depth,
+            max_depth=self._max_depth,
+        )
+
+        # Letterbox state (set during preprocessing)
+        self._scale = 1.0
+        self._pad_w = 0
+        self._pad_h = 0
+        self._orig_w = 0
+        self._orig_h = 0
+
+        # ── TF ──────────────────────────────────────────────────────────────
+        self._tf_buffer = tf2_ros.Buffer()
+        self._tf_listener = tf2_ros.TransformListener(self._tf_buffer, self)
+
+        # ── Publishers ──────────────────────────────────────────────────────
+        best_effort_qos = QoSProfile(
+            reliability=ReliabilityPolicy.BEST_EFFORT,
+            history=HistoryPolicy.KEEP_LAST,
+            depth=1,
+        )
+        self._pub_detections = self.create_publisher(
+            Detection2DArray, '/person/detections', best_effort_qos)
+        self._pub_target = self.create_publisher(
+            PoseStamped, '/person/target', best_effort_qos)
+        if self._pub_debug:
+            self._pub_debug_img = self.create_publisher(
+                Image, '/person/debug_image', best_effort_qos)
+
+        # ── Camera info subscriber ───────────────────────────────────────────
+        self.create_subscription(
+            CameraInfo,
+            '/camera/color/camera_info',
+            self._on_camera_info,
+            QoSProfile(reliability=ReliabilityPolicy.BEST_EFFORT,
+                       history=HistoryPolicy.KEEP_LAST, depth=1),
+        )
+
+        # ── Synchronized color + depth subscribers ───────────────────────────
+        color_sub = message_filters.Subscriber(
+            self, Image, '/camera/color/image_raw',
+            qos_profile=QoSProfile(
+                reliability=ReliabilityPolicy.BEST_EFFORT,
+                history=HistoryPolicy.KEEP_LAST, depth=4))
+        depth_sub = message_filters.Subscriber(
+            self, Image, '/camera/depth/image_rect_raw',
+            qos_profile=QoSProfile(
+                reliability=ReliabilityPolicy.BEST_EFFORT,
+                history=HistoryPolicy.KEEP_LAST, depth=4))
+        self._sync = message_filters.ApproximateTimeSynchronizer(
+            [color_sub, depth_sub], queue_size=4, slop=0.05)
+        self._sync.registerCallback(self._on_frame)
+
+        # ── Load model ───────────────────────────────────────────────────────
+        self._load_backend()
+        self.get_logger().info('PersonDetectorNode ready.')
+
+    # ── Model loading ─────────────────────────────────────────────────────────
+
+    def _load_backend(self):
+        engine_path = self.get_parameter('engine_path').value
+        onnx_path = self.get_parameter('onnx_path').value
+
+        if engine_path and os.path.isfile(engine_path):
+            try:
+                self._backend = _TRTBackend(engine_path)
+                self.get_logger().info(f'TensorRT backend loaded: {engine_path}')
+                return
+            except Exception as e:
+                self.get_logger().warn(f'TRT load failed ({e}), falling back to ONNX')
+
+        if onnx_path and os.path.isfile(onnx_path):
+            try:
+                self._backend = _ONNXBackend(onnx_path)
+                self.get_logger().info(f'ONNX backend loaded: {onnx_path}')
+                return
+            except Exception as e:
+                self.get_logger().error(f'ONNX load failed: {e}')
+
+        self.get_logger().error(
+            'No model found. Set engine_path or onnx_path parameter. '
+            'Detection disabled — node spinning without publishing.'
+        )
+
+    # ── Callbacks ─────────────────────────────────────────────────────────────
+
+    def _on_camera_info(self, msg: CameraInfo):
+        self._camera_info = msg
+
+    def _on_frame(self, color_msg: Image, depth_msg: Image):
+        if self._backend is None or self._camera_info is None:
+            return
+
+        t0 = time.monotonic()
+
+        # Decode images
+        try:
+            bgr = self._bridge.imgmsg_to_cv2(color_msg, desired_encoding='bgr8')
+            depth = self._bridge.imgmsg_to_cv2(depth_msg, desired_encoding='passthrough')
+        except Exception as e:
+            self.get_logger().error(f'Image decode error: {e}', throttle_duration_sec=5.0)
+            return
+
+        # Depth image should be float32 metres (realsense2_camera default)
+        if depth.dtype != np.float32:
+            depth = depth.astype(np.float32)
+            if depth.max() > 100.0:       # uint16 mm → float32 m
+                depth /= 1000.0
+
+        # Run detection
+        tensor = self._preprocess(bgr)
+        try:
+            raw_outputs = self._backend.infer(tensor)
+        except Exception as e:
+            self.get_logger().error(f'Inference error: {e}', throttle_duration_sec=5.0)
+            return
+
+        detections_px = self._postprocess_yolov8(raw_outputs[0])
+
+        # Get depth for each detection
+        detections = []
+        for x1, y1, x2, y2, conf in detections_px:
+            cx = (x1 + x2) / 2.0
+            cy = (y1 + y2) / 2.0
+            d = self._get_depth_at(depth, cx, cy)
+            detections.append(((x1, y1, x2, y2), d, conf))
+
+        # Update tracker
+        track = self._tracker.update(detections)
+
+        # Publish Detection2DArray
+        det_array = Detection2DArray()
+        det_array.header = color_msg.header
+        for (x1, y1, x2, y2), d, conf in detections:
+            det = self._make_detection2d(
+                color_msg.header, x1, y1, x2, y2, conf)
+            det_array.detections.append(det)
+        self._pub_detections.publish(det_array)
+
+        # Publish target PoseStamped
+        if track is not None:
+            x1, y1, x2, y2 = track.bbox
+            cx = (x1 + x2) / 2.0
+            cy = (y1 + y2) / 2.0
+            d = track.depth
+
+            if d > 0:
+                X, Y, Z = self._pixel_to_3d(cx, cy, d)
+                track.position_3d = (X, Y, Z)
+
+                pose = PoseStamped()
+                pose.header = color_msg.header
+                pose.header.frame_id = 'camera_color_optical_frame'
+                pose.pose.position.x = X
+                pose.pose.position.y = Y
+                pose.pose.position.z = Z
+                pose.pose.orientation.w = 1.0
+
+                # Transform to target_frame
+                if self._target_frame != 'camera_color_optical_frame':
+                    pose = self._transform_pose(pose)
+
+                self._pub_target.publish(pose)
+
+        # Debug image
+        if self._pub_debug and hasattr(self, '_pub_debug_img'):
+            debug = self._draw_debug(bgr, detections, track)
+            self._pub_debug_img.publish(
+                self._bridge.cv2_to_imgmsg(debug, encoding='bgr8'))
+
+        dt = (time.monotonic() - t0) * 1000
+        self.get_logger().debug(
+            f'Frame: {len(detections)} persons, track={track is not None}, {dt:.1f}ms',
+            throttle_duration_sec=1.0,
+        )
+
+    # ── Preprocessing ─────────────────────────────────────────────────────────
+
+    def _preprocess(self, bgr: np.ndarray) -> np.ndarray:
+        """Letterbox resize to 640×640, normalise, HWC→CHW, add batch dim."""
+        h, w = bgr.shape[:2]
+        canvas, scale, pad_w, pad_h = letterbox(bgr, _YOLO_INPUT_SIZE)
+        self._scale = scale
+        self._pad_w = pad_w
+        self._pad_h = pad_h
+        self._orig_w = w
+        self._orig_h = h
+
+        rgb = cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB)
+        tensor = rgb.astype(np.float32) / 255.0
+        tensor = tensor.transpose(2, 0, 1)            # HWC → CHW
+        return np.ascontiguousarray(tensor[np.newaxis])  # [1, 3, H, W]
+
+    def _remap_bbox(self, x1, y1, x2, y2):
+        """Map bbox from 640×640 space back to original image space."""
+        return remap_bbox(x1, y1, x2, y2,
+                          self._scale, self._pad_w, self._pad_h,
+                          self._orig_w, self._orig_h)
+
+    # ── Post-processing ───────────────────────────────────────────────────────
+
+    def _postprocess_yolov8(self, raw: np.ndarray) -> list:
+        """
+        Parse YOLOv8n output tensor and return person detections.
+
+        YOLOv8n output shape: [1, 84, 8400] or [84, 8400]
+          rows 0-3:  cx, cy, w, h  (in 640×640 input space)
+          rows 4-83: class scores (no objectness score in v8)
+
+        Returns:
+            list of (x1, y1, x2, y2, confidence) in original image space
+        """
+        pred = raw.squeeze()     # [84, 8400]
+        if pred.ndim != 2 or pred.shape[0] < 5:
+            return []
+
+        person_scores = pred[4 + _PERSON_CLASS_ID, :]   # [8400]
+        mask = person_scores > self._conf_thresh
+        if not mask.any():
+            return []
+
+        scores = person_scores[mask]
+        boxes_raw = pred[:4, mask]    # cx, cy, w, h — [4, N]
+
+        # cx,cy,w,h → x1,y1,x2,y2
+        cx, cy = boxes_raw[0], boxes_raw[1]
+        w2, h2 = boxes_raw[2] / 2.0, boxes_raw[3] / 2.0
+        x1, y1 = cx - w2, cy - h2
+        x2, y2 = cx + w2, cy + h2
+
+        boxes = np.stack([x1, y1, x2, y2], axis=1)  # [N, 4]
+        keep = nms(boxes, scores, self._nms_thresh)
+
+        results = []
+        for i in keep:
+            rx1, ry1, rx2, ry2 = self._remap_bbox(
+                boxes[i, 0], boxes[i, 1], boxes[i, 2], boxes[i, 3])
+            # Skip degenerate boxes
+            if rx2 - rx1 < 4 or ry2 - ry1 < 4:
+                continue
+            results.append((rx1, ry1, rx2, ry2, float(scores[i])))
+        return results
+
+    # ── Depth & 3D ────────────────────────────────────────────────────────────
+
+    def _get_depth_at(self, depth_img: np.ndarray, u: float, v: float,
+                      window: int = 7) -> float:
+        """Median depth in a window around pixel (u, v). Returns 0 if invalid."""
+        return get_depth_at(depth_img, u, v, window,
+                            self._min_depth, self._max_depth)
+
+    def _pixel_to_3d(self, u: float, v: float, depth_m: float):
+        """Back-project pixel (u, v) at depth_m to 3D point in camera frame."""
+        return pixel_to_3d(u, v, depth_m, self._camera_info.k)
+
+    # ── TF transform ──────────────────────────────────────────────────────────
+
+    def _transform_pose(self, pose_in: PoseStamped) -> PoseStamped:
+        try:
+            return self._tf_buffer.transform(
+                pose_in, self._target_frame,
+                timeout=Duration(seconds=0.05))
+        except Exception as e:
+            self.get_logger().warn(
+                f'TF {pose_in.header.frame_id}→{self._target_frame} failed: {e}',
+                throttle_duration_sec=5.0)
+            return pose_in  # publish in camera frame as fallback
+
+    # ── Message builders ──────────────────────────────────────────────────────
+
+    def _make_detection2d(self, header, x1, y1, x2, y2, conf) -> Detection2D:
+        det = Detection2D()
+        det.header = header
+
+        hyp = ObjectHypothesisWithPose()
+        hyp.hypothesis.class_id = 'person'
+        hyp.hypothesis.score = conf
+        det.results.append(hyp)
+
+        det.bbox.center.position.x = (x1 + x2) / 2.0
+        det.bbox.center.position.y = (y1 + y2) / 2.0
+        det.bbox.center.theta = 0.0
+        det.bbox.size_x = x2 - x1
+        det.bbox.size_y = y2 - y1
+        return det
+
+    # ── Debug visualisation ───────────────────────────────────────────────────
+
+    def _draw_debug(self, bgr, detections, track):
+        vis = bgr.copy()
+        for (x1, y1, x2, y2), d, conf in detections:
+            cv2.rectangle(vis, (int(x1), int(y1)), (int(x2), int(y2)),
+                          (0, 255, 0), 2)
+            cv2.putText(vis, f'{conf:.2f} {d:.1f}m',
+                        (int(x1), int(y1) - 6),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
+        if track is not None:
+            x1, y1, x2, y2 = track.bbox
+            cv2.rectangle(vis, (int(x1), int(y1)), (int(x2), int(y2)),
+                          (0, 0, 255), 3)
+            label = f'ID:{track.track_id} {track.depth:.1f}m'
+            if track.age > 0.05:
+                label += f' (held {track.age:.1f}s)'
+            cv2.putText(vis, label, (int(x1), int(y1) - 6),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
+        return vis
+
+
+# ── Entry point ───────────────────────────────────────────────────────────────
+
+def main(args=None):
+    rclpy.init(args=args)
+    node = PersonDetectorNode()
+    try:
+        rclpy.spin(node)
+    except KeyboardInterrupt:
+        pass
+    finally:
+        node.destroy_node()
+        rclpy.shutdown()
+
+
+if __name__ == '__main__':
+    main()
--- a/jetson/ros2_ws/src/saltybot_perception/saltybot_perception/tracker.py
+++ b/jetson/ros2_ws/src/saltybot_perception/saltybot_perception/tracker.py
@ -0,0 +1,179 @@
+"""
+tracker.py — Single-target person tracker for saltybot person-following mode.
+
+Strategy:
+  - Select closest valid detection each frame (smallest depth within range)
+  - Re-associate across frames using IoU matching with existing track
+  - Hold last known track for `hold_duration` seconds when detections are lost
+  - Assign monotonically increasing track IDs
+
+Usage:
+    tracker = SimplePersonTracker(hold_duration=2.0)
+    track = tracker.update(detections)   # detections: list of (bbox, depth, conf)
+    if track is not None:
+        print(track.bbox, track.depth, track.track_id)
+"""
+
+import time
+import numpy as np
+
+
+class PersonTrack:
+    """Single tracked person."""
+
+    def __init__(self, bbox, depth, confidence, track_id):
+        """
+        Args:
+            bbox: (x1, y1, x2, y2) in pixels
+            depth: distance to person in metres
+            confidence: detection confidence 0–1
+            track_id: unique integer ID
+        """
+        self.bbox = bbox
+        self.depth = depth
+        self.confidence = confidence
+        self.track_id = track_id
+        self.position_3d = None       # (X, Y, Z) in camera frame — set by node
+        self._first_seen = time.monotonic()
+        self._last_seen = time.monotonic()
+
+    def touch(self, bbox, depth, confidence):
+        self.bbox = bbox
+        self.depth = depth
+        self.confidence = confidence
+        self._last_seen = time.monotonic()
+
+    @property
+    def age(self):
+        """Seconds since last detection."""
+        return time.monotonic() - self._last_seen
+
+    @property
+    def is_stale(self):
+        return self.age > 0     # Always check externally against hold_duration
+
+    @property
+    def center(self):
+        x1, y1, x2, y2 = self.bbox
+        return ((x1 + x2) / 2.0, (y1 + y2) / 2.0)
+
+    @property
+    def area(self):
+        x1, y1, x2, y2 = self.bbox
+        return max(0.0, (x2 - x1) * (y2 - y1))
+
+
+class SimplePersonTracker:
+    """
+    Lightweight single-target person tracker.
+
+    Maintains one active PersonTrack at a time. On each update:
+      1. Filter detections to valid depth range.
+      2. If a track is active, attempt IoU re-association.
+      3. If re-association fails and track is within hold_duration, keep stale.
+      4. If no active track, initialise from closest detection.
+    """
+
+    def __init__(
+        self,
+        hold_duration: float = 2.0,
+        iou_threshold: float = 0.25,
+        min_depth: float = 0.3,
+        max_depth: float = 5.0,
+    ):
+        """
+        Args:
+            hold_duration: seconds to hold last known position after losing track
+            iou_threshold: minimum IoU to accept a re-association
+            min_depth: minimum valid depth in metres
+            max_depth: maximum valid depth in metres
+        """
+        self._hold_duration = hold_duration
+        self._iou_threshold = iou_threshold
+        self._min_depth = min_depth
+        self._max_depth = max_depth
+        self._track: PersonTrack | None = None
+        self._next_id = 1
+
+    def update(self, detections):
+        """
+        Update tracker with new detections.
+
+        Args:
+            detections: list of (bbox, depth, confidence) where
+                        bbox = (x1, y1, x2, y2) pixels,
+                        depth = float metres (0 = invalid),
+                        confidence = float 0–1
+
+        Returns:
+            PersonTrack or None.
+            Returns a stale PersonTrack (track.age > 0) during hold period.
+            Returns None after hold_duration or if never seen a person.
+        """
+        valid = [
+            (b, d, c) for b, d, c in detections
+            if self._min_depth < d < self._max_depth
+        ]
+
+        if not valid:
+            # No valid detections this frame
+            if self._track is not None:
+                if self._track.age <= self._hold_duration:
+                    return self._track      # hold last known
+                else:
+                    self._track = None
+            return None
+
+        if self._track is not None:
+            # Attempt re-association by IoU
+            best_iou = self._iou_threshold
+            best = None
+            for bbox, depth, conf in valid:
+                iou = _iou(bbox, self._track.bbox)
+                if iou > best_iou:
+                    best_iou = iou
+                    best = (bbox, depth, conf)
+
+            if best is not None:
+                self._track.touch(*best)
+                return self._track
+
+            # No IoU match — keep stale track within hold window
+            if self._track.age <= self._hold_duration:
+                return self._track
+            else:
+                self._track = None  # lost — start fresh
+
+        # No active track: pick closest valid detection
+        bbox, depth, conf = min(valid, key=lambda x: x[1])
+        self._track = PersonTrack(bbox, depth, conf, self._next_id)
+        self._next_id += 1
+        return self._track
+
+    def reset(self):
+        """Drop current track."""
+        self._track = None
+
+    @property
+    def active(self):
+        return self._track is not None
+
+
+def _iou(bbox_a, bbox_b):
+    """Compute Intersection-over-Union of two bounding boxes (x1,y1,x2,y2)."""
+    ax1, ay1, ax2, ay2 = bbox_a
+    bx1, by1, bx2, by2 = bbox_b
+
+    ix1 = max(ax1, bx1)
+    iy1 = max(ay1, by1)
+    ix2 = min(ax2, bx2)
+    iy2 = min(ay2, by2)
+
+    if ix2 <= ix1 or iy2 <= iy1:
+        return 0.0
+
+    intersection = (ix2 - ix1) * (iy2 - iy1)
+    area_a = (ax2 - ax1) * (ay2 - ay1)
+    area_b = (bx2 - bx1) * (by2 - by1)
+    union = area_a + area_b - intersection
+    return intersection / union if union > 0 else 0.0
--- a/jetson/ros2_ws/src/saltybot_perception/scripts/build_trt_engine.py
+++ b/jetson/ros2_ws/src/saltybot_perception/scripts/build_trt_engine.py
@ -0,0 +1,162 @@
+#!/usr/bin/env python3
+"""
+build_trt_engine.py — Convert ONNX model to TensorRT .engine file.
+
+Run this ONCE on the Jetson Orin Nano Super to build the optimised engine.
+The engine is hardware-specific and cannot be shared between GPU families.
+
+Usage:
+    python3 build_trt_engine.py --onnx yolov8n.onnx --engine yolov8n.engine
+    python3 build_trt_engine.py --onnx yolov8n.onnx --engine yolov8n.engine --fp16
+    python3 build_trt_engine.py --onnx yolov8n.onnx --engine yolov8n.engine --fp16 --batch 1
+
+Alternatively, use the trtexec CLI tool (ships with JetPack):
+    /usr/src/tensorrt/bin/trtexec \\
+        --onnx=yolov8n.onnx \\
+        --fp16 \\
+        --saveEngine=yolov8n.engine \\
+        --workspace=2048
+
+Model download (YOLOv8n):
+    pip3 install ultralytics
+    python3 -c "from ultralytics import YOLO; YOLO('yolov8n.pt').export(format='onnx', imgsz=640)"
+
+Model download (YOLOv5s):
+    python3 -c "
+    import torch
+    model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
+    model.export(format='onnx')
+    "
+
+Engine location:
+    Place the built .engine file at the path specified by `engine_path`
+    in person_detection_params.yaml (default: models/yolov8n.engine)
+"""
+
+import argparse
+import sys
+import os
+
+
+def build_engine(onnx_path: str, engine_path: str, fp16: bool, batch_size: int,
+                 workspace_mb: int) -> bool:
+    """Build TensorRT engine from ONNX model."""
+    try:
+        import tensorrt as trt
+    except ImportError:
+        print('ERROR: tensorrt not found. Run on Jetson with JetPack installed.')
+        print('       Alternatively use trtexec (see script header).')
+        return False
+
+    logger = trt.Logger(trt.Logger.VERBOSE)
+    builder = trt.Builder(logger)
+    network_flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(network_flags)
+    parser = trt.OnnxParser(network, logger)
+
+    print(f'[+] Parsing ONNX: {onnx_path}')
+    with open(onnx_path, 'rb') as f:
+        if not parser.parse(f.read()):
+            for i in range(parser.num_errors):
+                print(f'    Parse error {i}: {parser.get_error(i)}')
+            return False
+    print(f'    Network inputs: {network.num_inputs}')
+    print(f'    Network outputs: {network.num_outputs}')
+
+    config = builder.create_builder_config()
+    config.set_memory_pool_limit(
+        trt.MemoryPoolType.WORKSPACE, workspace_mb * 1024 * 1024)
+
+    if fp16 and builder.platform_has_fast_fp16:
+        config.set_flag(trt.BuilderFlag.FP16)
+        print('[+] FP16 mode enabled')
+    elif fp16:
+        print('[!] FP16 not supported on this platform, using FP32')
+
+    # Dynamic batch profile (batch_size = 1 for real-time inference)
+    profile = builder.create_optimization_profile()
+    input_name = network.get_input(0).name
+    input_shape = network.get_input(0).shape  # e.g., [-1, 3, 640, 640]
+    min_shape = (1, input_shape[1], input_shape[2], input_shape[3])
+    opt_shape = (batch_size, input_shape[1], input_shape[2], input_shape[3])
+    max_shape = (batch_size, input_shape[1], input_shape[2], input_shape[3])
+    profile.set_shape(input_name, min_shape, opt_shape, max_shape)
+    config.add_optimization_profile(profile)
+
+    print(f'[+] Building engine (this may take 5–15 minutes on first run)...')
+    serialized = builder.build_serialized_network(network, config)
+    if serialized is None:
+        print('ERROR: Engine build failed.')
+        return False
+
+    os.makedirs(os.path.dirname(os.path.abspath(engine_path)), exist_ok=True)
+    with open(engine_path, 'wb') as f:
+        f.write(serialized)
+
+    size_mb = os.path.getsize(engine_path) / (1024 * 1024)
+    print(f'[+] Engine saved: {engine_path} ({size_mb:.1f} MB)')
+    return True
+
+
+def verify_engine(engine_path: str) -> bool:
+    """Quick sanity check: deserialise and print binding info."""
+    try:
+        import tensorrt as trt
+        import numpy as np
+        logger = trt.Logger(trt.Logger.WARNING)
+        with open(engine_path, 'rb') as f, trt.Runtime(logger) as rt:
+            engine = rt.deserialize_cuda_engine(f.read())
+        print(f'\n[+] Engine verified: {engine.num_io_tensors} tensors')
+        for i in range(engine.num_io_tensors):
+            name = engine.get_tensor_name(i)
+            shape = engine.get_tensor_shape(name)
+            dtype = engine.get_tensor_dtype(name)
+            mode = 'IN ' if engine.get_tensor_mode(name).name == 'INPUT' else 'OUT'
+            print(f'    {mode} [{i}] {name}: {list(shape)} {dtype}')
+        return True
+    except Exception as e:
+        print(f'[!] Engine verification failed: {e}')
+        return False
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Build TensorRT engine from ONNX model')
+    parser.add_argument('--onnx', required=True,
+                        help='Path to input ONNX model')
+    parser.add_argument('--engine', required=True,
+                        help='Path for output .engine file')
+    parser.add_argument('--fp16', action='store_true', default=True,
+                        help='Enable FP16 precision (default: True)')
+    parser.add_argument('--no-fp16', dest='fp16', action='store_false',
+                        help='Disable FP16, use FP32')
+    parser.add_argument('--batch', type=int, default=1,
+                        help='Batch size (default: 1)')
+    parser.add_argument('--workspace', type=int, default=2048,
+                        help='Builder workspace in MB (default: 2048)')
+    parser.add_argument('--verify', action='store_true',
+                        help='Verify engine after build')
+    args = parser.parse_args()
+
+    if not os.path.isfile(args.onnx):
+        print(f'ERROR: ONNX model not found: {args.onnx}')
+        print()
+        print('Download YOLOv8n ONNX:')
+        print('    pip3 install ultralytics')
+        print('    python3 -c "from ultralytics import YOLO; '
+              'YOLO(\'yolov8n.pt\').export(format=\'onnx\', imgsz=640)"')
+        sys.exit(1)
+
+    ok = build_engine(args.onnx, args.engine, args.fp16, args.batch, args.workspace)
+    if not ok:
+        sys.exit(1)
+
+    if args.verify:
+        verify_engine(args.engine)
+
+    print('\nNext step: update person_detection_params.yaml:')
+    print(f'    engine_path: "{os.path.abspath(args.engine)}"')
+
+
+if __name__ == '__main__':
+    main()
--- a/jetson/ros2_ws/src/saltybot_perception/setup.cfg
+++ b/jetson/ros2_ws/src/saltybot_perception/setup.cfg
@ -0,0 +1,4 @@
+[develop]
+script_dir=$base/lib/saltybot_perception
+[install]
+install_scripts=$base/lib/saltybot_perception
--- a/jetson/ros2_ws/src/saltybot_perception/setup.py
+++ b/jetson/ros2_ws/src/saltybot_perception/setup.py
@ -0,0 +1,32 @@
+from setuptools import setup
+import os
+from glob import glob
+
+package_name = 'saltybot_perception'
+
+setup(
+    name=package_name,
+    version='0.1.0',
+    packages=[package_name],
+    data_files=[
+        ('share/ament_index/resource_index/packages',
+            ['resource/' + package_name]),
+        ('share/' + package_name, ['package.xml']),
+        (os.path.join('share', package_name, 'launch'),
+            glob('launch/*.py')),
+        (os.path.join('share', package_name, 'config'),
+            glob('config/*.yaml')),
+    ],
+    install_requires=['setuptools'],
+    zip_safe=True,
+    maintainer='seb',
+    maintainer_email='seb@vayrette.com',
+    description='Person detection and tracking for saltybot (YOLOv8n + TensorRT)',
+    license='MIT',
+    tests_require=['pytest'],
+    entry_points={
+        'console_scripts': [
+            'person_detector = saltybot_perception.person_detector_node:main',
+        ],
+    },
+)
--- a/jetson/ros2_ws/src/saltybot_perception/test/test_postprocess.py
+++ b/jetson/ros2_ws/src/saltybot_perception/test/test_postprocess.py
@ -0,0 +1,67 @@
+"""
+test_postprocess.py — Tests for YOLOv8 post-processing and NMS.
+
+Tests the _nms() helper and validate post-processing logic without
+requiring a GPU, TRT, or running ROS2 node.
+"""
+
+import numpy as np
+import pytest
+
+from saltybot_perception.detection_utils import nms as _nms
+
+
+class TestNMS:
+
+    def test_single_box_kept(self):
+        boxes = np.array([[0, 0, 10, 10]], dtype=float)
+        scores = np.array([0.9])
+        assert _nms(boxes, scores) == [0]
+
+    def test_empty_input(self):
+        assert _nms(np.zeros((0, 4)), np.array([])) == []
+
+    def test_suppresses_overlapping_box(self):
+        # Two heavily overlapping boxes — keep highest score
+        boxes = np.array([
+            [0, 0, 10, 10],   # score 0.9 — keep
+            [1, 1, 11, 11],   # score 0.8 — suppress (high IoU with first)
+        ], dtype=float)
+        scores = np.array([0.9, 0.8])
+        keep = _nms(boxes, scores, iou_threshold=0.45)
+        assert keep == [0]
+
+    def test_keeps_non_overlapping_boxes(self):
+        boxes = np.array([
+            [0, 0, 10, 10],
+            [50, 50, 60, 60],
+            [100, 100, 110, 110],
+        ], dtype=float)
+        scores = np.array([0.9, 0.85, 0.8])
+        keep = _nms(boxes, scores, iou_threshold=0.45)
+        assert sorted(keep) == [0, 1, 2]
+
+    def test_score_ordering(self):
+        # Lower score box overlaps with higher — higher should be kept
+        boxes = np.array([
+            [1, 1, 11, 11],   # score 0.6
+            [0, 0, 10, 10],   # score 0.95 — should be kept
+        ], dtype=float)
+        scores = np.array([0.6, 0.95])
+        keep = _nms(boxes, scores, iou_threshold=0.45)
+        assert 1 in keep      # higher score (index 1) kept
+        assert 0 not in keep  # lower score (index 0) suppressed
+
+    def test_iou_threshold_controls_suppression(self):
+        # Two boxes with ~0.5 IoU
+        boxes = np.array([
+            [0, 0, 10, 10],
+            [5, 5, 15, 15],
+        ], dtype=float)
+        scores = np.array([0.9, 0.8])
+        # High threshold — both boxes kept (IoU ~0.14 < 0.5)
+        keep_high = _nms(boxes, scores, iou_threshold=0.5)
+        assert sorted(keep_high) == [0, 1]
+        # Low threshold — only first kept
+        keep_low = _nms(boxes, scores, iou_threshold=0.0)
+        assert keep_low == [0]
--- a/jetson/ros2_ws/src/saltybot_perception/test/test_tracker.py
+++ b/jetson/ros2_ws/src/saltybot_perception/test/test_tracker.py
@ -0,0 +1,156 @@
+"""
+test_tracker.py — Unit tests for SimplePersonTracker.
+
+Run with: pytest test/test_tracker.py -v
+No ROS2 runtime required.
+"""
+
+import time
+import pytest
+from saltybot_perception.tracker import SimplePersonTracker, PersonTrack, _iou
+
+
+# ── IoU helper ────────────────────────────────────────────────────────────────
+
+class TestIoU:
+    def test_identical_boxes(self):
+        assert _iou((0, 0, 10, 10), (0, 0, 10, 10)) == pytest.approx(1.0)
+
+    def test_no_overlap(self):
+        assert _iou((0, 0, 5, 5), (10, 10, 15, 15)) == pytest.approx(0.0)
+
+    def test_partial_overlap(self):
+        iou = _iou((0, 0, 10, 10), (5, 5, 15, 15))
+        # Intersection 5×5=25, each area=100, union=175
+        assert iou == pytest.approx(25 / 175)
+
+    def test_contained_box(self):
+        # Inner box fully inside outer box
+        outer = (0, 0, 10, 10)
+        inner = (2, 2, 8, 8)
+        # intersection = 6×6=36, area_outer=100, area_inner=36, union=100
+        assert _iou(outer, inner) == pytest.approx(36 / 100)
+
+    def test_touching_edges(self):
+        # Boxes touch at a single edge — no area overlap
+        assert _iou((0, 0, 5, 5), (5, 0, 10, 5)) == pytest.approx(0.0)
+
+
+# ── Tracker ───────────────────────────────────────────────────────────────────
+
+class TestSimplePersonTracker:
+
+    def _make_det(self, x1=10, y1=10, x2=60, y2=160, depth=2.0, conf=0.85):
+        return ((x1, y1, x2, y2), depth, conf)
+
+    # ── Basic update ──────────────────────────────────────────────────────────
+
+    def test_no_detections_returns_none(self):
+        t = SimplePersonTracker()
+        assert t.update([]) is None
+
+    def test_single_detection_creates_track(self):
+        t = SimplePersonTracker()
+        track = t.update([self._make_det()])
+        assert track is not None
+        assert track.track_id == 1
+        assert track.depth == pytest.approx(2.0)
+
+    def test_track_id_increments(self):
+        t = SimplePersonTracker(hold_duration=0.0)
+        t.update([self._make_det()])
+        t.update([])                            # lose track immediately
+        track2 = t.update([self._make_det(x1=100, y1=100, x2=150, y2=250)])
+        assert track2.track_id == 2
+
+    # ── Closest-first selection ───────────────────────────────────────────────
+
+    def test_picks_closest_person(self):
+        t = SimplePersonTracker()
+        dets = [
+            ((10, 10, 60, 160), 4.0, 0.8),    # far
+            ((70, 10, 120, 160), 1.5, 0.9),    # closest
+            ((130, 10, 180, 160), 3.0, 0.75),  # mid
+        ]
+        track = t.update(dets)
+        assert track.depth == pytest.approx(1.5)
+
+    # ── Depth filtering ───────────────────────────────────────────────────────
+
+    def test_rejects_beyond_max_depth(self):
+        t = SimplePersonTracker(max_depth=5.0)
+        assert t.update([self._make_det(depth=6.0)]) is None
+
+    def test_rejects_below_min_depth(self):
+        t = SimplePersonTracker(min_depth=0.3)
+        assert t.update([self._make_det(depth=0.1)]) is None
+
+    def test_accepts_within_depth_range(self):
+        t = SimplePersonTracker(min_depth=0.3, max_depth=5.0)
+        track = t.update([self._make_det(depth=2.5)])
+        assert track is not None
+
+    # ── Re-association ────────────────────────────────────────────────────────
+
+    def test_iou_reassociation_keeps_same_id(self):
+        t = SimplePersonTracker(iou_threshold=0.2)
+        # Frame 1
+        track1 = t.update([self._make_det(x1=10, y1=10, x2=60, y2=160)])
+        id1 = track1.track_id
+        # Frame 2 — slightly shifted (good IoU)
+        track2 = t.update([self._make_det(x1=12, y1=12, x2=62, y2=162)])
+        assert track2.track_id == id1
+
+    def test_poor_iou_loses_track_after_hold(self):
+        t = SimplePersonTracker(hold_duration=0.0, iou_threshold=0.5)
+        # Frame 1 — track person at left
+        t.update([self._make_det(x1=0, y1=0, x2=50, y2=150)])
+        # Frame 2 — completely different position, bad IoU
+        # hold_duration=0, so old track expires, new track started
+        track = t.update([self._make_det(x1=500, y1=0, x2=550, y2=150)])
+        assert track.track_id == 2   # new track
+
+    # ── Hold duration ─────────────────────────────────────────────────────────
+
+    def test_holds_last_known_within_duration(self):
+        t = SimplePersonTracker(hold_duration=10.0)
+        track = t.update([self._make_det()])
+        track_id = track.track_id
+        # No detections — should hold
+        held = t.update([])
+        assert held is not None
+        assert held.track_id == track_id
+        assert held.age >= 0
+
+    def test_releases_track_after_hold_duration(self):
+        t = SimplePersonTracker(hold_duration=0.0)
+        t.update([self._make_det()])
+        # Immediately lose
+        result = t.update([])
+        assert result is None
+
+    # ── Reset ────────────────────────────────────────────────────────────────
+
+    def test_reset_clears_track(self):
+        t = SimplePersonTracker()
+        t.update([self._make_det()])
+        t.reset()
+        assert t.active is False
+        assert t.update([]) is None
+
+    # ── PersonTrack properties ────────────────────────────────────────────────
+
+    def test_track_center(self):
+        track = PersonTrack((10, 20, 50, 100), 2.0, 0.9, 1)
+        cx, cy = track.center
+        assert cx == pytest.approx(30.0)
+        assert cy == pytest.approx(60.0)
+
+    def test_track_area(self):
+        track = PersonTrack((10, 20, 50, 100), 2.0, 0.9, 1)
+        assert track.area == pytest.approx(40 * 80)
+
+    def test_track_age_increases(self):
+        track = PersonTrack((0, 0, 50, 100), 2.0, 0.9, 1)
+        time.sleep(0.05)
+        assert track.age >= 0.04