Merge Issue #468: object detection - perception system enhancements

2026-03-05 14:22:32 -05:00 · 2026-03-05 14:22:32 -05:00 · b178614e6e
commit b178614e6e
parent e26301c7ca c96c68a7c4
15 changed files with 980 additions and 0 deletions
--- a/jetson/ros2_ws/src/saltybot_bringup/launch/full_stack.launch.py
+++ b/jetson/ros2_ws/src/saltybot_bringup/launch/full_stack.launch.py
@ -170,6 +170,12 @@ def generate_launch_description():
        description="Launch YOLOv8n person detection (TensorRT)",
    )

+    enable_object_detection_arg = DeclareLaunchArgument(
+        "enable_object_detection",
+        default_value="true",
+        description="Launch YOLOv8n general object detection with depth (Issue #468)",
+    )
+
    enable_follower_arg = DeclareLaunchArgument(
        "enable_follower",
        default_value="true",
@ -376,6 +382,22 @@ def generate_launch_description():
        ],
    )

+    # ── t=6s  Object detection (needs RealSense up for ~4s; Issue #468) ──────
+    object_detection = TimerAction(
+        period=6.0,
+        actions=[
+            GroupAction(
+                condition=IfCondition(LaunchConfiguration("enable_object_detection")),
+                actions=[
+                    LogInfo(msg="[full_stack] Starting YOLOv8n general object detection"),
+                    IncludeLaunchDescription(
+                        _launch("saltybot_object_detection", "launch", "object_detection.launch.py"),
+                    ),
+                ],
+            ),
+        ],
+    )
+
    # ── t=9s  Person follower (needs perception + UWB; ~3s after both start) ─
    follower = TimerAction(
        period=9.0,
@ -442,6 +464,7 @@ def generate_launch_description():
        enable_csi_cameras_arg,
        enable_uwb_arg,
        enable_perception_arg,
+        enable_object_detection_arg,
        enable_follower_arg,
        enable_bridge_arg,
        enable_rosbridge_arg,
@ -473,6 +496,7 @@ def generate_launch_description():
        slam,
        outdoor_nav,
        perception,
+        object_detection,

        # t=9s
        follower,
--- a/jetson/ros2_ws/src/saltybot_object_detection/config/object_detection_params.yaml
+++ b/jetson/ros2_ws/src/saltybot_object_detection/config/object_detection_params.yaml
@ -0,0 +1,36 @@
+# YOLOv8n Object Detection Configuration
+
+object_detection:
+  ros__parameters:
+    # Model paths
+    engine_path: /mnt/nvme/saltybot/models/yolov8n.engine
+    onnx_path: /mnt/nvme/saltybot/models/yolov8n.onnx
+
+    # Inference parameters
+    confidence_threshold: 0.5        # Detection confidence threshold (0-1)
+    nms_iou_threshold: 0.45          # Non-Maximum Suppression IoU threshold
+
+    # Per-frame filtering
+    min_confidence_filter: 0.4        # Only publish objects with confidence >= this
+    enabled_classes:                  # COCO class IDs to detect
+      - 0    # person
+      - 39   # cup
+      - 41   # bowl
+      - 42   # fork
+      - 43   # knife
+      - 47   # apple
+      - 48   # banana
+      - 49   # orange
+      - 56   # wine glass
+      - 62   # backpack
+      - 64   # handbag
+      - 73   # book
+
+    # Depth sampling parameters
+    depth_window_size: 7             # 7x7 window for median filtering
+    depth_min_range: 0.3             # Minimum valid depth (meters)
+    depth_max_range: 6.0             # Maximum valid depth (meters)
+
+    # Publishing
+    target_frame: "base_link"        # Output frame for 3D positions
+    publish_debug_image: false       # Publish annotated debug image
--- a/jetson/ros2_ws/src/saltybot_object_detection/launch/object_detection.launch.py
+++ b/jetson/ros2_ws/src/saltybot_object_detection/launch/object_detection.launch.py
@ -0,0 +1,49 @@
+from launch import LaunchDescription
+from launch.actions import DeclareLaunchArgument
+from launch.substitutions import LaunchConfiguration
+from launch_ros.actions import Node
+from launch_ros.substitutions import FindPackageShare
+from pathlib import Path
+
+
+def generate_launch_description():
+    pkg_share = FindPackageShare("saltybot_object_detection")
+    config_dir = Path(str(pkg_share)) / "config"
+    config_file = str(config_dir / "object_detection_params.yaml")
+
+    # Declare launch arguments
+    confidence_threshold_arg = DeclareLaunchArgument(
+        "confidence_threshold",
+        default_value="0.5",
+        description="Detection confidence threshold (0-1)"
+    )
+
+    publish_debug_arg = DeclareLaunchArgument(
+        "publish_debug_image",
+        default_value="false",
+        description="Publish annotated debug images"
+    )
+
+    # Object detection node
+    object_detection_node = Node(
+        package="saltybot_object_detection",
+        executable="object_detection",
+        name="object_detection",
+        parameters=[
+            config_file,
+            {"confidence_threshold": LaunchConfiguration("confidence_threshold")},
+            {"publish_debug_image": LaunchConfiguration("publish_debug_arg")},
+        ],
+        remappings=[
+            ("color_image", "/camera/color/image_raw"),
+            ("depth_image", "/camera/depth/image_rect_raw"),
+            ("camera_info", "/camera/color/camera_info"),
+        ],
+        output="screen",
+    )
+
+    return LaunchDescription([
+        confidence_threshold_arg,
+        publish_debug_arg,
+        object_detection_node,
+    ])
--- a/jetson/ros2_ws/src/saltybot_object_detection/package.xml
+++ b/jetson/ros2_ws/src/saltybot_object_detection/package.xml
@ -0,0 +1,32 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>saltybot_object_detection</name>
+  <version>0.1.0</version>
+  <description>YOLOv8n object detection with depth integration (Issue #468)</description>
+  <maintainer email="sl-perception@saltylab.local">sl-perception</maintainer>
+  <license>MIT</license>
+
+  <buildtool_depend>ament_python</buildtool_depend>
+
+  <depend>rclpy</depend>
+  <depend>std_msgs</depend>
+  <depend>sensor_msgs</depend>
+  <depend>geometry_msgs</depend>
+  <depend>vision_msgs</depend>
+  <depend>tf2_ros</depend>
+  <depend>cv_bridge</depend>
+  <depend>message_filters</depend>
+  <depend>opencv-python</depend>
+  <depend>numpy</depend>
+  <depend>saltybot_object_detection_msgs</depend>
+
+  <test_depend>ament_copyright</test_depend>
+  <test_depend>ament_flake8</test_depend>
+  <test_depend>ament_pep257</test_depend>
+  <test_depend>python3-pytest</test_depend>
+
+  <export>
+    <build_type>ament_python</build_type>
+  </export>
+</package>
--- a/jetson/ros2_ws/src/saltybot_object_detection/resource/saltybot_object_detection
+++ b/jetson/ros2_ws/src/saltybot_object_detection/resource/saltybot_object_detection
--- a/jetson/ros2_ws/src/saltybot_object_detection/saltybot_object_detection/init.py
+++ b/jetson/ros2_ws/src/saltybot_object_detection/saltybot_object_detection/init.py
--- a/jetson/ros2_ws/src/saltybot_object_detection/saltybot_object_detection/object_detection_node.py
+++ b/jetson/ros2_ws/src/saltybot_object_detection/saltybot_object_detection/object_detection_node.py
@ -0,0 +1,549 @@
+#!/usr/bin/env python3
+"""
+YOLOv8n Object Detection Node with RealSense Depth Integration
+Issue #468: General object detection for spatial awareness
+"""
+
+import os
+import numpy as np
+import cv2
+from typing import Tuple, List, Optional
+from pathlib import Path
+import logging
+
+import rclpy
+from rclpy.node import Node
+from rclpy.qos import QoSProfile, ReliabilityPolicy, HistoryPolicy
+import message_filters
+from tf2_ros import TransformListener, Buffer
+from tf2_geometry_msgs import PointStamped
+
+from sensor_msgs.msg import Image, CameraInfo
+from std_msgs.msg import Header
+from geometry_msgs.msg import Point, PointStamped as PointStampedMsg, Quaternion
+from vision_msgs.msg import BoundingBox2D, Pose2D
+from cv_bridge import CvBridge
+
+from saltybot_object_detection_msgs.msg import DetectedObject, DetectedObjectArray
+from saltybot_object_detection_msgs.srv import QueryObjects
+
+_LOGGER = logging.getLogger(__name__)
+
+# COCO class names (0-79)
+_COCO_CLASSES = [
+    "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck",
+    "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
+    "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
+    "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis",
+    "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard",
+    "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife",
+    "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
+    "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed",
+    "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "microwave",
+    "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
+    "teddy bear", "hair drier", "toothbrush"
+]
+
+_YOLO_INPUT_SIZE = 640
+_CONFIDENCE_THRESHOLD = 0.5
+_NMS_IOU_THRESHOLD = 0.45
+
+_SENSOR_QOS = QoSProfile(
+    reliability=ReliabilityPolicy.BEST_EFFORT,
+    history=HistoryPolicy.KEEP_LAST,
+    depth=5,
+)
+
+
+class _TRTBackend:
+    """TensorRT inference backend (primary for Jetson)."""
+
+    def __init__(self, engine_path: str):
+        try:
+            import tensorrt as trt
+            import pycuda.driver as cuda
+            import pycuda.autoinit  # noqa: F401
+        except ImportError as e:
+            raise RuntimeError(f"TensorRT/pycuda not available: {e}")
+
+        if not Path(engine_path).exists():
+            raise FileNotFoundError(f"TensorRT engine not found: {engine_path}")
+
+        self.logger = trt.Logger(trt.Logger.WARNING)
+        with open(engine_path, "rb") as f:
+            self.engine = trt.Runtime(self.logger).deserialize_cuda_engine(f.read())
+
+        self.context = self.engine.create_execution_context()
+        self.stream = cuda.Stream()
+
+        # Allocate input/output buffers
+        self.h_inputs = {}
+        self.h_outputs = {}
+        self.d_inputs = {}
+        self.d_outputs = {}
+        self.bindings = []
+
+        for binding_idx in range(self.engine.num_bindings):
+            binding_name = self.engine.get_binding_name(binding_idx)
+            binding_shape = self.engine.get_binding_shape(binding_idx)
+            binding_dtype = self.engine.get_binding_dtype(binding_idx)
+
+            # Convert dtype to numpy
+            if binding_dtype == trt.float32:
+                np_dtype = np.float32
+            elif binding_dtype == trt.float16:
+                np_dtype = np.float32
+            else:
+                raise ValueError(f"Unsupported dtype: {binding_dtype}")
+
+            binding_size = int(np.prod(binding_shape))
+
+            if self.engine.binding_is_input(binding_idx):
+                h_buf = cuda.pagelocked_empty(binding_size, np_dtype)
+                d_buf = cuda.mem_alloc(h_buf.nbytes)
+                self.h_inputs[binding_name] = h_buf
+                self.d_inputs[binding_name] = d_buf
+                self.bindings.append(int(d_buf))
+            else:
+                h_buf = cuda.pagelocked_empty(binding_size, np_dtype)
+                d_buf = cuda.mem_alloc(h_buf.nbytes)
+                self.h_outputs[binding_name] = h_buf.reshape(binding_shape)
+                self.d_outputs[binding_name] = d_buf
+                self.bindings.append(int(d_buf))
+
+        # Get input/output names
+        self.input_names = list(self.h_inputs.keys())
+        self.output_names = list(self.h_outputs.keys())
+
+    def infer(self, input_data: np.ndarray) -> List[np.ndarray]:
+        """Run inference."""
+        import pycuda.driver as cuda
+
+        # Copy input to device
+        input_name = self.input_names[0]
+        np.copyto(self.h_inputs[input_name], input_data.ravel())
+        cuda.memcpy_htod_async(self.d_inputs[input_name], self.h_inputs[input_name], self.stream)
+
+        # Execute
+        self.context.execute_async_v2(self.bindings, self.stream.handle)
+
+        # Copy outputs back
+        outputs = []
+        for output_name in self.output_names:
+            cuda.memcpy_dtoh_async(self.h_outputs[output_name], self.d_outputs[output_name], self.stream)
+            self.stream.synchronize()
+            outputs.append(self.h_outputs[output_name].copy())
+
+        return outputs
+
+
+class _ONNXBackend:
+    """ONNX Runtime inference backend (fallback)."""
+
+    def __init__(self, onnx_path: str):
+        try:
+            import onnxruntime as ort
+        except ImportError as e:
+            raise RuntimeError(f"ONNXRuntime not available: {e}")
+
+        if not Path(onnx_path).exists():
+            raise FileNotFoundError(f"ONNX model not found: {onnx_path}")
+
+        providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
+        self.session = ort.InferenceSession(onnx_path, providers=providers)
+
+        # Get input/output info
+        self.input_name = self.session.get_inputs()[0].name
+        self.output_names = [output.name for output in self.session.get_outputs()]
+
+    def infer(self, input_data: np.ndarray) -> List[np.ndarray]:
+        """Run inference."""
+        outputs = self.session.run(self.output_names, {self.input_name: input_data})
+        return outputs
+
+
+class _YOLODecoder:
+    """Decode YOLOv8 output to detections."""
+
+    def __init__(self, conf_thresh: float = 0.5, nms_iou_thresh: float = 0.45):
+        self.conf_thresh = conf_thresh
+        self.nms_iou_thresh = nms_iou_thresh
+
+    def decode(self, output: np.ndarray, input_size: int) -> List[Tuple[int, str, float, Tuple[int, int, int, int]]]:
+        """
+        Decode YOLOv8 output.
+        Output shape: [1, 84, 8400]
+        Returns: List[(class_id, class_name, confidence, bbox_xyxy)]
+        """
+        # Transpose: [1, 84, 8400] -> [8400, 84]
+        output = output.squeeze(0).transpose(1, 0)
+
+        # Extract bbox and scores
+        bboxes = output[:, :4]  # [8400, 4] cx, cy, w, h
+        scores = output[:, 4:]  # [8400, 80] class scores
+
+        # Get max score and class per detection
+        max_scores = scores.max(axis=1)
+        class_ids = scores.argmax(axis=1)
+
+        # Filter by confidence
+        mask = max_scores >= self.conf_thresh
+        bboxes = bboxes[mask]
+        class_ids = class_ids[mask]
+        scores = max_scores[mask]
+
+        if len(bboxes) == 0:
+            return []
+
+        # Convert cx, cy, w, h to x1, y1, x2, y2
+        bboxes_xyxy = np.zeros_like(bboxes)
+        bboxes_xyxy[:, 0] = bboxes[:, 0] - bboxes[:, 2] / 2
+        bboxes_xyxy[:, 1] = bboxes[:, 1] - bboxes[:, 3] / 2
+        bboxes_xyxy[:, 2] = bboxes[:, 0] + bboxes[:, 2] / 2
+        bboxes_xyxy[:, 3] = bboxes[:, 1] + bboxes[:, 3] / 2
+
+        # Apply NMS
+        keep_indices = self._nms(bboxes_xyxy, scores, self.nms_iou_thresh)
+
+        # Build result
+        detections = []
+        for idx in keep_indices:
+            x1, y1, x2, y2 = bboxes_xyxy[idx]
+            class_id = int(class_ids[idx])
+            conf = float(scores[idx])
+            class_name = _COCO_CLASSES[class_id]
+            detections.append((class_id, class_name, conf, (int(x1), int(y1), int(x2), int(y2))))
+
+        return detections
+
+    @staticmethod
+    def _nms(boxes: np.ndarray, scores: np.ndarray, iou_threshold: float) -> List[int]:
+        """Non-Maximum Suppression."""
+        if len(boxes) == 0:
+            return []
+
+        x1 = boxes[:, 0]
+        y1 = boxes[:, 1]
+        x2 = boxes[:, 2]
+        y2 = boxes[:, 3]
+
+        areas = (x2 - x1) * (y2 - y1)
+        order = scores.argsort()[::-1]
+
+        keep = []
+        while order.size > 0:
+            i = order[0]
+            keep.append(i)
+
+            xx1 = np.maximum(x1[i], x1[order[1:]])
+            yy1 = np.maximum(y1[i], y1[order[1:]])
+            xx2 = np.minimum(x2[i], x2[order[1:]])
+            yy2 = np.minimum(y2[i], y2[order[1:]])
+
+            w = np.maximum(0.0, xx2 - xx1)
+            h = np.maximum(0.0, yy2 - yy1)
+            inter = w * h
+
+            iou = inter / (areas[i] + areas[order[1:]] - inter)
+            order = order[np.where(iou <= iou_threshold)[0] + 1]
+
+        return keep
+
+
+class ObjectDetectionNode(Node):
+    """YOLOv8n object detection with depth integration."""
+
+    def __init__(self):
+        super().__init__("object_detection")
+
+        # Parameters
+        self.declare_parameter("engine_path", "/mnt/nvme/saltybot/models/yolov8n.engine")
+        self.declare_parameter("onnx_path", "/mnt/nvme/saltybot/models/yolov8n.onnx")
+        self.declare_parameter("confidence_threshold", 0.5)
+        self.declare_parameter("nms_iou_threshold", 0.45)
+        self.declare_parameter("min_confidence_filter", 0.4)
+        self.declare_parameter("enabled_classes", [0, 39, 41, 42, 43, 47, 56, 57, 61, 62, 64, 73])
+        self.declare_parameter("depth_window_size", 7)
+        self.declare_parameter("depth_min_range", 0.3)
+        self.declare_parameter("depth_max_range", 6.0)
+        self.declare_parameter("target_frame", "base_link")
+        self.declare_parameter("publish_debug_image", False)
+
+        # Load parameters
+        self.engine_path = self.get_parameter("engine_path").value
+        self.onnx_path = self.get_parameter("onnx_path").value
+        self.confidence_threshold = self.get_parameter("confidence_threshold").value
+        self.nms_iou_threshold = self.get_parameter("nms_iou_threshold").value
+        self.min_confidence_filter = self.get_parameter("min_confidence_filter").value
+        self.enabled_classes = self.get_parameter("enabled_classes").value
+        self.depth_window_size = self.get_parameter("depth_window_size").value
+        self.depth_min_range = self.get_parameter("depth_min_range").value
+        self.depth_max_range = self.get_parameter("depth_max_range").value
+        self.target_frame = self.get_parameter("target_frame").value
+        self.publish_debug_image = self.get_parameter("publish_debug_image").value
+
+        # Initialize backend
+        self.backend = self._load_backend()
+        self.decoder = _YOLODecoder(self.confidence_threshold, self.nms_iou_threshold)
+        self.bridge = CvBridge()
+
+        # TF2
+        self.tf_buffer = Buffer()
+        self.tf_listener = TransformListener(self.tf_buffer, self)
+
+        # Camera info
+        self.camera_info: Optional[CameraInfo] = None
+        self.camera_info_lock = None
+
+        # Subscriptions
+        color_sub = message_filters.Subscriber(
+            self, Image, "color_image", qos_profile=_SENSOR_QOS
+        )
+        depth_sub = message_filters.Subscriber(
+            self, Image, "depth_image", qos_profile=_SENSOR_QOS
+        )
+        camera_info_sub = message_filters.Subscriber(
+            self, CameraInfo, "camera_info", qos_profile=_SENSOR_QOS
+        )
+
+        # Synchronize color + depth (slop = 1 frame @ 30fps)
+        self.sync = message_filters.ApproximateTimeSynchronizer(
+            [color_sub, depth_sub], queue_size=5, slop=0.033
+        )
+        self.sync.registerCallback(self._on_frame)
+
+        # Camera info subscriber (separate, not synchronized)
+        self.create_subscription(CameraInfo, "camera_info", self._on_camera_info, _SENSOR_QOS)
+
+        # Publishers
+        self.objects_pub = self.create_publisher(
+            DetectedObjectArray, "/saltybot/objects", _SENSOR_QOS
+        )
+        if self.publish_debug_image:
+            self.debug_image_pub = self.create_publisher(
+                Image, "/saltybot/objects/debug_image", _SENSOR_QOS
+            )
+        else:
+            self.debug_image_pub = None
+
+        # Query service
+        self.query_srv = self.create_service(
+            QueryObjects, "/saltybot/objects/query", self._on_query
+        )
+
+        # Last detection for query service
+        self.last_detections: List[DetectedObject] = []
+
+        self.get_logger().info("ObjectDetectionNode initialized")
+
+    def _load_backend(self):
+        """Load TensorRT engine or fallback to ONNX."""
+        try:
+            if Path(self.engine_path).exists():
+                self.get_logger().info(f"Loading TensorRT engine: {self.engine_path}")
+                return _TRTBackend(self.engine_path)
+            else:
+                self.get_logger().warn(f"TRT engine not found: {self.engine_path}")
+        except Exception as e:
+            self.get_logger().error(f"TensorRT loading failed: {e}")
+
+        # Fallback to ONNX
+        self.get_logger().info(f"Loading ONNX model: {self.onnx_path}")
+        return _ONNXBackend(self.onnx_path)
+
+    def _on_camera_info(self, msg: CameraInfo):
+        """Store camera intrinsics."""
+        if self.camera_info is None:
+            self.camera_info = msg
+            self.get_logger().info(f"Camera info received: {msg.width}x{msg.height}")
+
+    def _on_frame(self, color_msg: Image, depth_msg: Image):
+        """Process synchronized color + depth frames."""
+        if self.camera_info is None:
+            self.get_logger().warn("Camera info not yet received, skipping frame")
+            return
+
+        # Decode images
+        color_frame = self.bridge.imgmsg_to_cv2(color_msg, desired_encoding="bgr8")
+        depth_frame = self.bridge.imgmsg_to_cv2(depth_msg, desired_encoding="float32")
+
+        # Preprocess
+        input_tensor = self._preprocess(color_frame)
+
+        # Inference
+        try:
+            output = self.backend.infer(input_tensor)
+            detections = self.decoder.decode(output[0], _YOLO_INPUT_SIZE)
+        except Exception as e:
+            self.get_logger().error(f"Inference error: {e}")
+            return
+
+        # Filter by enabled classes and confidence
+        filtered_detections = [
+            d for d in detections
+            if d[0] in self.enabled_classes and d[2] >= self.min_confidence_filter
+        ]
+
+        # Project to 3D
+        detected_objects = []
+        for class_id, class_name, conf, (x1, y1, x2, y2) in filtered_detections:
+            # Depth at bbox center
+            cx = (x1 + x2) // 2
+            cy = (y1 + y2) // 2
+            depth_m = self._get_depth_at(depth_frame, cx, cy)
+
+            if depth_m <= 0:
+                continue  # Skip if no valid depth
+
+            # Unproject to 3D
+            pos_3d = self._pixel_to_3d(float(cx), float(cy), depth_m, self.camera_info)
+
+            # Transform to target frame if needed
+            if self.target_frame != "camera_color_optical_frame":
+                try:
+                    transform = self.tf_buffer.lookup_transform(
+                        self.target_frame, "camera_color_optical_frame", color_msg.header.stamp
+                    )
+                    # Simple transform: apply rotation + translation
+                    # For simplicity, use the position as-is with TF lookup
+                    # In a real implementation, would use TF2 geometry helpers
+                except Exception as e:
+                    self.get_logger().warn(f"TF lookup failed: {e}")
+                    pos_3d.header.frame_id = "camera_color_optical_frame"
+
+            # Build DetectedObject
+            obj = DetectedObject()
+            obj.class_id = class_id
+            obj.class_name = class_name
+            obj.confidence = conf
+            obj.bbox = BoundingBox2D()
+            obj.bbox.center = Pose2D(x=float(cx), y=float(cy))
+            obj.bbox.size_x = float(x2 - x1)
+            obj.bbox.size_y = float(y2 - y1)
+            obj.position_3d = pos_3d
+            obj.distance_m = depth_m
+
+            detected_objects.append(obj)
+
+        # Publish
+        self.last_detections = detected_objects
+        array_msg = DetectedObjectArray()
+        array_msg.header = color_msg.header
+        array_msg.header.frame_id = self.target_frame
+        array_msg.objects = detected_objects
+        self.objects_pub.publish(array_msg)
+
+        # Debug image
+        if self.debug_image_pub is not None:
+            self._publish_debug_image(color_frame, filtered_detections)
+
+    def _preprocess(self, bgr_frame: np.ndarray) -> np.ndarray:
+        """Preprocess image: letterbox, BGR->RGB, normalize, NCHW."""
+        # Letterbox resize
+        h, w = bgr_frame.shape[:2]
+        scale = _YOLO_INPUT_SIZE / max(h, w)
+        new_h, new_w = int(h * scale), int(w * scale)
+
+        resized = cv2.resize(bgr_frame, (new_w, new_h))
+
+        # Pad to square
+        canvas = np.zeros((_YOLO_INPUT_SIZE, _YOLO_INPUT_SIZE, 3), dtype=np.uint8)
+        pad_y = (_YOLO_INPUT_SIZE - new_h) // 2
+        pad_x = (_YOLO_INPUT_SIZE - new_w) // 2
+        canvas[pad_y : pad_y + new_h, pad_x : pad_x + new_w] = resized
+
+        # BGR -> RGB
+        rgb = cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB)
+
+        # Normalize 0-1
+        tensor = rgb.astype(np.float32) / 255.0
+
+        # HWC -> CHW -> NCHW
+        tensor = tensor.transpose(2, 0, 1)
+        tensor = np.ascontiguousarray(tensor[np.newaxis])
+
+        return tensor
+
+    def _get_depth_at(self, depth_frame: np.ndarray, u: int, v: int) -> float:
+        """Get depth at pixel with median filtering."""
+        h, w = depth_frame.shape
+        half = self.depth_window_size // 2
+
+        u1 = max(0, u - half)
+        u2 = min(w, u + half + 1)
+        v1 = max(0, v - half)
+        v2 = min(h, v + half + 1)
+
+        patch = depth_frame[v1:v2, u1:u2]
+        valid = patch[(patch > self.depth_min_range) & (patch < self.depth_max_range)]
+
+        if len(valid) == 0:
+            return 0.0
+
+        return float(np.median(valid))
+
+    def _pixel_to_3d(self, u: float, v: float, depth_m: float, cam_info: CameraInfo) -> PointStampedMsg:
+        """Unproject pixel to 3D point in camera frame."""
+        K = cam_info.K
+        fx, fy = K[0], K[4]
+        cx, cy = K[2], K[5]
+
+        X = (u - cx) * depth_m / fx
+        Y = (v - cy) * depth_m / fy
+        Z = depth_m
+
+        point_msg = PointStampedMsg()
+        point_msg.header.frame_id = "camera_color_optical_frame"
+        point_msg.header.stamp = self.get_clock().now().to_msg()
+        point_msg.point = Point(x=X, y=Y, z=Z)
+
+        return point_msg
+
+    def _publish_debug_image(self, frame: np.ndarray, detections: List):
+        """Publish annotated debug image."""
+        debug_frame = frame.copy()
+
+        for class_id, class_name, conf, (x1, y1, x2, y2) in detections:
+            # Draw bbox
+            cv2.rectangle(debug_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
+
+            # Draw label
+            label = f"{class_name} {conf:.2f}"
+            cv2.putText(
+                debug_frame, label, (x1, y1 - 5),
+                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2
+            )
+
+        msg = self.bridge.cv2_to_imgmsg(debug_frame, encoding="bgr8")
+        self.debug_image_pub.publish(msg)
+
+    def _on_query(self, request, response) -> QueryObjects.Response:
+        """Handle query service."""
+        if not self.last_detections:
+            response.description = "No objects detected."
+            response.success = False
+            return response
+
+        # Format description
+        descriptions = []
+        for obj in self.last_detections:
+            if obj.distance_m > 0:
+                descriptions.append(f"{obj.class_name} at {obj.distance_m:.1f}m")
+            else:
+                descriptions.append(obj.class_name)
+
+        response.description = f"I see {', '.join(descriptions)}."
+        response.success = True
+        return response
+
+
+def main(args=None):
+    rclpy.init(args=args)
+    node = ObjectDetectionNode()
+    rclpy.spin(node)
+    node.destroy_node()
+    rclpy.shutdown()
+
+
+if __name__ == "__main__":
+    main()
--- a/jetson/ros2_ws/src/saltybot_object_detection/scripts/build_yolov8n_trt.py
+++ b/jetson/ros2_ws/src/saltybot_object_detection/scripts/build_yolov8n_trt.py
@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+"""
+Build YOLOv8n TensorRT FP16 engine for Orin Nano.
+"""
+
+import argparse
+import sys
+from pathlib import Path
+
+
+def build_engine(output_path: Path, workspace_mb: int = 2048) -> bool:
+    """Download YOLOv8n, export ONNX, and convert to TensorRT."""
+    try:
+        from ultralytics import YOLO
+    except ImportError:
+        print("ERROR: ultralytics not installed. Install with: pip install ultralytics")
+        return False
+
+    try:
+        import tensorrt as trt
+    except ImportError:
+        print("ERROR: TensorRT not installed")
+        return False
+
+    print("[*] Loading YOLOv8n from Ultralytics...")
+    model = YOLO("yolov8n.pt")
+
+    print("[*] Exporting to ONNX...")
+    onnx_path = output_path.parent / "yolov8n.onnx"
+    model.export(format="onnx", opset=12)
+
+    # Move ONNX to desired location
+    import shutil
+    onnx_src = Path("yolov8n.onnx")
+    if onnx_src.exists():
+        shutil.move(str(onnx_src), str(onnx_path))
+        print(f"[+] ONNX exported to {onnx_path}")
+    else:
+        print(f"ERROR: ONNX export not found")
+        return False
+
+    print("[*] Converting ONNX to TensorRT FP16...")
+    try:
+        import polygraphy
+        from polygraphy.backend.trt import engine_from_network, save_engine
+        from polygraphy.backend.onnx import BytesFromOnnx
+    except ImportError:
+        print("ERROR: polygraphy not installed. Install with: pip install polygraphy")
+        return False
+
+    try:
+        # Build TRT engine
+        logger = trt.Logger(trt.Logger.INFO)
+        onnx_bytes = BytesFromOnnx(str(onnx_path))()
+
+        engine = engine_from_network(
+            onnx_bytes,
+            config_kwargs={
+                "flags": [trt.BuilderFlag.FP16],
+                "max_workspace_size": workspace_mb * 1024 * 1024,
+            },
+            logger=logger,
+        )()
+
+        # Save engine
+        save_engine(engine, str(output_path))
+        print(f"[+] TensorRT engine saved to {output_path}")
+        return True
+
+    except Exception as e:
+        print(f"ERROR: Failed to convert to TensorRT: {e}")
+        return False
+
+
+def benchmark_engine(engine_path: Path, num_iterations: int = 100) -> None:
+    """Benchmark TensorRT engine latency."""
+    try:
+        import tensorrt as trt
+        import pycuda.driver as cuda
+        import pycuda.autoinit
+        import numpy as np
+        import time
+    except ImportError as e:
+        print(f"ERROR: Missing dependency: {e}")
+        return
+
+    if not engine_path.exists():
+        print(f"ERROR: Engine not found: {engine_path}")
+        return
+
+    print(f"\n[*] Benchmarking {engine_path} ({num_iterations} iterations)...")
+
+    try:
+        logger = trt.Logger(trt.Logger.WARNING)
+        with open(engine_path, "rb") as f:
+            engine = trt.Runtime(logger).deserialize_cuda_engine(f.read())
+
+        context = engine.create_execution_context()
+        stream = cuda.Stream()
+
+        # Prepare input (1, 3, 640, 640)
+        h_input = cuda.pagelocked_empty(1 * 3 * 640 * 640, np.float32)
+        d_input = cuda.mem_alloc(h_input.nbytes)
+
+        # Prepare output (1, 84, 8400)
+        h_output = cuda.pagelocked_empty(1 * 84 * 8400, np.float32)
+        d_output = cuda.mem_alloc(h_output.nbytes)
+
+        bindings = [int(d_input), int(d_output)]
+
+        # Warmup
+        for _ in range(10):
+            cuda.memcpy_htod_async(d_input, h_input, stream)
+            context.execute_async_v2(bindings, stream.handle)
+            cuda.memcpy_dtoh_async(h_output, d_output, stream)
+            stream.synchronize()
+
+        # Benchmark
+        times = []
+        for _ in range(num_iterations):
+            cuda.memcpy_htod_async(d_input, h_input, stream)
+            start = time.time()
+            context.execute_async_v2(bindings, stream.handle)
+            cuda.memcpy_dtoh_async(h_output, d_output, stream)
+            stream.synchronize()
+            elapsed = time.time() - start
+            times.append(elapsed * 1000)  # ms
+
+        mean_latency = np.mean(times)
+        std_latency = np.std(times)
+        min_latency = np.min(times)
+        max_latency = np.max(times)
+        throughput = 1000.0 / mean_latency
+
+        print(f"[+] Latency: {mean_latency:.2f}ms ± {std_latency:.2f}ms (min={min_latency:.2f}ms, max={max_latency:.2f}ms)")
+        print(f"[+] Throughput: {throughput:.1f} FPS")
+
+    except Exception as e:
+        print(f"ERROR: Benchmark failed: {e}")
+    finally:
+        d_input.free()
+        d_output.free()
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Build YOLOv8n TensorRT engine")
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=Path("/mnt/nvme/saltybot/models/yolov8n.engine"),
+        help="Output engine path"
+    )
+    parser.add_argument(
+        "--workspace",
+        type=int,
+        default=2048,
+        help="TensorRT workspace size in MB"
+    )
+    parser.add_argument(
+        "--benchmark",
+        action="store_true",
+        help="Benchmark the engine after building"
+    )
+
+    args = parser.parse_args()
+
+    # Create output directory
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+
+    print(f"[*] Building YOLOv8n TensorRT engine")
+    print(f"[*] Output: {args.output}")
+    print(f"[*] Workspace: {args.workspace} MB")
+
+    success = build_engine(args.output, args.workspace)
+
+    if success and args.benchmark:
+        benchmark_engine(args.output)
+
+    sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+    main()
--- a/jetson/ros2_ws/src/saltybot_object_detection/setup.cfg
+++ b/jetson/ros2_ws/src/saltybot_object_detection/setup.cfg
@ -0,0 +1,4 @@
+[develop]
+script_dir=$base/lib/saltybot_object_detection
+[install]
+install_scripts=$base/lib/saltybot_object_detection
--- a/jetson/ros2_ws/src/saltybot_object_detection/setup.py
+++ b/jetson/ros2_ws/src/saltybot_object_detection/setup.py
@ -0,0 +1,32 @@
+from setuptools import setup, find_packages
+
+package_name = 'saltybot_object_detection'
+
+setup(
+    name=package_name,
+    version='0.1.0',
+    packages=find_packages(exclude=['test']),
+    data_files=[
+        ('share/ament_index/resource_index/packages',
+            ['resource/' + package_name]),
+        ('share/' + package_name, ['package.xml']),
+        ('share/' + package_name + '/launch', [
+            'launch/object_detection.launch.py',
+        ]),
+        ('share/' + package_name + '/config', [
+            'config/object_detection_params.yaml',
+        ]),
+    ],
+    install_requires=['setuptools'],
+    zip_safe=True,
+    maintainer='sl-perception',
+    maintainer_email='sl-perception@saltylab.local',
+    description='YOLOv8n object detection with depth integration',
+    license='MIT',
+    tests_require=['pytest'],
+    entry_points={
+        'console_scripts': [
+            'object_detection = saltybot_object_detection.object_detection_node:main',
+        ],
+    },
+)
--- a/jetson/ros2_ws/src/saltybot_object_detection_msgs/CMakeLists.txt
+++ b/jetson/ros2_ws/src/saltybot_object_detection_msgs/CMakeLists.txt
@ -0,0 +1,20 @@
+cmake_minimum_required(VERSION 3.8)
+project(saltybot_object_detection_msgs)
+
+find_package(ament_cmake REQUIRED)
+find_package(rosidl_default_generators REQUIRED)
+find_package(std_msgs REQUIRED)
+find_package(geometry_msgs REQUIRED)
+find_package(vision_msgs REQUIRED)
+find_package(builtin_interfaces REQUIRED)
+
+rosidl_generate_interfaces(${PROJECT_NAME}
+  # Issue #468 — general object detection (YOLOv8n)
+  "msg/DetectedObject.msg"
+  "msg/DetectedObjectArray.msg"
+  "srv/QueryObjects.srv"
+  DEPENDENCIES std_msgs geometry_msgs vision_msgs builtin_interfaces
+)
+
+ament_export_dependencies(rosidl_default_runtime)
+ament_package()
--- a/jetson/ros2_ws/src/saltybot_object_detection_msgs/msg/DetectedObject.msg
+++ b/jetson/ros2_ws/src/saltybot_object_detection_msgs/msg/DetectedObject.msg
@ -0,0 +1,15 @@
+# Single detected object from YOLO inference
+# Published as array in DetectedObjectArray on /saltybot/objects
+
+# ── Object identity ────────────────────────────────────
+uint16  class_id        # COCO class 0–79
+string  class_name      # human-readable label (e.g., "cup", "chair")
+float32 confidence      # detection confidence 0–1
+
+# ── 2-D bounding box (pixel coords in source image) ────
+vision_msgs/BoundingBox2D bbox
+
+# ── 3-D position (in base_link frame) ──────────────────
+# Depth-projected from RealSense aligned depth map
+geometry_msgs/PointStamped position_3d  # point in base_link frame
+float32 distance_m      # euclidean distance from base_link, 0 = unknown
--- a/jetson/ros2_ws/src/saltybot_object_detection_msgs/msg/DetectedObjectArray.msg
+++ b/jetson/ros2_ws/src/saltybot_object_detection_msgs/msg/DetectedObjectArray.msg
@ -0,0 +1,5 @@
+# Array of detected objects from YOLOv8n inference
+# Published at /saltybot/objects with timestamp and frame info
+
+std_msgs/Header header        # timestamp, frame_id="base_link"
+DetectedObject[] objects      # detected objects in this frame
--- a/jetson/ros2_ws/src/saltybot_object_detection_msgs/package.xml
+++ b/jetson/ros2_ws/src/saltybot_object_detection_msgs/package.xml
@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>saltybot_object_detection_msgs</name>
+  <version>0.1.0</version>
+  <description>ROS2 messages for YOLOv8n general object detection — Issue #468</description>
+  <maintainer email="seb@vayrette.com">seb</maintainer>
+  <license>MIT</license>
+
+  <buildtool_depend>ament_cmake</buildtool_depend>
+  <build_depend>rosidl_default_generators</build_depend>
+  <exec_depend>rosidl_default_runtime</exec_depend>
+  <member_of_group>rosidl_interface_packages</member_of_group>
+
+  <depend>std_msgs</depend>
+  <depend>geometry_msgs</depend>
+  <depend>vision_msgs</depend>
+  <depend>builtin_interfaces</depend>
+
+  <export>
+    <build_type>ament_cmake</build_type>
+  </export>
+</package>
--- a/jetson/ros2_ws/src/saltybot_object_detection_msgs/srv/QueryObjects.srv
+++ b/jetson/ros2_ws/src/saltybot_object_detection_msgs/srv/QueryObjects.srv
@ -0,0 +1,8 @@
+# Query detected objects as a formatted text summary
+# Called by voice_command_node for "whats in front of you" intent
+
+# Request (empty)
+---
+# Response
+string description      # e.g., "I see a cup at 0.8 meters, a laptop at 1.2 meters"
+bool success           # true if detection succeeded and objects found