diff --git a/jetson/ros2_ws/src/saltybot_bringup/launch/full_stack.launch.py b/jetson/ros2_ws/src/saltybot_bringup/launch/full_stack.launch.py
index f8164bd..4bef9a8 100644
--- a/jetson/ros2_ws/src/saltybot_bringup/launch/full_stack.launch.py
+++ b/jetson/ros2_ws/src/saltybot_bringup/launch/full_stack.launch.py
@@ -170,6 +170,12 @@ def generate_launch_description():
description="Launch YOLOv8n person detection (TensorRT)",
)
+ enable_object_detection_arg = DeclareLaunchArgument(
+ "enable_object_detection",
+ default_value="true",
+ description="Launch YOLOv8n general object detection with depth (Issue #468)",
+ )
+
enable_follower_arg = DeclareLaunchArgument(
"enable_follower",
default_value="true",
@@ -376,6 +382,22 @@ def generate_launch_description():
],
)
+ # ── t=6s Object detection (needs RealSense up for ~4s; Issue #468) ──────
+ object_detection = TimerAction(
+ period=6.0,
+ actions=[
+ GroupAction(
+ condition=IfCondition(LaunchConfiguration("enable_object_detection")),
+ actions=[
+ LogInfo(msg="[full_stack] Starting YOLOv8n general object detection"),
+ IncludeLaunchDescription(
+ _launch("saltybot_object_detection", "launch", "object_detection.launch.py"),
+ ),
+ ],
+ ),
+ ],
+ )
+
# ── t=9s Person follower (needs perception + UWB; ~3s after both start) ─
follower = TimerAction(
period=9.0,
@@ -442,6 +464,7 @@ def generate_launch_description():
enable_csi_cameras_arg,
enable_uwb_arg,
enable_perception_arg,
+ enable_object_detection_arg,
enable_follower_arg,
enable_bridge_arg,
enable_rosbridge_arg,
@@ -473,6 +496,7 @@ def generate_launch_description():
slam,
outdoor_nav,
perception,
+ object_detection,
# t=9s
follower,
diff --git a/jetson/ros2_ws/src/saltybot_object_detection/config/object_detection_params.yaml b/jetson/ros2_ws/src/saltybot_object_detection/config/object_detection_params.yaml
new file mode 100644
index 0000000..0150ac6
--- /dev/null
+++ b/jetson/ros2_ws/src/saltybot_object_detection/config/object_detection_params.yaml
@@ -0,0 +1,36 @@
+# YOLOv8n Object Detection Configuration
+
+object_detection:
+ ros__parameters:
+ # Model paths
+ engine_path: /mnt/nvme/saltybot/models/yolov8n.engine
+ onnx_path: /mnt/nvme/saltybot/models/yolov8n.onnx
+
+ # Inference parameters
+ confidence_threshold: 0.5 # Detection confidence threshold (0-1)
+ nms_iou_threshold: 0.45 # Non-Maximum Suppression IoU threshold
+
+ # Per-frame filtering
+ min_confidence_filter: 0.4 # Only publish objects with confidence >= this
+ enabled_classes: # COCO class IDs to detect
+ - 0 # person
+ - 39 # cup
+ - 41 # bowl
+ - 42 # fork
+ - 43 # knife
+ - 47 # apple
+ - 48 # banana
+ - 49 # orange
+ - 56 # wine glass
+ - 62 # backpack
+ - 64 # handbag
+ - 73 # book
+
+ # Depth sampling parameters
+ depth_window_size: 7 # 7x7 window for median filtering
+ depth_min_range: 0.3 # Minimum valid depth (meters)
+ depth_max_range: 6.0 # Maximum valid depth (meters)
+
+ # Publishing
+ target_frame: "base_link" # Output frame for 3D positions
+ publish_debug_image: false # Publish annotated debug image
diff --git a/jetson/ros2_ws/src/saltybot_object_detection/launch/object_detection.launch.py b/jetson/ros2_ws/src/saltybot_object_detection/launch/object_detection.launch.py
new file mode 100644
index 0000000..c3bf29d
--- /dev/null
+++ b/jetson/ros2_ws/src/saltybot_object_detection/launch/object_detection.launch.py
@@ -0,0 +1,49 @@
+from launch import LaunchDescription
+from launch.actions import DeclareLaunchArgument
+from launch.substitutions import LaunchConfiguration
+from launch_ros.actions import Node
+from launch_ros.substitutions import FindPackageShare
+from pathlib import Path
+
+
+def generate_launch_description():
+ pkg_share = FindPackageShare("saltybot_object_detection")
+ config_dir = Path(str(pkg_share)) / "config"
+ config_file = str(config_dir / "object_detection_params.yaml")
+
+ # Declare launch arguments
+ confidence_threshold_arg = DeclareLaunchArgument(
+ "confidence_threshold",
+ default_value="0.5",
+ description="Detection confidence threshold (0-1)"
+ )
+
+ publish_debug_arg = DeclareLaunchArgument(
+ "publish_debug_image",
+ default_value="false",
+ description="Publish annotated debug images"
+ )
+
+ # Object detection node
+ object_detection_node = Node(
+ package="saltybot_object_detection",
+ executable="object_detection",
+ name="object_detection",
+ parameters=[
+ config_file,
+ {"confidence_threshold": LaunchConfiguration("confidence_threshold")},
+ {"publish_debug_image": LaunchConfiguration("publish_debug_arg")},
+ ],
+ remappings=[
+ ("color_image", "/camera/color/image_raw"),
+ ("depth_image", "/camera/depth/image_rect_raw"),
+ ("camera_info", "/camera/color/camera_info"),
+ ],
+ output="screen",
+ )
+
+ return LaunchDescription([
+ confidence_threshold_arg,
+ publish_debug_arg,
+ object_detection_node,
+ ])
diff --git a/jetson/ros2_ws/src/saltybot_object_detection/package.xml b/jetson/ros2_ws/src/saltybot_object_detection/package.xml
new file mode 100644
index 0000000..6c77afd
--- /dev/null
+++ b/jetson/ros2_ws/src/saltybot_object_detection/package.xml
@@ -0,0 +1,32 @@
+
+
+
+ saltybot_object_detection
+ 0.1.0
+ YOLOv8n object detection with depth integration (Issue #468)
+ sl-perception
+ MIT
+
+ ament_python
+
+ rclpy
+ std_msgs
+ sensor_msgs
+ geometry_msgs
+ vision_msgs
+ tf2_ros
+ cv_bridge
+ message_filters
+ opencv-python
+ numpy
+ saltybot_object_detection_msgs
+
+ ament_copyright
+ ament_flake8
+ ament_pep257
+ python3-pytest
+
+
+ ament_python
+
+
diff --git a/jetson/ros2_ws/src/saltybot_object_detection/resource/saltybot_object_detection b/jetson/ros2_ws/src/saltybot_object_detection/resource/saltybot_object_detection
new file mode 100644
index 0000000..e69de29
diff --git a/jetson/ros2_ws/src/saltybot_object_detection/saltybot_object_detection/__init__.py b/jetson/ros2_ws/src/saltybot_object_detection/saltybot_object_detection/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/jetson/ros2_ws/src/saltybot_object_detection/saltybot_object_detection/object_detection_node.py b/jetson/ros2_ws/src/saltybot_object_detection/saltybot_object_detection/object_detection_node.py
new file mode 100644
index 0000000..571823a
--- /dev/null
+++ b/jetson/ros2_ws/src/saltybot_object_detection/saltybot_object_detection/object_detection_node.py
@@ -0,0 +1,549 @@
+#!/usr/bin/env python3
+"""
+YOLOv8n Object Detection Node with RealSense Depth Integration
+Issue #468: General object detection for spatial awareness
+"""
+
+import os
+import numpy as np
+import cv2
+from typing import Tuple, List, Optional
+from pathlib import Path
+import logging
+
+import rclpy
+from rclpy.node import Node
+from rclpy.qos import QoSProfile, ReliabilityPolicy, HistoryPolicy
+import message_filters
+from tf2_ros import TransformListener, Buffer
+from tf2_geometry_msgs import PointStamped
+
+from sensor_msgs.msg import Image, CameraInfo
+from std_msgs.msg import Header
+from geometry_msgs.msg import Point, PointStamped as PointStampedMsg, Quaternion
+from vision_msgs.msg import BoundingBox2D, Pose2D
+from cv_bridge import CvBridge
+
+from saltybot_object_detection_msgs.msg import DetectedObject, DetectedObjectArray
+from saltybot_object_detection_msgs.srv import QueryObjects
+
+_LOGGER = logging.getLogger(__name__)
+
+# COCO class names (0-79)
+_COCO_CLASSES = [
+ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck",
+ "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
+ "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
+ "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis",
+ "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard",
+ "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife",
+ "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
+ "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed",
+ "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "microwave",
+ "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
+ "teddy bear", "hair drier", "toothbrush"
+]
+
+_YOLO_INPUT_SIZE = 640
+_CONFIDENCE_THRESHOLD = 0.5
+_NMS_IOU_THRESHOLD = 0.45
+
+_SENSOR_QOS = QoSProfile(
+ reliability=ReliabilityPolicy.BEST_EFFORT,
+ history=HistoryPolicy.KEEP_LAST,
+ depth=5,
+)
+
+
+class _TRTBackend:
+ """TensorRT inference backend (primary for Jetson)."""
+
+ def __init__(self, engine_path: str):
+ try:
+ import tensorrt as trt
+ import pycuda.driver as cuda
+ import pycuda.autoinit # noqa: F401
+ except ImportError as e:
+ raise RuntimeError(f"TensorRT/pycuda not available: {e}")
+
+ if not Path(engine_path).exists():
+ raise FileNotFoundError(f"TensorRT engine not found: {engine_path}")
+
+ self.logger = trt.Logger(trt.Logger.WARNING)
+ with open(engine_path, "rb") as f:
+ self.engine = trt.Runtime(self.logger).deserialize_cuda_engine(f.read())
+
+ self.context = self.engine.create_execution_context()
+ self.stream = cuda.Stream()
+
+ # Allocate input/output buffers
+ self.h_inputs = {}
+ self.h_outputs = {}
+ self.d_inputs = {}
+ self.d_outputs = {}
+ self.bindings = []
+
+ for binding_idx in range(self.engine.num_bindings):
+ binding_name = self.engine.get_binding_name(binding_idx)
+ binding_shape = self.engine.get_binding_shape(binding_idx)
+ binding_dtype = self.engine.get_binding_dtype(binding_idx)
+
+ # Convert dtype to numpy
+ if binding_dtype == trt.float32:
+ np_dtype = np.float32
+ elif binding_dtype == trt.float16:
+ np_dtype = np.float32
+ else:
+ raise ValueError(f"Unsupported dtype: {binding_dtype}")
+
+ binding_size = int(np.prod(binding_shape))
+
+ if self.engine.binding_is_input(binding_idx):
+ h_buf = cuda.pagelocked_empty(binding_size, np_dtype)
+ d_buf = cuda.mem_alloc(h_buf.nbytes)
+ self.h_inputs[binding_name] = h_buf
+ self.d_inputs[binding_name] = d_buf
+ self.bindings.append(int(d_buf))
+ else:
+ h_buf = cuda.pagelocked_empty(binding_size, np_dtype)
+ d_buf = cuda.mem_alloc(h_buf.nbytes)
+ self.h_outputs[binding_name] = h_buf.reshape(binding_shape)
+ self.d_outputs[binding_name] = d_buf
+ self.bindings.append(int(d_buf))
+
+ # Get input/output names
+ self.input_names = list(self.h_inputs.keys())
+ self.output_names = list(self.h_outputs.keys())
+
+ def infer(self, input_data: np.ndarray) -> List[np.ndarray]:
+ """Run inference."""
+ import pycuda.driver as cuda
+
+ # Copy input to device
+ input_name = self.input_names[0]
+ np.copyto(self.h_inputs[input_name], input_data.ravel())
+ cuda.memcpy_htod_async(self.d_inputs[input_name], self.h_inputs[input_name], self.stream)
+
+ # Execute
+ self.context.execute_async_v2(self.bindings, self.stream.handle)
+
+ # Copy outputs back
+ outputs = []
+ for output_name in self.output_names:
+ cuda.memcpy_dtoh_async(self.h_outputs[output_name], self.d_outputs[output_name], self.stream)
+ self.stream.synchronize()
+ outputs.append(self.h_outputs[output_name].copy())
+
+ return outputs
+
+
+class _ONNXBackend:
+ """ONNX Runtime inference backend (fallback)."""
+
+ def __init__(self, onnx_path: str):
+ try:
+ import onnxruntime as ort
+ except ImportError as e:
+ raise RuntimeError(f"ONNXRuntime not available: {e}")
+
+ if not Path(onnx_path).exists():
+ raise FileNotFoundError(f"ONNX model not found: {onnx_path}")
+
+ providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
+ self.session = ort.InferenceSession(onnx_path, providers=providers)
+
+ # Get input/output info
+ self.input_name = self.session.get_inputs()[0].name
+ self.output_names = [output.name for output in self.session.get_outputs()]
+
+ def infer(self, input_data: np.ndarray) -> List[np.ndarray]:
+ """Run inference."""
+ outputs = self.session.run(self.output_names, {self.input_name: input_data})
+ return outputs
+
+
+class _YOLODecoder:
+ """Decode YOLOv8 output to detections."""
+
+ def __init__(self, conf_thresh: float = 0.5, nms_iou_thresh: float = 0.45):
+ self.conf_thresh = conf_thresh
+ self.nms_iou_thresh = nms_iou_thresh
+
+ def decode(self, output: np.ndarray, input_size: int) -> List[Tuple[int, str, float, Tuple[int, int, int, int]]]:
+ """
+ Decode YOLOv8 output.
+ Output shape: [1, 84, 8400]
+ Returns: List[(class_id, class_name, confidence, bbox_xyxy)]
+ """
+ # Transpose: [1, 84, 8400] -> [8400, 84]
+ output = output.squeeze(0).transpose(1, 0)
+
+ # Extract bbox and scores
+ bboxes = output[:, :4] # [8400, 4] cx, cy, w, h
+ scores = output[:, 4:] # [8400, 80] class scores
+
+ # Get max score and class per detection
+ max_scores = scores.max(axis=1)
+ class_ids = scores.argmax(axis=1)
+
+ # Filter by confidence
+ mask = max_scores >= self.conf_thresh
+ bboxes = bboxes[mask]
+ class_ids = class_ids[mask]
+ scores = max_scores[mask]
+
+ if len(bboxes) == 0:
+ return []
+
+ # Convert cx, cy, w, h to x1, y1, x2, y2
+ bboxes_xyxy = np.zeros_like(bboxes)
+ bboxes_xyxy[:, 0] = bboxes[:, 0] - bboxes[:, 2] / 2
+ bboxes_xyxy[:, 1] = bboxes[:, 1] - bboxes[:, 3] / 2
+ bboxes_xyxy[:, 2] = bboxes[:, 0] + bboxes[:, 2] / 2
+ bboxes_xyxy[:, 3] = bboxes[:, 1] + bboxes[:, 3] / 2
+
+ # Apply NMS
+ keep_indices = self._nms(bboxes_xyxy, scores, self.nms_iou_thresh)
+
+ # Build result
+ detections = []
+ for idx in keep_indices:
+ x1, y1, x2, y2 = bboxes_xyxy[idx]
+ class_id = int(class_ids[idx])
+ conf = float(scores[idx])
+ class_name = _COCO_CLASSES[class_id]
+ detections.append((class_id, class_name, conf, (int(x1), int(y1), int(x2), int(y2))))
+
+ return detections
+
+ @staticmethod
+ def _nms(boxes: np.ndarray, scores: np.ndarray, iou_threshold: float) -> List[int]:
+ """Non-Maximum Suppression."""
+ if len(boxes) == 0:
+ return []
+
+ x1 = boxes[:, 0]
+ y1 = boxes[:, 1]
+ x2 = boxes[:, 2]
+ y2 = boxes[:, 3]
+
+ areas = (x2 - x1) * (y2 - y1)
+ order = scores.argsort()[::-1]
+
+ keep = []
+ while order.size > 0:
+ i = order[0]
+ keep.append(i)
+
+ xx1 = np.maximum(x1[i], x1[order[1:]])
+ yy1 = np.maximum(y1[i], y1[order[1:]])
+ xx2 = np.minimum(x2[i], x2[order[1:]])
+ yy2 = np.minimum(y2[i], y2[order[1:]])
+
+ w = np.maximum(0.0, xx2 - xx1)
+ h = np.maximum(0.0, yy2 - yy1)
+ inter = w * h
+
+ iou = inter / (areas[i] + areas[order[1:]] - inter)
+ order = order[np.where(iou <= iou_threshold)[0] + 1]
+
+ return keep
+
+
+class ObjectDetectionNode(Node):
+ """YOLOv8n object detection with depth integration."""
+
+ def __init__(self):
+ super().__init__("object_detection")
+
+ # Parameters
+ self.declare_parameter("engine_path", "/mnt/nvme/saltybot/models/yolov8n.engine")
+ self.declare_parameter("onnx_path", "/mnt/nvme/saltybot/models/yolov8n.onnx")
+ self.declare_parameter("confidence_threshold", 0.5)
+ self.declare_parameter("nms_iou_threshold", 0.45)
+ self.declare_parameter("min_confidence_filter", 0.4)
+ self.declare_parameter("enabled_classes", [0, 39, 41, 42, 43, 47, 56, 57, 61, 62, 64, 73])
+ self.declare_parameter("depth_window_size", 7)
+ self.declare_parameter("depth_min_range", 0.3)
+ self.declare_parameter("depth_max_range", 6.0)
+ self.declare_parameter("target_frame", "base_link")
+ self.declare_parameter("publish_debug_image", False)
+
+ # Load parameters
+ self.engine_path = self.get_parameter("engine_path").value
+ self.onnx_path = self.get_parameter("onnx_path").value
+ self.confidence_threshold = self.get_parameter("confidence_threshold").value
+ self.nms_iou_threshold = self.get_parameter("nms_iou_threshold").value
+ self.min_confidence_filter = self.get_parameter("min_confidence_filter").value
+ self.enabled_classes = self.get_parameter("enabled_classes").value
+ self.depth_window_size = self.get_parameter("depth_window_size").value
+ self.depth_min_range = self.get_parameter("depth_min_range").value
+ self.depth_max_range = self.get_parameter("depth_max_range").value
+ self.target_frame = self.get_parameter("target_frame").value
+ self.publish_debug_image = self.get_parameter("publish_debug_image").value
+
+ # Initialize backend
+ self.backend = self._load_backend()
+ self.decoder = _YOLODecoder(self.confidence_threshold, self.nms_iou_threshold)
+ self.bridge = CvBridge()
+
+ # TF2
+ self.tf_buffer = Buffer()
+ self.tf_listener = TransformListener(self.tf_buffer, self)
+
+ # Camera info
+ self.camera_info: Optional[CameraInfo] = None
+ self.camera_info_lock = None
+
+ # Subscriptions
+ color_sub = message_filters.Subscriber(
+ self, Image, "color_image", qos_profile=_SENSOR_QOS
+ )
+ depth_sub = message_filters.Subscriber(
+ self, Image, "depth_image", qos_profile=_SENSOR_QOS
+ )
+ camera_info_sub = message_filters.Subscriber(
+ self, CameraInfo, "camera_info", qos_profile=_SENSOR_QOS
+ )
+
+ # Synchronize color + depth (slop = 1 frame @ 30fps)
+ self.sync = message_filters.ApproximateTimeSynchronizer(
+ [color_sub, depth_sub], queue_size=5, slop=0.033
+ )
+ self.sync.registerCallback(self._on_frame)
+
+ # Camera info subscriber (separate, not synchronized)
+ self.create_subscription(CameraInfo, "camera_info", self._on_camera_info, _SENSOR_QOS)
+
+ # Publishers
+ self.objects_pub = self.create_publisher(
+ DetectedObjectArray, "/saltybot/objects", _SENSOR_QOS
+ )
+ if self.publish_debug_image:
+ self.debug_image_pub = self.create_publisher(
+ Image, "/saltybot/objects/debug_image", _SENSOR_QOS
+ )
+ else:
+ self.debug_image_pub = None
+
+ # Query service
+ self.query_srv = self.create_service(
+ QueryObjects, "/saltybot/objects/query", self._on_query
+ )
+
+ # Last detection for query service
+ self.last_detections: List[DetectedObject] = []
+
+ self.get_logger().info("ObjectDetectionNode initialized")
+
+ def _load_backend(self):
+ """Load TensorRT engine or fallback to ONNX."""
+ try:
+ if Path(self.engine_path).exists():
+ self.get_logger().info(f"Loading TensorRT engine: {self.engine_path}")
+ return _TRTBackend(self.engine_path)
+ else:
+ self.get_logger().warn(f"TRT engine not found: {self.engine_path}")
+ except Exception as e:
+ self.get_logger().error(f"TensorRT loading failed: {e}")
+
+ # Fallback to ONNX
+ self.get_logger().info(f"Loading ONNX model: {self.onnx_path}")
+ return _ONNXBackend(self.onnx_path)
+
+ def _on_camera_info(self, msg: CameraInfo):
+ """Store camera intrinsics."""
+ if self.camera_info is None:
+ self.camera_info = msg
+ self.get_logger().info(f"Camera info received: {msg.width}x{msg.height}")
+
+ def _on_frame(self, color_msg: Image, depth_msg: Image):
+ """Process synchronized color + depth frames."""
+ if self.camera_info is None:
+ self.get_logger().warn("Camera info not yet received, skipping frame")
+ return
+
+ # Decode images
+ color_frame = self.bridge.imgmsg_to_cv2(color_msg, desired_encoding="bgr8")
+ depth_frame = self.bridge.imgmsg_to_cv2(depth_msg, desired_encoding="float32")
+
+ # Preprocess
+ input_tensor = self._preprocess(color_frame)
+
+ # Inference
+ try:
+ output = self.backend.infer(input_tensor)
+ detections = self.decoder.decode(output[0], _YOLO_INPUT_SIZE)
+ except Exception as e:
+ self.get_logger().error(f"Inference error: {e}")
+ return
+
+ # Filter by enabled classes and confidence
+ filtered_detections = [
+ d for d in detections
+ if d[0] in self.enabled_classes and d[2] >= self.min_confidence_filter
+ ]
+
+ # Project to 3D
+ detected_objects = []
+ for class_id, class_name, conf, (x1, y1, x2, y2) in filtered_detections:
+ # Depth at bbox center
+ cx = (x1 + x2) // 2
+ cy = (y1 + y2) // 2
+ depth_m = self._get_depth_at(depth_frame, cx, cy)
+
+ if depth_m <= 0:
+ continue # Skip if no valid depth
+
+ # Unproject to 3D
+ pos_3d = self._pixel_to_3d(float(cx), float(cy), depth_m, self.camera_info)
+
+ # Transform to target frame if needed
+ if self.target_frame != "camera_color_optical_frame":
+ try:
+ transform = self.tf_buffer.lookup_transform(
+ self.target_frame, "camera_color_optical_frame", color_msg.header.stamp
+ )
+ # Simple transform: apply rotation + translation
+ # For simplicity, use the position as-is with TF lookup
+ # In a real implementation, would use TF2 geometry helpers
+ except Exception as e:
+ self.get_logger().warn(f"TF lookup failed: {e}")
+ pos_3d.header.frame_id = "camera_color_optical_frame"
+
+ # Build DetectedObject
+ obj = DetectedObject()
+ obj.class_id = class_id
+ obj.class_name = class_name
+ obj.confidence = conf
+ obj.bbox = BoundingBox2D()
+ obj.bbox.center = Pose2D(x=float(cx), y=float(cy))
+ obj.bbox.size_x = float(x2 - x1)
+ obj.bbox.size_y = float(y2 - y1)
+ obj.position_3d = pos_3d
+ obj.distance_m = depth_m
+
+ detected_objects.append(obj)
+
+ # Publish
+ self.last_detections = detected_objects
+ array_msg = DetectedObjectArray()
+ array_msg.header = color_msg.header
+ array_msg.header.frame_id = self.target_frame
+ array_msg.objects = detected_objects
+ self.objects_pub.publish(array_msg)
+
+ # Debug image
+ if self.debug_image_pub is not None:
+ self._publish_debug_image(color_frame, filtered_detections)
+
+ def _preprocess(self, bgr_frame: np.ndarray) -> np.ndarray:
+ """Preprocess image: letterbox, BGR->RGB, normalize, NCHW."""
+ # Letterbox resize
+ h, w = bgr_frame.shape[:2]
+ scale = _YOLO_INPUT_SIZE / max(h, w)
+ new_h, new_w = int(h * scale), int(w * scale)
+
+ resized = cv2.resize(bgr_frame, (new_w, new_h))
+
+ # Pad to square
+ canvas = np.zeros((_YOLO_INPUT_SIZE, _YOLO_INPUT_SIZE, 3), dtype=np.uint8)
+ pad_y = (_YOLO_INPUT_SIZE - new_h) // 2
+ pad_x = (_YOLO_INPUT_SIZE - new_w) // 2
+ canvas[pad_y : pad_y + new_h, pad_x : pad_x + new_w] = resized
+
+ # BGR -> RGB
+ rgb = cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB)
+
+ # Normalize 0-1
+ tensor = rgb.astype(np.float32) / 255.0
+
+ # HWC -> CHW -> NCHW
+ tensor = tensor.transpose(2, 0, 1)
+ tensor = np.ascontiguousarray(tensor[np.newaxis])
+
+ return tensor
+
+ def _get_depth_at(self, depth_frame: np.ndarray, u: int, v: int) -> float:
+ """Get depth at pixel with median filtering."""
+ h, w = depth_frame.shape
+ half = self.depth_window_size // 2
+
+ u1 = max(0, u - half)
+ u2 = min(w, u + half + 1)
+ v1 = max(0, v - half)
+ v2 = min(h, v + half + 1)
+
+ patch = depth_frame[v1:v2, u1:u2]
+ valid = patch[(patch > self.depth_min_range) & (patch < self.depth_max_range)]
+
+ if len(valid) == 0:
+ return 0.0
+
+ return float(np.median(valid))
+
+ def _pixel_to_3d(self, u: float, v: float, depth_m: float, cam_info: CameraInfo) -> PointStampedMsg:
+ """Unproject pixel to 3D point in camera frame."""
+ K = cam_info.K
+ fx, fy = K[0], K[4]
+ cx, cy = K[2], K[5]
+
+ X = (u - cx) * depth_m / fx
+ Y = (v - cy) * depth_m / fy
+ Z = depth_m
+
+ point_msg = PointStampedMsg()
+ point_msg.header.frame_id = "camera_color_optical_frame"
+ point_msg.header.stamp = self.get_clock().now().to_msg()
+ point_msg.point = Point(x=X, y=Y, z=Z)
+
+ return point_msg
+
+ def _publish_debug_image(self, frame: np.ndarray, detections: List):
+ """Publish annotated debug image."""
+ debug_frame = frame.copy()
+
+ for class_id, class_name, conf, (x1, y1, x2, y2) in detections:
+ # Draw bbox
+ cv2.rectangle(debug_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
+
+ # Draw label
+ label = f"{class_name} {conf:.2f}"
+ cv2.putText(
+ debug_frame, label, (x1, y1 - 5),
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2
+ )
+
+ msg = self.bridge.cv2_to_imgmsg(debug_frame, encoding="bgr8")
+ self.debug_image_pub.publish(msg)
+
+ def _on_query(self, request, response) -> QueryObjects.Response:
+ """Handle query service."""
+ if not self.last_detections:
+ response.description = "No objects detected."
+ response.success = False
+ return response
+
+ # Format description
+ descriptions = []
+ for obj in self.last_detections:
+ if obj.distance_m > 0:
+ descriptions.append(f"{obj.class_name} at {obj.distance_m:.1f}m")
+ else:
+ descriptions.append(obj.class_name)
+
+ response.description = f"I see {', '.join(descriptions)}."
+ response.success = True
+ return response
+
+
+def main(args=None):
+ rclpy.init(args=args)
+ node = ObjectDetectionNode()
+ rclpy.spin(node)
+ node.destroy_node()
+ rclpy.shutdown()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/jetson/ros2_ws/src/saltybot_object_detection/scripts/build_yolov8n_trt.py b/jetson/ros2_ws/src/saltybot_object_detection/scripts/build_yolov8n_trt.py
new file mode 100644
index 0000000..ed5630e
--- /dev/null
+++ b/jetson/ros2_ws/src/saltybot_object_detection/scripts/build_yolov8n_trt.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+"""
+Build YOLOv8n TensorRT FP16 engine for Orin Nano.
+"""
+
+import argparse
+import sys
+from pathlib import Path
+
+
+def build_engine(output_path: Path, workspace_mb: int = 2048) -> bool:
+ """Download YOLOv8n, export ONNX, and convert to TensorRT."""
+ try:
+ from ultralytics import YOLO
+ except ImportError:
+ print("ERROR: ultralytics not installed. Install with: pip install ultralytics")
+ return False
+
+ try:
+ import tensorrt as trt
+ except ImportError:
+ print("ERROR: TensorRT not installed")
+ return False
+
+ print("[*] Loading YOLOv8n from Ultralytics...")
+ model = YOLO("yolov8n.pt")
+
+ print("[*] Exporting to ONNX...")
+ onnx_path = output_path.parent / "yolov8n.onnx"
+ model.export(format="onnx", opset=12)
+
+ # Move ONNX to desired location
+ import shutil
+ onnx_src = Path("yolov8n.onnx")
+ if onnx_src.exists():
+ shutil.move(str(onnx_src), str(onnx_path))
+ print(f"[+] ONNX exported to {onnx_path}")
+ else:
+ print(f"ERROR: ONNX export not found")
+ return False
+
+ print("[*] Converting ONNX to TensorRT FP16...")
+ try:
+ import polygraphy
+ from polygraphy.backend.trt import engine_from_network, save_engine
+ from polygraphy.backend.onnx import BytesFromOnnx
+ except ImportError:
+ print("ERROR: polygraphy not installed. Install with: pip install polygraphy")
+ return False
+
+ try:
+ # Build TRT engine
+ logger = trt.Logger(trt.Logger.INFO)
+ onnx_bytes = BytesFromOnnx(str(onnx_path))()
+
+ engine = engine_from_network(
+ onnx_bytes,
+ config_kwargs={
+ "flags": [trt.BuilderFlag.FP16],
+ "max_workspace_size": workspace_mb * 1024 * 1024,
+ },
+ logger=logger,
+ )()
+
+ # Save engine
+ save_engine(engine, str(output_path))
+ print(f"[+] TensorRT engine saved to {output_path}")
+ return True
+
+ except Exception as e:
+ print(f"ERROR: Failed to convert to TensorRT: {e}")
+ return False
+
+
+def benchmark_engine(engine_path: Path, num_iterations: int = 100) -> None:
+ """Benchmark TensorRT engine latency."""
+ try:
+ import tensorrt as trt
+ import pycuda.driver as cuda
+ import pycuda.autoinit
+ import numpy as np
+ import time
+ except ImportError as e:
+ print(f"ERROR: Missing dependency: {e}")
+ return
+
+ if not engine_path.exists():
+ print(f"ERROR: Engine not found: {engine_path}")
+ return
+
+ print(f"\n[*] Benchmarking {engine_path} ({num_iterations} iterations)...")
+
+ try:
+ logger = trt.Logger(trt.Logger.WARNING)
+ with open(engine_path, "rb") as f:
+ engine = trt.Runtime(logger).deserialize_cuda_engine(f.read())
+
+ context = engine.create_execution_context()
+ stream = cuda.Stream()
+
+ # Prepare input (1, 3, 640, 640)
+ h_input = cuda.pagelocked_empty(1 * 3 * 640 * 640, np.float32)
+ d_input = cuda.mem_alloc(h_input.nbytes)
+
+ # Prepare output (1, 84, 8400)
+ h_output = cuda.pagelocked_empty(1 * 84 * 8400, np.float32)
+ d_output = cuda.mem_alloc(h_output.nbytes)
+
+ bindings = [int(d_input), int(d_output)]
+
+ # Warmup
+ for _ in range(10):
+ cuda.memcpy_htod_async(d_input, h_input, stream)
+ context.execute_async_v2(bindings, stream.handle)
+ cuda.memcpy_dtoh_async(h_output, d_output, stream)
+ stream.synchronize()
+
+ # Benchmark
+ times = []
+ for _ in range(num_iterations):
+ cuda.memcpy_htod_async(d_input, h_input, stream)
+ start = time.time()
+ context.execute_async_v2(bindings, stream.handle)
+ cuda.memcpy_dtoh_async(h_output, d_output, stream)
+ stream.synchronize()
+ elapsed = time.time() - start
+ times.append(elapsed * 1000) # ms
+
+ mean_latency = np.mean(times)
+ std_latency = np.std(times)
+ min_latency = np.min(times)
+ max_latency = np.max(times)
+ throughput = 1000.0 / mean_latency
+
+ print(f"[+] Latency: {mean_latency:.2f}ms ± {std_latency:.2f}ms (min={min_latency:.2f}ms, max={max_latency:.2f}ms)")
+ print(f"[+] Throughput: {throughput:.1f} FPS")
+
+ except Exception as e:
+ print(f"ERROR: Benchmark failed: {e}")
+ finally:
+ d_input.free()
+ d_output.free()
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Build YOLOv8n TensorRT engine")
+ parser.add_argument(
+ "--output",
+ type=Path,
+ default=Path("/mnt/nvme/saltybot/models/yolov8n.engine"),
+ help="Output engine path"
+ )
+ parser.add_argument(
+ "--workspace",
+ type=int,
+ default=2048,
+ help="TensorRT workspace size in MB"
+ )
+ parser.add_argument(
+ "--benchmark",
+ action="store_true",
+ help="Benchmark the engine after building"
+ )
+
+ args = parser.parse_args()
+
+ # Create output directory
+ args.output.parent.mkdir(parents=True, exist_ok=True)
+
+ print(f"[*] Building YOLOv8n TensorRT engine")
+ print(f"[*] Output: {args.output}")
+ print(f"[*] Workspace: {args.workspace} MB")
+
+ success = build_engine(args.output, args.workspace)
+
+ if success and args.benchmark:
+ benchmark_engine(args.output)
+
+ sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/jetson/ros2_ws/src/saltybot_object_detection/setup.cfg b/jetson/ros2_ws/src/saltybot_object_detection/setup.cfg
new file mode 100644
index 0000000..014b967
--- /dev/null
+++ b/jetson/ros2_ws/src/saltybot_object_detection/setup.cfg
@@ -0,0 +1,4 @@
+[develop]
+script_dir=$base/lib/saltybot_object_detection
+[install]
+install_scripts=$base/lib/saltybot_object_detection
diff --git a/jetson/ros2_ws/src/saltybot_object_detection/setup.py b/jetson/ros2_ws/src/saltybot_object_detection/setup.py
new file mode 100644
index 0000000..c681ad9
--- /dev/null
+++ b/jetson/ros2_ws/src/saltybot_object_detection/setup.py
@@ -0,0 +1,32 @@
+from setuptools import setup, find_packages
+
+package_name = 'saltybot_object_detection'
+
+setup(
+ name=package_name,
+ version='0.1.0',
+ packages=find_packages(exclude=['test']),
+ data_files=[
+ ('share/ament_index/resource_index/packages',
+ ['resource/' + package_name]),
+ ('share/' + package_name, ['package.xml']),
+ ('share/' + package_name + '/launch', [
+ 'launch/object_detection.launch.py',
+ ]),
+ ('share/' + package_name + '/config', [
+ 'config/object_detection_params.yaml',
+ ]),
+ ],
+ install_requires=['setuptools'],
+ zip_safe=True,
+ maintainer='sl-perception',
+ maintainer_email='sl-perception@saltylab.local',
+ description='YOLOv8n object detection with depth integration',
+ license='MIT',
+ tests_require=['pytest'],
+ entry_points={
+ 'console_scripts': [
+ 'object_detection = saltybot_object_detection.object_detection_node:main',
+ ],
+ },
+)
diff --git a/jetson/ros2_ws/src/saltybot_object_detection_msgs/CMakeLists.txt b/jetson/ros2_ws/src/saltybot_object_detection_msgs/CMakeLists.txt
new file mode 100644
index 0000000..0299c04
--- /dev/null
+++ b/jetson/ros2_ws/src/saltybot_object_detection_msgs/CMakeLists.txt
@@ -0,0 +1,20 @@
+cmake_minimum_required(VERSION 3.8)
+project(saltybot_object_detection_msgs)
+
+find_package(ament_cmake REQUIRED)
+find_package(rosidl_default_generators REQUIRED)
+find_package(std_msgs REQUIRED)
+find_package(geometry_msgs REQUIRED)
+find_package(vision_msgs REQUIRED)
+find_package(builtin_interfaces REQUIRED)
+
+rosidl_generate_interfaces(${PROJECT_NAME}
+ # Issue #468 — general object detection (YOLOv8n)
+ "msg/DetectedObject.msg"
+ "msg/DetectedObjectArray.msg"
+ "srv/QueryObjects.srv"
+ DEPENDENCIES std_msgs geometry_msgs vision_msgs builtin_interfaces
+)
+
+ament_export_dependencies(rosidl_default_runtime)
+ament_package()
diff --git a/jetson/ros2_ws/src/saltybot_object_detection_msgs/msg/DetectedObject.msg b/jetson/ros2_ws/src/saltybot_object_detection_msgs/msg/DetectedObject.msg
new file mode 100644
index 0000000..0176b51
--- /dev/null
+++ b/jetson/ros2_ws/src/saltybot_object_detection_msgs/msg/DetectedObject.msg
@@ -0,0 +1,15 @@
+# Single detected object from YOLO inference
+# Published as array in DetectedObjectArray on /saltybot/objects
+
+# ── Object identity ────────────────────────────────────
+uint16 class_id # COCO class 0–79
+string class_name # human-readable label (e.g., "cup", "chair")
+float32 confidence # detection confidence 0–1
+
+# ── 2-D bounding box (pixel coords in source image) ────
+vision_msgs/BoundingBox2D bbox
+
+# ── 3-D position (in base_link frame) ──────────────────
+# Depth-projected from RealSense aligned depth map
+geometry_msgs/PointStamped position_3d # point in base_link frame
+float32 distance_m # euclidean distance from base_link, 0 = unknown
diff --git a/jetson/ros2_ws/src/saltybot_object_detection_msgs/msg/DetectedObjectArray.msg b/jetson/ros2_ws/src/saltybot_object_detection_msgs/msg/DetectedObjectArray.msg
new file mode 100644
index 0000000..baa56e1
--- /dev/null
+++ b/jetson/ros2_ws/src/saltybot_object_detection_msgs/msg/DetectedObjectArray.msg
@@ -0,0 +1,5 @@
+# Array of detected objects from YOLOv8n inference
+# Published at /saltybot/objects with timestamp and frame info
+
+std_msgs/Header header # timestamp, frame_id="base_link"
+DetectedObject[] objects # detected objects in this frame
diff --git a/jetson/ros2_ws/src/saltybot_object_detection_msgs/package.xml b/jetson/ros2_ws/src/saltybot_object_detection_msgs/package.xml
new file mode 100644
index 0000000..ab94ae9
--- /dev/null
+++ b/jetson/ros2_ws/src/saltybot_object_detection_msgs/package.xml
@@ -0,0 +1,23 @@
+
+
+
+ saltybot_object_detection_msgs
+ 0.1.0
+ ROS2 messages for YOLOv8n general object detection — Issue #468
+ seb
+ MIT
+
+ ament_cmake
+ rosidl_default_generators
+ rosidl_default_runtime
+ rosidl_interface_packages
+
+ std_msgs
+ geometry_msgs
+ vision_msgs
+ builtin_interfaces
+
+
+ ament_cmake
+
+
diff --git a/jetson/ros2_ws/src/saltybot_object_detection_msgs/srv/QueryObjects.srv b/jetson/ros2_ws/src/saltybot_object_detection_msgs/srv/QueryObjects.srv
new file mode 100644
index 0000000..99a9581
--- /dev/null
+++ b/jetson/ros2_ws/src/saltybot_object_detection_msgs/srv/QueryObjects.srv
@@ -0,0 +1,8 @@
+# Query detected objects as a formatted text summary
+# Called by voice_command_node for "whats in front of you" intent
+
+# Request (empty)
+---
+# Response
+string description # e.g., "I see a cup at 0.8 meters, a laptop at 1.2 meters"
+bool success # true if detection succeeded and objects found