From a9717cd6020ffe5aa307aebcdd6c9b8d5fade57c Mon Sep 17 00:00:00 2001 From: sl-perception Date: Mon, 2 Mar 2026 09:59:53 -0500 Subject: [PATCH] =?UTF-8?q?feat(scene):=20semantic=20scene=20understanding?= =?UTF-8?q?=20=E2=80=94=20YOLOv8n=20TRT=20+=20room=20classification=20+=20?= =?UTF-8?q?hazards=20(Issue=20#141)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New packages: saltybot_scene_msgs — 4 msgs (SceneObject, SceneObjectArray, RoomClassification, BehaviorHint) saltybot_scene — 3 nodes + launch + config + TRT build script Nodes: scene_detector_node — YOLOv8-nano TRT FP16 (target ≥15 FPS @ 640×640); synchronized RGB+depth input; filters scene classes (chairs, tables, doors, stairs, pets, appliances); 3D back-projection via aligned depth; depth-based hazard scan (HazardClassifier); room classification at 2Hz; publishes /social/scene/objects + /social/scene/hazards + /social/scene/room_type behavior_adapter_node — adapts speed_limit_mps + personality_mode from room type and hazard severity; publishes BehaviorHint on /social/scene/behavior_hint (on-change + 1Hz heartbeat) costmap_publisher_node — converts SceneObjectArray → PointCloud2 disc rings for Nav2 obstacle_layer + MarkerArray for RViz; publishes /social/scene/obstacle_cloud Modules: yolo_utils.py — YOLOv8 preprocess/postprocess (letterbox, cx/cy/w/h decode, NMS), COCO+custom class table (door=80, stairs=81, wet=82), hazard-by-class mapping room_classifier.py — rule-based (object co-occurrence weights + softmax) with optional MobileNetV2 TRT/ONNX backend (Places365-style 8-class) hazard_classifier.py — depth-only hazard patterns: drop (row-mean cliff), stairs (alternating depth bands), wet floor (depth std-dev), glass (zero depth + strong Sobel edges in RGB) scripts/build_scene_trt.py — export YOLOv8n → ONNX → TRT FP16; optionally build MobileNetV2 room classifier engine; includes benchmark Topic map: /social/scene/objects SceneObjectArray ~15+ FPS /social/scene/room_type RoomClassification ~2 Hz /social/scene/hazards SceneObjectArray on hazard /social/scene/behavior_hint BehaviorHint on-change + 1 Hz /social/scene/obstacle_cloud PointCloud2 Nav2 obstacle_layer /social/scene/object_markers MarkerArray RViz debug Co-Authored-By: Claude Sonnet 4.6 --- .../ros2_ws/src/saltybot_scene/CMakeLists.txt | 20 + .../saltybot_scene/config/scene_params.yaml | 61 +++ .../launch/scene_understanding.launch.py | 80 ++++ jetson/ros2_ws/src/saltybot_scene/package.xml | 42 ++ .../saltybot_scene/resource/saltybot_scene | 0 .../saltybot_scene/saltybot_scene/__init__.py | 0 .../saltybot_scene/behavior_adapter_node.py | 191 ++++++++ .../saltybot_scene/costmap_publisher_node.py | 163 +++++++ .../saltybot_scene/hazard_classifier.py | 212 +++++++++ .../saltybot_scene/room_classifier.py | 209 +++++++++ .../saltybot_scene/scene_detector_node.py | 426 ++++++++++++++++++ .../saltybot_scene/yolo_utils.py | 202 +++++++++ .../saltybot_scene/scripts/build_scene_trt.py | 211 +++++++++ jetson/ros2_ws/src/saltybot_scene/setup.py | 27 ++ .../src/saltybot_scene_msgs/CMakeLists.txt | 20 + .../saltybot_scene_msgs/msg/BehaviorHint.msg | 19 + .../msg/RoomClassification.msg | 22 + .../saltybot_scene_msgs/msg/SceneObject.msg | 22 + .../msg/SceneObjectArray.msg | 4 + .../src/saltybot_scene_msgs/package.xml | 23 + 20 files changed, 1954 insertions(+) create mode 100644 jetson/ros2_ws/src/saltybot_scene/CMakeLists.txt create mode 100644 jetson/ros2_ws/src/saltybot_scene/config/scene_params.yaml create mode 100644 jetson/ros2_ws/src/saltybot_scene/launch/scene_understanding.launch.py create mode 100644 jetson/ros2_ws/src/saltybot_scene/package.xml create mode 100644 jetson/ros2_ws/src/saltybot_scene/resource/saltybot_scene create mode 100644 jetson/ros2_ws/src/saltybot_scene/saltybot_scene/__init__.py create mode 100644 jetson/ros2_ws/src/saltybot_scene/saltybot_scene/behavior_adapter_node.py create mode 100644 jetson/ros2_ws/src/saltybot_scene/saltybot_scene/costmap_publisher_node.py create mode 100644 jetson/ros2_ws/src/saltybot_scene/saltybot_scene/hazard_classifier.py create mode 100644 jetson/ros2_ws/src/saltybot_scene/saltybot_scene/room_classifier.py create mode 100644 jetson/ros2_ws/src/saltybot_scene/saltybot_scene/scene_detector_node.py create mode 100644 jetson/ros2_ws/src/saltybot_scene/saltybot_scene/yolo_utils.py create mode 100644 jetson/ros2_ws/src/saltybot_scene/scripts/build_scene_trt.py create mode 100644 jetson/ros2_ws/src/saltybot_scene/setup.py create mode 100644 jetson/ros2_ws/src/saltybot_scene_msgs/CMakeLists.txt create mode 100644 jetson/ros2_ws/src/saltybot_scene_msgs/msg/BehaviorHint.msg create mode 100644 jetson/ros2_ws/src/saltybot_scene_msgs/msg/RoomClassification.msg create mode 100644 jetson/ros2_ws/src/saltybot_scene_msgs/msg/SceneObject.msg create mode 100644 jetson/ros2_ws/src/saltybot_scene_msgs/msg/SceneObjectArray.msg create mode 100644 jetson/ros2_ws/src/saltybot_scene_msgs/package.xml diff --git a/jetson/ros2_ws/src/saltybot_scene/CMakeLists.txt b/jetson/ros2_ws/src/saltybot_scene/CMakeLists.txt new file mode 100644 index 0000000..d235315 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene/CMakeLists.txt @@ -0,0 +1,20 @@ +cmake_minimum_required(VERSION 3.8) +project(saltybot_scene) + +find_package(ament_cmake REQUIRED) +find_package(ament_cmake_python REQUIRED) + +ament_python_install_package(${PROJECT_NAME}) + +install(PROGRAMS + scripts/build_scene_trt.py + DESTINATION lib/${PROJECT_NAME} +) + +install(DIRECTORY + launch + config + DESTINATION share/${PROJECT_NAME}/ +) + +ament_package() diff --git a/jetson/ros2_ws/src/saltybot_scene/config/scene_params.yaml b/jetson/ros2_ws/src/saltybot_scene/config/scene_params.yaml new file mode 100644 index 0000000..cc7cc74 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene/config/scene_params.yaml @@ -0,0 +1,61 @@ +# saltybot_scene — default parameters +# +# YOLOv8-nano TRT engine: +# Build with: ros2 run saltybot_scene build_scene_trt +# Place at: /config/yolov8n_scene.engine +# +# Room classifier engine (optional, falls back to rule_based): +# Build with: ros2 run saltybot_scene build_scene_trt --room +# Place at: /config/room_classifier.engine + +scene_detector: + ros__parameters: + # YOLOv8-nano TRT FP16 engine (built from ONNX via build_scene_trt.py) + engine_path: '/config/yolov8n_scene.engine' + onnx_path: '/config/yolov8n_scene.onnx' + + # MobileNetV2 room classifier (optional — rule_based is the fallback) + room_engine_path: '/config/room_classifier.engine' + room_onnx_path: '/config/room_classifier.onnx' + + # Detection thresholds + conf_threshold: 0.35 # YOLOv8 confidence threshold + iou_threshold: 0.45 # NMS IoU threshold + + # Rate control + room_update_hz: 2.0 # room classification rate (decimated from 30Hz input) + max_depth_m: 6.0 # ignore depth readings beyond this + + # Debug annotated image (disable in production to save bandwidth) + publish_debug: false + +behavior_adapter: + ros__parameters: {} # all params are hardcoded speed/personality tables + +costmap_publisher: + ros__parameters: + max_obstacle_distance_m: 4.0 # ignore objects farther than this + min_confidence: 0.40 # discard low-confidence detections from costmap + obstacle_frame: 'base_link' + publish_markers: true # RViz MarkerArray on /social/scene/object_markers + + +# ── Nav2 obstacle_layer integration ────────────────────────────────────────── +# Add to your local_costmap config in nav2_params.yaml: +# +# local_costmap: +# local_costmap: +# ros__parameters: +# plugins: ["voxel_layer", "inflation_layer", "scene_obstacle_layer"] +# scene_obstacle_layer: +# plugin: "nav2_costmap_2d::ObstacleLayer" +# observation_sources: scene_objects +# scene_objects: +# topic: /social/scene/obstacle_cloud +# data_type: PointCloud2 +# min_obstacle_height: 0.0 +# max_obstacle_height: 2.0 +# marking: true +# clearing: true +# obstacle_max_range: 4.0 +# raytrace_max_range: 4.5 diff --git a/jetson/ros2_ws/src/saltybot_scene/launch/scene_understanding.launch.py b/jetson/ros2_ws/src/saltybot_scene/launch/scene_understanding.launch.py new file mode 100644 index 0000000..86da697 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene/launch/scene_understanding.launch.py @@ -0,0 +1,80 @@ +""" +scene_understanding.launch.py — Semantic scene understanding stack. + +Starts: + scene_detector — YOLOv8-nano TRT + room classifier + hazard detection + behavior_adapter — speed/personality hints from room + hazards + costmap_publisher — obstacle PointCloud2 for Nav2 + +Launch args: + engine_path str '/config/yolov8n_scene.engine' + onnx_path str '/config/yolov8n_scene.onnx' + conf_threshold float '0.35' + publish_debug bool 'false' + publish_markers bool 'true' + +Verify: + ros2 topic hz /social/scene/objects # ~15 Hz + ros2 topic hz /social/scene/room_type # ~2 Hz + ros2 topic hz /social/scene/behavior_hint # ~1 Hz + on-change + ros2 topic hz /social/scene/obstacle_cloud # ~15 Hz (when objects detected) + ros2 topic echo /social/scene/room_type + ros2 topic echo /social/scene/behavior_hint +""" + +from launch import LaunchDescription +from launch.actions import DeclareLaunchArgument +from launch.substitutions import LaunchConfiguration +from launch_ros.actions import Node + + +def generate_launch_description(): + args = [ + DeclareLaunchArgument('engine_path', default_value='/config/yolov8n_scene.engine'), + DeclareLaunchArgument('onnx_path', default_value='/config/yolov8n_scene.onnx'), + DeclareLaunchArgument('conf_threshold', default_value='0.35'), + DeclareLaunchArgument('publish_debug', default_value='false'), + DeclareLaunchArgument('publish_markers',default_value='true'), + ] + + scene_detector = Node( + package='saltybot_scene', + executable='scene_detector', + name='scene_detector', + output='screen', + parameters=[{ + 'engine_path': LaunchConfiguration('engine_path'), + 'onnx_path': LaunchConfiguration('onnx_path'), + 'conf_threshold': LaunchConfiguration('conf_threshold'), + 'iou_threshold': 0.45, + 'publish_debug': LaunchConfiguration('publish_debug'), + 'room_update_hz': 2.0, + 'max_depth_m': 6.0, + }], + ) + + behavior_adapter = Node( + package='saltybot_scene', + executable='behavior_adapter', + name='behavior_adapter', + output='screen', + ) + + costmap_publisher = Node( + package='saltybot_scene', + executable='costmap_publisher', + name='costmap_publisher', + output='screen', + parameters=[{ + 'max_obstacle_distance_m': 4.0, + 'min_confidence': 0.40, + 'obstacle_frame': 'base_link', + 'publish_markers': LaunchConfiguration('publish_markers'), + }], + ) + + return LaunchDescription(args + [ + scene_detector, + behavior_adapter, + costmap_publisher, + ]) diff --git a/jetson/ros2_ws/src/saltybot_scene/package.xml b/jetson/ros2_ws/src/saltybot_scene/package.xml new file mode 100644 index 0000000..646ac3d --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene/package.xml @@ -0,0 +1,42 @@ + + + + saltybot_scene + 0.1.0 + + Semantic scene understanding for saltybot — YOLOv8-nano TRT FP16 object detection, + room classification (rule-based + MobileNetV2), hazard detection (stairs/drops/wet/glass), + Nav2 costmap obstacle publishing, and behavior adaptation. + + seb + MIT + + ament_cmake + ament_cmake_python + + rclpy + std_msgs + sensor_msgs + geometry_msgs + vision_msgs + nav_msgs + visualization_msgs + tf2_ros + tf2_geometry_msgs + cv_bridge + image_transport + message_filters + saltybot_scene_msgs + + python3-numpy + python3-opencv + + ament_copyright + ament_flake8 + ament_pep257 + python3-pytest + + + ament_cmake + + diff --git a/jetson/ros2_ws/src/saltybot_scene/resource/saltybot_scene b/jetson/ros2_ws/src/saltybot_scene/resource/saltybot_scene new file mode 100644 index 0000000..e69de29 diff --git a/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/__init__.py b/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/behavior_adapter_node.py b/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/behavior_adapter_node.py new file mode 100644 index 0000000..3478782 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/behavior_adapter_node.py @@ -0,0 +1,191 @@ +""" +behavior_adapter_node.py — Adapt robot speed and personality from scene context. + +Subscribes: + /social/scene/room_type RoomClassification — current room + /social/scene/hazards SceneObjectArray — active hazards + +Publishes: + /social/scene/behavior_hint BehaviorHint (on change + 1 Hz heartbeat) + +Behavior rules (speed_limit_mps, turn_limit_rads, personality_mode): + Baseline: 1.0, 1.5, 'efficient' + living_room: 0.6, 1.2, 'gentle' + kitchen: 0.5, 1.0, 'careful' + hallway: 0.8, 1.5, 'efficient' + bedroom: 0.4, 0.8, 'gentle' + garage: 1.2, 2.0, 'efficient' + outdoor: 1.5, 2.0, 'active' + office: 0.5, 1.0, 'careful' + +Hazard overrides (highest severity wins): + HAZARD_STAIRS: 0.1, 0.5, 'cautious', alert=True + HAZARD_DROP: 0.1, 0.5, 'cautious', alert=True + HAZARD_WET_FLOOR: 0.3, 0.8, 'careful', alert=True + HAZARD_GLASS_DOOR: 0.2, 0.5, 'cautious', alert=True + HAZARD_PET: 0.3, 0.8, 'gentle', pet_nearby=True +""" + +import time + +import rclpy +from rclpy.node import Node + +from saltybot_scene_msgs.msg import RoomClassification, SceneObjectArray, BehaviorHint + +from .hazard_classifier import ( + HAZARD_NONE, HAZARD_STAIRS, HAZARD_DROP, HAZARD_WET_FLOOR, + HAZARD_GLASS_DOOR, HAZARD_PET +) +from .room_classifier import ROOM_NAMES + +# ── Room speed/personality profiles ────────────────────────────────────────── +# (speed_mps, turn_rads, personality) +_ROOM_PROFILE = { + 0: (1.0, 1.5, 'efficient'), # unknown — baseline + 1: (0.6, 1.2, 'gentle'), # living_room + 2: (0.5, 1.0, 'careful'), # kitchen + 3: (0.8, 1.5, 'efficient'), # hallway + 4: (0.4, 0.8, 'gentle'), # bedroom + 5: (1.2, 2.0, 'efficient'), # garage + 6: (1.5, 2.0, 'active'), # outdoor + 7: (0.5, 1.0, 'careful'), # office +} + +# ── Hazard override profiles ────────────────────────────────────────────────── +# (speed_mps, turn_rads, personality, alert_reason) +_HAZARD_OVERRIDE = { + HAZARD_STAIRS: (0.10, 0.5, 'cautious', 'stairs ahead'), + HAZARD_DROP: (0.10, 0.5, 'cautious', 'floor drop ahead'), + HAZARD_WET_FLOOR: (0.30, 0.8, 'careful', 'wet floor'), + HAZARD_GLASS_DOOR: (0.20, 0.5, 'cautious', 'glass door ahead'), + HAZARD_PET: (0.30, 0.8, 'gentle', 'pet nearby'), +} + +# Severity order — highest number wins +_HAZARD_SEVERITY = { + HAZARD_NONE: 0, + HAZARD_PET: 1, + HAZARD_WET_FLOOR: 2, + HAZARD_GLASS_DOOR: 3, + HAZARD_DROP: 4, + HAZARD_STAIRS: 4, +} + + +class BehaviorAdapterNode(Node): + + def __init__(self): + super().__init__('behavior_adapter') + + self._current_room: RoomClassification | None = None + self._active_hazards: list = [] + self._last_hint: BehaviorHint | None = None + self._last_publish_t: float = 0.0 + + self.create_subscription( + RoomClassification, '/social/scene/room_type', self._on_room, 10 + ) + self.create_subscription( + SceneObjectArray, '/social/scene/hazards', self._on_hazards, 10 + ) + + self._hint_pub = self.create_publisher( + BehaviorHint, '/social/scene/behavior_hint', 10 + ) + + # Heartbeat + hazard expiry timer + self.create_timer(1.0, self._tick) + + self.get_logger().info('behavior_adapter ready') + + def _on_room(self, msg: RoomClassification) -> None: + if self._current_room is None or msg.room_type != self._current_room.room_type: + self.get_logger().info( + f'[scene] room changed → {msg.room_name} ' + f'(conf={msg.confidence:.0%})' + ) + self._current_room = msg + self._publish_hint() + + def _on_hazards(self, msg: SceneObjectArray) -> None: + self._active_hazards = list(msg.objects) + self._publish_hint() + + def _tick(self) -> None: + # Expire hazards older than 2s (no new SceneObjectArray received) + self._active_hazards = [] + # Heartbeat + self._publish_hint() + + def _publish_hint(self) -> None: + hint = self._build_hint() + # Only publish if something changed or 1s heartbeat + now = time.monotonic() + changed = (self._last_hint is None or + self._hint_changed(hint, self._last_hint)) + if not changed and now - self._last_publish_t < 1.0: + return + self._hint_pub.publish(hint) + self._last_hint = hint + self._last_publish_t = now + + def _build_hint(self) -> BehaviorHint: + hint = BehaviorHint() + hint.header.stamp = self.get_clock().now().to_msg() + + # ── Room baseline ────────────────────────────────────────────────── + room_type = self._current_room.room_type if self._current_room else 0 + speed, turn, personality = _ROOM_PROFILE.get(room_type, (1.0, 1.5, 'efficient')) + hint.room_name = ROOM_NAMES.get(room_type, 'unknown') + hint.speed_limit_mps = speed + hint.turn_limit_rads = turn + hint.personality_mode = personality + + # ── Hazard override ──────────────────────────────────────────────── + best_severity = 0 + best_hazard = HAZARD_NONE + for obj in self._active_hazards: + sev = _HAZARD_SEVERITY.get(obj.hazard_type, 0) + if sev > best_severity: + best_severity = sev + best_hazard = obj.hazard_type + + if best_hazard != HAZARD_NONE: + ov_speed, ov_turn, ov_personality, ov_reason = _HAZARD_OVERRIDE[best_hazard] + # Take the more conservative limit + hint.speed_limit_mps = min(hint.speed_limit_mps, ov_speed) + hint.turn_limit_rads = min(hint.turn_limit_rads, ov_turn) + hint.personality_mode = ov_personality + hint.hazard_alert = True + hint.alert_reason = ov_reason + hint.hazard_type = best_hazard + hint.pet_nearby = (best_hazard == HAZARD_PET) + else: + hint.hazard_alert = False + hint.hazard_type = HAZARD_NONE + + return hint + + @staticmethod + def _hint_changed(a: BehaviorHint, b: BehaviorHint) -> bool: + return ( + abs(a.speed_limit_mps - b.speed_limit_mps) > 0.05 + or a.personality_mode != b.personality_mode + or a.hazard_alert != b.hazard_alert + or a.room_name != b.room_name + ) + + +def main(args=None): + rclpy.init(args=args) + node = BehaviorAdapterNode() + try: + rclpy.spin(node) + finally: + node.destroy_node() + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/costmap_publisher_node.py b/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/costmap_publisher_node.py new file mode 100644 index 0000000..cef655a --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/costmap_publisher_node.py @@ -0,0 +1,163 @@ +""" +costmap_publisher_node.py — Publish detected objects as Nav2 costmap obstacles. + +Converts SceneObjectArray detections to: + 1. sensor_msgs/PointCloud2 → consumed by Nav2's obstacle_layer + (configure nav2_costmap_2d obstacle_layer with observation_sources pointing here) + 2. visualization_msgs/MarkerArray → RViz debug visualisation + +Only publishes objects closer than max_obstacle_distance_m (default 4m) with +sufficient confidence. Stationary objects (chairs, tables) get a larger +inflation radius than dynamic ones (persons, pets). + +Configuration in nav2_costmap_2d (local_costmap): + obstacle_layer: + observation_sources: scene_objects + scene_objects: + topic: /social/scene/obstacle_cloud + data_type: PointCloud2 + min_obstacle_height: 0.0 + max_obstacle_height: 2.0 + marking: true + clearing: true + +Subscribes: + /social/scene/objects SceneObjectArray + +Publishes: + /social/scene/obstacle_cloud sensor_msgs/PointCloud2 + /social/scene/object_markers visualization_msgs/MarkerArray +""" + +import struct + +import rclpy +from rclpy.node import Node +import numpy as np +from visualization_msgs.msg import Marker, MarkerArray +from sensor_msgs.msg import PointCloud2, PointField +from std_msgs.msg import Header + +from saltybot_scene_msgs.msg import SceneObjectArray, SceneObject +from .hazard_classifier import HAZARD_NONE, HAZARD_PET + + +# Hazard objects get a larger effective footprint in the costmap +_HAZARD_SCALE = 0.4 # publish extra ring of points at this radius (m) +_DEFAULT_SCALE = 0.15 # non-hazard object footprint radius + + +class CostmapPublisherNode(Node): + + def __init__(self): + super().__init__('costmap_publisher') + + self.declare_parameter('max_obstacle_distance_m', 4.0) + self.declare_parameter('min_confidence', 0.40) + self.declare_parameter('obstacle_frame', 'base_link') + self.declare_parameter('publish_markers', True) + + self._max_dist = self.get_parameter('max_obstacle_distance_m').value + self._min_conf = self.get_parameter('min_confidence').value + self._frame = self.get_parameter('obstacle_frame').value + self._pub_markers= self.get_parameter('publish_markers').value + + self.create_subscription( + SceneObjectArray, '/social/scene/objects', self._on_objects, 10 + ) + self._cloud_pub = self.create_publisher(PointCloud2, '/social/scene/obstacle_cloud', 10) + self._marker_pub = self.create_publisher(MarkerArray, '/social/scene/object_markers', 10) + + self.get_logger().info('costmap_publisher ready') + + def _on_objects(self, msg: SceneObjectArray) -> None: + points = [] + marker_array = MarkerArray() + + for idx, obj in enumerate(msg.objects): + if obj.confidence < self._min_conf: + continue + if obj.distance_m <= 0 or obj.distance_m > self._max_dist: + continue + + p = obj.position_3d.point + x, y, z = p.x, p.y, p.z + + # For Nav2 obstacle layer: publish a disc of points around the object + radius = _HAZARD_SCALE if obj.hazard_type != HAZARD_NONE else _DEFAULT_SCALE + n_ring = 8 + for k in range(n_ring): + angle = 2 * np.pi * k / n_ring + px = x + radius * np.cos(angle) + py = y + radius * np.sin(angle) + points.append((px, py, 0.1)) # z=0.1 (ground-level obstacle) + points.append((x, y, 0.5)) # centroid at mid-height + + # ── RViz marker ───────────────────────────────────────────────── + if self._pub_markers: + m = Marker() + m.header.frame_id = self._frame + m.header.stamp = msg.header.stamp + m.ns = 'scene_objects' + m.id = idx + m.type = Marker.CYLINDER + m.action = Marker.ADD + m.pose.position.x = x + m.pose.position.y = y + m.pose.position.z = 0.5 + m.pose.orientation.w = 1.0 + m.scale.x = radius * 2 + m.scale.y = radius * 2 + m.scale.z = 1.0 + m.lifetime.sec = 1 # auto-expire after 1s + + # Colour by hazard + if obj.hazard_type in (1, 2, 4): # stairs / drop / glass + m.color.r, m.color.g, m.color.b, m.color.a = 1.0, 0.0, 0.0, 0.7 + elif obj.hazard_type == 3: # wet floor + m.color.r, m.color.g, m.color.b, m.color.a = 0.0, 0.5, 1.0, 0.6 + elif obj.hazard_type == HAZARD_PET: # pet + m.color.r, m.color.g, m.color.b, m.color.a = 1.0, 0.5, 0.0, 0.7 + else: + m.color.r, m.color.g, m.color.b, m.color.a = 0.0, 0.8, 0.0, 0.5 + marker_array.markers.append(m) + + if points: + self._cloud_pub.publish( + self._make_cloud(points, msg.header.stamp) + ) + if self._pub_markers and marker_array.markers: + self._marker_pub.publish(marker_array) + + def _make_cloud(self, points: list, stamp) -> PointCloud2: + pts_arr = np.array(points, dtype=np.float32) + cloud = PointCloud2() + cloud.header.frame_id = self._frame + cloud.header.stamp = stamp + cloud.height = 1 + cloud.width = len(pts_arr) + cloud.is_dense = True + cloud.is_bigendian = False + cloud.point_step = 12 # 3 × float32 + cloud.row_step = cloud.point_step * cloud.width + cloud.fields = [ + PointField(name='x', offset=0, datatype=PointField.FLOAT32, count=1), + PointField(name='y', offset=4, datatype=PointField.FLOAT32, count=1), + PointField(name='z', offset=8, datatype=PointField.FLOAT32, count=1), + ] + cloud.data = pts_arr.tobytes() + return cloud + + +def main(args=None): + rclpy.init(args=args) + node = CostmapPublisherNode() + try: + rclpy.spin(node) + finally: + node.destroy_node() + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/hazard_classifier.py b/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/hazard_classifier.py new file mode 100644 index 0000000..294ca6d --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/hazard_classifier.py @@ -0,0 +1,212 @@ +""" +hazard_classifier.py — Depth-image-based hazard detection. + +Runs independently of YOLO detections. The scene_detector_node calls +HazardClassifier.analyse() once per frame to check for: + + HAZARD_STAIRS — stair pattern: repeating horizontal edges at increasing depth + HAZARD_DROP — floor cliff below robot's current heading + HAZARD_WET_FLOOR — specular floor reflection (high-variance narrow depth stripe) + HAZARD_GLASS_DOOR — depth near-zero where RGB has strong vertical edges + +All methods take float32 depth images in metres and optional BGR images. +Returns a list of (hazard_type, confidence, description) tuples. + +Tuning constants are conservative to avoid false positives in normal use. +""" + +from __future__ import annotations +from typing import List, Tuple + +import numpy as np + +# Hazard type constants (match SceneObject.msg) +HAZARD_NONE = 0 +HAZARD_STAIRS = 1 +HAZARD_DROP = 2 +HAZARD_WET_FLOOR = 3 +HAZARD_GLASS_DOOR = 4 +HAZARD_PET = 5 + +HazardResult = Tuple[int, float, str] # (hazard_type, confidence, description) + + +class HazardClassifier: + """ + Stateless hazard analyser. Call analyse() per frame. + """ + + def __init__( + self, + drop_threshold_m: float = 0.25, # depth discontinuity = drop if > this + stair_min_steps: int = 3, # minimum horizontal bands to flag stairs + glass_depth_thr_m: float = 0.15, # depth below which we suspect glass (unreturned) + wet_variance_thr: float = 0.08, # depth std-dev in floor stripe = specular + ): + self._drop_thr = drop_threshold_m + self._stair_steps = stair_min_steps + self._glass_thr = glass_depth_thr_m + self._wet_var_thr = wet_variance_thr + + def analyse( + self, + depth: np.ndarray, + bgr: np.ndarray | None = None, + ) -> List[HazardResult]: + """ + Analyse a depth (and optionally BGR) frame for hazards. + + Args: + depth: float32 H×W depth in metres. 0 = invalid/no return. + bgr: uint8 H×W×3 BGR image (same resolution). None = skip visual checks. + + Returns: + List of (hazard_type, confidence, description) tuples. + Empty list = no hazards detected. + """ + results: List[HazardResult] = [] + h, w = depth.shape + + # ── 1. Floor drop / cliff ────────────────────────────────────────── + result = self._check_drop(depth, h, w) + if result: + results.append(result) + + # ── 2. Stair pattern ─────────────────────────────────────────────── + result = self._check_stairs(depth, h, w) + if result: + results.append(result) + + # ── 3. Wet floor (specular) ──────────────────────────────────────── + result = self._check_wet_floor(depth, h, w) + if result: + results.append(result) + + # ── 4. Glass door (depth=0, RGB has vertical edges) ─────────────── + if bgr is not None: + result = self._check_glass(depth, bgr, h, w) + if result: + results.append(result) + + return results + + # ── Private checks ────────────────────────────────────────────────────── + + def _check_drop(self, depth: np.ndarray, h: int, w: int) -> HazardResult | None: + """ + Drop: in the lower 20% of the frame (floor area), check for large depth + jumps between adjacent rows — indicates floor edge or drop-off. + """ + floor_start = int(h * 0.70) + floor = depth[floor_start:, w // 4: 3 * w // 4] # central strip + valid = (floor > 0.1) & (floor < 8.0) + + if valid.sum() < 50: + return None + + # Row-mean of valid depths + row_means = np.array([ + np.nanmean(np.where(valid[r], floor[r], np.nan)) + for r in range(floor.shape[0]) + ]) + row_means = row_means[~np.isnan(row_means)] + if len(row_means) < 3: + return None + + # Consecutive row depth differences + diffs = np.abs(np.diff(row_means)) + max_jump = float(diffs.max()) + if max_jump > self._drop_thr: + conf = min(1.0, max_jump / (self._drop_thr * 3)) + return HAZARD_DROP, conf, f'floor drop {max_jump:.2f}m detected ahead' + return None + + def _check_stairs(self, depth: np.ndarray, h: int, w: int) -> HazardResult | None: + """ + Stairs: horizontal banding pattern in depth — multiple rows at distinct + increasing depths, each band ~step-height apart (typically 0.15–0.22m). + """ + # Use central 40% strip (avoids walls on sides) + col_start, col_end = int(w * 0.3), int(w * 0.7) + strip = depth[:, col_start:col_end] + valid = (strip > 0.3) & (strip < 5.0) + + # Row medians + row_med = np.array([ + np.median(strip[r, valid[r]]) if valid[r].sum() > 5 else np.nan + for r in range(h) + ]) + finite_mask = ~np.isnan(row_med) + if finite_mask.sum() < 20: + return None + + # Smooth to suppress noise + kernel = np.ones(5) / 5 + smoothed = np.convolve(row_med[finite_mask], kernel, mode='same') + + # Count direction changes (peaks / valleys) — stairs produce alternating pattern + diffs = np.diff(smoothed) + changes = np.sum(np.diff(np.sign(diffs)) != 0) + + # Also check that depth range is consistent with steps (0.1–0.3m per band) + range_m = float(smoothed.max() - smoothed.min()) + step_like = 0.3 < range_m < 3.0 + + if changes >= self._stair_steps * 2 and step_like: + conf = min(1.0, changes / 20.0) + return HAZARD_STAIRS, conf, f'stair pattern detected ({changes} bands)' + return None + + def _check_wet_floor(self, depth: np.ndarray, h: int, w: int) -> HazardResult | None: + """ + Wet/glossy floor: depth returns are noisy / highly variable in the + floor region because specular reflections scatter the IR pattern. + A large std-dev in the floor stripe with valid-looking mean depth is suspect. + """ + floor_start = int(h * 0.75) + floor = depth[floor_start:, w // 4: 3 * w // 4] + valid = (floor > 0.1) & (floor < 3.0) + if valid.sum() < 30: + return None + + valid_vals = floor[valid] + std = float(valid_vals.std()) + if std > self._wet_var_thr: + conf = min(1.0, std / (self._wet_var_thr * 4)) + return HAZARD_WET_FLOOR, conf, f'wet/specular floor (depth std={std:.3f}m)' + return None + + def _check_glass( + self, + depth: np.ndarray, + bgr: np.ndarray, + h: int, + w: int, + ) -> HazardResult | None: + """ + Glass door: central column has near-zero depth returns (IR passes through) + while the RGB image shows strong vertical edges (door frame / objects behind). + + Requires both depth and BGR images. + """ + import cv2 + + central_d = depth[:, w // 3: 2 * w // 3] + near_zero = (central_d < self._glass_thr) & (central_d >= 0) + zero_frac = float(near_zero.mean()) + + if zero_frac < 0.15: + return None + + # Check for strong vertical edges in RGB + gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY) + sobel_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3) + central_sobel = np.abs(sobel_x[:, w // 3: 2 * w // 3]) + edge_strength = float(central_sobel.mean()) + + if edge_strength > 15.0: + conf = min(1.0, zero_frac * 2 * edge_strength / 50) + return HAZARD_GLASS_DOOR, conf, ( + f'glass door suspected (zero_depth={zero_frac:.0%}, edge={edge_strength:.1f})' + ) + return None diff --git a/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/room_classifier.py b/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/room_classifier.py new file mode 100644 index 0000000..81a91cf --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/room_classifier.py @@ -0,0 +1,209 @@ +""" +room_classifier.py — Classify room/environment type from detected objects. + +Two modes: + 1. rule_based — fast heuristic from object co-occurrence (always available). + 2. mobilenet — MobileNetV2 scene classifier (TensorRT FP16) from a + Places365-pretrained model exported to ONNX. + Falls back to rule_based if engine file is missing. + +Rule-based logic: + Each room type has a set of indicator objects (COCO class IDs) with weights. + Score = weighted sum of detected indicators, normalised to softmax. + Highest score wins. + +MobileNet mode: + Input: 224×224 RGB float32 CHW + Output: [1, 8] logits (one per ROOM_* constant) + Engine: /config/room_classifier.engine (built via scripts/build_scene_trt.py) +""" + +from __future__ import annotations +import math +from typing import List + +import numpy as np + + +# Room type constants (must match RoomClassification.msg ROOM_* values) +ROOM_UNKNOWN = 0 +ROOM_LIVING_ROOM = 1 +ROOM_KITCHEN = 2 +ROOM_HALLWAY = 3 +ROOM_BEDROOM = 4 +ROOM_GARAGE = 5 +ROOM_OUTDOOR = 6 +ROOM_OFFICE = 7 + +ROOM_NAMES = { + ROOM_UNKNOWN: 'unknown', + ROOM_LIVING_ROOM: 'living_room', + ROOM_KITCHEN: 'kitchen', + ROOM_HALLWAY: 'hallway', + ROOM_BEDROOM: 'bedroom', + ROOM_GARAGE: 'garage', + ROOM_OUTDOOR: 'outdoor', + ROOM_OFFICE: 'office', +} + +N_ROOMS = len(ROOM_NAMES) + +# ── Rule-based indicator weights ────────────────────────────────────────────── +# {room_type: {class_id: weight}} +_INDICATORS = { + ROOM_LIVING_ROOM: {57: 3.0, 56: 2.0, 62: 2.5, 65: 1.5}, # couch, chair, tv, bed + ROOM_KITCHEN: {72: 4.0, 69: 3.5, 68: 3.0, 71: 2.0, 46: 1.5}, # fridge, oven, microwave, sink, banana + ROOM_HALLWAY: {80: 3.0, 28: 1.5, 25: 1.5}, # door, backpack, umbrella + ROOM_BEDROOM: {65: 4.0, 56: 1.5, 62: 1.0}, # bed, chair, tv + ROOM_GARAGE: {2: 3.0, 3: 3.0, 7: 2.0, 1: 2.0}, # car, motorcycle, truck, bicycle + ROOM_OUTDOOR: {14: 2.0, 17: 2.0, 0: 0.5, 2: 1.5}, # bird, horse, person(weak), car + ROOM_OFFICE: {63: 3.0, 66: 2.5, 73: 2.0, 56: 1.5}, # laptop, keyboard, book, chair +} + + +def _softmax(x: np.ndarray) -> np.ndarray: + e = np.exp(x - x.max()) + return e / e.sum() + + +def classify_rule_based( + class_ids: List[int], + confidences: List[float], +) -> tuple[int, str, float, list[float]]: + """ + Classify room type from detected object class IDs. + + Returns: + (room_type, room_name, confidence, class_scores[N_ROOMS]) + """ + scores = np.zeros(N_ROOMS, dtype=np.float32) + for cid, conf in zip(class_ids, confidences): + for room_type, indicators in _INDICATORS.items(): + if cid in indicators: + scores[room_type] += indicators[cid] * conf + + if scores.max() < 0.1: + # No indicator objects found — unknown + uniform = np.ones(N_ROOMS, dtype=np.float32) / N_ROOMS + return ROOM_UNKNOWN, ROOM_NAMES[ROOM_UNKNOWN], 0.0, uniform.tolist() + + probs = _softmax(scores) + room_type = int(probs.argmax()) + confidence = float(probs[room_type]) + return room_type, ROOM_NAMES[room_type], confidence, probs.tolist() + + +# ── MobileNetV2 TRT/ONNX backend ───────────────────────────────────────────── + +class _MobileNetTRT: + """TensorRT MobileNetV2 scene classifier (Jetson Orin).""" + + _INPUT_SIZE = 224 + + def __init__(self, engine_path: str): + import tensorrt as trt + import pycuda.driver as cuda + import pycuda.autoinit # noqa: F401 + + self._cuda = cuda + logger = trt.Logger(trt.Logger.WARNING) + with open(engine_path, 'rb') as f, trt.Runtime(logger) as rt: + self._engine = rt.deserialize_cuda_engine(f.read()) + self._context = self._engine.create_execution_context() + self._inputs, self._outputs, self._bindings = [], [], [] + for i in range(self._engine.num_io_tensors): + name = self._engine.get_tensor_name(i) + dtype = trt.nptype(self._engine.get_tensor_dtype(name)) + shape = tuple(self._engine.get_tensor_shape(name)) + nbytes = int(np.prod(shape)) * np.dtype(dtype).itemsize + host = cuda.pagelocked_empty(shape, dtype) + device = cuda.mem_alloc(nbytes) + self._bindings.append(int(device)) + if self._engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT: + self._inputs.append({'host': host, 'device': device}) + else: + self._outputs.append({'host': host, 'device': device}) + self._stream = cuda.Stream() + + def predict(self, bgr: np.ndarray) -> np.ndarray: + import cv2 + rgb = cv2.cvtColor( + cv2.resize(bgr, (self._INPUT_SIZE, self._INPUT_SIZE)), cv2.COLOR_BGR2RGB + ) + blob = (rgb.astype(np.float32) / 255.0).transpose(2, 0, 1)[None] + np.copyto(self._inputs[0]['host'], blob.ravel()) + self._cuda.memcpy_htod_async( + self._inputs[0]['device'], self._inputs[0]['host'], self._stream + ) + self._context.execute_async_v2(self._bindings, self._stream.handle) + for out in self._outputs: + self._cuda.memcpy_dtoh_async(out['host'], out['device'], self._stream) + self._stream.synchronize() + return self._outputs[0]['host'].copy().ravel() # logits [N_ROOMS] + + +class _MobileNetONNX: + """ONNX Runtime fallback for non-Jetson environments.""" + + _INPUT_SIZE = 224 + + def __init__(self, onnx_path: str): + import onnxruntime as ort + providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] + self._session = ort.InferenceSession(onnx_path, providers=providers) + self._input_name = self._session.get_inputs()[0].name + + def predict(self, bgr: np.ndarray) -> np.ndarray: + import cv2 + rgb = cv2.cvtColor( + cv2.resize(bgr, (self._INPUT_SIZE, self._INPUT_SIZE)), cv2.COLOR_BGR2RGB + ) + blob = (rgb.astype(np.float32) / 255.0).transpose(2, 0, 1)[None] + result = self._session.run(None, {self._input_name: blob}) + return result[0].ravel() + + +class RoomClassifier: + """ + High-level room classifier. Tries TRT engine, then ONNX, then rule-based. + """ + + def __init__(self, engine_path: str = '', onnx_path: str = ''): + self._backend = None + self._method = 'rule_based' + + if engine_path: + try: + self._backend = _MobileNetTRT(engine_path) + self._method = 'mobilenet_trt' + return + except Exception: + pass + if onnx_path: + try: + self._backend = _MobileNetONNX(onnx_path) + self._method = 'mobilenet_onnx' + return + except Exception: + pass + + def classify( + self, + bgr: np.ndarray | None, + class_ids: List[int], + confidences: List[float], + ) -> tuple[int, str, float, list[float], str]: + """ + Classify the current frame. + + Returns: + (room_type, room_name, confidence, class_scores, method) + """ + if self._backend is not None and bgr is not None: + logits = self._backend.predict(bgr) + probs = _softmax(logits.astype(np.float32)) + room = int(probs.argmax()) + return room, ROOM_NAMES[room], float(probs[room]), probs.tolist(), self._method + + room, name, conf, scores = classify_rule_based(class_ids, confidences) + return room, name, conf, scores, 'rule_based' diff --git a/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/scene_detector_node.py b/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/scene_detector_node.py new file mode 100644 index 0000000..95e70e8 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/scene_detector_node.py @@ -0,0 +1,426 @@ +""" +scene_detector_node.py — Semantic scene understanding: YOLOv8-nano TRT FP16. + +Pipeline per frame: + 1. Receive synchronized RGB + depth from RealSense D435i. + 2. YOLOv8-nano TRT FP16 inference (target ≥15 FPS). + 3. Filter scene-relevant classes (chairs, tables, doors, stairs, pets, …). + 4. Back-project bounding box centres to 3D via aligned depth. + 5. Classify hazard type per object (stairs, drop, wet floor, glass door, pet). + 6. Run depth-image hazard scan (HazardClassifier) for drop/wet not tied to a bbox. + 7. Classify room type (rule_based or MobileNetV2 TRT). + 8. Publish results. + +Subscribes: + /camera/color/image_raw sensor_msgs/Image 30 Hz RGB + /camera/depth/image_rect_raw sensor_msgs/Image 30 Hz float32 depth (m) + /camera/color/camera_info sensor_msgs/CameraInfo camera intrinsics + +Publishes: + /social/scene/objects SceneObjectArray per-frame detections + /social/scene/room_type RoomClassification ~2 Hz (decimated) + /social/scene/hazards SceneObjectArray hazard subset only + /social/scene/debug_image sensor_msgs/Image annotated RGB (lazy) + +Parameters: + engine_path str '/config/yolov8n_scene.engine' + onnx_path str '/config/yolov8n_scene.onnx' + room_engine_path str '/config/room_classifier.engine' + room_onnx_path str '/config/room_classifier.onnx' + conf_threshold float 0.35 + iou_threshold float 0.45 + publish_debug bool false + room_update_hz float 2.0 (room classification rate, decimated from detection rate) + max_depth_m float 6.0 +""" + +import time +import math + +import rclpy +from rclpy.node import Node +from rclpy.qos import QoSProfile, ReliabilityPolicy, HistoryPolicy, DurabilityPolicy +import message_filters +import cv2 +import numpy as np +from cv_bridge import CvBridge + +from sensor_msgs.msg import Image, CameraInfo +from geometry_msgs.msg import PointStamped, Point +from vision_msgs.msg import BoundingBox2D, Pose2D + +import tf2_ros +import tf2_geometry_msgs # noqa: F401 + +from saltybot_scene_msgs.msg import ( + SceneObject, SceneObjectArray, RoomClassification +) + +from .yolo_utils import ( + preprocess, postprocess, SCENE_CLASSES, HAZARD_BY_CLASS, + HAZARD_NONE, HAZARD_STAIRS, HAZARD_PET +) +from .room_classifier import RoomClassifier, ROOM_NAMES +from .hazard_classifier import HazardClassifier + + +_LATCHED_QOS = QoSProfile( + reliability=ReliabilityPolicy.RELIABLE, + history=HistoryPolicy.KEEP_LAST, + depth=1, + durability=DurabilityPolicy.TRANSIENT_LOCAL, +) +_SENSOR_QOS = QoSProfile( + reliability=ReliabilityPolicy.BEST_EFFORT, + history=HistoryPolicy.KEEP_LAST, + depth=5, +) + + +# ── TensorRT backend (identical pattern to person_detector_node) ────────────── + +class _TRTBackend: + def __init__(self, engine_path: str): + import tensorrt as trt + import pycuda.driver as cuda + import pycuda.autoinit # noqa: F401 + self._cuda = cuda + logger = trt.Logger(trt.Logger.WARNING) + with open(engine_path, 'rb') as f, trt.Runtime(logger) as rt: + self._engine = rt.deserialize_cuda_engine(f.read()) + self._ctx = self._engine.create_execution_context() + self._inputs, self._outputs, self._bindings = [], [], [] + for i in range(self._engine.num_io_tensors): + name = self._engine.get_tensor_name(i) + dtype = trt.nptype(self._engine.get_tensor_dtype(name)) + shape = tuple(self._engine.get_tensor_shape(name)) + nbytes = int(np.prod(shape)) * np.dtype(dtype).itemsize + host = cuda.pagelocked_empty(shape, dtype) + device = cuda.mem_alloc(nbytes) + self._bindings.append(int(device)) + if self._engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT: + self._inputs.append({'host': host, 'device': device}) + else: + self._outputs.append({'host': host, 'device': device}) + self._stream = cuda.Stream() + + def infer(self, blob: np.ndarray) -> list[np.ndarray]: + np.copyto(self._inputs[0]['host'], blob.ravel()) + self._cuda.memcpy_htod_async( + self._inputs[0]['device'], self._inputs[0]['host'], self._stream + ) + self._ctx.execute_async_v2(self._bindings, self._stream.handle) + for out in self._outputs: + self._cuda.memcpy_dtoh_async(out['host'], out['device'], self._stream) + self._stream.synchronize() + return [out['host'].copy() for out in self._outputs] + + +class _ONNXBackend: + def __init__(self, onnx_path: str): + import onnxruntime as ort + self._session = ort.InferenceSession( + onnx_path, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'] + ) + self._input_name = self._session.get_inputs()[0].name + + def infer(self, blob: np.ndarray) -> list[np.ndarray]: + return self._session.run(None, {self._input_name: blob}) + + +# ── Node ────────────────────────────────────────────────────────────────────── + +class SceneDetectorNode(Node): + + def __init__(self): + super().__init__('scene_detector') + + # ── Parameters ────────────────────────────────────────────────────── + self.declare_parameter('engine_path', '/config/yolov8n_scene.engine') + self.declare_parameter('onnx_path', '/config/yolov8n_scene.onnx') + self.declare_parameter('room_engine_path', '/config/room_classifier.engine') + self.declare_parameter('room_onnx_path', '/config/room_classifier.onnx') + self.declare_parameter('conf_threshold', 0.35) + self.declare_parameter('iou_threshold', 0.45) + self.declare_parameter('publish_debug', False) + self.declare_parameter('room_update_hz', 2.0) + self.declare_parameter('max_depth_m', 6.0) + + engine_path = self.get_parameter('engine_path').value + onnx_path = self.get_parameter('onnx_path').value + self._conf = self.get_parameter('conf_threshold').value + self._iou = self.get_parameter('iou_threshold').value + self._debug = self.get_parameter('publish_debug').value + room_hz = self.get_parameter('room_update_hz').value + self._max_d = self.get_parameter('max_depth_m').value + + # ── Inference backend ──────────────────────────────────────────────── + self._backend = None + for cls, path in [(_TRTBackend, engine_path), (_ONNXBackend, onnx_path)]: + if path: + try: + self._backend = cls(path) + self.get_logger().info( + f'Loaded {cls.__name__} from {path}' + ) + break + except Exception as exc: + self.get_logger().warning(f'{cls.__name__} load failed: {exc}') + + if self._backend is None: + self.get_logger().error( + 'No inference backend loaded — set engine_path or onnx_path param. ' + 'Publishing empty detections.' + ) + + # ── Room classifier ────────────────────────────────────────────────── + room_engine = self.get_parameter('room_engine_path').value + room_onnx = self.get_parameter('room_onnx_path').value + self._room_clf = RoomClassifier( + engine_path=room_engine, onnx_path=room_onnx + ) + + # ── Hazard classifier ──────────────────────────────────────────────── + self._hazard_clf = HazardClassifier() + + # ── State ──────────────────────────────────────────────────────────── + self._bridge = CvBridge() + self._camera_k: np.ndarray | None = None + self._frame_count = 0 + self._room_period = max(1, int(round(30.0 / max(room_hz, 0.1)))) + self._last_room: RoomClassification | None = None + + # ── TF ─────────────────────────────────────────────────────────────── + self._tf_buffer = tf2_ros.Buffer() + self._tf_listener = tf2_ros.TransformListener(self._tf_buffer, self) + + # ── Synchronized subscriptions ──────────────────────────────────────── + rgb_sub = message_filters.Subscriber( + self, Image, '/camera/color/image_raw', qos_profile=_SENSOR_QOS + ) + depth_sub = message_filters.Subscriber( + self, Image, '/camera/depth/image_rect_raw', qos_profile=_SENSOR_QOS + ) + self._sync = message_filters.ApproximateTimeSynchronizer( + [rgb_sub, depth_sub], queue_size=5, slop=0.05 + ) + self._sync.registerCallback(self._on_rgbd) + + self.create_subscription( + CameraInfo, '/camera/color/camera_info', self._on_camera_info, _LATCHED_QOS + ) + + # ── Publishers ─────────────────────────────────────────────────────── + self._obj_pub = self.create_publisher(SceneObjectArray, '/social/scene/objects', 10) + self._hazard_pub = self.create_publisher(SceneObjectArray, '/social/scene/hazards', 10) + self._room_pub = self.create_publisher(RoomClassification, '/social/scene/room_type', 10) + if self._debug: + self._debug_pub = self.create_publisher(Image, '/social/scene/debug_image', 5) + + self.get_logger().info( + f'scene_detector ready — backend={"TRT" if isinstance(self._backend, _TRTBackend) else "ONNX" if self._backend else "none"}' + f', room_clf={self._room_clf._method}' + ) + + # ── Callbacks ──────────────────────────────────────────────────────────── + + def _on_camera_info(self, msg: CameraInfo) -> None: + self._camera_k = np.array(msg.k, dtype=np.float64) + + def _on_rgbd(self, rgb_msg: Image, depth_msg: Image) -> None: + t0 = time.monotonic() + self._frame_count += 1 + + # Decode images + try: + bgr = self._bridge.imgmsg_to_cv2(rgb_msg, desired_encoding='bgr8') + depth = self._bridge.imgmsg_to_cv2(depth_msg, desired_encoding='32FC1') + except Exception as exc: + self.get_logger().warning(f'Image decode error: {exc}') + return + + orig_h, orig_w = bgr.shape[:2] + + # ── YOLOv8 inference ───────────────────────────────────────────────── + detections = [] + infer_ms = 0.0 + if self._backend is not None: + blob, scale, pad_w, pad_h = preprocess(bgr) + t_infer = time.monotonic() + raw = self._backend.infer(blob) + infer_ms = (time.monotonic() - t_infer) * 1000.0 + detections = postprocess( + raw[0], scale, pad_w, pad_h, orig_w, orig_h, + conf_thr=self._conf, iou_thr=self._iou, + ) + # Filter to scene-relevant classes only + detections = [d for d in detections if d['class_id'] in SCENE_CLASSES] + + # ── Depth-based hazard scan (independent of YOLO) ──────────────────── + depth_hazards = self._hazard_clf.analyse(depth, bgr) + + # ── Build SceneObject messages ──────────────────────────────────────── + objects = [] + for det in detections: + obj = self._build_scene_object(det, depth, rgb_msg.header) + objects.append(obj) + + # Inject depth-detected hazards as pseudo-objects (no bbox) + for hazard_type, conf, desc in depth_hazards: + # Check we don't already have a YOLO object for the same hazard type + already = any(o.hazard_type == hazard_type for o in objects) + if not already: + pseudo = SceneObject() + pseudo.class_id = 255 # sentinel: depth-only hazard + pseudo.class_name = desc + pseudo.confidence = conf + pseudo.hazard_type = hazard_type + objects.append(pseudo) + + # ── Publish objects ─────────────────────────────────────────────────── + arr = SceneObjectArray() + arr.header = rgb_msg.header + arr.objects = objects + arr.frame_count = self._frame_count + arr.inference_ms = infer_ms + self._obj_pub.publish(arr) + + # ── Publish hazard subset ──────────────────────────────────────────── + hazard_objs = [o for o in objects if o.hazard_type != 0] + if hazard_objs: + harr = SceneObjectArray() + harr.header = rgb_msg.header + harr.objects = hazard_objs + harr.frame_count = self._frame_count + self._hazard_pub.publish(harr) + + # ── Room classification (decimated) ─────────────────────────────────── + if self._frame_count % self._room_period == 0: + class_ids = [d['class_id'] for d in detections] + confs = [d['conf'] for d in detections] + room_type, room_name, confidence, scores, method = \ + self._room_clf.classify(bgr, class_ids, confs) + room_msg = RoomClassification() + room_msg.header = rgb_msg.header + room_msg.room_type = room_type + room_msg.room_name = room_name + room_msg.confidence = confidence + room_msg.class_scores = scores + room_msg.method = method + self._room_pub.publish(room_msg) + self._last_room = room_msg + + # ── Debug image ─────────────────────────────────────────────────────── + if self._debug and hasattr(self, '_debug_pub'): + self._debug_pub.publish( + self._bridge.cv2_to_imgmsg( + self._draw_debug(bgr, objects), encoding='bgr8' + ) + ) + + total_ms = (time.monotonic() - t0) * 1000.0 + if self._frame_count % 30 == 0: + fps = 1000.0 / max(total_ms, 1) + room = self._last_room.room_name if self._last_room else 'unknown' + self.get_logger().info( + f'[scene] {fps:.1f} FPS infer={infer_ms:.1f}ms ' + f'{len(objects)} objs room={room}' + ) + + # ── Helpers ─────────────────────────────────────────────────────────────── + + def _build_scene_object( + self, det: dict, depth: np.ndarray, header + ) -> SceneObject: + obj = SceneObject() + obj.class_id = det['class_id'] + obj.class_name = det['class_name'] + obj.confidence = det['conf'] + + # Bounding box + cx = (det['x1'] + det['x2']) / 2 + cy = (det['y1'] + det['y2']) / 2 + bw = det['x2'] - det['x1'] + bh = det['y2'] - det['y1'] + obj.bbox.center.position.x = cx + obj.bbox.center.position.y = cy + obj.bbox.size_x = bw + obj.bbox.size_y = bh + + # 3-D position + if self._camera_k is not None: + h, w = depth.shape + cx_i, cy_i = int(np.clip(cx, 0, w - 1)), int(np.clip(cy, 0, h - 1)) + d = self._sample_depth(depth, cx_i, cy_i) + if d > 0.1: + obj.distance_m = d + K = self._camera_k + X = (cx - K[2]) * d / K[0] + Y = (cy - K[5]) * d / K[4] + pt_cam = PointStamped() + pt_cam.header = header + pt_cam.point.x = X + pt_cam.point.y = Y + pt_cam.point.z = d + obj.position_3d = pt_cam + # Optionally transform to base_link + try: + pt_bl = self._tf_buffer.transform(pt_cam, 'base_link', + rclpy.duration.Duration(seconds=0.05)) + obj.position_3d = pt_bl + except Exception: + pass # keep camera-frame position + + # Hazard classification + obj.hazard_type = HAZARD_BY_CLASS.get(det['class_id'], HAZARD_NONE) + + return obj + + @staticmethod + def _sample_depth(depth: np.ndarray, u: int, v: int, window: int = 9) -> float: + h, w = depth.shape + half = window // 2 + patch = depth[ + max(0, v - half): min(h, v + half + 1), + max(0, u - half): min(w, u + half + 1), + ] + valid = patch[(patch > 0.1) & (patch < 8.0)] + return float(np.median(valid)) if len(valid) > 0 else 0.0 + + def _draw_debug(self, bgr: np.ndarray, objects: list) -> np.ndarray: + img = bgr.copy() + colours = { + 0: (0, 200, 0), 1: (0, 0, 255), 2: (0, 0, 255), + 3: (0, 128, 255), 4: (0, 0, 255), 5: (255, 128, 0), + } + for obj in objects: + if obj.bbox.size_x < 1: + continue + cx = int(obj.bbox.center.position.x) + cy = int(obj.bbox.center.position.y) + x1 = int(cx - obj.bbox.size_x / 2) + y1 = int(cy - obj.bbox.size_y / 2) + x2 = int(cx + obj.bbox.size_x / 2) + y2 = int(cy + obj.bbox.size_y / 2) + colour = colours.get(obj.hazard_type, (0, 200, 0)) + cv2.rectangle(img, (x1, y1), (x2, y2), colour, 2) + label = f'{obj.class_name} {obj.confidence:.0%}' + if obj.distance_m > 0: + label += f' {obj.distance_m:.1f}m' + cv2.putText(img, label, (x1, y1 - 5), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, colour, 1) + return img + + +def main(args=None): + rclpy.init(args=args) + node = SceneDetectorNode() + try: + rclpy.spin(node) + finally: + node.destroy_node() + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/yolo_utils.py b/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/yolo_utils.py new file mode 100644 index 0000000..7d149ee --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene/saltybot_scene/yolo_utils.py @@ -0,0 +1,202 @@ +""" +yolo_utils.py — YOLOv8 preprocessing / postprocessing helpers. + +Shared by scene_detector_node and build_scene_trt script. +No ROS2 imports — unit-testable standalone. + +YOLOv8n output format (TensorRT / ONNX): + Shape: [1, 84, 8400] (batch=1, 4_bbox + 80_classes, anchors) + Coordinate order: cx, cy, w, h (normalised 0–1 in 640×640 input space) + +Custom-extended class set for scene understanding (appended after COCO 80): + 80 = door (no COCO equivalent; detected via custom-trained head) + 81 = stairs (custom) + 82 = wet_floor (custom — yellow warning sign or wet-floor pattern) +The custom head is optional — if the engine only has 80 outputs the custom +classes simply won't appear. +""" + +from __future__ import annotations +import numpy as np + +# ── COCO 80 class names (index = COCO class id) ────────────────────────────── +COCO_NAMES = [ + 'person','bicycle','car','motorcycle','airplane','bus','train','truck','boat', + 'traffic light','fire hydrant','stop sign','parking meter','bench','bird','cat', + 'dog','horse','sheep','cow','elephant','bear','zebra','giraffe','backpack', + 'umbrella','handbag','tie','suitcase','frisbee','skis','snowboard','sports ball', + 'kite','baseball bat','baseball glove','skateboard','surfboard','tennis racket', + 'bottle','wine glass','cup','fork','knife','spoon','bowl','banana','apple', + 'sandwich','orange','broccoli','carrot','hot dog','pizza','donut','cake', + 'chair','couch','potted plant','bed','dining table','toilet','tv','laptop', + 'mouse','remote','keyboard','cell phone','microwave','oven','toaster','sink', + 'refrigerator','book','clock','vase','scissors','teddy bear','hair drier', + 'toothbrush', + # Custom extensions (indices 80+): + 'door', 'stairs', 'wet_floor', +] + +# ── Scene-relevant class IDs ───────────────────────────────────────────────── +SCENE_CLASSES = { + 0: 'person', + 15: 'cat', + 16: 'dog', + 56: 'chair', + 57: 'couch', + 58: 'potted plant', + 60: 'dining table', + 61: 'toilet', + 62: 'tv', + 68: 'microwave', + 69: 'oven', + 71: 'sink', + 72: 'refrigerator', + 80: 'door', + 81: 'stairs', + 82: 'wet_floor', +} + +# ── Hazard class mapping ────────────────────────────────────────────────────── +# SceneObject.HAZARD_* constants (must match msg definition) +HAZARD_NONE = 0 +HAZARD_STAIRS = 1 +HAZARD_DROP = 2 +HAZARD_WET_FLOOR = 3 +HAZARD_GLASS_DOOR = 4 +HAZARD_PET = 5 + +HAZARD_BY_CLASS = { + 81: HAZARD_STAIRS, + 82: HAZARD_WET_FLOOR, + 15: HAZARD_PET, # cat + 16: HAZARD_PET, # dog +} + +# ── Input preprocessing ────────────────────────────────────────────────────── +_YOLO_SIZE = 640 + + +def letterbox(image: np.ndarray, size: int = _YOLO_SIZE, pad_value: int = 114): + """ + Letterbox-resize BGR image to size×size. + Returns (canvas, scale, pad_w, pad_h). + """ + import cv2 + h, w = image.shape[:2] + scale = min(size / w, size / h) + new_w, new_h = int(round(w * scale)), int(round(h * scale)) + resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR) + canvas = np.full((size, size, 3), pad_value, dtype=np.uint8) + pad_w = (size - new_w) // 2 + pad_h = (size - new_h) // 2 + canvas[pad_h:pad_h + new_h, pad_w:pad_w + new_w] = resized + return canvas, scale, pad_w, pad_h + + +def preprocess(bgr: np.ndarray) -> tuple[np.ndarray, float, int, int]: + """ + Prepare a BGR frame for YOLOv8 inference. + + Returns: + (blob, scale, pad_w, pad_h) + blob: float32 CHW input, shape (1, 3, 640, 640), values 0–1 + """ + canvas, scale, pad_w, pad_h = letterbox(bgr) + rgb = canvas[..., ::-1] # BGR → RGB + blob = rgb.astype(np.float32) / 255.0 + blob = blob.transpose(2, 0, 1)[None] # HWC → 1CHW + blob = np.ascontiguousarray(blob) + return blob, scale, pad_w, pad_h + + +def postprocess( + raw: np.ndarray, + scale: float, + pad_w: int, + pad_h: int, + orig_w: int, + orig_h: int, + conf_thr: float = 0.35, + iou_thr: float = 0.45, +) -> list[dict]: + """ + Parse YOLOv8 output tensor → list of detection dicts. + + Args: + raw: shape [1, num_classes+4, 8400] (as returned by TRT/ONNX) + scale, pad_w, pad_h: from preprocess() + orig_w, orig_h: original frame dimensions + + Returns: + List of dicts with keys: class_id, class_name, conf, x1, y1, x2, y2 + """ + preds = raw[0] # (84+, 8400) + # YOLOv8 output layout: rows 0-3 = cx,cy,w,h; rows 4+ = class scores + boxes_xywh = preds[:4].T # (8400, 4) + class_probs = preds[4:].T # (8400, N_classes) + + class_ids = class_probs.argmax(axis=1) + confidences = class_probs[np.arange(len(class_probs)), class_ids] + + # Filter by confidence + mask = confidences > conf_thr + if not mask.any(): + return [] + + boxes_xywh = boxes_xywh[mask] + confidences = confidences[mask] + class_ids = class_ids[mask] + + # cx,cy,w,h → x1,y1,x2,y2 (letterbox space) + cx, cy, w, h = boxes_xywh.T + x1 = cx - w / 2 + y1 = cy - h / 2 + x2 = cx + w / 2 + y2 = cy + h / 2 + boxes_xyxy = np.stack([x1, y1, x2, y2], axis=1) + + # NMS + kept = _nms(boxes_xyxy, confidences, iou_thr) + + results = [] + for i in kept: + cx1, cy1, cx2, cy2 = _remap( + boxes_xyxy[i], scale, pad_w, pad_h, orig_w, orig_h + ) + cid = int(class_ids[i]) + results.append({ + 'class_id': cid, + 'class_name': COCO_NAMES[cid] if cid < len(COCO_NAMES) else str(cid), + 'conf': float(confidences[i]), + 'x1': cx1, 'y1': cy1, 'x2': cx2, 'y2': cy2, + }) + return results + + +def _nms(boxes: np.ndarray, scores: np.ndarray, iou_thr: float) -> list[int]: + x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3] + areas = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1) + order = scores.argsort()[::-1] + keep = [] + while len(order): + i = order[0] + keep.append(int(i)) + if len(order) == 1: + break + ix1 = np.maximum(x1[i], x1[order[1:]]) + iy1 = np.maximum(y1[i], y1[order[1:]]) + ix2 = np.minimum(x2[i], x2[order[1:]]) + iy2 = np.minimum(y2[i], y2[order[1:]]) + inter = np.maximum(0, ix2 - ix1) * np.maximum(0, iy2 - iy1) + iou = inter / (areas[i] + areas[order[1:]] - inter + 1e-6) + order = order[1:][iou < iou_thr] + return keep + + +def _remap(box, scale, pad_w, pad_h, orig_w, orig_h): + x1, y1, x2, y2 = box + x1 = float(np.clip((x1 - pad_w) / scale, 0, orig_w)) + y1 = float(np.clip((y1 - pad_h) / scale, 0, orig_h)) + x2 = float(np.clip((x2 - pad_w) / scale, 0, orig_w)) + y2 = float(np.clip((y2 - pad_h) / scale, 0, orig_h)) + return x1, y1, x2, y2 diff --git a/jetson/ros2_ws/src/saltybot_scene/scripts/build_scene_trt.py b/jetson/ros2_ws/src/saltybot_scene/scripts/build_scene_trt.py new file mode 100644 index 0000000..f070c23 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene/scripts/build_scene_trt.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 +""" +build_scene_trt.py — Build TensorRT FP16 engines for saltybot_scene. + +Builds two engines: + 1. yolov8n_scene.engine — YOLOv8-nano for scene object detection + 2. room_classifier.engine — MobileNetV2 for room classification (--room flag) + +Prerequisites: + pip install ultralytics onnx + # TensorRT + pycuda available on Jetson (JetPack 6) + +Usage: + # Build YOLOv8n engine (downloads weights, exports to ONNX, converts to TRT FP16) + ros2 run saltybot_scene build_scene_trt + + # Also build room classifier engine + ros2 run saltybot_scene build_scene_trt -- --room + + # Custom output directory + ros2 run saltybot_scene build_scene_trt -- --output /config + +Engine performance targets (Jetson Orin Nano Super 8GB): + yolov8n_scene: ~20-25 FPS at FP16 (640×640 input, 8400 anchors) + room_classifier: ~200 FPS at FP16 (224×224 input, 8 classes) +""" + +import argparse +import os +import sys +from pathlib import Path + + +def build_yolov8_engine(output_dir: Path) -> None: + print('[build_scene_trt] Building YOLOv8-nano scene engine...') + + onnx_path = output_dir / 'yolov8n_scene.onnx' + engine_path = output_dir / 'yolov8n_scene.engine' + + if not onnx_path.exists(): + print(f' Exporting YOLOv8n to ONNX → {onnx_path}') + from ultralytics import YOLO + model = YOLO('yolov8n.pt') + model.export(format='onnx', imgsz=640, half=False, dynamic=False, + simplify=True) + # ultralytics exports to same directory as weights; move to output + exported = Path('yolov8n.onnx') + if exported.exists(): + exported.rename(onnx_path) + else: + print(f' [ERROR] ONNX export not found at {exported}') + return + + print(f' Converting ONNX → TensorRT FP16 → {engine_path}') + import tensorrt as trt + logger = trt.Logger(trt.Logger.WARNING) + builder = trt.Builder(logger) + network = builder.create_network( + 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + ) + parser = trt.OnnxParser(network, logger) + with open(onnx_path, 'rb') as f: + if not parser.parse(f.read()): + for i in range(parser.num_errors): + print(f' [TRT parse error] {parser.get_error(i)}') + return + + config = builder.create_builder_config() + config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30) # 1 GB + config.set_flag(trt.BuilderFlag.FP16) + + serialized = builder.build_serialized_network(network, config) + with open(engine_path, 'wb') as f: + f.write(serialized) + print(f' [OK] Engine saved to {engine_path}') + _print_latency(engine_path, (1, 3, 640, 640)) + + +def build_room_classifier_engine(output_dir: Path) -> None: + """ + Build MobileNetV2 room classifier TRT engine. + + The ONNX model is a MobileNetV2 with the classifier head replaced by + an 8-class linear layer (one per ROOM_* constant). Pre-trained weights + are loaded from a checkpoint if available, otherwise random init (for + structure verification only — train before deploying). + """ + print('[build_scene_trt] Building room classifier engine...') + onnx_path = output_dir / 'room_classifier.onnx' + engine_path = output_dir / 'room_classifier.engine' + + if not onnx_path.exists(): + print(f' Exporting MobileNetV2 room classifier to ONNX → {onnx_path}') + import torch + import torch.nn as nn + from torchvision.models import mobilenet_v2 + + N_ROOMS = 8 + model = mobilenet_v2(weights=None) + model.classifier[1] = nn.Linear(model.last_channel, N_ROOMS) + + # Load pretrained weights if available + ckpt = output_dir / 'room_classifier.pth' + if ckpt.exists(): + model.load_state_dict(torch.load(ckpt, map_location='cpu')) + print(f' Loaded weights from {ckpt}') + else: + print(f' [WARN] No checkpoint at {ckpt} — using random weights. ' + f'Train before deploying.') + + model.eval() + dummy = torch.zeros(1, 3, 224, 224) + torch.onnx.export( + model, dummy, str(onnx_path), + input_names=['input'], output_names=['logits'], + opset_version=12, dynamic_axes=None, + ) + + print(f' Converting ONNX → TensorRT FP16 → {engine_path}') + import tensorrt as trt + logger = trt.Logger(trt.Logger.WARNING) + builder = trt.Builder(logger) + network = builder.create_network( + 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + ) + parser = trt.OnnxParser(network, logger) + with open(onnx_path, 'rb') as f: + if not parser.parse(f.read()): + for i in range(parser.num_errors): + print(f' [TRT parse error] {parser.get_error(i)}') + return + + config = builder.create_builder_config() + config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 512 << 20) + config.set_flag(trt.BuilderFlag.FP16) + + serialized = builder.build_serialized_network(network, config) + with open(engine_path, 'wb') as f: + f.write(serialized) + print(f' [OK] Engine saved to {engine_path}') + + +def _print_latency(engine_path: Path, input_shape: tuple) -> None: + """Quick warmup + timing of the built engine.""" + try: + import tensorrt as trt + import pycuda.driver as cuda + import pycuda.autoinit # noqa: F401 + import numpy as np + import time + + logger = trt.Logger(trt.Logger.WARNING) + with open(engine_path, 'rb') as f, trt.Runtime(logger) as rt: + engine = rt.deserialize_cuda_engine(f.read()) + context = engine.create_execution_context() + + # Allocate buffers + inputs, outputs, bindings = [], [], [] + stream = cuda.Stream() + for i in range(engine.num_io_tensors): + name = engine.get_tensor_name(i) + dtype = trt.nptype(engine.get_tensor_dtype(name)) + shape = tuple(engine.get_tensor_shape(name)) + nbytes = int(np.prod(shape)) * np.dtype(dtype).itemsize + host = cuda.pagelocked_empty(shape, dtype) + device = cuda.mem_alloc(nbytes) + bindings.append(int(device)) + if engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT: + inputs.append({'host': host, 'device': device}) + else: + outputs.append({'host': host, 'device': device}) + + # Warmup + for _ in range(5): + cuda.memcpy_htod_async(inputs[0]['device'], inputs[0]['host'], stream) + context.execute_async_v2(bindings, stream.handle) + stream.synchronize() + + # Benchmark + N = 20 + t0 = time.monotonic() + for _ in range(N): + cuda.memcpy_htod_async(inputs[0]['device'], inputs[0]['host'], stream) + context.execute_async_v2(bindings, stream.handle) + stream.synchronize() + ms = (time.monotonic() - t0) / N * 1000 + print(f' [benchmark] {ms:.1f} ms/frame → {1000/ms:.1f} FPS') + except Exception as exc: + print(f' [benchmark skipped] {exc}') + + +def main(): + parser = argparse.ArgumentParser(description='Build TRT engines for saltybot_scene') + parser.add_argument('--output', default='/config', + help='Output directory for engine files') + parser.add_argument('--room', action='store_true', + help='Also build room classifier engine') + args, _ = parser.parse_known_args() + + output_dir = Path(args.output) + output_dir.mkdir(parents=True, exist_ok=True) + + build_yolov8_engine(output_dir) + if args.room: + build_room_classifier_engine(output_dir) + + print('[build_scene_trt] Done.') + + +if __name__ == '__main__': + main() diff --git a/jetson/ros2_ws/src/saltybot_scene/setup.py b/jetson/ros2_ws/src/saltybot_scene/setup.py new file mode 100644 index 0000000..fdcd063 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene/setup.py @@ -0,0 +1,27 @@ +from setuptools import setup, find_packages + +package_name = 'saltybot_scene' + +setup( + name=package_name, + version='0.1.0', + packages=find_packages(exclude=['test']), + data_files=[ + ('share/ament_index/resource_index/packages', + [f'resource/{package_name}']), + (f'share/{package_name}', ['package.xml']), + ], + install_requires=['setuptools'], + zip_safe=True, + maintainer='seb', + maintainer_email='seb@vayrette.com', + description='Semantic scene understanding for saltybot', + license='MIT', + entry_points={ + 'console_scripts': [ + 'scene_detector = saltybot_scene.scene_detector_node:main', + 'behavior_adapter = saltybot_scene.behavior_adapter_node:main', + 'costmap_publisher = saltybot_scene.costmap_publisher_node:main', + ], + }, +) diff --git a/jetson/ros2_ws/src/saltybot_scene_msgs/CMakeLists.txt b/jetson/ros2_ws/src/saltybot_scene_msgs/CMakeLists.txt new file mode 100644 index 0000000..cc12696 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene_msgs/CMakeLists.txt @@ -0,0 +1,20 @@ +cmake_minimum_required(VERSION 3.8) +project(saltybot_scene_msgs) + +find_package(ament_cmake REQUIRED) +find_package(rosidl_default_generators REQUIRED) +find_package(std_msgs REQUIRED) +find_package(geometry_msgs REQUIRED) +find_package(vision_msgs REQUIRED) +find_package(builtin_interfaces REQUIRED) + +rosidl_generate_interfaces(${PROJECT_NAME} + "msg/SceneObject.msg" + "msg/SceneObjectArray.msg" + "msg/RoomClassification.msg" + "msg/BehaviorHint.msg" + DEPENDENCIES std_msgs geometry_msgs vision_msgs builtin_interfaces +) + +ament_export_dependencies(rosidl_default_runtime) +ament_package() diff --git a/jetson/ros2_ws/src/saltybot_scene_msgs/msg/BehaviorHint.msg b/jetson/ros2_ws/src/saltybot_scene_msgs/msg/BehaviorHint.msg new file mode 100644 index 0000000..80b7e19 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene_msgs/msg/BehaviorHint.msg @@ -0,0 +1,19 @@ +# Behavior adaptation hint — published on /social/scene/behavior_hint. +# Consumed by motion controller and social personality nodes. + +std_msgs/Header header + +float32 speed_limit_mps # max allowed linear speed (m/s); 0 = stop +float32 turn_limit_rads # max allowed angular speed (rad/s); 0 = stop + +# Personality mode — matches saltybot_social personality constants +string personality_mode # "efficient"|"careful"|"gentle"|"cautious"|"active" + +# Alert flags +bool hazard_alert # true = immediate hazard ahead → override to slow +bool pet_nearby # true = slow, announce presence gently +string alert_reason # human-readable: "stairs ahead", "wet floor", etc. + +# Source context +string room_name +uint8 hazard_type # highest-severity hazard from SceneObject.HAZARD_* diff --git a/jetson/ros2_ws/src/saltybot_scene_msgs/msg/RoomClassification.msg b/jetson/ros2_ws/src/saltybot_scene_msgs/msg/RoomClassification.msg new file mode 100644 index 0000000..f568347 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene_msgs/msg/RoomClassification.msg @@ -0,0 +1,22 @@ +# Room/environment type classification — published on /social/scene/room_type. + +std_msgs/Header header + +uint8 ROOM_UNKNOWN = 0 +uint8 ROOM_LIVING_ROOM = 1 +uint8 ROOM_KITCHEN = 2 +uint8 ROOM_HALLWAY = 3 +uint8 ROOM_BEDROOM = 4 +uint8 ROOM_GARAGE = 5 +uint8 ROOM_OUTDOOR = 6 +uint8 ROOM_OFFICE = 7 + +uint8 room_type +string room_name # human-readable: "kitchen", "outdoor", etc. +float32 confidence # 0–1 + +# Per-class softmax scores (index = ROOM_* constant above) +float32[] class_scores + +# Method: "rule_based" (object co-occurrence) or "mobilenet" (neural) +string method diff --git a/jetson/ros2_ws/src/saltybot_scene_msgs/msg/SceneObject.msg b/jetson/ros2_ws/src/saltybot_scene_msgs/msg/SceneObject.msg new file mode 100644 index 0000000..8678be9 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene_msgs/msg/SceneObject.msg @@ -0,0 +1,22 @@ +# A single detected scene object — published in SceneObjectArray on /social/scene/objects. + +# ── Object identity ───────────────────────────────────────────────────────── +uint16 class_id # COCO class ID (or custom: 80=door, 81=stairs) +string class_name # human-readable label +float32 confidence # detection confidence 0–1 + +# ── 2-D bounding box (pixel coords in source image frame) ───────────────── +vision_msgs/BoundingBox2D bbox + +# ── 3-D position (in base_link frame, NaN if depth unavailable) ──────────── +geometry_msgs/PointStamped position_3d +float32 distance_m # straight-line distance from base_link, 0 = unknown + +# ── Hazard classification ────────────────────────────────────────────────── +uint8 HAZARD_NONE = 0 +uint8 HAZARD_STAIRS = 1 # stair pattern detected ahead +uint8 HAZARD_DROP = 2 # depth cliff (floor drops away) +uint8 HAZARD_WET_FLOOR = 3 # specular/glossy floor reflection +uint8 HAZARD_GLASS_DOOR = 4 # transparent surface, depth=0 with visible structure +uint8 HAZARD_PET = 5 # cat or dog requiring gentle approach +uint8 hazard_type diff --git a/jetson/ros2_ws/src/saltybot_scene_msgs/msg/SceneObjectArray.msg b/jetson/ros2_ws/src/saltybot_scene_msgs/msg/SceneObjectArray.msg new file mode 100644 index 0000000..78e025f --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene_msgs/msg/SceneObjectArray.msg @@ -0,0 +1,4 @@ +std_msgs/Header header +SceneObject[] objects +uint32 frame_count # running counter for latency diagnostics +float32 inference_ms # last YOLOv8 inference time diff --git a/jetson/ros2_ws/src/saltybot_scene_msgs/package.xml b/jetson/ros2_ws/src/saltybot_scene_msgs/package.xml new file mode 100644 index 0000000..57316b5 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_scene_msgs/package.xml @@ -0,0 +1,23 @@ + + + + saltybot_scene_msgs + 0.1.0 + ROS2 messages for saltybot semantic scene understanding — objects, hazards, room classification + seb + MIT + + ament_cmake + rosidl_default_generators + rosidl_default_runtime + rosidl_interface_packages + + std_msgs + geometry_msgs + vision_msgs + builtin_interfaces + + + ament_cmake + + -- 2.47.2