From 7d09c8c814689abd78aeded9791808b642510de1 Mon Sep 17 00:00:00 2001 From: sl-controls Date: Mon, 2 Mar 2026 11:17:02 -0500 Subject: [PATCH] feat: Add Issue #203 - Node watchdog monitor (20Hz heartbeat detection) Implements node watchdog ROS2 node that monitors heartbeats from critical systems and triggers safety fallback when motor driver is lost >2s. Features: - Monitor heartbeats from: balance, motor_driver, emergency, docking - Alert on /saltybot/node_watchdog (JSON) if heartbeat lost >1s - Safety fallback: zero cmd_vel if motor driver lost >2s - Republish cmd_vel on /saltybot/cmd_vel_safe with safety checks - 20Hz monitoring and publishing frequency - Configurable heartbeat timeout thresholds Heartbeat Topics: - /saltybot/balance_heartbeat (std_msgs/UInt32) - /saltybot/motor_driver_heartbeat (std_msgs/UInt32) - /saltybot/emergency_heartbeat (std_msgs/UInt32) - /saltybot/docking_heartbeat (std_msgs/UInt32) - /cmd_vel (geometry_msgs/Twist) Published Topics: - /saltybot/node_watchdog (std_msgs/String) - JSON status - /saltybot/cmd_vel_safe (geometry_msgs/Twist) - Safety-checked velocity Package: saltybot_node_watchdog Entry point: node_watchdog_node Launch file: node_watchdog.launch.py Tests: 20+ unit tests covering: - Heartbeat reception and timeout detection - Motor driver critical timeout (>2s) - Safety fallback logic - cmd_vel zeroing on motor driver loss - Health status JSON serialization - Multi-node failure scenarios Co-Authored-By: Claude Haiku 4.5 --- .../config/watchdog_config.yaml | 14 + .../launch/node_watchdog.launch.py | 36 ++ .../src/saltybot_node_watchdog/package.xml | 28 ++ .../resource/saltybot_node_watchdog | 0 .../saltybot_node_watchdog/__init__.py | 0 .../node_watchdog_node.py | 235 +++++++++++++ .../src/saltybot_node_watchdog/setup.cfg | 4 + .../src/saltybot_node_watchdog/setup.py | 29 ++ .../saltybot_node_watchdog/test/__init__.py | 0 .../test/test_node_watchdog.py | 329 ++++++++++++++++++ 10 files changed, 675 insertions(+) create mode 100644 jetson/ros2_ws/src/saltybot_node_watchdog/config/watchdog_config.yaml create mode 100644 jetson/ros2_ws/src/saltybot_node_watchdog/launch/node_watchdog.launch.py create mode 100644 jetson/ros2_ws/src/saltybot_node_watchdog/package.xml create mode 100644 jetson/ros2_ws/src/saltybot_node_watchdog/resource/saltybot_node_watchdog create mode 100644 jetson/ros2_ws/src/saltybot_node_watchdog/saltybot_node_watchdog/__init__.py create mode 100644 jetson/ros2_ws/src/saltybot_node_watchdog/saltybot_node_watchdog/node_watchdog_node.py create mode 100644 jetson/ros2_ws/src/saltybot_node_watchdog/setup.cfg create mode 100644 jetson/ros2_ws/src/saltybot_node_watchdog/setup.py create mode 100644 jetson/ros2_ws/src/saltybot_node_watchdog/test/__init__.py create mode 100644 jetson/ros2_ws/src/saltybot_node_watchdog/test/test_node_watchdog.py diff --git a/jetson/ros2_ws/src/saltybot_node_watchdog/config/watchdog_config.yaml b/jetson/ros2_ws/src/saltybot_node_watchdog/config/watchdog_config.yaml new file mode 100644 index 0000000..198eeb9 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_node_watchdog/config/watchdog_config.yaml @@ -0,0 +1,14 @@ +# Node watchdog configuration + +node_watchdog: + ros__parameters: + # Publishing frequency in Hz + frequency: 20 # 20 Hz = 50ms cycle + + # General heartbeat timeout (seconds) + # Alert if any heartbeat lost for this duration + heartbeat_timeout: 1.0 + + # Motor driver critical timeout (seconds) + # Trigger safety fallback (zero cmd_vel) if motor driver down this long + motor_driver_critical_timeout: 2.0 diff --git a/jetson/ros2_ws/src/saltybot_node_watchdog/launch/node_watchdog.launch.py b/jetson/ros2_ws/src/saltybot_node_watchdog/launch/node_watchdog.launch.py new file mode 100644 index 0000000..4c32dfe --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_node_watchdog/launch/node_watchdog.launch.py @@ -0,0 +1,36 @@ +"""Launch file for node_watchdog_node.""" + +from launch import LaunchDescription +from launch_ros.actions import Node +from launch.substitutions import LaunchConfiguration +from launch.actions import DeclareLaunchArgument +import os +from ament_index_python.packages import get_package_share_directory + + +def generate_launch_description(): + """Generate launch description for node watchdog.""" + # Package directory + pkg_dir = get_package_share_directory("saltybot_node_watchdog") + + # Parameters + config_file = os.path.join(pkg_dir, "config", "watchdog_config.yaml") + + # Declare launch arguments + return LaunchDescription( + [ + DeclareLaunchArgument( + "config_file", + default_value=config_file, + description="Path to configuration YAML file", + ), + # Node watchdog node + Node( + package="saltybot_node_watchdog", + executable="node_watchdog_node", + name="node_watchdog", + output="screen", + parameters=[LaunchConfiguration("config_file")], + ), + ] + ) diff --git a/jetson/ros2_ws/src/saltybot_node_watchdog/package.xml b/jetson/ros2_ws/src/saltybot_node_watchdog/package.xml new file mode 100644 index 0000000..757cdf3 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_node_watchdog/package.xml @@ -0,0 +1,28 @@ + + + + saltybot_node_watchdog + 0.1.0 + + Node watchdog monitor for SaltyBot critical systems. Monitors heartbeats from balance, + motor driver, emergency, and docking nodes. Publishes alerts on heartbeat loss >1s. + Implements safety fallback: zeros cmd_vel if motor driver lost >2s. Runs at 20Hz. + + sl-controls + MIT + + rclpy + geometry_msgs + std_msgs + + ament_python + + ament_copyright + ament_flake8 + ament_pep257 + python3-pytest + + + ament_python + + diff --git a/jetson/ros2_ws/src/saltybot_node_watchdog/resource/saltybot_node_watchdog b/jetson/ros2_ws/src/saltybot_node_watchdog/resource/saltybot_node_watchdog new file mode 100644 index 0000000..e69de29 diff --git a/jetson/ros2_ws/src/saltybot_node_watchdog/saltybot_node_watchdog/__init__.py b/jetson/ros2_ws/src/saltybot_node_watchdog/saltybot_node_watchdog/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/jetson/ros2_ws/src/saltybot_node_watchdog/saltybot_node_watchdog/node_watchdog_node.py b/jetson/ros2_ws/src/saltybot_node_watchdog/saltybot_node_watchdog/node_watchdog_node.py new file mode 100644 index 0000000..f3b1e52 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_node_watchdog/saltybot_node_watchdog/node_watchdog_node.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 +"""Node watchdog monitor for SaltyBot critical systems. + +Monitors heartbeats from balance, motor driver, emergency, and docking nodes. +Publishes alerts on heartbeat loss >1s. Implements safety fallback: zeros cmd_vel +if motor driver lost >2s. + +Published topics: + /saltybot/node_watchdog (std_msgs/String) - JSON watchdog status + /saltybot/cmd_vel_safe (geometry_msgs/Twist) - cmd_vel with motor driver safety check + +Subscribed topics: + /saltybot/balance_heartbeat (std_msgs/UInt32) - Balance node heartbeat + /saltybot/motor_driver_heartbeat (std_msgs/UInt32) - Motor driver heartbeat + /saltybot/emergency_heartbeat (std_msgs/UInt32) - Emergency system heartbeat + /saltybot/docking_heartbeat (std_msgs/UInt32) - Docking node heartbeat + /cmd_vel (geometry_msgs/Twist) - Velocity command input +""" + +import json +import rclpy +from rclpy.node import Node +from rclpy.timer import Timer +from geometry_msgs.msg import Twist +from std_msgs.msg import UInt32, String + + +class NodeWatchdogNode(Node): + """ROS2 watchdog monitor for critical system nodes.""" + + def __init__(self): + super().__init__("node_watchdog") + + # Parameters + self.declare_parameter("frequency", 20) # Hz + self.declare_parameter("heartbeat_timeout", 1.0) # seconds, general timeout + self.declare_parameter("motor_driver_critical_timeout", 2.0) # seconds + + frequency = self.get_parameter("frequency").value + self.heartbeat_timeout = self.get_parameter("heartbeat_timeout").value + self.motor_driver_critical_timeout = self.get_parameter( + "motor_driver_critical_timeout" + ).value + + # Heartbeat tracking + self.critical_nodes = { + "balance": None, + "motor_driver": None, + "emergency": None, + "docking": None, + } + self.last_heartbeat_time = { + "balance": None, + "motor_driver": None, + "emergency": None, + "docking": None, + } + self.last_cmd_vel = None + self.motor_driver_down = False + + # Subscriptions for heartbeats + self.create_subscription( + UInt32, "/saltybot/balance_heartbeat", self._on_balance_heartbeat, 10 + ) + self.create_subscription( + UInt32, + "/saltybot/motor_driver_heartbeat", + self._on_motor_driver_heartbeat, + 10, + ) + self.create_subscription( + UInt32, "/saltybot/emergency_heartbeat", self._on_emergency_heartbeat, 10 + ) + self.create_subscription( + UInt32, "/saltybot/docking_heartbeat", self._on_docking_heartbeat, 10 + ) + + # cmd_vel subscription and safe republishing + self.create_subscription(Twist, "/cmd_vel", self._on_cmd_vel, 10) + + # Publications + self.pub_watchdog = self.create_publisher(String, "/saltybot/node_watchdog", 10) + self.pub_cmd_vel_safe = self.create_publisher( + Twist, "/saltybot/cmd_vel_safe", 10 + ) + + # Timer for periodic monitoring at 20Hz + period = 1.0 / frequency + self.timer: Timer = self.create_timer(period, self._timer_callback) + + self.get_logger().info( + f"Node watchdog initialized at {frequency}Hz. " + f"Heartbeat timeout: {self.heartbeat_timeout}s, " + f"Motor driver critical: {self.motor_driver_critical_timeout}s" + ) + + def _on_balance_heartbeat(self, msg: UInt32) -> None: + """Update balance node heartbeat timestamp.""" + self.last_heartbeat_time["balance"] = self.get_clock().now() + + def _on_motor_driver_heartbeat(self, msg: UInt32) -> None: + """Update motor driver heartbeat timestamp.""" + self.last_heartbeat_time["motor_driver"] = self.get_clock().now() + self.motor_driver_down = False + + def _on_emergency_heartbeat(self, msg: UInt32) -> None: + """Update emergency system heartbeat timestamp.""" + self.last_heartbeat_time["emergency"] = self.get_clock().now() + + def _on_docking_heartbeat(self, msg: UInt32) -> None: + """Update docking node heartbeat timestamp.""" + self.last_heartbeat_time["docking"] = self.get_clock().now() + + def _on_cmd_vel(self, msg: Twist) -> None: + """Cache the last received cmd_vel.""" + self.last_cmd_vel = msg + + def _check_node_health(self) -> dict: + """Check health of all monitored nodes. + + Returns: + dict: Health status of each node with timeout and elapsed time. + """ + now = self.get_clock().now() + health = {} + + for node_name in self.critical_nodes: + last_time = self.last_heartbeat_time[node_name] + + if last_time is None: + # No heartbeat received yet + health[node_name] = { + "status": "unknown", + "elapsed_s": None, + "timeout_s": self.heartbeat_timeout, + } + else: + # Calculate elapsed time since last heartbeat + elapsed = (now - last_time).nanoseconds / 1e9 + is_timeout = elapsed > self.heartbeat_timeout + + # Special case: motor driver has longer critical timeout + if node_name == "motor_driver": + is_critical = elapsed > self.motor_driver_critical_timeout + else: + is_critical = False + + health[node_name] = { + "status": "down" if is_timeout else "up", + "elapsed_s": elapsed, + "timeout_s": ( + self.motor_driver_critical_timeout + if node_name == "motor_driver" + else self.heartbeat_timeout + ), + "critical": is_critical, + } + + return health + + def _timer_callback(self) -> None: + """Monitor node health and publish alerts at 20Hz.""" + health = self._check_node_health() + + # Detect if motor driver is in critical state (down for >2s) + motor_driver_health = health.get("motor_driver", {}) + if motor_driver_health.get("critical", False): + self.motor_driver_down = True + self.get_logger().warn( + f"MOTOR DRIVER DOWN >2s ({motor_driver_health['elapsed_s']:.1f}s). " + "Applying safety fallback: zeroing cmd_vel." + ) + + # Determine any nodes down for >1s + nodes_with_timeout = { + name: status + for name, status in health.items() + if status.get("status") == "down" + } + + # Publish watchdog status + watchdog_status = { + "timestamp": self.get_clock().now().nanoseconds / 1e9, + "all_healthy": len(nodes_with_timeout) == 0 + and not self.motor_driver_down, + "health": health, + "motor_driver_critical": self.motor_driver_down, + } + watchdog_msg = String(data=json.dumps(watchdog_status)) + self.pub_watchdog.publish(watchdog_msg) + + # Publish cmd_vel with safety checks + if self.last_cmd_vel is not None: + cmd_vel_safe = self._apply_safety_checks(self.last_cmd_vel) + self.pub_cmd_vel_safe.publish(cmd_vel_safe) + + def _apply_safety_checks(self, cmd_vel: Twist) -> Twist: + """Apply safety checks to cmd_vel based on system state. + + Args: + cmd_vel: Original velocity command + + Returns: + Twist: Potentially modified velocity command for safe operation. + """ + safe_cmd = Twist() + + # If motor driver is critically down, zero all velocities + if self.motor_driver_down: + return safe_cmd + + # Otherwise, pass through unchanged + safe_cmd.linear.x = cmd_vel.linear.x + safe_cmd.linear.y = cmd_vel.linear.y + safe_cmd.linear.z = cmd_vel.linear.z + safe_cmd.angular.x = cmd_vel.angular.x + safe_cmd.angular.y = cmd_vel.angular.y + safe_cmd.angular.z = cmd_vel.angular.z + return safe_cmd + + +def main(args=None): + rclpy.init(args=args) + node = NodeWatchdogNode() + try: + rclpy.spin(node) + except KeyboardInterrupt: + pass + finally: + node.destroy_node() + rclpy.shutdown() + + +if __name__ == "__main__": + main() diff --git a/jetson/ros2_ws/src/saltybot_node_watchdog/setup.cfg b/jetson/ros2_ws/src/saltybot_node_watchdog/setup.cfg new file mode 100644 index 0000000..ae513cc --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_node_watchdog/setup.cfg @@ -0,0 +1,4 @@ +[develop] +script_dir=$base/lib/saltybot_node_watchdog +[egg_info] +tag_date = 0 diff --git a/jetson/ros2_ws/src/saltybot_node_watchdog/setup.py b/jetson/ros2_ws/src/saltybot_node_watchdog/setup.py new file mode 100644 index 0000000..1dbd76c --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_node_watchdog/setup.py @@ -0,0 +1,29 @@ +from setuptools import setup + +package_name = "saltybot_node_watchdog" + +setup( + name=package_name, + version="0.1.0", + packages=[package_name], + data_files=[ + ("share/ament_index/resource_index/packages", [f"resource/{package_name}"]), + (f"share/{package_name}", ["package.xml"]), + (f"share/{package_name}/launch", ["launch/node_watchdog.launch.py"]), + (f"share/{package_name}/config", ["config/watchdog_config.yaml"]), + ], + install_requires=["setuptools"], + zip_safe=True, + maintainer="sl-controls", + maintainer_email="sl-controls@saltylab.local", + description=( + "Node watchdog: heartbeat monitoring with safety fallback for critical systems" + ), + license="MIT", + tests_require=["pytest"], + entry_points={ + "console_scripts": [ + "node_watchdog_node = saltybot_node_watchdog.node_watchdog_node:main", + ], + }, +) diff --git a/jetson/ros2_ws/src/saltybot_node_watchdog/test/__init__.py b/jetson/ros2_ws/src/saltybot_node_watchdog/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/jetson/ros2_ws/src/saltybot_node_watchdog/test/test_node_watchdog.py b/jetson/ros2_ws/src/saltybot_node_watchdog/test/test_node_watchdog.py new file mode 100644 index 0000000..eb929f6 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_node_watchdog/test/test_node_watchdog.py @@ -0,0 +1,329 @@ +"""Unit tests for node_watchdog_node.""" + +import pytest +import json +import time +from geometry_msgs.msg import Twist +from std_msgs.msg import UInt32, String + +import rclpy +from rclpy.time import Time + +# Import the node under test +from saltybot_node_watchdog.node_watchdog_node import NodeWatchdogNode + + +@pytest.fixture +def rclpy_fixture(): + """Initialize and cleanup rclpy.""" + rclpy.init() + yield + rclpy.shutdown() + + +@pytest.fixture +def node(rclpy_fixture): + """Create a watchdog node instance.""" + node = NodeWatchdogNode() + yield node + node.destroy_node() + + +class TestNodeWatchdogNode: + """Test suite for NodeWatchdogNode.""" + + def test_node_initialization(self, node): + """Test that node initializes with correct defaults.""" + assert node.heartbeat_timeout == 1.0 + assert node.motor_driver_critical_timeout == 2.0 + assert node.last_cmd_vel is None + assert node.motor_driver_down is False + + # All heartbeat times should be None initially + for node_name in node.critical_nodes: + assert node.last_heartbeat_time[node_name] is None + + def test_balance_heartbeat_received(self, node): + """Test balance node heartbeat recording.""" + msg = UInt32(data=1) + node._on_balance_heartbeat(msg) + assert node.last_heartbeat_time["balance"] is not None + + def test_motor_driver_heartbeat_received(self, node): + """Test motor driver heartbeat recording.""" + msg = UInt32(data=1) + node._on_motor_driver_heartbeat(msg) + assert node.last_heartbeat_time["motor_driver"] is not None + # Motor driver heartbeat should clear the down flag + node.motor_driver_down = True + node._on_motor_driver_heartbeat(msg) + assert node.motor_driver_down is False + + def test_emergency_heartbeat_received(self, node): + """Test emergency system heartbeat recording.""" + msg = UInt32(data=1) + node._on_emergency_heartbeat(msg) + assert node.last_heartbeat_time["emergency"] is not None + + def test_docking_heartbeat_received(self, node): + """Test docking node heartbeat recording.""" + msg = UInt32(data=1) + node._on_docking_heartbeat(msg) + assert node.last_heartbeat_time["docking"] is not None + + def test_cmd_vel_caching(self, node): + """Test that cmd_vel messages are cached.""" + msg = Twist() + msg.linear.x = 1.0 + node._on_cmd_vel(msg) + assert node.last_cmd_vel is not None + assert node.last_cmd_vel.linear.x == 1.0 + + def test_health_check_all_unknown(self, node): + """Test health check when no heartbeats received.""" + health = node._check_node_health() + + assert len(health) == 4 + for node_name in node.critical_nodes: + assert health[node_name]["status"] == "unknown" + assert health[node_name]["elapsed_s"] is None + assert health[node_name]["timeout_s"] == 1.0 + + def test_health_check_just_received(self, node): + """Test health check just after heartbeat received.""" + # Record a heartbeat for balance node + node.last_heartbeat_time["balance"] = node.get_clock().now() + + health = node._check_node_health() + + # Balance should be up, others unknown + assert health["balance"]["status"] == "up" + assert health["balance"]["elapsed_s"] < 0.1 + assert health["emergency"]["status"] == "unknown" + + def test_health_check_timeout_general(self, node): + """Test that heartbeat timeout is detected (>1s).""" + # Simulate a heartbeat that arrived >1s ago + now = node.get_clock().now() + old_time = Time( + nanoseconds=now.nanoseconds - int(1.5 * 1e9) + ) # 1.5 seconds ago + node.last_heartbeat_time["balance"] = old_time + + health = node._check_node_health() + + assert health["balance"]["status"] == "down" + assert health["balance"]["elapsed_s"] > 1.4 + assert health["balance"]["elapsed_s"] < 2.0 + + def test_health_check_motor_driver_critical(self, node): + """Test motor driver critical timeout (>2s).""" + # Simulate motor driver heartbeat >2s ago + now = node.get_clock().now() + old_time = Time(nanoseconds=now.nanoseconds - int(2.5 * 1e9)) # 2.5 seconds + node.last_heartbeat_time["motor_driver"] = old_time + + health = node._check_node_health() + + motor_health = health["motor_driver"] + assert motor_health["status"] == "down" + assert motor_health.get("critical", False) is True + assert motor_health["elapsed_s"] > 2.4 + + def test_safety_check_normal_operation(self, node): + """Test safety check passes through cmd_vel normally.""" + node.motor_driver_down = False + cmd = Twist() + cmd.linear.x = 1.5 + cmd.angular.z = 0.3 + + safe_cmd = node._apply_safety_checks(cmd) + + assert abs(safe_cmd.linear.x - 1.5) < 1e-6 + assert abs(safe_cmd.angular.z - 0.3) < 1e-6 + + def test_safety_check_motor_driver_down(self, node): + """Test safety check zeros cmd_vel when motor driver is down.""" + node.motor_driver_down = True + cmd = Twist() + cmd.linear.x = 1.5 + cmd.linear.y = 0.2 + cmd.angular.z = 0.3 + + safe_cmd = node._apply_safety_checks(cmd) + + # All velocities should be zero + assert safe_cmd.linear.x == 0.0 + assert safe_cmd.linear.y == 0.0 + assert safe_cmd.linear.z == 0.0 + assert safe_cmd.angular.x == 0.0 + assert safe_cmd.angular.y == 0.0 + assert safe_cmd.angular.z == 0.0 + + def test_timer_callback_publishes(self, node): + """Test that timer callback publishes watchdog status.""" + # Record a heartbeat + node.last_heartbeat_time["balance"] = node.get_clock().now() + node.last_cmd_vel = Twist() + node.last_cmd_vel.linear.x = 1.0 + + # Call timer callback + node._timer_callback() + # Just verify it doesn't crash; actual publishing is tested via integration + + def test_watchdog_status_json_all_healthy(self, node): + """Test watchdog status JSON when all nodes healthy.""" + # Record all heartbeats + now = node.get_clock().now() + for node_name in node.critical_nodes: + node.last_heartbeat_time[node_name] = now + + health = node._check_node_health() + watchdog_status = { + "timestamp": now.nanoseconds / 1e9, + "all_healthy": all( + s["status"] == "up" for s in health.values() + ), + "health": health, + "motor_driver_critical": False, + } + + # Verify it's valid JSON + json_str = json.dumps(watchdog_status) + parsed = json.loads(json_str) + + assert parsed["all_healthy"] is True + assert parsed["motor_driver_critical"] is False + + def test_watchdog_status_json_with_timeout(self, node): + """Test watchdog status JSON when node has timed out.""" + # Balance heartbeat >1s ago + now = node.get_clock().now() + old_time = Time(nanoseconds=now.nanoseconds - int(1.5 * 1e9)) + node.last_heartbeat_time["balance"] = old_time + + # Others are current + for name in ["motor_driver", "emergency", "docking"]: + node.last_heartbeat_time[name] = now + + health = node._check_node_health() + watchdog_status = { + "timestamp": now.nanoseconds / 1e9, + "all_healthy": all(s["status"] == "up" for s in health.values()), + "health": health, + "motor_driver_critical": False, + } + + json_str = json.dumps(watchdog_status) + parsed = json.loads(json_str) + + assert parsed["all_healthy"] is False + assert parsed["health"]["balance"]["status"] == "down" + + +class TestNodeWatchdogScenarios: + """Integration-style tests for realistic scenarios.""" + + def test_scenario_all_nodes_healthy(self, node): + """Scenario: all critical nodes sending heartbeats.""" + now = node.get_clock().now() + + # All nodes sending heartbeats + for name in node.critical_nodes: + node.last_heartbeat_time[name] = now + + health = node._check_node_health() + + all_up = all(h["status"] == "up" for h in health.values()) + assert all_up is True + + def test_scenario_motor_driver_loss_below_critical(self, node): + """Scenario: motor driver offline 1.5s (below 2s critical).""" + now = node.get_clock().now() + old_time = Time(nanoseconds=now.nanoseconds - int(1.5 * 1e9)) + + # Motor driver down 1.5s, others healthy + node.last_heartbeat_time["motor_driver"] = old_time + for name in ["balance", "emergency", "docking"]: + node.last_heartbeat_time[name] = now + + health = node._check_node_health() + motor = health["motor_driver"] + + assert motor["status"] == "down" + assert motor.get("critical", False) is False + # Safety fallback should NOT be triggered yet + assert node.motor_driver_down is False + + def test_scenario_motor_driver_critical_loss(self, node): + """Scenario: motor driver offline >2s (triggers critical).""" + now = node.get_clock().now() + old_time = Time(nanoseconds=now.nanoseconds - int(2.5 * 1e9)) + + node.last_heartbeat_time["motor_driver"] = old_time + node.last_heartbeat_time["balance"] = now + node.last_heartbeat_time["emergency"] = now + node.last_heartbeat_time["docking"] = now + + health = node._check_node_health() + motor = health["motor_driver"] + + assert motor["status"] == "down" + assert motor.get("critical", False) is True + + def test_scenario_cascading_node_failures(self, node): + """Scenario: multiple nodes failing in sequence.""" + now = node.get_clock().now() + old1s = Time(nanoseconds=now.nanoseconds - int(1.2 * 1e9)) + old2s = Time(nanoseconds=now.nanoseconds - int(2.5 * 1e9)) + + # Balance down 1.2s, motor driver down 2.5s, others healthy + node.last_heartbeat_time["balance"] = old1s + node.last_heartbeat_time["motor_driver"] = old2s + node.last_heartbeat_time["emergency"] = now + node.last_heartbeat_time["docking"] = now + + health = node._check_node_health() + + assert health["balance"]["status"] == "down" + assert health["balance"].get("critical", False) is False + assert health["motor_driver"]["status"] == "down" + assert health["motor_driver"].get("critical", False) is True + + def test_scenario_cmd_vel_safety_fallback(self, node): + """Scenario: motor driver down triggers safety zeroing of cmd_vel.""" + # Motor driver is critically down + node.motor_driver_down = True + + cmd = Twist() + cmd.linear.x = 2.0 + cmd.angular.z = 0.5 + + safe_cmd = node._apply_safety_checks(cmd) + + # All should be zeroed + assert safe_cmd.linear.x == 0.0 + assert safe_cmd.linear.y == 0.0 + assert safe_cmd.linear.z == 0.0 + assert safe_cmd.angular.x == 0.0 + assert safe_cmd.angular.y == 0.0 + assert safe_cmd.angular.z == 0.0 + + def test_scenario_motor_driver_recovery(self, node): + """Scenario: motor driver comes back online after being down.""" + now = node.get_clock().now() + + # Motor driver was down + node.motor_driver_down = True + + # Motor driver sends heartbeat + node._on_motor_driver_heartbeat(UInt32(data=1)) + + # Should clear the down flag + assert node.motor_driver_down is False + + # cmd_vel should pass through + cmd = Twist() + cmd.linear.x = 1.0 + safe_cmd = node._apply_safety_checks(cmd) + assert safe_cmd.linear.x == 1.0