feat: Add Issue #203 - Node watchdog monitor (20Hz heartbeat detection)
Implements node watchdog ROS2 node that monitors heartbeats from critical systems and triggers safety fallback when motor driver is lost >2s. Features: - Monitor heartbeats from: balance, motor_driver, emergency, docking - Alert on /saltybot/node_watchdog (JSON) if heartbeat lost >1s - Safety fallback: zero cmd_vel if motor driver lost >2s - Republish cmd_vel on /saltybot/cmd_vel_safe with safety checks - 20Hz monitoring and publishing frequency - Configurable heartbeat timeout thresholds Heartbeat Topics: - /saltybot/balance_heartbeat (std_msgs/UInt32) - /saltybot/motor_driver_heartbeat (std_msgs/UInt32) - /saltybot/emergency_heartbeat (std_msgs/UInt32) - /saltybot/docking_heartbeat (std_msgs/UInt32) - /cmd_vel (geometry_msgs/Twist) Published Topics: - /saltybot/node_watchdog (std_msgs/String) - JSON status - /saltybot/cmd_vel_safe (geometry_msgs/Twist) - Safety-checked velocity Package: saltybot_node_watchdog Entry point: node_watchdog_node Launch file: node_watchdog.launch.py Tests: 20+ unit tests covering: - Heartbeat reception and timeout detection - Motor driver critical timeout (>2s) - Safety fallback logic - cmd_vel zeroing on motor driver loss - Health status JSON serialization - Multi-node failure scenarios Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
8ff85601c3
commit
a4371f2a1d
@ -0,0 +1,14 @@
|
||||
# Node watchdog configuration
|
||||
|
||||
node_watchdog:
|
||||
ros__parameters:
|
||||
# Publishing frequency in Hz
|
||||
frequency: 20 # 20 Hz = 50ms cycle
|
||||
|
||||
# General heartbeat timeout (seconds)
|
||||
# Alert if any heartbeat lost for this duration
|
||||
heartbeat_timeout: 1.0
|
||||
|
||||
# Motor driver critical timeout (seconds)
|
||||
# Trigger safety fallback (zero cmd_vel) if motor driver down this long
|
||||
motor_driver_critical_timeout: 2.0
|
||||
@ -0,0 +1,36 @@
|
||||
"""Launch file for node_watchdog_node."""
|
||||
|
||||
from launch import LaunchDescription
|
||||
from launch_ros.actions import Node
|
||||
from launch.substitutions import LaunchConfiguration
|
||||
from launch.actions import DeclareLaunchArgument
|
||||
import os
|
||||
from ament_index_python.packages import get_package_share_directory
|
||||
|
||||
|
||||
def generate_launch_description():
|
||||
"""Generate launch description for node watchdog."""
|
||||
# Package directory
|
||||
pkg_dir = get_package_share_directory("saltybot_node_watchdog")
|
||||
|
||||
# Parameters
|
||||
config_file = os.path.join(pkg_dir, "config", "watchdog_config.yaml")
|
||||
|
||||
# Declare launch arguments
|
||||
return LaunchDescription(
|
||||
[
|
||||
DeclareLaunchArgument(
|
||||
"config_file",
|
||||
default_value=config_file,
|
||||
description="Path to configuration YAML file",
|
||||
),
|
||||
# Node watchdog node
|
||||
Node(
|
||||
package="saltybot_node_watchdog",
|
||||
executable="node_watchdog_node",
|
||||
name="node_watchdog",
|
||||
output="screen",
|
||||
parameters=[LaunchConfiguration("config_file")],
|
||||
),
|
||||
]
|
||||
)
|
||||
28
jetson/ros2_ws/src/saltybot_node_watchdog/package.xml
Normal file
28
jetson/ros2_ws/src/saltybot_node_watchdog/package.xml
Normal file
@ -0,0 +1,28 @@
|
||||
<?xml version="1.0"?>
|
||||
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
|
||||
<package format="3">
|
||||
<name>saltybot_node_watchdog</name>
|
||||
<version>0.1.0</version>
|
||||
<description>
|
||||
Node watchdog monitor for SaltyBot critical systems. Monitors heartbeats from balance,
|
||||
motor driver, emergency, and docking nodes. Publishes alerts on heartbeat loss >1s.
|
||||
Implements safety fallback: zeros cmd_vel if motor driver lost >2s. Runs at 20Hz.
|
||||
</description>
|
||||
<maintainer email="sl-controls@saltylab.local">sl-controls</maintainer>
|
||||
<license>MIT</license>
|
||||
|
||||
<depend>rclpy</depend>
|
||||
<depend>geometry_msgs</depend>
|
||||
<depend>std_msgs</depend>
|
||||
|
||||
<buildtool_depend>ament_python</buildtool_depend>
|
||||
|
||||
<test_depend>ament_copyright</test_depend>
|
||||
<test_depend>ament_flake8</test_depend>
|
||||
<test_depend>ament_pep257</test_depend>
|
||||
<test_depend>python3-pytest</test_depend>
|
||||
|
||||
<export>
|
||||
<build_type>ament_python</build_type>
|
||||
</export>
|
||||
</package>
|
||||
Binary file not shown.
@ -0,0 +1,235 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Node watchdog monitor for SaltyBot critical systems.
|
||||
|
||||
Monitors heartbeats from balance, motor driver, emergency, and docking nodes.
|
||||
Publishes alerts on heartbeat loss >1s. Implements safety fallback: zeros cmd_vel
|
||||
if motor driver lost >2s.
|
||||
|
||||
Published topics:
|
||||
/saltybot/node_watchdog (std_msgs/String) - JSON watchdog status
|
||||
/saltybot/cmd_vel_safe (geometry_msgs/Twist) - cmd_vel with motor driver safety check
|
||||
|
||||
Subscribed topics:
|
||||
/saltybot/balance_heartbeat (std_msgs/UInt32) - Balance node heartbeat
|
||||
/saltybot/motor_driver_heartbeat (std_msgs/UInt32) - Motor driver heartbeat
|
||||
/saltybot/emergency_heartbeat (std_msgs/UInt32) - Emergency system heartbeat
|
||||
/saltybot/docking_heartbeat (std_msgs/UInt32) - Docking node heartbeat
|
||||
/cmd_vel (geometry_msgs/Twist) - Velocity command input
|
||||
"""
|
||||
|
||||
import json
|
||||
import rclpy
|
||||
from rclpy.node import Node
|
||||
from rclpy.timer import Timer
|
||||
from geometry_msgs.msg import Twist
|
||||
from std_msgs.msg import UInt32, String
|
||||
|
||||
|
||||
class NodeWatchdogNode(Node):
|
||||
"""ROS2 watchdog monitor for critical system nodes."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__("node_watchdog")
|
||||
|
||||
# Parameters
|
||||
self.declare_parameter("frequency", 20) # Hz
|
||||
self.declare_parameter("heartbeat_timeout", 1.0) # seconds, general timeout
|
||||
self.declare_parameter("motor_driver_critical_timeout", 2.0) # seconds
|
||||
|
||||
frequency = self.get_parameter("frequency").value
|
||||
self.heartbeat_timeout = self.get_parameter("heartbeat_timeout").value
|
||||
self.motor_driver_critical_timeout = self.get_parameter(
|
||||
"motor_driver_critical_timeout"
|
||||
).value
|
||||
|
||||
# Heartbeat tracking
|
||||
self.critical_nodes = {
|
||||
"balance": None,
|
||||
"motor_driver": None,
|
||||
"emergency": None,
|
||||
"docking": None,
|
||||
}
|
||||
self.last_heartbeat_time = {
|
||||
"balance": None,
|
||||
"motor_driver": None,
|
||||
"emergency": None,
|
||||
"docking": None,
|
||||
}
|
||||
self.last_cmd_vel = None
|
||||
self.motor_driver_down = False
|
||||
|
||||
# Subscriptions for heartbeats
|
||||
self.create_subscription(
|
||||
UInt32, "/saltybot/balance_heartbeat", self._on_balance_heartbeat, 10
|
||||
)
|
||||
self.create_subscription(
|
||||
UInt32,
|
||||
"/saltybot/motor_driver_heartbeat",
|
||||
self._on_motor_driver_heartbeat,
|
||||
10,
|
||||
)
|
||||
self.create_subscription(
|
||||
UInt32, "/saltybot/emergency_heartbeat", self._on_emergency_heartbeat, 10
|
||||
)
|
||||
self.create_subscription(
|
||||
UInt32, "/saltybot/docking_heartbeat", self._on_docking_heartbeat, 10
|
||||
)
|
||||
|
||||
# cmd_vel subscription and safe republishing
|
||||
self.create_subscription(Twist, "/cmd_vel", self._on_cmd_vel, 10)
|
||||
|
||||
# Publications
|
||||
self.pub_watchdog = self.create_publisher(String, "/saltybot/node_watchdog", 10)
|
||||
self.pub_cmd_vel_safe = self.create_publisher(
|
||||
Twist, "/saltybot/cmd_vel_safe", 10
|
||||
)
|
||||
|
||||
# Timer for periodic monitoring at 20Hz
|
||||
period = 1.0 / frequency
|
||||
self.timer: Timer = self.create_timer(period, self._timer_callback)
|
||||
|
||||
self.get_logger().info(
|
||||
f"Node watchdog initialized at {frequency}Hz. "
|
||||
f"Heartbeat timeout: {self.heartbeat_timeout}s, "
|
||||
f"Motor driver critical: {self.motor_driver_critical_timeout}s"
|
||||
)
|
||||
|
||||
def _on_balance_heartbeat(self, msg: UInt32) -> None:
|
||||
"""Update balance node heartbeat timestamp."""
|
||||
self.last_heartbeat_time["balance"] = self.get_clock().now()
|
||||
|
||||
def _on_motor_driver_heartbeat(self, msg: UInt32) -> None:
|
||||
"""Update motor driver heartbeat timestamp."""
|
||||
self.last_heartbeat_time["motor_driver"] = self.get_clock().now()
|
||||
self.motor_driver_down = False
|
||||
|
||||
def _on_emergency_heartbeat(self, msg: UInt32) -> None:
|
||||
"""Update emergency system heartbeat timestamp."""
|
||||
self.last_heartbeat_time["emergency"] = self.get_clock().now()
|
||||
|
||||
def _on_docking_heartbeat(self, msg: UInt32) -> None:
|
||||
"""Update docking node heartbeat timestamp."""
|
||||
self.last_heartbeat_time["docking"] = self.get_clock().now()
|
||||
|
||||
def _on_cmd_vel(self, msg: Twist) -> None:
|
||||
"""Cache the last received cmd_vel."""
|
||||
self.last_cmd_vel = msg
|
||||
|
||||
def _check_node_health(self) -> dict:
|
||||
"""Check health of all monitored nodes.
|
||||
|
||||
Returns:
|
||||
dict: Health status of each node with timeout and elapsed time.
|
||||
"""
|
||||
now = self.get_clock().now()
|
||||
health = {}
|
||||
|
||||
for node_name in self.critical_nodes:
|
||||
last_time = self.last_heartbeat_time[node_name]
|
||||
|
||||
if last_time is None:
|
||||
# No heartbeat received yet
|
||||
health[node_name] = {
|
||||
"status": "unknown",
|
||||
"elapsed_s": None,
|
||||
"timeout_s": self.heartbeat_timeout,
|
||||
}
|
||||
else:
|
||||
# Calculate elapsed time since last heartbeat
|
||||
elapsed = (now - last_time).nanoseconds / 1e9
|
||||
is_timeout = elapsed > self.heartbeat_timeout
|
||||
|
||||
# Special case: motor driver has longer critical timeout
|
||||
if node_name == "motor_driver":
|
||||
is_critical = elapsed > self.motor_driver_critical_timeout
|
||||
else:
|
||||
is_critical = False
|
||||
|
||||
health[node_name] = {
|
||||
"status": "down" if is_timeout else "up",
|
||||
"elapsed_s": elapsed,
|
||||
"timeout_s": (
|
||||
self.motor_driver_critical_timeout
|
||||
if node_name == "motor_driver"
|
||||
else self.heartbeat_timeout
|
||||
),
|
||||
"critical": is_critical,
|
||||
}
|
||||
|
||||
return health
|
||||
|
||||
def _timer_callback(self) -> None:
|
||||
"""Monitor node health and publish alerts at 20Hz."""
|
||||
health = self._check_node_health()
|
||||
|
||||
# Detect if motor driver is in critical state (down for >2s)
|
||||
motor_driver_health = health.get("motor_driver", {})
|
||||
if motor_driver_health.get("critical", False):
|
||||
self.motor_driver_down = True
|
||||
self.get_logger().warn(
|
||||
f"MOTOR DRIVER DOWN >2s ({motor_driver_health['elapsed_s']:.1f}s). "
|
||||
"Applying safety fallback: zeroing cmd_vel."
|
||||
)
|
||||
|
||||
# Determine any nodes down for >1s
|
||||
nodes_with_timeout = {
|
||||
name: status
|
||||
for name, status in health.items()
|
||||
if status.get("status") == "down"
|
||||
}
|
||||
|
||||
# Publish watchdog status
|
||||
watchdog_status = {
|
||||
"timestamp": self.get_clock().now().nanoseconds / 1e9,
|
||||
"all_healthy": len(nodes_with_timeout) == 0
|
||||
and not self.motor_driver_down,
|
||||
"health": health,
|
||||
"motor_driver_critical": self.motor_driver_down,
|
||||
}
|
||||
watchdog_msg = String(data=json.dumps(watchdog_status))
|
||||
self.pub_watchdog.publish(watchdog_msg)
|
||||
|
||||
# Publish cmd_vel with safety checks
|
||||
if self.last_cmd_vel is not None:
|
||||
cmd_vel_safe = self._apply_safety_checks(self.last_cmd_vel)
|
||||
self.pub_cmd_vel_safe.publish(cmd_vel_safe)
|
||||
|
||||
def _apply_safety_checks(self, cmd_vel: Twist) -> Twist:
|
||||
"""Apply safety checks to cmd_vel based on system state.
|
||||
|
||||
Args:
|
||||
cmd_vel: Original velocity command
|
||||
|
||||
Returns:
|
||||
Twist: Potentially modified velocity command for safe operation.
|
||||
"""
|
||||
safe_cmd = Twist()
|
||||
|
||||
# If motor driver is critically down, zero all velocities
|
||||
if self.motor_driver_down:
|
||||
return safe_cmd
|
||||
|
||||
# Otherwise, pass through unchanged
|
||||
safe_cmd.linear.x = cmd_vel.linear.x
|
||||
safe_cmd.linear.y = cmd_vel.linear.y
|
||||
safe_cmd.linear.z = cmd_vel.linear.z
|
||||
safe_cmd.angular.x = cmd_vel.angular.x
|
||||
safe_cmd.angular.y = cmd_vel.angular.y
|
||||
safe_cmd.angular.z = cmd_vel.angular.z
|
||||
return safe_cmd
|
||||
|
||||
|
||||
def main(args=None):
|
||||
rclpy.init(args=args)
|
||||
node = NodeWatchdogNode()
|
||||
try:
|
||||
rclpy.spin(node)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
finally:
|
||||
node.destroy_node()
|
||||
rclpy.shutdown()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
4
jetson/ros2_ws/src/saltybot_node_watchdog/setup.cfg
Normal file
4
jetson/ros2_ws/src/saltybot_node_watchdog/setup.cfg
Normal file
@ -0,0 +1,4 @@
|
||||
[develop]
|
||||
script_dir=$base/lib/saltybot_node_watchdog
|
||||
[egg_info]
|
||||
tag_date = 0
|
||||
29
jetson/ros2_ws/src/saltybot_node_watchdog/setup.py
Normal file
29
jetson/ros2_ws/src/saltybot_node_watchdog/setup.py
Normal file
@ -0,0 +1,29 @@
|
||||
from setuptools import setup
|
||||
|
||||
package_name = "saltybot_node_watchdog"
|
||||
|
||||
setup(
|
||||
name=package_name,
|
||||
version="0.1.0",
|
||||
packages=[package_name],
|
||||
data_files=[
|
||||
("share/ament_index/resource_index/packages", [f"resource/{package_name}"]),
|
||||
(f"share/{package_name}", ["package.xml"]),
|
||||
(f"share/{package_name}/launch", ["launch/node_watchdog.launch.py"]),
|
||||
(f"share/{package_name}/config", ["config/watchdog_config.yaml"]),
|
||||
],
|
||||
install_requires=["setuptools"],
|
||||
zip_safe=True,
|
||||
maintainer="sl-controls",
|
||||
maintainer_email="sl-controls@saltylab.local",
|
||||
description=(
|
||||
"Node watchdog: heartbeat monitoring with safety fallback for critical systems"
|
||||
),
|
||||
license="MIT",
|
||||
tests_require=["pytest"],
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"node_watchdog_node = saltybot_node_watchdog.node_watchdog_node:main",
|
||||
],
|
||||
},
|
||||
)
|
||||
@ -0,0 +1,329 @@
|
||||
"""Unit tests for node_watchdog_node."""
|
||||
|
||||
import pytest
|
||||
import json
|
||||
import time
|
||||
from geometry_msgs.msg import Twist
|
||||
from std_msgs.msg import UInt32, String
|
||||
|
||||
import rclpy
|
||||
from rclpy.time import Time
|
||||
|
||||
# Import the node under test
|
||||
from saltybot_node_watchdog.node_watchdog_node import NodeWatchdogNode
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def rclpy_fixture():
|
||||
"""Initialize and cleanup rclpy."""
|
||||
rclpy.init()
|
||||
yield
|
||||
rclpy.shutdown()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def node(rclpy_fixture):
|
||||
"""Create a watchdog node instance."""
|
||||
node = NodeWatchdogNode()
|
||||
yield node
|
||||
node.destroy_node()
|
||||
|
||||
|
||||
class TestNodeWatchdogNode:
|
||||
"""Test suite for NodeWatchdogNode."""
|
||||
|
||||
def test_node_initialization(self, node):
|
||||
"""Test that node initializes with correct defaults."""
|
||||
assert node.heartbeat_timeout == 1.0
|
||||
assert node.motor_driver_critical_timeout == 2.0
|
||||
assert node.last_cmd_vel is None
|
||||
assert node.motor_driver_down is False
|
||||
|
||||
# All heartbeat times should be None initially
|
||||
for node_name in node.critical_nodes:
|
||||
assert node.last_heartbeat_time[node_name] is None
|
||||
|
||||
def test_balance_heartbeat_received(self, node):
|
||||
"""Test balance node heartbeat recording."""
|
||||
msg = UInt32(data=1)
|
||||
node._on_balance_heartbeat(msg)
|
||||
assert node.last_heartbeat_time["balance"] is not None
|
||||
|
||||
def test_motor_driver_heartbeat_received(self, node):
|
||||
"""Test motor driver heartbeat recording."""
|
||||
msg = UInt32(data=1)
|
||||
node._on_motor_driver_heartbeat(msg)
|
||||
assert node.last_heartbeat_time["motor_driver"] is not None
|
||||
# Motor driver heartbeat should clear the down flag
|
||||
node.motor_driver_down = True
|
||||
node._on_motor_driver_heartbeat(msg)
|
||||
assert node.motor_driver_down is False
|
||||
|
||||
def test_emergency_heartbeat_received(self, node):
|
||||
"""Test emergency system heartbeat recording."""
|
||||
msg = UInt32(data=1)
|
||||
node._on_emergency_heartbeat(msg)
|
||||
assert node.last_heartbeat_time["emergency"] is not None
|
||||
|
||||
def test_docking_heartbeat_received(self, node):
|
||||
"""Test docking node heartbeat recording."""
|
||||
msg = UInt32(data=1)
|
||||
node._on_docking_heartbeat(msg)
|
||||
assert node.last_heartbeat_time["docking"] is not None
|
||||
|
||||
def test_cmd_vel_caching(self, node):
|
||||
"""Test that cmd_vel messages are cached."""
|
||||
msg = Twist()
|
||||
msg.linear.x = 1.0
|
||||
node._on_cmd_vel(msg)
|
||||
assert node.last_cmd_vel is not None
|
||||
assert node.last_cmd_vel.linear.x == 1.0
|
||||
|
||||
def test_health_check_all_unknown(self, node):
|
||||
"""Test health check when no heartbeats received."""
|
||||
health = node._check_node_health()
|
||||
|
||||
assert len(health) == 4
|
||||
for node_name in node.critical_nodes:
|
||||
assert health[node_name]["status"] == "unknown"
|
||||
assert health[node_name]["elapsed_s"] is None
|
||||
assert health[node_name]["timeout_s"] == 1.0
|
||||
|
||||
def test_health_check_just_received(self, node):
|
||||
"""Test health check just after heartbeat received."""
|
||||
# Record a heartbeat for balance node
|
||||
node.last_heartbeat_time["balance"] = node.get_clock().now()
|
||||
|
||||
health = node._check_node_health()
|
||||
|
||||
# Balance should be up, others unknown
|
||||
assert health["balance"]["status"] == "up"
|
||||
assert health["balance"]["elapsed_s"] < 0.1
|
||||
assert health["emergency"]["status"] == "unknown"
|
||||
|
||||
def test_health_check_timeout_general(self, node):
|
||||
"""Test that heartbeat timeout is detected (>1s)."""
|
||||
# Simulate a heartbeat that arrived >1s ago
|
||||
now = node.get_clock().now()
|
||||
old_time = Time(
|
||||
nanoseconds=now.nanoseconds - int(1.5 * 1e9)
|
||||
) # 1.5 seconds ago
|
||||
node.last_heartbeat_time["balance"] = old_time
|
||||
|
||||
health = node._check_node_health()
|
||||
|
||||
assert health["balance"]["status"] == "down"
|
||||
assert health["balance"]["elapsed_s"] > 1.4
|
||||
assert health["balance"]["elapsed_s"] < 2.0
|
||||
|
||||
def test_health_check_motor_driver_critical(self, node):
|
||||
"""Test motor driver critical timeout (>2s)."""
|
||||
# Simulate motor driver heartbeat >2s ago
|
||||
now = node.get_clock().now()
|
||||
old_time = Time(nanoseconds=now.nanoseconds - int(2.5 * 1e9)) # 2.5 seconds
|
||||
node.last_heartbeat_time["motor_driver"] = old_time
|
||||
|
||||
health = node._check_node_health()
|
||||
|
||||
motor_health = health["motor_driver"]
|
||||
assert motor_health["status"] == "down"
|
||||
assert motor_health.get("critical", False) is True
|
||||
assert motor_health["elapsed_s"] > 2.4
|
||||
|
||||
def test_safety_check_normal_operation(self, node):
|
||||
"""Test safety check passes through cmd_vel normally."""
|
||||
node.motor_driver_down = False
|
||||
cmd = Twist()
|
||||
cmd.linear.x = 1.5
|
||||
cmd.angular.z = 0.3
|
||||
|
||||
safe_cmd = node._apply_safety_checks(cmd)
|
||||
|
||||
assert abs(safe_cmd.linear.x - 1.5) < 1e-6
|
||||
assert abs(safe_cmd.angular.z - 0.3) < 1e-6
|
||||
|
||||
def test_safety_check_motor_driver_down(self, node):
|
||||
"""Test safety check zeros cmd_vel when motor driver is down."""
|
||||
node.motor_driver_down = True
|
||||
cmd = Twist()
|
||||
cmd.linear.x = 1.5
|
||||
cmd.linear.y = 0.2
|
||||
cmd.angular.z = 0.3
|
||||
|
||||
safe_cmd = node._apply_safety_checks(cmd)
|
||||
|
||||
# All velocities should be zero
|
||||
assert safe_cmd.linear.x == 0.0
|
||||
assert safe_cmd.linear.y == 0.0
|
||||
assert safe_cmd.linear.z == 0.0
|
||||
assert safe_cmd.angular.x == 0.0
|
||||
assert safe_cmd.angular.y == 0.0
|
||||
assert safe_cmd.angular.z == 0.0
|
||||
|
||||
def test_timer_callback_publishes(self, node):
|
||||
"""Test that timer callback publishes watchdog status."""
|
||||
# Record a heartbeat
|
||||
node.last_heartbeat_time["balance"] = node.get_clock().now()
|
||||
node.last_cmd_vel = Twist()
|
||||
node.last_cmd_vel.linear.x = 1.0
|
||||
|
||||
# Call timer callback
|
||||
node._timer_callback()
|
||||
# Just verify it doesn't crash; actual publishing is tested via integration
|
||||
|
||||
def test_watchdog_status_json_all_healthy(self, node):
|
||||
"""Test watchdog status JSON when all nodes healthy."""
|
||||
# Record all heartbeats
|
||||
now = node.get_clock().now()
|
||||
for node_name in node.critical_nodes:
|
||||
node.last_heartbeat_time[node_name] = now
|
||||
|
||||
health = node._check_node_health()
|
||||
watchdog_status = {
|
||||
"timestamp": now.nanoseconds / 1e9,
|
||||
"all_healthy": all(
|
||||
s["status"] == "up" for s in health.values()
|
||||
),
|
||||
"health": health,
|
||||
"motor_driver_critical": False,
|
||||
}
|
||||
|
||||
# Verify it's valid JSON
|
||||
json_str = json.dumps(watchdog_status)
|
||||
parsed = json.loads(json_str)
|
||||
|
||||
assert parsed["all_healthy"] is True
|
||||
assert parsed["motor_driver_critical"] is False
|
||||
|
||||
def test_watchdog_status_json_with_timeout(self, node):
|
||||
"""Test watchdog status JSON when node has timed out."""
|
||||
# Balance heartbeat >1s ago
|
||||
now = node.get_clock().now()
|
||||
old_time = Time(nanoseconds=now.nanoseconds - int(1.5 * 1e9))
|
||||
node.last_heartbeat_time["balance"] = old_time
|
||||
|
||||
# Others are current
|
||||
for name in ["motor_driver", "emergency", "docking"]:
|
||||
node.last_heartbeat_time[name] = now
|
||||
|
||||
health = node._check_node_health()
|
||||
watchdog_status = {
|
||||
"timestamp": now.nanoseconds / 1e9,
|
||||
"all_healthy": all(s["status"] == "up" for s in health.values()),
|
||||
"health": health,
|
||||
"motor_driver_critical": False,
|
||||
}
|
||||
|
||||
json_str = json.dumps(watchdog_status)
|
||||
parsed = json.loads(json_str)
|
||||
|
||||
assert parsed["all_healthy"] is False
|
||||
assert parsed["health"]["balance"]["status"] == "down"
|
||||
|
||||
|
||||
class TestNodeWatchdogScenarios:
|
||||
"""Integration-style tests for realistic scenarios."""
|
||||
|
||||
def test_scenario_all_nodes_healthy(self, node):
|
||||
"""Scenario: all critical nodes sending heartbeats."""
|
||||
now = node.get_clock().now()
|
||||
|
||||
# All nodes sending heartbeats
|
||||
for name in node.critical_nodes:
|
||||
node.last_heartbeat_time[name] = now
|
||||
|
||||
health = node._check_node_health()
|
||||
|
||||
all_up = all(h["status"] == "up" for h in health.values())
|
||||
assert all_up is True
|
||||
|
||||
def test_scenario_motor_driver_loss_below_critical(self, node):
|
||||
"""Scenario: motor driver offline 1.5s (below 2s critical)."""
|
||||
now = node.get_clock().now()
|
||||
old_time = Time(nanoseconds=now.nanoseconds - int(1.5 * 1e9))
|
||||
|
||||
# Motor driver down 1.5s, others healthy
|
||||
node.last_heartbeat_time["motor_driver"] = old_time
|
||||
for name in ["balance", "emergency", "docking"]:
|
||||
node.last_heartbeat_time[name] = now
|
||||
|
||||
health = node._check_node_health()
|
||||
motor = health["motor_driver"]
|
||||
|
||||
assert motor["status"] == "down"
|
||||
assert motor.get("critical", False) is False
|
||||
# Safety fallback should NOT be triggered yet
|
||||
assert node.motor_driver_down is False
|
||||
|
||||
def test_scenario_motor_driver_critical_loss(self, node):
|
||||
"""Scenario: motor driver offline >2s (triggers critical)."""
|
||||
now = node.get_clock().now()
|
||||
old_time = Time(nanoseconds=now.nanoseconds - int(2.5 * 1e9))
|
||||
|
||||
node.last_heartbeat_time["motor_driver"] = old_time
|
||||
node.last_heartbeat_time["balance"] = now
|
||||
node.last_heartbeat_time["emergency"] = now
|
||||
node.last_heartbeat_time["docking"] = now
|
||||
|
||||
health = node._check_node_health()
|
||||
motor = health["motor_driver"]
|
||||
|
||||
assert motor["status"] == "down"
|
||||
assert motor.get("critical", False) is True
|
||||
|
||||
def test_scenario_cascading_node_failures(self, node):
|
||||
"""Scenario: multiple nodes failing in sequence."""
|
||||
now = node.get_clock().now()
|
||||
old1s = Time(nanoseconds=now.nanoseconds - int(1.2 * 1e9))
|
||||
old2s = Time(nanoseconds=now.nanoseconds - int(2.5 * 1e9))
|
||||
|
||||
# Balance down 1.2s, motor driver down 2.5s, others healthy
|
||||
node.last_heartbeat_time["balance"] = old1s
|
||||
node.last_heartbeat_time["motor_driver"] = old2s
|
||||
node.last_heartbeat_time["emergency"] = now
|
||||
node.last_heartbeat_time["docking"] = now
|
||||
|
||||
health = node._check_node_health()
|
||||
|
||||
assert health["balance"]["status"] == "down"
|
||||
assert health["balance"].get("critical", False) is False
|
||||
assert health["motor_driver"]["status"] == "down"
|
||||
assert health["motor_driver"].get("critical", False) is True
|
||||
|
||||
def test_scenario_cmd_vel_safety_fallback(self, node):
|
||||
"""Scenario: motor driver down triggers safety zeroing of cmd_vel."""
|
||||
# Motor driver is critically down
|
||||
node.motor_driver_down = True
|
||||
|
||||
cmd = Twist()
|
||||
cmd.linear.x = 2.0
|
||||
cmd.angular.z = 0.5
|
||||
|
||||
safe_cmd = node._apply_safety_checks(cmd)
|
||||
|
||||
# All should be zeroed
|
||||
assert safe_cmd.linear.x == 0.0
|
||||
assert safe_cmd.linear.y == 0.0
|
||||
assert safe_cmd.linear.z == 0.0
|
||||
assert safe_cmd.angular.x == 0.0
|
||||
assert safe_cmd.angular.y == 0.0
|
||||
assert safe_cmd.angular.z == 0.0
|
||||
|
||||
def test_scenario_motor_driver_recovery(self, node):
|
||||
"""Scenario: motor driver comes back online after being down."""
|
||||
now = node.get_clock().now()
|
||||
|
||||
# Motor driver was down
|
||||
node.motor_driver_down = True
|
||||
|
||||
# Motor driver sends heartbeat
|
||||
node._on_motor_driver_heartbeat(UInt32(data=1))
|
||||
|
||||
# Should clear the down flag
|
||||
assert node.motor_driver_down is False
|
||||
|
||||
# cmd_vel should pass through
|
||||
cmd = Twist()
|
||||
cmd.linear.x = 1.0
|
||||
safe_cmd = node._apply_safety_checks(cmd)
|
||||
assert safe_cmd.linear.x == 1.0
|
||||
Loading…
x
Reference in New Issue
Block a user