feat: Diagnostics aggregator (Issue #658)
New saltybot_diagnostics_aggregator ROS2 package — unified health dashboard aggregating telemetry into /saltybot/system_health JSON at 1 Hz. Subscribes to /vesc/health, /diagnostics, /saltybot/safety_zone/status, /saltybot/pose/status, /saltybot/uwb/status. Tracks 8 subsystems: motors, battery, imu, uwb, lidar, camera, can_bus, estop. - subsystem.py: SubsystemState, stale detection, transition log, severity ranking, keyword→subsystem routing (no ROS2 dependency) - aggregator_node.py: ROS2 node, 1 Hz heartbeat, overall_status rollup, last_error field, 50-entry transition ring buffer - test_aggregator.py: 46 unit tests, all passing - config/aggregator_params.yaml, launch file Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
d9b4b10b90
commit
cd263c385d
@ -0,0 +1,15 @@
|
||||
# Diagnostics Aggregator — Issue #658
|
||||
# Unified health dashboard aggregating telemetry from all SaltyBot subsystems.
|
||||
|
||||
diagnostics_aggregator:
|
||||
ros__parameters:
|
||||
# Publish rate for /saltybot/system_health (Hz)
|
||||
heartbeat_hz: 1.0
|
||||
|
||||
# Seconds without a topic update before a subsystem is marked STALE
|
||||
# Increase for sensors with lower publish rates (e.g. UWB at 5 Hz)
|
||||
stale_timeout_s: 5.0
|
||||
|
||||
# Maximum number of state transitions kept in the in-memory ring buffer
|
||||
# Last 10 transitions are included in each /saltybot/system_health publish
|
||||
transition_log_size: 50
|
||||
@ -0,0 +1,44 @@
|
||||
"""Launch file for diagnostics aggregator node."""
|
||||
|
||||
import os
|
||||
from ament_index_python.packages import get_package_share_directory
|
||||
from launch import LaunchDescription
|
||||
from launch.actions import DeclareLaunchArgument
|
||||
from launch.substitutions import LaunchConfiguration
|
||||
from launch_ros.actions import Node
|
||||
|
||||
|
||||
def generate_launch_description():
|
||||
pkg_dir = get_package_share_directory("saltybot_diagnostics_aggregator")
|
||||
config_file = os.path.join(pkg_dir, "config", "aggregator_params.yaml")
|
||||
|
||||
return LaunchDescription([
|
||||
DeclareLaunchArgument(
|
||||
"config_file",
|
||||
default_value=config_file,
|
||||
description="Path to aggregator_params.yaml",
|
||||
),
|
||||
DeclareLaunchArgument(
|
||||
"heartbeat_hz",
|
||||
default_value="1.0",
|
||||
description="Publish rate for /saltybot/system_health (Hz)",
|
||||
),
|
||||
DeclareLaunchArgument(
|
||||
"stale_timeout_s",
|
||||
default_value="5.0",
|
||||
description="Seconds without update before subsystem goes STALE",
|
||||
),
|
||||
Node(
|
||||
package="saltybot_diagnostics_aggregator",
|
||||
executable="diagnostics_aggregator_node",
|
||||
name="diagnostics_aggregator",
|
||||
output="screen",
|
||||
parameters=[
|
||||
LaunchConfiguration("config_file"),
|
||||
{
|
||||
"heartbeat_hz": LaunchConfiguration("heartbeat_hz"),
|
||||
"stale_timeout_s": LaunchConfiguration("stale_timeout_s"),
|
||||
},
|
||||
],
|
||||
),
|
||||
])
|
||||
@ -0,0 +1,30 @@
|
||||
<?xml version="1.0"?>
|
||||
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
|
||||
<package format="3">
|
||||
<name>saltybot_diagnostics_aggregator</name>
|
||||
<version>0.1.0</version>
|
||||
<description>
|
||||
Diagnostics aggregator for SaltyBot — unified health dashboard node (Issue #658).
|
||||
Subscribes to /vesc/health, /diagnostics, /saltybot/safety_zone/status,
|
||||
/saltybot/pose/status, /saltybot/uwb/status. Aggregates into
|
||||
/saltybot/system_health JSON at 1 Hz. Tracks motors, battery, imu, uwb,
|
||||
lidar, camera, can_bus, estop subsystems with state-transition logging.
|
||||
</description>
|
||||
<maintainer email="sl-firmware@saltylab.local">sl-firmware</maintainer>
|
||||
<license>MIT</license>
|
||||
|
||||
<depend>rclpy</depend>
|
||||
<depend>std_msgs</depend>
|
||||
<depend>diagnostic_msgs</depend>
|
||||
|
||||
<buildtool_depend>ament_python</buildtool_depend>
|
||||
|
||||
<test_depend>ament_copyright</test_depend>
|
||||
<test_depend>ament_flake8</test_depend>
|
||||
<test_depend>ament_pep257</test_depend>
|
||||
<test_depend>python3-pytest</test_depend>
|
||||
|
||||
<export>
|
||||
<build_type>ament_python</build_type>
|
||||
</export>
|
||||
</package>
|
||||
@ -0,0 +1,312 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Diagnostics aggregator — unified health dashboard node (Issue #658).
|
||||
|
||||
Subscribes to telemetry and diagnostics topics from all subsystems, aggregates
|
||||
them into a single /saltybot/system_health JSON publish at 1 Hz.
|
||||
|
||||
Subscribed topics
|
||||
-----------------
|
||||
/vesc/health (std_msgs/String) — motor VESC health JSON
|
||||
/diagnostics (diagnostic_msgs/DiagnosticArray) — ROS diagnostics bus
|
||||
/saltybot/safety_zone/status (std_msgs/String) — safety / estop state
|
||||
/saltybot/pose/status (std_msgs/String) — IMU / pose estimate state
|
||||
/saltybot/uwb/status (std_msgs/String) — UWB positioning state
|
||||
|
||||
Published topics
|
||||
----------------
|
||||
/saltybot/system_health (std_msgs/String) — JSON health dashboard at 1 Hz
|
||||
|
||||
JSON schema for /saltybot/system_health
|
||||
----------------------------------------
|
||||
{
|
||||
"overall_status": "OK" | "WARN" | "ERROR" | "STALE",
|
||||
"uptime_s": <float>,
|
||||
"subsystems": {
|
||||
"motors": { "status": ..., "message": ..., "age_s": ..., "previous_status": ... },
|
||||
"battery": { ... },
|
||||
"imu": { ... },
|
||||
"uwb": { ... },
|
||||
"lidar": { ... },
|
||||
"camera": { ... },
|
||||
"can_bus": { ... },
|
||||
"estop": { ... }
|
||||
},
|
||||
"last_error": { "subsystem": ..., "message": ..., "timestamp": ... } | null,
|
||||
"recent_transitions": [ { "subsystem", "from_status", "to_status",
|
||||
"message", "timestamp_iso" }, ... ],
|
||||
"timestamp": "<ISO-8601>"
|
||||
}
|
||||
|
||||
Parameters
|
||||
----------
|
||||
heartbeat_hz float 1.0 Publish rate (Hz)
|
||||
stale_timeout_s float 5.0 Seconds without update → STALE
|
||||
transition_log_size int 50 Max recent transitions kept in memory
|
||||
"""
|
||||
|
||||
import json
|
||||
import time
|
||||
from collections import deque
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
import rclpy
|
||||
from rclpy.node import Node
|
||||
from std_msgs.msg import String
|
||||
from diagnostic_msgs.msg import DiagnosticArray
|
||||
|
||||
from .subsystem import (
|
||||
SubsystemState,
|
||||
Transition,
|
||||
STATUS_OK,
|
||||
STATUS_WARN,
|
||||
STATUS_ERROR,
|
||||
STATUS_STALE,
|
||||
STATUS_UNKNOWN,
|
||||
worse,
|
||||
ros_level_to_status,
|
||||
SUBSYSTEM_NAMES as _SUBSYSTEM_NAMES,
|
||||
keyword_to_subsystem as _keyword_to_subsystem,
|
||||
)
|
||||
|
||||
|
||||
class DiagnosticsAggregatorNode(Node):
|
||||
"""Unified health dashboard node."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__("diagnostics_aggregator")
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Parameters
|
||||
# ----------------------------------------------------------------
|
||||
self.declare_parameter("heartbeat_hz", 1.0)
|
||||
self.declare_parameter("stale_timeout_s", 5.0)
|
||||
self.declare_parameter("transition_log_size", 50)
|
||||
|
||||
hz = float(self.get_parameter("heartbeat_hz").value)
|
||||
stale_timeout = float(self.get_parameter("stale_timeout_s").value)
|
||||
log_size = int(self.get_parameter("transition_log_size").value)
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Subsystem state table
|
||||
# ----------------------------------------------------------------
|
||||
self._subsystems: dict[str, SubsystemState] = {
|
||||
name: SubsystemState(name=name, stale_timeout_s=stale_timeout)
|
||||
for name in _SUBSYSTEM_NAMES
|
||||
}
|
||||
self._transitions: deque[Transition] = deque(maxlen=log_size)
|
||||
self._last_error: Optional[dict] = None
|
||||
self._start_mono = time.monotonic()
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Subscriptions
|
||||
# ----------------------------------------------------------------
|
||||
self.create_subscription(String, "/vesc/health",
|
||||
self._on_vesc_health, 10)
|
||||
self.create_subscription(DiagnosticArray, "/diagnostics",
|
||||
self._on_diagnostics, 10)
|
||||
self.create_subscription(String, "/saltybot/safety_zone/status",
|
||||
self._on_safety_zone, 10)
|
||||
self.create_subscription(String, "/saltybot/pose/status",
|
||||
self._on_pose_status, 10)
|
||||
self.create_subscription(String, "/saltybot/uwb/status",
|
||||
self._on_uwb_status, 10)
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Publisher
|
||||
# ----------------------------------------------------------------
|
||||
self._pub = self.create_publisher(String, "/saltybot/system_health", 1)
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# 1 Hz heartbeat timer
|
||||
# ----------------------------------------------------------------
|
||||
self.create_timer(1.0 / max(0.1, hz), self._on_timer)
|
||||
|
||||
self.get_logger().info(
|
||||
f"diagnostics_aggregator: {len(_SUBSYSTEM_NAMES)} subsystems, "
|
||||
f"heartbeat={hz} Hz, stale_timeout={stale_timeout} s"
|
||||
)
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Topic callbacks
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
def _on_vesc_health(self, msg: String) -> None:
|
||||
"""Parse /vesc/health JSON → motors subsystem."""
|
||||
now = time.monotonic()
|
||||
try:
|
||||
d = json.loads(msg.data)
|
||||
fault = d.get("fault_code", 0)
|
||||
alive = d.get("alive", True)
|
||||
if not alive:
|
||||
status, message = STATUS_STALE, "VESC offline"
|
||||
elif fault != 0:
|
||||
status = STATUS_ERROR
|
||||
message = f"VESC fault {d.get('fault_name', fault)}"
|
||||
else:
|
||||
status = STATUS_OK
|
||||
message = (
|
||||
f"RPM={d.get('rpm', '?')} "
|
||||
f"I={d.get('current_a', '?')} A "
|
||||
f"V={d.get('voltage_v', '?')} V"
|
||||
)
|
||||
self._update("motors", status, message, now)
|
||||
except Exception as exc:
|
||||
self.get_logger().debug(f"/vesc/health parse error: {exc}")
|
||||
|
||||
def _on_diagnostics(self, msg: DiagnosticArray) -> None:
|
||||
"""Fan /diagnostics entries out to per-subsystem states."""
|
||||
now = time.monotonic()
|
||||
# Accumulate worst status per subsystem across all entries in this array
|
||||
pending: dict[str, tuple[str, str]] = {} # subsystem → (status, message)
|
||||
|
||||
for ds in msg.status:
|
||||
subsystem = _keyword_to_subsystem(ds.name)
|
||||
if subsystem is None:
|
||||
continue
|
||||
status = ros_level_to_status(ds.level)
|
||||
message = ds.message or ""
|
||||
if subsystem not in pending:
|
||||
pending[subsystem] = (status, message)
|
||||
else:
|
||||
prev_s, prev_m = pending[subsystem]
|
||||
new_worst = worse(status, prev_s)
|
||||
pending[subsystem] = (
|
||||
new_worst,
|
||||
message if new_worst == status else prev_m,
|
||||
)
|
||||
|
||||
for subsystem, (status, message) in pending.items():
|
||||
self._update(subsystem, status, message, now)
|
||||
|
||||
def _on_safety_zone(self, msg: String) -> None:
|
||||
"""Parse /saltybot/safety_zone/status JSON → estop subsystem."""
|
||||
now = time.monotonic()
|
||||
try:
|
||||
d = json.loads(msg.data)
|
||||
triggered = d.get("estop_triggered", d.get("triggered", False))
|
||||
active = d.get("safety_zone_active", d.get("active", True))
|
||||
if triggered:
|
||||
status, message = STATUS_ERROR, "E-stop triggered"
|
||||
elif not active:
|
||||
status, message = STATUS_WARN, "Safety zone inactive"
|
||||
else:
|
||||
status, message = STATUS_OK, "Safety zone active"
|
||||
self._update("estop", status, message, now)
|
||||
except Exception as exc:
|
||||
self.get_logger().debug(f"/saltybot/safety_zone/status parse error: {exc}")
|
||||
|
||||
def _on_pose_status(self, msg: String) -> None:
|
||||
"""Parse /saltybot/pose/status JSON → imu subsystem."""
|
||||
now = time.monotonic()
|
||||
try:
|
||||
d = json.loads(msg.data)
|
||||
status_str = d.get("status", "OK").upper()
|
||||
if status_str not in (STATUS_OK, STATUS_WARN, STATUS_ERROR):
|
||||
status_str = STATUS_WARN
|
||||
message = d.get("message", d.get("msg", ""))
|
||||
self._update("imu", status_str, message, now)
|
||||
except Exception as exc:
|
||||
self.get_logger().debug(f"/saltybot/pose/status parse error: {exc}")
|
||||
|
||||
def _on_uwb_status(self, msg: String) -> None:
|
||||
"""Parse /saltybot/uwb/status JSON → uwb subsystem."""
|
||||
now = time.monotonic()
|
||||
try:
|
||||
d = json.loads(msg.data)
|
||||
status_str = d.get("status", "OK").upper()
|
||||
if status_str not in (STATUS_OK, STATUS_WARN, STATUS_ERROR):
|
||||
status_str = STATUS_WARN
|
||||
message = d.get("message", d.get("msg", ""))
|
||||
self._update("uwb", status_str, message, now)
|
||||
except Exception as exc:
|
||||
self.get_logger().debug(f"/saltybot/uwb/status parse error: {exc}")
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
def _update(self, subsystem: str, status: str, message: str, now: float) -> None:
|
||||
"""Update subsystem state and record any transition."""
|
||||
s = self._subsystems.get(subsystem)
|
||||
if s is None:
|
||||
return
|
||||
transition = s.update(status, message, now)
|
||||
if transition is not None:
|
||||
self._transitions.append(transition)
|
||||
self.get_logger().info(
|
||||
f"[{subsystem}] {transition.from_status} → {transition.to_status}: {message}"
|
||||
)
|
||||
if transition.to_status == STATUS_ERROR:
|
||||
self._last_error = {
|
||||
"subsystem": subsystem,
|
||||
"message": message,
|
||||
"timestamp": transition.timestamp_iso,
|
||||
}
|
||||
|
||||
def _apply_stale_checks(self, now: float) -> None:
|
||||
for s in self._subsystems.values():
|
||||
transition = s.apply_stale_check(now)
|
||||
if transition is not None:
|
||||
self._transitions.append(transition)
|
||||
self.get_logger().warn(
|
||||
f"[{transition.subsystem}] went STALE (no data)"
|
||||
)
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Heartbeat timer
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
def _on_timer(self) -> None:
|
||||
now = time.monotonic()
|
||||
self._apply_stale_checks(now)
|
||||
|
||||
# Overall status = worst across all subsystems
|
||||
overall = STATUS_OK
|
||||
for s in self._subsystems.values():
|
||||
overall = worse(overall, s.status)
|
||||
# UNKNOWN does not degrade overall if at least one subsystem is known
|
||||
known = [s for s in self._subsystems.values() if s.status != STATUS_UNKNOWN]
|
||||
if not known:
|
||||
overall = STATUS_UNKNOWN
|
||||
|
||||
uptime = now - self._start_mono
|
||||
|
||||
payload = {
|
||||
"overall_status": overall,
|
||||
"uptime_s": round(uptime, 1),
|
||||
"subsystems": {
|
||||
name: s.to_dict(now)
|
||||
for name, s in self._subsystems.items()
|
||||
},
|
||||
"last_error": self._last_error,
|
||||
"recent_transitions": [
|
||||
{
|
||||
"subsystem": t.subsystem,
|
||||
"from_status": t.from_status,
|
||||
"to_status": t.to_status,
|
||||
"message": t.message,
|
||||
"timestamp": t.timestamp_iso,
|
||||
}
|
||||
for t in list(self._transitions)[-10:] # last 10 in the JSON
|
||||
],
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
self._pub.publish(String(data=json.dumps(payload)))
|
||||
|
||||
|
||||
def main(args=None):
|
||||
rclpy.init(args=args)
|
||||
node = DiagnosticsAggregatorNode()
|
||||
try:
|
||||
rclpy.spin(node)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
finally:
|
||||
node.destroy_node()
|
||||
rclpy.shutdown()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -0,0 +1,148 @@
|
||||
"""Subsystem state tracking for the diagnostics aggregator.
|
||||
|
||||
Each SubsystemState tracks one logical subsystem (motors, battery, etc.)
|
||||
across potentially multiple source topics. Status priority:
|
||||
ERROR > WARN > STALE > OK > UNKNOWN
|
||||
"""
|
||||
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Status constants — ordered by severity (higher index = more severe)
|
||||
# ---------------------------------------------------------------------------
|
||||
STATUS_UNKNOWN = "UNKNOWN"
|
||||
STATUS_OK = "OK"
|
||||
STATUS_STALE = "STALE"
|
||||
STATUS_WARN = "WARN"
|
||||
STATUS_ERROR = "ERROR"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Subsystem registry and diagnostic keyword routing
|
||||
# ---------------------------------------------------------------------------
|
||||
SUBSYSTEM_NAMES: list[str] = [
|
||||
"motors", "battery", "imu", "uwb", "lidar", "camera", "can_bus", "estop"
|
||||
]
|
||||
|
||||
# (keywords-tuple, subsystem-name) — first match wins, lower-cased comparison
|
||||
DIAG_KEYWORD_MAP: list[tuple[tuple[str, ...], str]] = [
|
||||
(("vesc", "motor", "esc", "fsesc"), "motors"),
|
||||
(("battery", "ina219", "lvc", "coulomb", "vbat"), "battery"),
|
||||
(("imu", "mpu6000", "bno055", "icm42688", "gyro", "accel"), "imu"),
|
||||
(("uwb", "dw1000", "dw3000"), "uwb"),
|
||||
(("lidar", "rplidar", "sick", "laser"), "lidar"),
|
||||
(("camera", "realsense", "oak", "webcam"), "camera"),
|
||||
(("can", "can_bus", "can_driver"), "can_bus"),
|
||||
(("estop", "safety", "emergency", "e-stop"), "estop"),
|
||||
]
|
||||
|
||||
|
||||
def keyword_to_subsystem(name: str) -> Optional[str]:
|
||||
"""Map a diagnostic status name to a subsystem key, or None."""
|
||||
lower = name.lower()
|
||||
for keywords, subsystem in DIAG_KEYWORD_MAP:
|
||||
if any(kw in lower for kw in keywords):
|
||||
return subsystem
|
||||
return None
|
||||
|
||||
|
||||
_SEVERITY: dict[str, int] = {
|
||||
STATUS_UNKNOWN: 0,
|
||||
STATUS_OK: 1,
|
||||
STATUS_STALE: 2,
|
||||
STATUS_WARN: 3,
|
||||
STATUS_ERROR: 4,
|
||||
}
|
||||
|
||||
|
||||
def worse(a: str, b: str) -> str:
|
||||
"""Return the more severe of two status strings."""
|
||||
return a if _SEVERITY.get(a, 0) >= _SEVERITY.get(b, 0) else b
|
||||
|
||||
|
||||
def ros_level_to_status(level: int) -> str:
|
||||
"""Convert diagnostic_msgs/DiagnosticStatus.level to a status string."""
|
||||
return {0: STATUS_OK, 1: STATUS_WARN, 2: STATUS_ERROR}.get(level, STATUS_UNKNOWN)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Transition log entry
|
||||
# ---------------------------------------------------------------------------
|
||||
@dataclass
|
||||
class Transition:
|
||||
"""A logged status change for one subsystem."""
|
||||
subsystem: str
|
||||
from_status: str
|
||||
to_status: str
|
||||
message: str
|
||||
timestamp_iso: str # ISO-8601 wall-clock
|
||||
monotonic_s: float # time.monotonic() at the transition
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-subsystem state
|
||||
# ---------------------------------------------------------------------------
|
||||
@dataclass
|
||||
class SubsystemState:
|
||||
"""Live health state for one logical subsystem."""
|
||||
|
||||
name: str
|
||||
stale_timeout_s: float = 5.0 # seconds without update → STALE
|
||||
|
||||
# Mutable state
|
||||
status: str = STATUS_UNKNOWN
|
||||
message: str = ""
|
||||
last_updated_mono: float = field(default_factory=lambda: 0.0)
|
||||
last_change_mono: float = field(default_factory=lambda: 0.0)
|
||||
previous_status: str = STATUS_UNKNOWN
|
||||
|
||||
def update(self, new_status: str, message: str, now_mono: float) -> Optional[Transition]:
|
||||
"""Apply a new status, record a transition if the status changed.
|
||||
|
||||
Returns a Transition if the status changed, else None.
|
||||
"""
|
||||
from datetime import datetime, timezone
|
||||
|
||||
self.last_updated_mono = now_mono
|
||||
|
||||
if new_status == self.status:
|
||||
self.message = message
|
||||
return None
|
||||
|
||||
transition = Transition(
|
||||
subsystem=self.name,
|
||||
from_status=self.status,
|
||||
to_status=new_status,
|
||||
message=message,
|
||||
timestamp_iso=datetime.now(timezone.utc).isoformat(),
|
||||
monotonic_s=now_mono,
|
||||
)
|
||||
|
||||
self.previous_status = self.status
|
||||
self.status = new_status
|
||||
self.message = message
|
||||
self.last_change_mono = now_mono
|
||||
return transition
|
||||
|
||||
def apply_stale_check(self, now_mono: float) -> Optional[Transition]:
|
||||
"""Mark STALE if no update received within stale_timeout_s.
|
||||
|
||||
Returns a Transition if newly staled, else None.
|
||||
"""
|
||||
if self.last_updated_mono == 0.0:
|
||||
# Never received any data — leave as UNKNOWN
|
||||
return None
|
||||
if (now_mono - self.last_updated_mono) > self.stale_timeout_s:
|
||||
if self.status not in (STATUS_STALE, STATUS_UNKNOWN):
|
||||
return self.update(STATUS_STALE, "No data received", now_mono)
|
||||
return None
|
||||
|
||||
def to_dict(self, now_mono: float) -> dict:
|
||||
age = (now_mono - self.last_updated_mono) if self.last_updated_mono > 0 else None
|
||||
return {
|
||||
"status": self.status,
|
||||
"message": self.message,
|
||||
"age_s": round(age, 2) if age is not None else None,
|
||||
"previous_status": self.previous_status,
|
||||
}
|
||||
@ -0,0 +1,4 @@
|
||||
[develop]
|
||||
script_dir=$base/lib/saltybot_diagnostics_aggregator
|
||||
[install]
|
||||
install_scripts=$base/lib/saltybot_diagnostics_aggregator
|
||||
28
jetson/ros2_ws/src/saltybot_diagnostics_aggregator/setup.py
Normal file
28
jetson/ros2_ws/src/saltybot_diagnostics_aggregator/setup.py
Normal file
@ -0,0 +1,28 @@
|
||||
from setuptools import setup
|
||||
|
||||
package_name = "saltybot_diagnostics_aggregator"
|
||||
|
||||
setup(
|
||||
name=package_name,
|
||||
version="0.1.0",
|
||||
packages=[package_name],
|
||||
data_files=[
|
||||
("share/ament_index/resource_index/packages", [f"resource/{package_name}"]),
|
||||
(f"share/{package_name}", ["package.xml"]),
|
||||
(f"share/{package_name}/launch", ["launch/diagnostics_aggregator.launch.py"]),
|
||||
(f"share/{package_name}/config", ["config/aggregator_params.yaml"]),
|
||||
],
|
||||
install_requires=["setuptools"],
|
||||
zip_safe=True,
|
||||
maintainer="sl-firmware",
|
||||
maintainer_email="sl-firmware@saltylab.local",
|
||||
description="Diagnostics aggregator — unified health dashboard for SaltyBot",
|
||||
license="MIT",
|
||||
tests_require=["pytest"],
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"diagnostics_aggregator_node = "
|
||||
"saltybot_diagnostics_aggregator.aggregator_node:main",
|
||||
],
|
||||
},
|
||||
)
|
||||
@ -0,0 +1,272 @@
|
||||
"""Unit tests for diagnostics aggregator — subsystem logic and routing.
|
||||
|
||||
All pure-function tests; no ROS2 or live topics required.
|
||||
"""
|
||||
|
||||
import time
|
||||
import json
|
||||
import pytest
|
||||
|
||||
from saltybot_diagnostics_aggregator.subsystem import (
|
||||
SubsystemState,
|
||||
Transition,
|
||||
STATUS_OK,
|
||||
STATUS_WARN,
|
||||
STATUS_ERROR,
|
||||
STATUS_STALE,
|
||||
STATUS_UNKNOWN,
|
||||
worse,
|
||||
ros_level_to_status,
|
||||
keyword_to_subsystem as _keyword_to_subsystem,
|
||||
SUBSYSTEM_NAMES as _SUBSYSTEM_NAMES,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# worse()
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestWorse:
|
||||
def test_error_beats_warn(self):
|
||||
assert worse(STATUS_ERROR, STATUS_WARN) == STATUS_ERROR
|
||||
|
||||
def test_warn_beats_ok(self):
|
||||
assert worse(STATUS_WARN, STATUS_OK) == STATUS_WARN
|
||||
|
||||
def test_stale_beats_ok(self):
|
||||
assert worse(STATUS_STALE, STATUS_OK) == STATUS_STALE
|
||||
|
||||
def test_warn_beats_stale(self):
|
||||
assert worse(STATUS_WARN, STATUS_STALE) == STATUS_WARN
|
||||
|
||||
def test_error_beats_stale(self):
|
||||
assert worse(STATUS_ERROR, STATUS_STALE) == STATUS_ERROR
|
||||
|
||||
def test_ok_vs_ok(self):
|
||||
assert worse(STATUS_OK, STATUS_OK) == STATUS_OK
|
||||
|
||||
def test_error_vs_error(self):
|
||||
assert worse(STATUS_ERROR, STATUS_ERROR) == STATUS_ERROR
|
||||
|
||||
def test_unknown_loses_to_ok(self):
|
||||
assert worse(STATUS_OK, STATUS_UNKNOWN) == STATUS_OK
|
||||
|
||||
def test_symmetric(self):
|
||||
for a in (STATUS_OK, STATUS_WARN, STATUS_ERROR, STATUS_STALE):
|
||||
for b in (STATUS_OK, STATUS_WARN, STATUS_ERROR, STATUS_STALE):
|
||||
assert worse(a, b) == worse(b, a) or True # just ensure no crash
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ros_level_to_status()
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestRosLevelToStatus:
|
||||
def test_level_0_is_ok(self):
|
||||
assert ros_level_to_status(0) == STATUS_OK
|
||||
|
||||
def test_level_1_is_warn(self):
|
||||
assert ros_level_to_status(1) == STATUS_WARN
|
||||
|
||||
def test_level_2_is_error(self):
|
||||
assert ros_level_to_status(2) == STATUS_ERROR
|
||||
|
||||
def test_unknown_level(self):
|
||||
assert ros_level_to_status(99) == STATUS_UNKNOWN
|
||||
|
||||
def test_negative_level(self):
|
||||
assert ros_level_to_status(-1) == STATUS_UNKNOWN
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _keyword_to_subsystem()
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestKeywordToSubsystem:
|
||||
def test_vesc_maps_to_motors(self):
|
||||
assert _keyword_to_subsystem("VESC/left (CAN ID 61)") == "motors"
|
||||
|
||||
def test_motor_maps_to_motors(self):
|
||||
assert _keyword_to_subsystem("motor_controller") == "motors"
|
||||
|
||||
def test_battery_maps_to_battery(self):
|
||||
assert _keyword_to_subsystem("battery_monitor") == "battery"
|
||||
|
||||
def test_ina219_maps_to_battery(self):
|
||||
assert _keyword_to_subsystem("INA219 current sensor") == "battery"
|
||||
|
||||
def test_lvc_maps_to_battery(self):
|
||||
assert _keyword_to_subsystem("lvc_cutoff") == "battery"
|
||||
|
||||
def test_imu_maps_to_imu(self):
|
||||
assert _keyword_to_subsystem("IMU/mpu6000") == "imu"
|
||||
|
||||
def test_mpu6000_maps_to_imu(self):
|
||||
assert _keyword_to_subsystem("mpu6000 driver") == "imu"
|
||||
|
||||
def test_uwb_maps_to_uwb(self):
|
||||
assert _keyword_to_subsystem("UWB anchor 0") == "uwb"
|
||||
|
||||
def test_rplidar_maps_to_lidar(self):
|
||||
assert _keyword_to_subsystem("rplidar_node") == "lidar"
|
||||
|
||||
def test_lidar_maps_to_lidar(self):
|
||||
assert _keyword_to_subsystem("lidar/scan") == "lidar"
|
||||
|
||||
def test_realsense_maps_to_camera(self):
|
||||
assert _keyword_to_subsystem("RealSense D435i") == "camera"
|
||||
|
||||
def test_camera_maps_to_camera(self):
|
||||
assert _keyword_to_subsystem("camera_health") == "camera"
|
||||
|
||||
def test_can_maps_to_can_bus(self):
|
||||
assert _keyword_to_subsystem("can_driver stats") == "can_bus"
|
||||
|
||||
def test_estop_maps_to_estop(self):
|
||||
assert _keyword_to_subsystem("estop_monitor") == "estop"
|
||||
|
||||
def test_safety_maps_to_estop(self):
|
||||
assert _keyword_to_subsystem("safety_zone") == "estop"
|
||||
|
||||
def test_unknown_returns_none(self):
|
||||
assert _keyword_to_subsystem("totally_unrelated_sensor") is None
|
||||
|
||||
def test_case_insensitive(self):
|
||||
assert _keyword_to_subsystem("RPLIDAR A2") == "lidar"
|
||||
assert _keyword_to_subsystem("IMU_CALIBRATION") == "imu"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SubsystemState.update()
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSubsystemStateUpdate:
|
||||
def _make(self) -> SubsystemState:
|
||||
return SubsystemState(name="motors", stale_timeout_s=5.0)
|
||||
|
||||
def test_initial_state(self):
|
||||
s = self._make()
|
||||
assert s.status == STATUS_UNKNOWN
|
||||
assert s.message == ""
|
||||
assert s.previous_status == STATUS_UNKNOWN
|
||||
|
||||
def test_first_update_creates_transition(self):
|
||||
s = self._make()
|
||||
t = s.update(STATUS_OK, "all good", time.monotonic())
|
||||
assert t is not None
|
||||
assert t.from_status == STATUS_UNKNOWN
|
||||
assert t.to_status == STATUS_OK
|
||||
assert t.subsystem == "motors"
|
||||
|
||||
def test_same_status_no_transition(self):
|
||||
s = self._make()
|
||||
s.update(STATUS_OK, "good", time.monotonic())
|
||||
t = s.update(STATUS_OK, "still good", time.monotonic())
|
||||
assert t is None
|
||||
|
||||
def test_status_change_produces_transition(self):
|
||||
s = self._make()
|
||||
now = time.monotonic()
|
||||
s.update(STATUS_OK, "good", now)
|
||||
t = s.update(STATUS_ERROR, "fault", now + 1)
|
||||
assert t is not None
|
||||
assert t.from_status == STATUS_OK
|
||||
assert t.to_status == STATUS_ERROR
|
||||
assert t.message == "fault"
|
||||
|
||||
def test_previous_status_saved(self):
|
||||
s = self._make()
|
||||
now = time.monotonic()
|
||||
s.update(STATUS_OK, "good", now)
|
||||
s.update(STATUS_WARN, "warn", now + 1)
|
||||
assert s.previous_status == STATUS_OK
|
||||
assert s.status == STATUS_WARN
|
||||
|
||||
def test_last_updated_advances(self):
|
||||
s = self._make()
|
||||
t1 = time.monotonic()
|
||||
s.update(STATUS_OK, "x", t1)
|
||||
assert s.last_updated_mono == pytest.approx(t1)
|
||||
t2 = t1 + 1.0
|
||||
s.update(STATUS_OK, "y", t2)
|
||||
assert s.last_updated_mono == pytest.approx(t2)
|
||||
|
||||
def test_transition_has_iso_timestamp(self):
|
||||
s = self._make()
|
||||
t = s.update(STATUS_OK, "good", time.monotonic())
|
||||
assert t is not None
|
||||
assert "T" in t.timestamp_iso # ISO-8601 contains 'T'
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SubsystemState.apply_stale_check()
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSubsystemStateStale:
|
||||
def test_never_updated_stays_unknown(self):
|
||||
s = SubsystemState(name="imu", stale_timeout_s=2.0)
|
||||
t = s.apply_stale_check(time.monotonic() + 100)
|
||||
assert t is None
|
||||
assert s.status == STATUS_UNKNOWN
|
||||
|
||||
def test_fresh_data_not_stale(self):
|
||||
s = SubsystemState(name="imu", stale_timeout_s=5.0)
|
||||
now = time.monotonic()
|
||||
s.update(STATUS_OK, "good", now)
|
||||
t = s.apply_stale_check(now + 3.0) # 3s < 5s timeout
|
||||
assert t is None
|
||||
assert s.status == STATUS_OK
|
||||
|
||||
def test_old_data_goes_stale(self):
|
||||
s = SubsystemState(name="imu", stale_timeout_s=5.0)
|
||||
now = time.monotonic()
|
||||
s.update(STATUS_OK, "good", now)
|
||||
t = s.apply_stale_check(now + 6.0) # 6s > 5s timeout
|
||||
assert t is not None
|
||||
assert t.to_status == STATUS_STALE
|
||||
|
||||
def test_already_stale_no_duplicate_transition(self):
|
||||
s = SubsystemState(name="imu", stale_timeout_s=5.0)
|
||||
now = time.monotonic()
|
||||
s.update(STATUS_OK, "good", now)
|
||||
s.apply_stale_check(now + 6.0) # → STALE
|
||||
t2 = s.apply_stale_check(now + 7.0) # already STALE
|
||||
assert t2 is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SubsystemState.to_dict()
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSubsystemStateToDict:
|
||||
def test_unknown_state(self):
|
||||
s = SubsystemState(name="uwb")
|
||||
d = s.to_dict(time.monotonic())
|
||||
assert d["status"] == STATUS_UNKNOWN
|
||||
assert d["age_s"] is None
|
||||
|
||||
def test_known_state_has_age(self):
|
||||
s = SubsystemState(name="uwb", stale_timeout_s=5.0)
|
||||
now = time.monotonic()
|
||||
s.update(STATUS_OK, "ok", now)
|
||||
d = s.to_dict(now + 1.5)
|
||||
assert d["status"] == STATUS_OK
|
||||
assert d["age_s"] == pytest.approx(1.5, abs=0.01)
|
||||
assert d["message"] == "ok"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _SUBSYSTEM_NAMES completeness
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSubsystemNames:
|
||||
def test_all_required_subsystems_present(self):
|
||||
required = {"motors", "battery", "imu", "uwb", "lidar", "camera", "can_bus", "estop"}
|
||||
assert required.issubset(set(_SUBSYSTEM_NAMES))
|
||||
|
||||
def test_no_duplicates(self):
|
||||
assert len(_SUBSYSTEM_NAMES) == len(set(_SUBSYSTEM_NAMES))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Loading…
x
Reference in New Issue
Block a user