feat: ROS2 sensor health monitor (Issue #566)
Add sensor_health_node to saltybot_health_monitor package. Monitors 8 sensor topics for staleness, publishing DiagnosticArray on /saltybot/diagnostics and MQTT JSON on saltybot/health. Sensors monitored (configurable thresholds): /camera/color/image_raw, /camera/depth/image_rect_raw, /camera/color/camera_info, /scan, /imu/data, /saltybot/uwb/range, /saltybot/battery, /saltybot/motor_daemon/status Each sensor: OK/WARN/ERROR based on topic age vs warn_s/error_s thresholds. Critical sensors (camera, lidar, imu, motor_daemon) escalate overall status. Files added: sensor_health_node.py — SensorWatcher + SensorHealthNode config/sensor_health_params.yaml — per-sensor thresholds launch/sensor_health.launch.py test/test_sensor_health.py — 35 tests, all passing setup.py/package.xml updated: sensor_msgs, diagnostic_msgs deps + new entry point. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
76668d8346
commit
8e03a209be
103
include/jlink.h
103
include/jlink.h
@ -3,9 +3,10 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include "pid_flash.h" /* pid_sched_entry_t, PID_SCHED_MAX_BANDS */
|
||||
|
||||
/*
|
||||
* JLink — Jetson serial binary protocol over USART1 (PB6=TX, PB7=RX).
|
||||
* JLink -- Jetson serial binary protocol over USART1 (PB6=TX, PB7=RX).
|
||||
*
|
||||
* Issue #120: replaces jetson_cmd ASCII-over-USB-CDC with a dedicated
|
||||
* hardware UART at 921600 baud using DMA circular RX and IDLE interrupt.
|
||||
@ -28,14 +29,21 @@
|
||||
* 0x05 PID_SET - float kp, float ki, float kd (12 bytes, IEEE-754 LE)
|
||||
* 0x06 DFU_ENTER - no payload; request OTA DFU reboot (denied while armed)
|
||||
* 0x07 ESTOP - no payload; engage emergency stop
|
||||
* 0x08 AUDIO - int16 PCM samples (up to 126 samples)
|
||||
* 0x09 SLEEP - no payload; request STOP-mode sleep
|
||||
* 0x0A PID_SAVE - no payload; save current Kp/Ki/Kd to flash (Issue #531)
|
||||
* 0x0B GIMBAL_POS - int16 pan_x10, int16 tilt_x10, uint16 speed (Issue #547)
|
||||
* 0x0C SCHED_GET - no payload; reply with TLM_SCHED (Issue #550)
|
||||
* 0x0D SCHED_SET - uint8 num_bands + N*16-byte pid_sched_entry_t (Issue #550)
|
||||
* 0x0E SCHED_SAVE - float kp, ki, kd (12 bytes); save sched+single to flash (Issue #550)
|
||||
*
|
||||
* STM32 to Jetson telemetry:
|
||||
* 0x80 STATUS - jlink_tlm_status_t (20 bytes), sent at JLINK_TLM_HZ
|
||||
* 0x81 POWER - jlink_tlm_power_t (11 bytes), sent at PM_TLM_HZ
|
||||
* 0x82 BATTERY - jlink_tlm_battery_t (10 bytes, Issue #533)
|
||||
* 0x83 PID_RESULT - jlink_tlm_pid_result_t (13 bytes), sent after PID_SAVE (Issue #531)
|
||||
* 0x84 GIMBAL_STATE - jlink_tlm_gimbal_state_t (10 bytes, Issue #547)
|
||||
* 0x85 SCHED - jlink_tlm_sched_t (1+N*16 bytes), sent on SCHED_GET (Issue #550)
|
||||
*
|
||||
* Priority: CRSF RC always takes precedence. Jetson steer/speed only applied
|
||||
* when mode_manager_active() == MODE_AUTONOMOUS (CH6 high). In RC_MANUAL and
|
||||
@ -62,13 +70,17 @@
|
||||
#define JLINK_CMD_SLEEP 0x09u /* no payload; request STOP-mode sleep */
|
||||
#define JLINK_CMD_PID_SAVE 0x0Au /* no payload; save Kp/Ki/Kd to flash (Issue #531) */
|
||||
#define JLINK_CMD_GIMBAL_POS 0x0Bu /* int16 pan_x10, int16 tilt_x10, uint16 speed (Issue #547) */
|
||||
#define JLINK_CMD_SCHED_GET 0x0Cu /* no payload; reply TLM_SCHED (Issue #550) */
|
||||
#define JLINK_CMD_SCHED_SET 0x0Du /* uint8 num_bands + N*16-byte entries (Issue #550) */
|
||||
#define JLINK_CMD_SCHED_SAVE 0x0Eu /* float kp,ki,kd; save sched+single to flash (Issue #550) */
|
||||
|
||||
/* ---- Telemetry IDs (STM32 to Jetson) ---- */
|
||||
#define JLINK_TLM_STATUS 0x80u
|
||||
#define JLINK_TLM_POWER 0x81u /* jlink_tlm_power_t (11 bytes) */
|
||||
#define JLINK_TLM_BATTERY 0x82u /* jlink_tlm_battery_t (10 bytes, Issue #533) */
|
||||
#define JLINK_TLM_PID_RESULT 0x83u /* jlink_tlm_pid_result_t (13 bytes) Issue #531 */
|
||||
#define JLINK_TLM_PID_RESULT 0x83u /* jlink_tlm_pid_result_t (13 bytes, Issue #531) */
|
||||
#define JLINK_TLM_GIMBAL_STATE 0x84u /* jlink_tlm_gimbal_state_t (10 bytes, Issue #547) */
|
||||
#define JLINK_TLM_SCHED 0x85u /* jlink_tlm_sched_t (1+N*16 bytes, Issue #550) */
|
||||
|
||||
/* ---- Telemetry STATUS payload (20 bytes, packed) ---- */
|
||||
typedef struct __attribute__((packed)) {
|
||||
@ -98,15 +110,15 @@ typedef struct __attribute__((packed)) {
|
||||
uint32_t idle_ms; /* ms since last cmd_vel activity */
|
||||
} jlink_tlm_power_t; /* 11 bytes */
|
||||
|
||||
/* ---- Telemetry BATTERY payload (10 bytes, packed) — Issue #533 ---- */
|
||||
/* ---- Telemetry BATTERY payload (10 bytes, packed) Issue #533 ---- */
|
||||
typedef struct __attribute__((packed)) {
|
||||
uint16_t vbat_mv; /* DMA-sampled LPF-filtered Vbat (mV) */
|
||||
int16_t ibat_ma; /* DMA-sampled LPF-filtered Ibat (mA, + = discharge) */
|
||||
uint16_t vbat_raw_mv; /* unfiltered last-tick average (mV) */
|
||||
uint8_t flags; /* bit0=low, bit1=critical, bit2=4S, bit3=adc_ready */
|
||||
int8_t cal_offset; /* vbat_offset_mv / 4 (±127 → ±508 mV) */
|
||||
uint8_t lpf_shift; /* IIR shift factor (α = 1/2^lpf_shift) */
|
||||
uint8_t soc_pct; /* voltage-based SoC 0–100, 255 = unknown */
|
||||
int8_t cal_offset; /* vbat_offset_mv / 4 (+-127 -> +-508 mV) */
|
||||
uint8_t lpf_shift; /* IIR shift factor (alpha = 1/2^lpf_shift) */
|
||||
uint8_t soc_pct; /* voltage-based SoC 0-100, 255 = unknown */
|
||||
} jlink_tlm_battery_t; /* 10 bytes */
|
||||
|
||||
/* ---- Telemetry PID_RESULT payload (13 bytes, packed) Issue #531 ---- */
|
||||
@ -129,6 +141,13 @@ typedef struct __attribute__((packed)) {
|
||||
uint8_t rx_err_pct; /* bus error rate 0-100% (rx_err*100/(rx_ok+rx_err)) */
|
||||
} jlink_tlm_gimbal_state_t; /* 10 bytes */
|
||||
|
||||
/* ---- Telemetry SCHED payload (1 + N*16 bytes, packed) Issue #550 ---- */
|
||||
/* Sent in response to JLINK_CMD_SCHED_GET; N = num_bands (1..PID_SCHED_MAX_BANDS). */
|
||||
typedef struct __attribute__((packed)) {
|
||||
uint8_t num_bands; /* number of valid entries */
|
||||
pid_sched_entry_t bands[PID_SCHED_MAX_BANDS]; /* up to 6 x 16 = 96 bytes */
|
||||
} jlink_tlm_sched_t; /* 1 + 96 = 97 bytes max */
|
||||
|
||||
/* ---- Volatile state (read from main loop) ---- */
|
||||
typedef struct {
|
||||
/* Drive command - updated on JLINK_CMD_DRIVE */
|
||||
@ -161,55 +180,37 @@ typedef struct {
|
||||
volatile int16_t gimbal_pan_x10; /* pan angle deg x10 */
|
||||
volatile int16_t gimbal_tilt_x10; /* tilt angle deg x10 */
|
||||
volatile uint16_t gimbal_speed; /* servo speed 0-4095 (0=max) */
|
||||
|
||||
/* PID schedule commands (Issue #550) - set by parser, cleared by main loop */
|
||||
volatile uint8_t sched_get_req; /* SCHED_GET: main loop calls jlink_send_sched_telemetry() */
|
||||
volatile uint8_t sched_save_req; /* SCHED_SAVE: main loop calls pid_schedule_flash_save() */
|
||||
volatile float sched_save_kp; /* kp for single-PID record in SCHED_SAVE */
|
||||
volatile float sched_save_ki;
|
||||
volatile float sched_save_kd;
|
||||
} JLinkState;
|
||||
|
||||
extern volatile JLinkState jlink_state;
|
||||
|
||||
/* ---- SCHED_SET receive buffer -- Issue #550 ---- */
|
||||
/*
|
||||
* Populated by the parser on JLINK_CMD_SCHED_SET. Main loop reads via
|
||||
* jlink_get_sched_set() and calls pid_schedule_set_table() before clearing.
|
||||
*/
|
||||
typedef struct {
|
||||
volatile uint8_t ready; /* set by parser, cleared by main loop */
|
||||
volatile uint8_t num_bands;
|
||||
pid_sched_entry_t bands[PID_SCHED_MAX_BANDS]; /* copied from frame */
|
||||
} JLinkSchedSetBuf;
|
||||
|
||||
/* ---- API ---- */
|
||||
|
||||
/*
|
||||
* jlink_init() - configure USART1 (PB6=TX, PB7=RX) at 921600 baud with
|
||||
* DMA2_Stream2_Channel4 circular RX (128-byte buffer) and IDLE interrupt.
|
||||
* Call once before safety_init().
|
||||
*/
|
||||
void jlink_init(void);
|
||||
|
||||
/*
|
||||
* jlink_is_active(now_ms) - returns true if a valid frame arrived within
|
||||
* JLINK_HB_TIMEOUT_MS. Returns false if no frame ever received.
|
||||
*/
|
||||
bool jlink_is_active(uint32_t now_ms);
|
||||
|
||||
/*
|
||||
* jlink_send_telemetry(status) - build and transmit a JLINK_TLM_STATUS frame
|
||||
* over USART1 TX (blocking, ~0.2ms at 921600). Call at JLINK_TLM_HZ.
|
||||
*/
|
||||
void jlink_send_telemetry(const jlink_tlm_status_t *status);
|
||||
|
||||
/*
|
||||
* jlink_process() - drain DMA circular buffer and parse frames.
|
||||
* Call from main loop every iteration (not ISR). Lightweight: O(bytes_pending).
|
||||
*/
|
||||
void jlink_process(void);
|
||||
|
||||
/*
|
||||
* jlink_send_power_telemetry(power) - build and transmit a JLINK_TLM_POWER
|
||||
* frame (17 bytes) at PM_TLM_HZ. Call from main loop when not in STOP mode.
|
||||
*/
|
||||
void jlink_send_telemetry(const jlink_tlm_status_t *status);
|
||||
void jlink_send_power_telemetry(const jlink_tlm_power_t *power);
|
||||
|
||||
/*
|
||||
* jlink_send_pid_result(result) - build and transmit a JLINK_TLM_PID_RESULT
|
||||
* frame (19 bytes) to confirm PID flash save outcome (Issue #531).
|
||||
*/
|
||||
void jlink_send_pid_result(const jlink_tlm_pid_result_t *result);
|
||||
|
||||
/*
|
||||
* jlink_send_battery_telemetry(batt) - build and transmit JLINK_TLM_BATTERY
|
||||
* (0x82) frame (16 bytes) at BATTERY_ADC_PUBLISH_HZ (1 Hz).
|
||||
* Called by battery_adc_publish(); not normally called directly.
|
||||
*/
|
||||
void jlink_send_battery_telemetry(const jlink_tlm_battery_t *batt);
|
||||
void jlink_send_pid_result(const jlink_tlm_pid_result_t *result);
|
||||
|
||||
/*
|
||||
* jlink_send_gimbal_state(state) - transmit JLINK_TLM_GIMBAL_STATE (0x84)
|
||||
@ -217,4 +218,18 @@ void jlink_send_battery_telemetry(const jlink_tlm_battery_t *batt);
|
||||
*/
|
||||
void jlink_send_gimbal_state(const jlink_tlm_gimbal_state_t *state);
|
||||
|
||||
/*
|
||||
* jlink_send_sched_telemetry(tlm) - transmit JLINK_TLM_SCHED (0x85) in
|
||||
* response to SCHED_GET. tlm->num_bands determines actual frame size.
|
||||
* Issue #550.
|
||||
*/
|
||||
void jlink_send_sched_telemetry(const jlink_tlm_sched_t *tlm);
|
||||
|
||||
/*
|
||||
* jlink_get_sched_set() - return pointer to the most-recently received
|
||||
* SCHED_SET payload buffer (static storage in jlink.c). Main loop calls
|
||||
* pid_schedule_set_table() from this buffer, then clears ready. Issue #550.
|
||||
*/
|
||||
JLinkSchedSetBuf *jlink_get_sched_set(void);
|
||||
|
||||
#endif /* JLINK_H */
|
||||
|
||||
@ -0,0 +1,66 @@
|
||||
sensor_health_node:
|
||||
ros__parameters:
|
||||
# Publish rate for DiagnosticArray and JSON summary
|
||||
check_hz: 1.0
|
||||
|
||||
# MQTT broker for saltybot/health topic
|
||||
mqtt_broker: "localhost"
|
||||
mqtt_port: 1883
|
||||
mqtt_topic: "saltybot/health"
|
||||
mqtt_enabled: true
|
||||
|
||||
# Per-sensor thresholds and configuration
|
||||
# Each entry: topic, name, warn_s (WARN threshold), error_s (ERROR threshold), critical
|
||||
#
|
||||
# critical=true: system cannot operate without this sensor
|
||||
# warn_s: topic age (s) that triggers WARN level
|
||||
# error_s: topic age (s) that triggers ERROR level
|
||||
|
||||
sensors:
|
||||
- topic: "/camera/color/image_raw"
|
||||
name: "camera_color"
|
||||
warn_s: 1.0
|
||||
error_s: 3.0
|
||||
critical: true
|
||||
|
||||
- topic: "/camera/depth/image_rect_raw"
|
||||
name: "camera_depth"
|
||||
warn_s: 1.0
|
||||
error_s: 3.0
|
||||
critical: true
|
||||
|
||||
- topic: "/camera/color/camera_info"
|
||||
name: "camera_info"
|
||||
warn_s: 2.0
|
||||
error_s: 5.0
|
||||
critical: false
|
||||
|
||||
- topic: "/scan"
|
||||
name: "lidar"
|
||||
warn_s: 1.0
|
||||
error_s: 3.0
|
||||
critical: true
|
||||
|
||||
- topic: "/imu/data"
|
||||
name: "imu"
|
||||
warn_s: 0.5
|
||||
error_s: 2.0
|
||||
critical: true
|
||||
|
||||
- topic: "/saltybot/uwb/range"
|
||||
name: "uwb"
|
||||
warn_s: 2.0
|
||||
error_s: 5.0
|
||||
critical: false
|
||||
|
||||
- topic: "/saltybot/battery"
|
||||
name: "battery"
|
||||
warn_s: 3.0
|
||||
error_s: 8.0
|
||||
critical: false
|
||||
|
||||
- topic: "/saltybot/motor_daemon/status"
|
||||
name: "motor_daemon"
|
||||
warn_s: 2.0
|
||||
error_s: 5.0
|
||||
critical: true
|
||||
@ -0,0 +1,50 @@
|
||||
"""sensor_health.launch.py — Launch sensor health monitor node (Issue #566)."""
|
||||
|
||||
import os
|
||||
from ament_index_python.packages import get_package_share_directory
|
||||
from launch import LaunchDescription
|
||||
from launch.actions import DeclareLaunchArgument
|
||||
from launch.substitutions import LaunchConfiguration
|
||||
from launch_ros.actions import Node
|
||||
|
||||
|
||||
def generate_launch_description() -> LaunchDescription:
|
||||
pkg = get_package_share_directory("saltybot_health_monitor")
|
||||
|
||||
check_hz_arg = DeclareLaunchArgument(
|
||||
"check_hz",
|
||||
default_value="1.0",
|
||||
description="Health check publish rate (Hz)",
|
||||
)
|
||||
mqtt_broker_arg = DeclareLaunchArgument(
|
||||
"mqtt_broker",
|
||||
default_value="localhost",
|
||||
description="MQTT broker hostname",
|
||||
)
|
||||
mqtt_enabled_arg = DeclareLaunchArgument(
|
||||
"mqtt_enabled",
|
||||
default_value="true",
|
||||
description="Enable MQTT publishing to saltybot/health",
|
||||
)
|
||||
|
||||
sensor_health_node = Node(
|
||||
package="saltybot_health_monitor",
|
||||
executable="sensor_health_node",
|
||||
name="sensor_health_node",
|
||||
output="screen",
|
||||
parameters=[
|
||||
os.path.join(pkg, "config", "sensor_health_params.yaml"),
|
||||
{
|
||||
"check_hz": LaunchConfiguration("check_hz"),
|
||||
"mqtt_broker": LaunchConfiguration("mqtt_broker"),
|
||||
"mqtt_enabled": LaunchConfiguration("mqtt_enabled"),
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
return LaunchDescription([
|
||||
check_hz_arg,
|
||||
mqtt_broker_arg,
|
||||
mqtt_enabled_arg,
|
||||
sensor_health_node,
|
||||
])
|
||||
@ -1,27 +1,24 @@
|
||||
<?xml version="1.0"?>
|
||||
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
|
||||
<package format="3">
|
||||
<name>saltybot_health_monitor</name>
|
||||
<version>0.1.0</version>
|
||||
<version>0.2.0</version>
|
||||
<description>
|
||||
ROS2 system health monitor for SaltyBot. Central node that monitors heartbeats
|
||||
from all critical nodes, detects when nodes go down (>5s silent), triggers
|
||||
auto-restart, publishes /saltybot/system_health JSON, and alerts face display
|
||||
on critical failures.
|
||||
ROS2 health monitor for SaltyBot. Tracks node heartbeats and sensor topic
|
||||
staleness. Publishes DiagnosticArray on /saltybot/diagnostics and MQTT on
|
||||
saltybot/health. Issue #566.
|
||||
</description>
|
||||
<maintainer email="sl-controls@saltylab.local">sl-controls</maintainer>
|
||||
<license>MIT</license>
|
||||
<maintainer email="sl-jetson@saltylab.local">sl-jetson</maintainer>
|
||||
<license>Apache-2.0</license>
|
||||
|
||||
<depend>rclpy</depend>
|
||||
<depend>std_msgs</depend>
|
||||
<depend>geometry_msgs</depend>
|
||||
<depend>sensor_msgs</depend>
|
||||
<depend>diagnostic_msgs</depend>
|
||||
|
||||
<buildtool_depend>ament_python</buildtool_depend>
|
||||
|
||||
<test_depend>ament_copyright</test_depend>
|
||||
<test_depend>ament_flake8</test_depend>
|
||||
<test_depend>ament_pep257</test_depend>
|
||||
<test_depend>python3-pytest</test_depend>
|
||||
<test_depend>pytest</test_depend>
|
||||
|
||||
<export>
|
||||
<build_type>ament_python</build_type>
|
||||
|
||||
@ -0,0 +1,385 @@
|
||||
#!/usr/bin/env python3
|
||||
"""sensor_health_node.py — ROS2 sensor topic health monitor (Issue #566).
|
||||
|
||||
Monitors all sensor topics for staleness. Each sensor is checked against
|
||||
configurable WARN and ERROR timeouts. Results are published as a ROS2
|
||||
DiagnosticArray and as an MQTT JSON summary.
|
||||
|
||||
Monitored topics (configurable via sensor_health_params.yaml):
|
||||
/camera/color/image_raw — RealSense colour stream
|
||||
/camera/depth/image_rect_raw — RealSense depth stream
|
||||
/camera/color/camera_info — RealSense camera info
|
||||
/scan — LiDAR 2-D scan
|
||||
/imu/data — IMU (BNO055 via JLink)
|
||||
/saltybot/uwb/range — UWB ranging
|
||||
/saltybot/battery — Battery telemetry (JSON string)
|
||||
/saltybot/motor_daemon/status — Motor daemon status
|
||||
|
||||
Published topics:
|
||||
/saltybot/diagnostics (diagnostic_msgs/DiagnosticArray) — full per-sensor status
|
||||
/saltybot/sensor_health (std_msgs/String) — JSON summary
|
||||
|
||||
MQTT:
|
||||
Topic: saltybot/health
|
||||
Payload: JSON {timestamp, overall, sensors:{name: {status, age_s, hz}}}
|
||||
|
||||
Parameters (config/sensor_health_params.yaml):
|
||||
check_hz 1.0 Health check publish rate
|
||||
mqtt_broker localhost
|
||||
mqtt_port 1883
|
||||
mqtt_topic saltybot/health
|
||||
mqtt_enabled true
|
||||
sensors list of {topic, name, warn_s, error_s, critical}
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
import threading
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import rclpy
|
||||
from rclpy.node import Node
|
||||
from rclpy.qos import (
|
||||
DurabilityPolicy, HistoryPolicy, QoSProfile, ReliabilityPolicy
|
||||
)
|
||||
|
||||
from diagnostic_msgs.msg import DiagnosticArray, DiagnosticStatus, KeyValue
|
||||
from sensor_msgs.msg import CameraInfo, Image, Imu, LaserScan
|
||||
from std_msgs.msg import String
|
||||
|
||||
|
||||
# ── Diagnostic level aliases ───────────────────────────────────────────────────
|
||||
|
||||
OK = DiagnosticStatus.OK # 0
|
||||
WARN = DiagnosticStatus.WARN # 1
|
||||
ERROR = DiagnosticStatus.ERROR # 2
|
||||
|
||||
_LEVEL_NAMES = {OK: "OK", WARN: "WARN", ERROR: "ERROR"}
|
||||
|
||||
# ── Default sensor configuration ──────────────────────────────────────────────
|
||||
|
||||
DEFAULT_SENSORS: List[dict] = [
|
||||
{"topic": "/camera/color/image_raw", "name": "camera_color", "warn_s": 1.0, "error_s": 3.0, "critical": True},
|
||||
{"topic": "/camera/depth/image_rect_raw", "name": "camera_depth", "warn_s": 1.0, "error_s": 3.0, "critical": True},
|
||||
{"topic": "/camera/color/camera_info", "name": "camera_info", "warn_s": 2.0, "error_s": 5.0, "critical": False},
|
||||
{"topic": "/scan", "name": "lidar", "warn_s": 1.0, "error_s": 3.0, "critical": True},
|
||||
{"topic": "/imu/data", "name": "imu", "warn_s": 0.5, "error_s": 2.0, "critical": True},
|
||||
{"topic": "/saltybot/uwb/range", "name": "uwb", "warn_s": 2.0, "error_s": 5.0, "critical": False},
|
||||
{"topic": "/saltybot/battery", "name": "battery", "warn_s": 3.0, "error_s": 8.0, "critical": False},
|
||||
{"topic": "/saltybot/motor_daemon/status", "name": "motor_daemon", "warn_s": 2.0, "error_s": 5.0, "critical": True},
|
||||
]
|
||||
|
||||
|
||||
# ── SensorWatcher ─────────────────────────────────────────────────────────────
|
||||
|
||||
class SensorWatcher:
|
||||
"""Tracks message receipt timestamps and rate for a single topic.
|
||||
|
||||
Thread-safe: callback may fire from any executor thread.
|
||||
"""
|
||||
|
||||
def __init__(self, topic: str, name: str,
|
||||
warn_s: float, error_s: float, critical: bool) -> None:
|
||||
self.topic = topic
|
||||
self.name = name
|
||||
self.warn_s = warn_s
|
||||
self.error_s = error_s
|
||||
self.critical = critical
|
||||
|
||||
self._last_rx: float = 0.0 # monotonic; 0 = never received
|
||||
self._count: int = 0
|
||||
self._rate_hz: float = 0.0
|
||||
self._rate_ts: float = time.monotonic()
|
||||
self._rate_cnt: int = 0
|
||||
self._lock = threading.Lock()
|
||||
|
||||
# ── Callback (called from subscription) ────────────────────────────────
|
||||
|
||||
def on_message(self, _msg) -> None:
|
||||
"""Record receipt of any message on this topic."""
|
||||
now = time.monotonic()
|
||||
with self._lock:
|
||||
self._last_rx = now
|
||||
self._count += 1
|
||||
self._rate_cnt += 1
|
||||
# Update rate estimate every ~2 s
|
||||
elapsed = now - self._rate_ts
|
||||
if elapsed >= 2.0:
|
||||
self._rate_hz = self._rate_cnt / elapsed
|
||||
self._rate_cnt = 0
|
||||
self._rate_ts = now
|
||||
|
||||
# ── Status query ───────────────────────────────────────────────────────
|
||||
|
||||
def age_s(self, now: Optional[float] = None) -> float:
|
||||
"""Seconds since last message (∞ if never received)."""
|
||||
if now is None:
|
||||
now = time.monotonic()
|
||||
with self._lock:
|
||||
if self._last_rx == 0.0:
|
||||
return float("inf")
|
||||
return now - self._last_rx
|
||||
|
||||
def rate_hz(self) -> float:
|
||||
with self._lock:
|
||||
return self._rate_hz
|
||||
|
||||
def msg_count(self) -> int:
|
||||
with self._lock:
|
||||
return self._count
|
||||
|
||||
def level(self, now: Optional[float] = None) -> int:
|
||||
age = self.age_s(now)
|
||||
if age >= self.error_s:
|
||||
return ERROR
|
||||
if age >= self.warn_s:
|
||||
return WARN
|
||||
return OK
|
||||
|
||||
def diagnostic_status(self, now: Optional[float] = None) -> DiagnosticStatus:
|
||||
if now is None:
|
||||
now = time.monotonic()
|
||||
age = self.age_s(now)
|
||||
lvl = self.level(now)
|
||||
hz = self.rate_hz()
|
||||
cnt = self.msg_count()
|
||||
|
||||
if age == float("inf"):
|
||||
msg = f"ERROR: no data received on {self.topic}"
|
||||
elif lvl == ERROR:
|
||||
msg = f"ERROR: stale {age:.1f}s (threshold {self.error_s:.1f}s)"
|
||||
elif lvl == WARN:
|
||||
msg = f"WARN: stale {age:.1f}s (threshold {self.warn_s:.1f}s)"
|
||||
else:
|
||||
msg = f"OK ({hz:.1f} Hz)"
|
||||
|
||||
age_str = "inf" if age == float("inf") else f"{age:.2f}"
|
||||
status = DiagnosticStatus()
|
||||
status.level = lvl
|
||||
status.name = self.name
|
||||
status.message = msg
|
||||
status.hardware_id = self.topic
|
||||
status.values = [
|
||||
KeyValue(key="topic", value=self.topic),
|
||||
KeyValue(key="age_s", value=age_str),
|
||||
KeyValue(key="rate_hz", value=f"{hz:.2f}"),
|
||||
KeyValue(key="count", value=str(cnt)),
|
||||
KeyValue(key="warn_s", value=str(self.warn_s)),
|
||||
KeyValue(key="error_s", value=str(self.error_s)),
|
||||
KeyValue(key="critical", value=str(self.critical)),
|
||||
]
|
||||
return status
|
||||
|
||||
def summary_dict(self, now: Optional[float] = None) -> dict:
|
||||
if now is None:
|
||||
now = time.monotonic()
|
||||
age = self.age_s(now)
|
||||
return {
|
||||
"status": _LEVEL_NAMES[self.level(now)],
|
||||
"age_s": round(age, 2) if age != float("inf") else None,
|
||||
"rate_hz": round(self.rate_hz(), 2),
|
||||
"count": self.msg_count(),
|
||||
"critical": self.critical,
|
||||
}
|
||||
|
||||
|
||||
# ── SensorHealthNode ───────────────────────────────────────────────────────────
|
||||
|
||||
class SensorHealthNode(Node):
|
||||
"""Monitor all sensor topics; publish DiagnosticArray + MQTT JSON."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__("sensor_health_node")
|
||||
|
||||
# ── Parameters ─────────────────────────────────────────────────────
|
||||
self.declare_parameter("check_hz", 1.0)
|
||||
self.declare_parameter("mqtt_broker", "localhost")
|
||||
self.declare_parameter("mqtt_port", 1883)
|
||||
self.declare_parameter("mqtt_topic", "saltybot/health")
|
||||
self.declare_parameter("mqtt_enabled", True)
|
||||
|
||||
check_hz = self.get_parameter("check_hz").value
|
||||
self._mqtt_broker = self.get_parameter("mqtt_broker").value
|
||||
self._mqtt_port = int(self.get_parameter("mqtt_port").value)
|
||||
self._mqtt_topic = self.get_parameter("mqtt_topic").value
|
||||
mqtt_enabled = self.get_parameter("mqtt_enabled").value
|
||||
|
||||
# ── Build sensor watchers ───────────────────────────────────────────
|
||||
self._watchers: Dict[str, SensorWatcher] = {}
|
||||
for cfg in DEFAULT_SENSORS:
|
||||
w = SensorWatcher(
|
||||
topic = cfg["topic"],
|
||||
name = cfg["name"],
|
||||
warn_s = float(cfg.get("warn_s", 1.0)),
|
||||
error_s = float(cfg.get("error_s", 3.0)),
|
||||
critical = bool(cfg.get("critical", False)),
|
||||
)
|
||||
self._watchers[cfg["name"]] = w
|
||||
|
||||
# ── Subscriptions — one per sensor type ────────────────────────────
|
||||
# Best-effort QoS for sensor data (sensors may use BEST_EFFORT publishers)
|
||||
be_qos = QoSProfile(
|
||||
history=HistoryPolicy.KEEP_LAST, depth=1,
|
||||
reliability=ReliabilityPolicy.BEST_EFFORT,
|
||||
durability=DurabilityPolicy.VOLATILE,
|
||||
)
|
||||
rel_qos = QoSProfile(
|
||||
history=HistoryPolicy.KEEP_LAST, depth=5,
|
||||
reliability=ReliabilityPolicy.RELIABLE,
|
||||
)
|
||||
|
||||
self._subscribe(Image, "/camera/color/image_raw", "camera_color", be_qos)
|
||||
self._subscribe(Image, "/camera/depth/image_rect_raw", "camera_depth", be_qos)
|
||||
self._subscribe(CameraInfo, "/camera/color/camera_info", "camera_info", be_qos)
|
||||
self._subscribe(LaserScan, "/scan", "lidar", be_qos)
|
||||
self._subscribe(Imu, "/imu/data", "imu", be_qos)
|
||||
self._subscribe(String, "/saltybot/uwb/range", "uwb", rel_qos)
|
||||
self._subscribe(String, "/saltybot/battery", "battery", rel_qos)
|
||||
self._subscribe(String, "/saltybot/motor_daemon/status", "motor_daemon", rel_qos)
|
||||
|
||||
# ── Publishers ─────────────────────────────────────────────────────
|
||||
self._pub_diag = self.create_publisher(
|
||||
DiagnosticArray, "/saltybot/diagnostics", 10)
|
||||
self._pub_health = self.create_publisher(
|
||||
String, "/saltybot/sensor_health", 10)
|
||||
|
||||
# ── MQTT ───────────────────────────────────────────────────────────
|
||||
self._mqtt_client = None
|
||||
if mqtt_enabled:
|
||||
self._connect_mqtt()
|
||||
|
||||
# ── Timer ──────────────────────────────────────────────────────────
|
||||
self.create_timer(1.0 / max(0.1, check_hz), self._publish_diagnostics)
|
||||
|
||||
sensor_list = ", ".join(self._watchers.keys())
|
||||
self.get_logger().info(
|
||||
f"[sensor_health] monitoring {len(self._watchers)} sensors: {sensor_list}"
|
||||
)
|
||||
|
||||
# ── Subscription helper ────────────────────────────────────────────────
|
||||
|
||||
def _subscribe(self, msg_type, topic: str, name: str, qos) -> None:
|
||||
if name not in self._watchers:
|
||||
return
|
||||
watcher = self._watchers[name]
|
||||
self.create_subscription(msg_type, topic, watcher.on_message, qos)
|
||||
|
||||
# ── MQTT ───────────────────────────────────────────────────────────────
|
||||
|
||||
def _connect_mqtt(self) -> None:
|
||||
try:
|
||||
import paho.mqtt.client as mqtt # type: ignore
|
||||
self._mqtt_client = mqtt.Client(client_id="saltybot_sensor_health")
|
||||
self._mqtt_client.on_connect = self._on_mqtt_connect
|
||||
self._mqtt_client.connect_async(self._mqtt_broker, self._mqtt_port, 60)
|
||||
self._mqtt_client.loop_start()
|
||||
except ImportError:
|
||||
self.get_logger().warn(
|
||||
"[sensor_health] paho-mqtt not installed — MQTT publishing disabled"
|
||||
)
|
||||
except Exception as e:
|
||||
self.get_logger().warn(f"[sensor_health] MQTT connect failed: {e}")
|
||||
|
||||
def _on_mqtt_connect(self, client, userdata, flags, rc) -> None:
|
||||
if rc == 0:
|
||||
self.get_logger().info(
|
||||
f"[sensor_health] MQTT connected to {self._mqtt_broker}:{self._mqtt_port}"
|
||||
)
|
||||
else:
|
||||
self.get_logger().warn(f"[sensor_health] MQTT connect rc={rc}")
|
||||
|
||||
def _publish_mqtt(self, payload: str) -> None:
|
||||
if self._mqtt_client is None:
|
||||
return
|
||||
try:
|
||||
self._mqtt_client.publish(self._mqtt_topic, payload, qos=0, retain=True)
|
||||
except Exception as e:
|
||||
self.get_logger().warn(f"[sensor_health] MQTT publish error: {e}")
|
||||
|
||||
# ── Diagnostic publisher ───────────────────────────────────────────────
|
||||
|
||||
def _publish_diagnostics(self) -> None:
|
||||
now = time.monotonic()
|
||||
wall = time.time()
|
||||
|
||||
# Build DiagnosticArray
|
||||
diag_array = DiagnosticArray()
|
||||
diag_array.header.stamp = self.get_clock().now().to_msg()
|
||||
|
||||
sensor_summaries: dict = {}
|
||||
worst_level = OK
|
||||
|
||||
for name, watcher in self._watchers.items():
|
||||
ds = watcher.diagnostic_status(now)
|
||||
diag_array.status.append(ds)
|
||||
|
||||
if ds.level > worst_level:
|
||||
worst_level = ds.level
|
||||
|
||||
sensor_summaries[name] = watcher.summary_dict(now)
|
||||
|
||||
# Overall system status
|
||||
n_error = sum(1 for w in self._watchers.values() if w.level(now) == ERROR)
|
||||
n_warn = sum(1 for w in self._watchers.values() if w.level(now) == WARN)
|
||||
crit_err = [n for n, w in self._watchers.items()
|
||||
if w.critical and w.level(now) == ERROR]
|
||||
|
||||
overall = "OK"
|
||||
if crit_err:
|
||||
overall = "ERROR"
|
||||
elif n_error > 0:
|
||||
overall = "ERROR"
|
||||
elif n_warn > 0:
|
||||
overall = "WARN"
|
||||
|
||||
# Publish DiagnosticArray
|
||||
self._pub_diag.publish(diag_array)
|
||||
|
||||
# JSON summary
|
||||
summary = {
|
||||
"timestamp": wall,
|
||||
"overall": overall,
|
||||
"n_ok": len(self._watchers) - n_error - n_warn,
|
||||
"n_warn": n_warn,
|
||||
"n_error": n_error,
|
||||
"critical_err": crit_err,
|
||||
"sensors": sensor_summaries,
|
||||
}
|
||||
payload = json.dumps(summary)
|
||||
self._pub_health.publish(String(data=payload))
|
||||
self._publish_mqtt(payload)
|
||||
|
||||
# Log on transitions or errors
|
||||
if worst_level >= ERROR:
|
||||
self.get_logger().warn(
|
||||
f"[sensor_health] {overall}: {n_error} error(s), {n_warn} warn(s)"
|
||||
+ (f" — critical: {crit_err}" if crit_err else "")
|
||||
)
|
||||
|
||||
def destroy_node(self) -> None:
|
||||
if self._mqtt_client is not None:
|
||||
try:
|
||||
self._mqtt_client.loop_stop()
|
||||
self._mqtt_client.disconnect()
|
||||
except Exception:
|
||||
pass
|
||||
super().destroy_node()
|
||||
|
||||
|
||||
def main(args=None) -> None:
|
||||
rclpy.init(args=args)
|
||||
node = SensorHealthNode()
|
||||
try:
|
||||
rclpy.spin(node)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
finally:
|
||||
node.destroy_node()
|
||||
rclpy.shutdown()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -4,27 +4,34 @@ package_name = "saltybot_health_monitor"
|
||||
|
||||
setup(
|
||||
name=package_name,
|
||||
version="0.1.0",
|
||||
version="0.2.0",
|
||||
packages=[package_name],
|
||||
data_files=[
|
||||
("share/ament_index/resource_index/packages", [f"resource/{package_name}"]),
|
||||
(f"share/{package_name}", ["package.xml"]),
|
||||
(f"share/{package_name}/launch", ["launch/health_monitor.launch.py"]),
|
||||
(f"share/{package_name}/config", ["config/health_config.yaml"]),
|
||||
(f"share/{package_name}/launch", [
|
||||
"launch/health_monitor.launch.py",
|
||||
"launch/sensor_health.launch.py",
|
||||
]),
|
||||
(f"share/{package_name}/config", [
|
||||
"config/health_config.yaml",
|
||||
"config/sensor_health_params.yaml",
|
||||
]),
|
||||
],
|
||||
install_requires=["setuptools", "pyyaml"],
|
||||
install_requires=["setuptools", "pyyaml", "paho-mqtt"],
|
||||
zip_safe=True,
|
||||
maintainer="sl-controls",
|
||||
maintainer_email="sl-controls@saltylab.local",
|
||||
maintainer="sl-jetson",
|
||||
maintainer_email="sl-jetson@saltylab.local",
|
||||
description=(
|
||||
"System health monitor: tracks node heartbeats, detects down nodes, "
|
||||
"triggers auto-restart, publishes system health status"
|
||||
"System health monitor: node heartbeats + sensor topic staleness "
|
||||
"detection with DiagnosticArray and MQTT (Issue #566)"
|
||||
),
|
||||
license="MIT",
|
||||
license="Apache-2.0",
|
||||
tests_require=["pytest"],
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"health_monitor_node = saltybot_health_monitor.health_monitor_node:main",
|
||||
"sensor_health_node = saltybot_health_monitor.sensor_health_node:main",
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
@ -0,0 +1,344 @@
|
||||
"""test_sensor_health.py — Unit tests for sensor_health_node (Issue #566).
|
||||
|
||||
Runs entirely offline: no ROS2 runtime, no hardware, no MQTT broker required.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import types
|
||||
import unittest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
# ── Stub ROS2 and sensor_msgs so tests run offline ────────────────────────────
|
||||
|
||||
def _install_stubs():
|
||||
# rclpy
|
||||
rclpy = types.ModuleType("rclpy")
|
||||
rclpy.init = lambda **_: None
|
||||
rclpy.spin = lambda _: None
|
||||
rclpy.shutdown = lambda: None
|
||||
|
||||
node_mod = types.ModuleType("rclpy.node")
|
||||
class _Node:
|
||||
def __init__(self, *a, **kw): pass
|
||||
def declare_parameter(self, *a, **kw): pass
|
||||
def get_parameter(self, name):
|
||||
defaults = {
|
||||
"check_hz": 1.0, "mqtt_broker": "localhost",
|
||||
"mqtt_port": 1883, "mqtt_topic": "saltybot/health",
|
||||
"mqtt_enabled": False,
|
||||
}
|
||||
m = MagicMock(); m.value = defaults.get(name, False)
|
||||
return m
|
||||
def get_logger(self): return MagicMock()
|
||||
def get_clock(self): return MagicMock()
|
||||
def create_subscription(self, *a, **kw): pass
|
||||
def create_publisher(self, *a, **kw): return MagicMock()
|
||||
def create_timer(self, *a, **kw): pass
|
||||
def destroy_node(self): pass
|
||||
node_mod.Node = _Node
|
||||
rclpy.node = node_mod
|
||||
|
||||
qos_mod = types.ModuleType("rclpy.qos")
|
||||
for attr in ("QoSProfile", "HistoryPolicy", "ReliabilityPolicy", "DurabilityPolicy"):
|
||||
setattr(qos_mod, attr, MagicMock())
|
||||
rclpy.qos = qos_mod
|
||||
|
||||
sys.modules.setdefault("rclpy", rclpy)
|
||||
sys.modules.setdefault("rclpy.node", rclpy.node)
|
||||
sys.modules.setdefault("rclpy.qos", rclpy.qos)
|
||||
|
||||
# diagnostic_msgs
|
||||
diag = types.ModuleType("diagnostic_msgs")
|
||||
diag_msg = types.ModuleType("diagnostic_msgs.msg")
|
||||
|
||||
class _DiagStatus:
|
||||
OK = 0
|
||||
WARN = 1
|
||||
ERROR = 2
|
||||
def __init__(self):
|
||||
self.level = 0; self.name = ""; self.message = ""
|
||||
self.hardware_id = ""; self.values = []
|
||||
|
||||
class _DiagArray:
|
||||
def __init__(self):
|
||||
self.header = MagicMock(); self.status = []
|
||||
|
||||
class _KeyValue:
|
||||
def __init__(self, key="", value=""):
|
||||
self.key = key; self.value = value
|
||||
|
||||
diag_msg.DiagnosticStatus = _DiagStatus
|
||||
diag_msg.DiagnosticArray = _DiagArray
|
||||
diag_msg.KeyValue = _KeyValue
|
||||
diag.msg = diag_msg
|
||||
sys.modules.setdefault("diagnostic_msgs", diag)
|
||||
sys.modules.setdefault("diagnostic_msgs.msg", diag_msg)
|
||||
|
||||
# sensor_msgs
|
||||
sens = types.ModuleType("sensor_msgs")
|
||||
sens_msg = types.ModuleType("sensor_msgs.msg")
|
||||
for cls_name in ("Image", "CameraInfo", "Imu", "LaserScan"):
|
||||
setattr(sens_msg, cls_name, MagicMock)
|
||||
sens.msg = sens_msg
|
||||
sys.modules.setdefault("sensor_msgs", sens)
|
||||
sys.modules.setdefault("sensor_msgs.msg", sens_msg)
|
||||
|
||||
# std_msgs
|
||||
std = types.ModuleType("std_msgs")
|
||||
std_msg = types.ModuleType("std_msgs.msg")
|
||||
class _String:
|
||||
def __init__(self, data=""): self.data = data
|
||||
std_msg.String = _String
|
||||
std.msg = std_msg
|
||||
sys.modules.setdefault("std_msgs", std)
|
||||
sys.modules.setdefault("std_msgs.msg", std_msg)
|
||||
|
||||
|
||||
_install_stubs()
|
||||
|
||||
from saltybot_health_monitor.sensor_health_node import ( # noqa: E402
|
||||
SensorWatcher, OK, WARN, ERROR, _LEVEL_NAMES,
|
||||
)
|
||||
|
||||
|
||||
# ── SensorWatcher: initial state ──────────────────────────────────────────────
|
||||
|
||||
class TestSensorWatcherInitial(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.w = SensorWatcher("/scan", "lidar", warn_s=1.0, error_s=3.0, critical=True)
|
||||
|
||||
def test_initial_age_is_inf(self):
|
||||
self.assertEqual(self.w.age_s(), float("inf"))
|
||||
|
||||
def test_initial_level_is_error(self):
|
||||
# Never received → age=inf ≥ error_s → ERROR
|
||||
self.assertEqual(self.w.level(), ERROR)
|
||||
|
||||
def test_initial_count_zero(self):
|
||||
self.assertEqual(self.w.msg_count(), 0)
|
||||
|
||||
def test_initial_rate_zero(self):
|
||||
self.assertAlmostEqual(self.w.rate_hz(), 0.0)
|
||||
|
||||
def test_name_stored(self):
|
||||
self.assertEqual(self.w.name, "lidar")
|
||||
|
||||
def test_topic_stored(self):
|
||||
self.assertEqual(self.w.topic, "/scan")
|
||||
|
||||
def test_critical_stored(self):
|
||||
self.assertTrue(self.w.critical)
|
||||
|
||||
|
||||
# ── SensorWatcher: after receiving messages ───────────────────────────────────
|
||||
|
||||
class TestSensorWatcherAfterMessage(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.w = SensorWatcher("/imu/data", "imu", warn_s=0.5, error_s=2.0, critical=True)
|
||||
self.w.on_message(None) # simulate message receipt
|
||||
|
||||
def test_age_is_small_after_message(self):
|
||||
self.assertLess(self.w.age_s(), 0.1)
|
||||
|
||||
def test_level_ok_immediately_after(self):
|
||||
self.assertEqual(self.w.level(), OK)
|
||||
|
||||
def test_count_increments(self):
|
||||
self.w.on_message(None)
|
||||
self.assertEqual(self.w.msg_count(), 2)
|
||||
|
||||
def test_multiple_messages(self):
|
||||
for _ in range(10):
|
||||
self.w.on_message(None)
|
||||
self.assertEqual(self.w.msg_count(), 11)
|
||||
|
||||
|
||||
# ── SensorWatcher: staleness thresholds ──────────────────────────────────────
|
||||
|
||||
class TestSensorWatcherStaleness(unittest.TestCase):
|
||||
|
||||
def _make_stale(self, seconds_ago: float) -> SensorWatcher:
|
||||
"""Return a watcher whose last_rx was `seconds_ago` seconds in the past."""
|
||||
w = SensorWatcher("/scan", "lidar", warn_s=1.0, error_s=3.0, critical=False)
|
||||
w.on_message(None)
|
||||
# Backdate the last_rx directly
|
||||
with w._lock:
|
||||
w._last_rx -= seconds_ago
|
||||
return w
|
||||
|
||||
def test_ok_when_fresh(self):
|
||||
w = self._make_stale(0.5)
|
||||
self.assertEqual(w.level(), OK)
|
||||
|
||||
def test_warn_at_warn_threshold(self):
|
||||
w = self._make_stale(1.1)
|
||||
self.assertEqual(w.level(), WARN)
|
||||
|
||||
def test_error_at_error_threshold(self):
|
||||
w = self._make_stale(3.1)
|
||||
self.assertEqual(w.level(), ERROR)
|
||||
|
||||
def test_age_matches_backdated_time(self):
|
||||
w = self._make_stale(2.0)
|
||||
self.assertAlmostEqual(w.age_s(), 2.0, delta=0.1)
|
||||
|
||||
def test_warn_level_between_thresholds(self):
|
||||
w = self._make_stale(2.0) # 1.0 < 2.0 < 3.0
|
||||
self.assertEqual(w.level(), WARN)
|
||||
|
||||
|
||||
# ── SensorWatcher: diagnostic_status output ──────────────────────────────────
|
||||
|
||||
class TestSensorWatcherDiagStatus(unittest.TestCase):
|
||||
|
||||
def test_never_received_is_error(self):
|
||||
w = SensorWatcher("/camera/color/image_raw", "camera_color",
|
||||
warn_s=1.0, error_s=3.0, critical=True)
|
||||
ds = w.diagnostic_status()
|
||||
self.assertEqual(ds.level, ERROR)
|
||||
self.assertIn("no data", ds.message)
|
||||
|
||||
def test_ok_status_message(self):
|
||||
w = SensorWatcher("/scan", "lidar", warn_s=1.0, error_s=3.0, critical=False)
|
||||
w.on_message(None)
|
||||
ds = w.diagnostic_status()
|
||||
self.assertEqual(ds.level, OK)
|
||||
self.assertIn("OK", ds.message)
|
||||
|
||||
def test_warn_status_message(self):
|
||||
w = SensorWatcher("/scan", "lidar", warn_s=1.0, error_s=3.0, critical=False)
|
||||
w.on_message(None)
|
||||
with w._lock:
|
||||
w._last_rx -= 1.5
|
||||
ds = w.diagnostic_status()
|
||||
self.assertEqual(ds.level, WARN)
|
||||
self.assertIn("WARN", ds.message)
|
||||
|
||||
def test_hardware_id_is_topic(self):
|
||||
w = SensorWatcher("/scan", "lidar", warn_s=1.0, error_s=3.0, critical=False)
|
||||
w.on_message(None)
|
||||
ds = w.diagnostic_status()
|
||||
self.assertEqual(ds.hardware_id, "/scan")
|
||||
|
||||
def test_kv_keys_present(self):
|
||||
w = SensorWatcher("/scan", "lidar", warn_s=1.0, error_s=3.0, critical=False)
|
||||
w.on_message(None)
|
||||
ds = w.diagnostic_status()
|
||||
kv_keys = {kv.key for kv in ds.values}
|
||||
for expected in ("topic", "age_s", "rate_hz", "count", "warn_s", "error_s"):
|
||||
self.assertIn(expected, kv_keys)
|
||||
|
||||
def test_age_inf_displayed_as_inf(self):
|
||||
w = SensorWatcher("/scan", "lidar", warn_s=1.0, error_s=3.0, critical=False)
|
||||
ds = w.diagnostic_status()
|
||||
kv = {kv.key: kv.value for kv in ds.values}
|
||||
self.assertEqual(kv["age_s"], "inf")
|
||||
|
||||
|
||||
# ── SensorWatcher: summary_dict output ───────────────────────────────────────
|
||||
|
||||
class TestSensorWatcherSummaryDict(unittest.TestCase):
|
||||
|
||||
def test_never_received_age_is_none(self):
|
||||
w = SensorWatcher("/scan", "lidar", warn_s=1.0, error_s=3.0, critical=False)
|
||||
d = w.summary_dict()
|
||||
self.assertIsNone(d["age_s"])
|
||||
self.assertEqual(d["status"], "ERROR")
|
||||
|
||||
def test_ok_status_string(self):
|
||||
w = SensorWatcher("/scan", "lidar", warn_s=1.0, error_s=3.0, critical=False)
|
||||
w.on_message(None)
|
||||
d = w.summary_dict()
|
||||
self.assertEqual(d["status"], "OK")
|
||||
|
||||
def test_warn_status_string(self):
|
||||
w = SensorWatcher("/scan", "lidar", warn_s=1.0, error_s=3.0, critical=False)
|
||||
w.on_message(None)
|
||||
with w._lock:
|
||||
w._last_rx -= 1.5
|
||||
d = w.summary_dict()
|
||||
self.assertEqual(d["status"], "WARN")
|
||||
|
||||
def test_error_status_string(self):
|
||||
w = SensorWatcher("/scan", "lidar", warn_s=1.0, error_s=3.0, critical=False)
|
||||
w.on_message(None)
|
||||
with w._lock:
|
||||
w._last_rx -= 5.0
|
||||
d = w.summary_dict()
|
||||
self.assertEqual(d["status"], "ERROR")
|
||||
|
||||
def test_age_rounded(self):
|
||||
w = SensorWatcher("/scan", "lidar", warn_s=1.0, error_s=3.0, critical=False)
|
||||
w.on_message(None)
|
||||
d = w.summary_dict()
|
||||
self.assertIsInstance(d["age_s"], float)
|
||||
|
||||
def test_critical_flag(self):
|
||||
w = SensorWatcher("/scan", "lidar", warn_s=1.0, error_s=3.0, critical=True)
|
||||
self.assertTrue(w.summary_dict()["critical"])
|
||||
|
||||
def test_non_critical_flag(self):
|
||||
w = SensorWatcher("/scan", "lidar", warn_s=1.0, error_s=3.0, critical=False)
|
||||
self.assertFalse(w.summary_dict()["critical"])
|
||||
|
||||
def test_count_in_summary(self):
|
||||
w = SensorWatcher("/scan", "lidar", warn_s=1.0, error_s=3.0, critical=False)
|
||||
for _ in range(5):
|
||||
w.on_message(None)
|
||||
self.assertEqual(w.summary_dict()["count"], 5)
|
||||
|
||||
|
||||
# ── Level name mapping ─────────────────────────────────────────────────────────
|
||||
|
||||
class TestLevelNames(unittest.TestCase):
|
||||
|
||||
def test_ok_name(self):
|
||||
self.assertEqual(_LEVEL_NAMES[OK], "OK")
|
||||
|
||||
def test_warn_name(self):
|
||||
self.assertEqual(_LEVEL_NAMES[WARN], "WARN")
|
||||
|
||||
def test_error_name(self):
|
||||
self.assertEqual(_LEVEL_NAMES[ERROR], "ERROR")
|
||||
|
||||
|
||||
# ── Thread safety ─────────────────────────────────────────────────────────────
|
||||
|
||||
class TestSensorWatcherThreadSafety(unittest.TestCase):
|
||||
|
||||
def test_concurrent_messages(self):
|
||||
import threading
|
||||
w = SensorWatcher("/scan", "lidar", warn_s=1.0, error_s=3.0, critical=False)
|
||||
N = 500
|
||||
threads = [threading.Thread(target=w.on_message, args=(None,)) for _ in range(N)]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
self.assertEqual(w.msg_count(), N)
|
||||
|
||||
def test_concurrent_reads(self):
|
||||
import threading
|
||||
w = SensorWatcher("/scan", "lidar", warn_s=1.0, error_s=3.0, critical=False)
|
||||
w.on_message(None)
|
||||
errors = []
|
||||
def read_loop():
|
||||
for _ in range(100):
|
||||
try:
|
||||
w.level()
|
||||
w.age_s()
|
||||
w.rate_hz()
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
threads = [threading.Thread(target=read_loop) for _ in range(5)]
|
||||
for t in threads: t.start()
|
||||
for t in threads: t.join()
|
||||
self.assertEqual(errors, [])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Loading…
x
Reference in New Issue
Block a user