feat: STM32 watchdog and fault recovery handler (Issue #565)
- New src/fault_handler.c + include/fault_handler.h:
- HardFault/MemManage/BusFault/UsageFault naked ISR stubs with
Cortex-M7 stack-frame capture (R0-R3, LR, PC, xPSR, CFSR, HFSR,
MMFAR, BFAR, SP) and NVIC_SystemReset()
- .noinit SRAM capture ring survives soft reset; persisted to flash
sector 7 (0x08060000, 8x64-byte slots) on subsequent boot
- MPU Region 0 stack guard (32 B at __stack_end, no-access) ->
MemManage fault detected as FAULT_STACK_OVF
- Brownout detect via RCC_CSR_BORRSTF on boot -> FAULT_BROWNOUT
- Watchdog reset detection delegates to existing watchdog.c
- LED blink codes on LED2 (PC14, active-low) for 10 s post-recovery:
HARDFAULT=3, WATCHDOG=2, BROWNOUT=1, STACK_OVF=4 fast blinks
- fault_led_tick(), fault_log_read(), fault_log_get_count(),
fault_get_last_type(), fault_log_clear(), FAULT_ASSERT() macro
- jlink.h: add JLINK_CMD_FAULT_LOG_GET (0x0F), JLINK_TLM_FAULT_LOG
(0x86), jlink_tlm_fault_log_t (20 bytes), fault_log_req in JLinkState,
jlink_send_fault_log() declaration
- jlink.c: dispatch JLINK_CMD_FAULT_LOG_GET; implement
jlink_send_fault_log() (26-byte CRC16-XModem framed response)
- main.c: call fault_handler_init() first in main(); send fault log
TLM on boot if prior fault recorded; fault_led_tick() in main loop;
handle fault_log_req flag to respond to Jetson queries
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
35440b7463
commit
13dd30c44c
140
include/fault_handler.h
Normal file
140
include/fault_handler.h
Normal file
@ -0,0 +1,140 @@
|
||||
#ifndef FAULT_HANDLER_H
|
||||
#define FAULT_HANDLER_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
/*
|
||||
* fault_handler.h — STM32F7 fault detection and recovery (Issue #565)
|
||||
*
|
||||
* Features:
|
||||
* - HardFault / BusFault / UsageFault / MemManage vector hooks with full
|
||||
* Cortex-M7 register dump (R0-R3, LR, PC, xPSR, CFSR, HFSR, MMFAR, BFAR)
|
||||
* - .noinit SRAM ring: fault frame captured and magic-tagged, survives
|
||||
* NVIC_SystemReset(); persisted to flash on the subsequent boot
|
||||
* - MPU Region 0 stack-guard (32 bytes at __stack_end, no-access) → MemManage
|
||||
* fault detected as FAULT_STACK_OVF
|
||||
* - Brownout detect via RCC_CSR_BORRSTF on boot → FAULT_BROWNOUT
|
||||
* - Persistent fault log: last 8 entries × 64 bytes in flash sector 7
|
||||
* at 0x08060000 (below the PID store at 0x0807FFC0)
|
||||
* - JLINK_TLM_FAULT_LOG (0x85): 20-byte summary sent via JLink on boot
|
||||
* and on JLINK_CMD_FAULT_LOG_GET (0x0C) request
|
||||
* - LED blink codes on LED2 (PC14, active-low) for 10 s after recovery:
|
||||
* HARDFAULT = 3 fast blinks (100 ms)
|
||||
* WATCHDOG = 2 slow blinks (300 ms)
|
||||
* BROWNOUT = 1 long blink (500 ms)
|
||||
* STACK_OVF = 4 fast blinks (100 ms)
|
||||
* BUS_FAULT = alternating 3+1
|
||||
* USAGE_FAULT = 2 fast blinks
|
||||
* - Auto-recovery: fault → .noinit capture → NVIC_SystemReset()
|
||||
* On next boot fault_handler_init() re-runs safely: persists, prints, blinks
|
||||
*
|
||||
* Flash layout within sector 7 (0x08060000, 128 KB):
|
||||
* Slot 0-7: 0x08060000 – 0x080601FF (8 × 64 bytes = 512 bytes fault log)
|
||||
* PID store: 0x0807FFC0 – 0x0807FFFF (64 bytes, managed by pid_flash.c)
|
||||
*/
|
||||
|
||||
/* ---- Fault types ---- */
|
||||
typedef enum {
|
||||
FAULT_NONE = 0x00,
|
||||
FAULT_HARDFAULT = 0x01, /* HardFault escalation */
|
||||
FAULT_WATCHDOG = 0x02, /* IWDG timeout reset */
|
||||
FAULT_BROWNOUT = 0x03, /* Brown-out reset (BOR) */
|
||||
FAULT_STACK_OVF = 0x04, /* MPU stack guard MemManage */
|
||||
FAULT_BUS_FAULT = 0x05, /* BusFault */
|
||||
FAULT_USAGE_FAULT = 0x06, /* UsageFault */
|
||||
FAULT_MEM_FAULT = 0x07, /* MemManageFault (non-stack-guard) */
|
||||
FAULT_ASSERT = 0x08, /* Software assertion */
|
||||
} FaultType;
|
||||
|
||||
/* ---- Flash fault log constants ---- */
|
||||
#define FAULT_LOG_MAX_ENTRIES 8u
|
||||
#define FAULT_LOG_MAGIC 0xFADE5A01u
|
||||
#define FAULT_LOG_BASE_ADDR 0x08060000UL /* start of flash sector 7 */
|
||||
#define FAULT_LOG_ENTRY_SIZE 64u /* bytes per entry */
|
||||
|
||||
/* ---- Flash fault log entry (64 bytes, packed) ---- */
|
||||
typedef struct __attribute__((packed)) {
|
||||
uint32_t magic; /* FAULT_LOG_MAGIC when valid */
|
||||
uint8_t fault_type; /* FaultType */
|
||||
uint8_t reset_count; /* lifetime reset counter */
|
||||
uint16_t _pad0;
|
||||
uint32_t timestamp_ms; /* HAL_GetTick() at reset (0 if pre-tick) */
|
||||
uint32_t pc; /* faulting instruction address */
|
||||
uint32_t lr; /* link register at fault */
|
||||
uint32_t r0;
|
||||
uint32_t r1;
|
||||
uint32_t r2;
|
||||
uint32_t r3;
|
||||
uint32_t cfsr; /* SCB->CFSR: combined fault status register */
|
||||
uint32_t hfsr; /* SCB->HFSR: hard fault status register */
|
||||
uint32_t mmfar; /* SCB->MMFAR: memory manage fault address */
|
||||
uint32_t bfar; /* SCB->BFAR: bus fault address */
|
||||
uint32_t sp; /* stack pointer value at fault */
|
||||
uint8_t _pad1[4]; /* pad to 64 bytes */
|
||||
} fault_log_entry_t; /* 64 bytes */
|
||||
|
||||
/*
|
||||
* fault_handler_init() — call early in main(), before safety_init().
|
||||
* 1. Increments reset counter (.noinit SRAM).
|
||||
* 2. Checks .noinit SRAM for a pending fault capture; if found: persists to
|
||||
* flash, prints CDC register dump, starts LED blink code.
|
||||
* 3. Detects brownout via RCC_CSR_BORRSTF; logs if detected.
|
||||
* 4. Clears RCC reset flags.
|
||||
* 5. Installs MPU Region 0 stack guard.
|
||||
* 6. Enables MemManage, BusFault, UsageFault (SCB->SHCSR).
|
||||
*/
|
||||
void fault_handler_init(void);
|
||||
|
||||
/*
|
||||
* fault_mpu_guard_init() — configure MPU Region 0 as a 32-byte no-access
|
||||
* guard at __stack_end (bottom of main stack). Generates MemManage on
|
||||
* stack overflow. Called automatically by fault_handler_init().
|
||||
*/
|
||||
void fault_mpu_guard_init(void);
|
||||
|
||||
/*
|
||||
* fault_get_last_type() — most recent fault type from flash log, or FAULT_NONE.
|
||||
*/
|
||||
FaultType fault_get_last_type(void);
|
||||
|
||||
/*
|
||||
* fault_log_read(idx, out) — read flash slot 0..7.
|
||||
* Returns false if slot empty or idx out of range.
|
||||
*/
|
||||
bool fault_log_read(uint8_t idx, fault_log_entry_t *out);
|
||||
|
||||
/*
|
||||
* fault_log_get_count() — number of valid (occupied) log slots, 0-8.
|
||||
*/
|
||||
uint8_t fault_log_get_count(void);
|
||||
|
||||
/*
|
||||
* fault_log_clear() — erase fault log, restore PID if previously saved.
|
||||
* Erases all of sector 7 (~1 s stall). Do not call while armed.
|
||||
*/
|
||||
void fault_log_clear(void);
|
||||
|
||||
/*
|
||||
* fault_assert(file, line) — software fault at runtime; captures return
|
||||
* address, writes SRAM magic, triggers NVIC_SystemReset().
|
||||
* Use via FAULT_ASSERT(cond) macro below.
|
||||
*/
|
||||
void fault_assert_impl(const char *file, int line);
|
||||
|
||||
#define FAULT_ASSERT(cond) \
|
||||
do { if (!(cond)) fault_assert_impl(__FILE__, __LINE__); } while (0)
|
||||
|
||||
/*
|
||||
* fault_led_tick(now_ms) — drive LED2 blink code from main loop (1 ms).
|
||||
* Self-disables after 10 s so it doesn't interfere with normal LED state.
|
||||
*/
|
||||
void fault_led_tick(uint32_t now_ms);
|
||||
|
||||
/* C-level fault dispatch (called from naked asm stubs; not for direct use) */
|
||||
void fault_hard_c(uint32_t *frame);
|
||||
void fault_mem_c(uint32_t *frame);
|
||||
void fault_bus_c(uint32_t *frame);
|
||||
void fault_usage_c(uint32_t *frame);
|
||||
|
||||
#endif /* FAULT_HANDLER_H */
|
||||
@ -22,20 +22,21 @@
|
||||
* ETX : frame end sentinel (0x03)
|
||||
*
|
||||
* Jetson to STM32 commands:
|
||||
* 0x01 HEARTBEAT - no payload; refreshes heartbeat timer
|
||||
* 0x02 DRIVE - int16 speed (-1000..+1000), int16 steer (-1000..+1000)
|
||||
* 0x03 ARM - no payload; request arm (same interlock as CDC 'A')
|
||||
* 0x04 DISARM - no payload; disarm immediately
|
||||
* 0x05 PID_SET - float kp, float ki, float kd (12 bytes, IEEE-754 LE)
|
||||
* 0x06 DFU_ENTER - no payload; request OTA DFU reboot (denied while armed)
|
||||
* 0x07 ESTOP - no payload; engage emergency stop
|
||||
* 0x08 AUDIO - int16 PCM samples (up to 126 samples)
|
||||
* 0x09 SLEEP - no payload; request STOP-mode sleep
|
||||
* 0x0A PID_SAVE - no payload; save current Kp/Ki/Kd to flash (Issue #531)
|
||||
* 0x0B GIMBAL_POS - int16 pan_x10, int16 tilt_x10, uint16 speed (Issue #547)
|
||||
* 0x0C SCHED_GET - no payload; reply with TLM_SCHED (Issue #550)
|
||||
* 0x0D SCHED_SET - uint8 num_bands + N*16-byte pid_sched_entry_t (Issue #550)
|
||||
* 0x0E SCHED_SAVE - float kp, ki, kd (12 bytes); save sched+single to flash (Issue #550)
|
||||
* 0x01 HEARTBEAT - no payload; refreshes heartbeat timer
|
||||
* 0x02 DRIVE - int16 speed (-1000..+1000), int16 steer (-1000..+1000)
|
||||
* 0x03 ARM - no payload; request arm (same interlock as CDC 'A')
|
||||
* 0x04 DISARM - no payload; disarm immediately
|
||||
* 0x05 PID_SET - float kp, float ki, float kd (12 bytes, IEEE-754 LE)
|
||||
* 0x06 DFU_ENTER - no payload; request OTA DFU reboot (denied while armed)
|
||||
* 0x07 ESTOP - no payload; engage emergency stop
|
||||
* 0x08 AUDIO - int16 PCM samples (up to 126 samples)
|
||||
* 0x09 SLEEP - no payload; request STOP-mode sleep
|
||||
* 0x0A PID_SAVE - no payload; save current Kp/Ki/Kd to flash (Issue #531)
|
||||
* 0x0B GIMBAL_POS - int16 pan_x10, int16 tilt_x10, uint16 speed (Issue #547)
|
||||
* 0x0C SCHED_GET - no payload; reply with TLM_SCHED (Issue #550)
|
||||
* 0x0D SCHED_SET - uint8 num_bands + N*16-byte pid_sched_entry_t (Issue #550)
|
||||
* 0x0E SCHED_SAVE - float kp, ki, kd (12 bytes); save sched+single to flash (Issue #550)
|
||||
* 0x0F FAULT_LOG_GET - no payload; reply with TLM_FAULT_LOG (Issue #565)
|
||||
*
|
||||
* STM32 to Jetson telemetry:
|
||||
* 0x80 STATUS - jlink_tlm_status_t (20 bytes), sent at JLINK_TLM_HZ
|
||||
@ -44,6 +45,7 @@
|
||||
* 0x83 PID_RESULT - jlink_tlm_pid_result_t (13 bytes), sent after PID_SAVE (Issue #531)
|
||||
* 0x84 GIMBAL_STATE - jlink_tlm_gimbal_state_t (10 bytes, Issue #547)
|
||||
* 0x85 SCHED - jlink_tlm_sched_t (1+N*16 bytes), sent on SCHED_GET (Issue #550)
|
||||
* 0x86 FAULT_LOG - jlink_tlm_fault_log_t (20 bytes), sent on boot + FAULT_LOG_GET (Issue #565)
|
||||
*
|
||||
* Priority: CRSF RC always takes precedence. Jetson steer/speed only applied
|
||||
* when mode_manager_active() == MODE_AUTONOMOUS (CH6 high). In RC_MANUAL and
|
||||
@ -73,6 +75,7 @@
|
||||
#define JLINK_CMD_SCHED_GET 0x0Cu /* no payload; reply TLM_SCHED (Issue #550) */
|
||||
#define JLINK_CMD_SCHED_SET 0x0Du /* uint8 num_bands + N*16-byte entries (Issue #550) */
|
||||
#define JLINK_CMD_SCHED_SAVE 0x0Eu /* float kp,ki,kd; save sched+single to flash (Issue #550) */
|
||||
#define JLINK_CMD_FAULT_LOG_GET 0x0Fu /* no payload; reply TLM_FAULT_LOG (Issue #565) */
|
||||
|
||||
/* ---- Telemetry IDs (STM32 to Jetson) ---- */
|
||||
#define JLINK_TLM_STATUS 0x80u
|
||||
@ -81,6 +84,7 @@
|
||||
#define JLINK_TLM_PID_RESULT 0x83u /* jlink_tlm_pid_result_t (13 bytes, Issue #531) */
|
||||
#define JLINK_TLM_GIMBAL_STATE 0x84u /* jlink_tlm_gimbal_state_t (10 bytes, Issue #547) */
|
||||
#define JLINK_TLM_SCHED 0x85u /* jlink_tlm_sched_t (1+N*16 bytes, Issue #550) */
|
||||
#define JLINK_TLM_FAULT_LOG 0x86u /* jlink_tlm_fault_log_t (20 bytes, Issue #565) */
|
||||
|
||||
/* ---- Telemetry STATUS payload (20 bytes, packed) ---- */
|
||||
typedef struct __attribute__((packed)) {
|
||||
@ -148,6 +152,20 @@ typedef struct __attribute__((packed)) {
|
||||
pid_sched_entry_t bands[PID_SCHED_MAX_BANDS]; /* up to 6 x 16 = 96 bytes */
|
||||
} jlink_tlm_sched_t; /* 1 + 96 = 97 bytes max */
|
||||
|
||||
/* ---- Telemetry FAULT_LOG payload (20 bytes, packed) Issue #565 ---- */
|
||||
/* Sent on boot (if last fault != NONE) and in response to FAULT_LOG_GET. */
|
||||
typedef struct __attribute__((packed)) {
|
||||
uint8_t fault_type; /* FaultType of most recent entry */
|
||||
uint8_t entry_count; /* number of valid entries in flash log (0-8) */
|
||||
uint8_t reset_count; /* lifetime reset counter */
|
||||
uint8_t _pad;
|
||||
uint32_t timestamp_ms; /* HAL_GetTick() at fault */
|
||||
uint32_t pc; /* faulting PC */
|
||||
uint32_t lr; /* link register at fault */
|
||||
uint32_t cfsr; /* SCB->CFSR */
|
||||
uint32_t hfsr; /* SCB->HFSR */
|
||||
} jlink_tlm_fault_log_t; /* 20 bytes */
|
||||
|
||||
/* ---- Volatile state (read from main loop) ---- */
|
||||
typedef struct {
|
||||
/* Drive command - updated on JLINK_CMD_DRIVE */
|
||||
@ -187,6 +205,9 @@ typedef struct {
|
||||
volatile float sched_save_kp; /* kp for single-PID record in SCHED_SAVE */
|
||||
volatile float sched_save_ki;
|
||||
volatile float sched_save_kd;
|
||||
|
||||
/* Fault log request (Issue #565) - set by JLINK_CMD_FAULT_LOG_GET, cleared by main loop */
|
||||
volatile uint8_t fault_log_req;
|
||||
} JLinkState;
|
||||
|
||||
extern volatile JLinkState jlink_state;
|
||||
@ -232,4 +253,11 @@ void jlink_send_sched_telemetry(const jlink_tlm_sched_t *tlm);
|
||||
*/
|
||||
JLinkSchedSetBuf *jlink_get_sched_set(void);
|
||||
|
||||
/*
|
||||
* jlink_send_fault_log(fl) - transmit JLINK_TLM_FAULT_LOG (0x86) frame
|
||||
* (26 bytes) on boot (if fault log non-empty) and in response to
|
||||
* FAULT_LOG_GET. Issue #565.
|
||||
*/
|
||||
void jlink_send_fault_log(const jlink_tlm_fault_log_t *fl);
|
||||
|
||||
#endif /* JLINK_H */
|
||||
|
||||
457
src/fault_handler.c
Normal file
457
src/fault_handler.c
Normal file
@ -0,0 +1,457 @@
|
||||
#include "fault_handler.h"
|
||||
#include "config.h"
|
||||
#include "pid_flash.h"
|
||||
#include "stm32f7xx_hal.h"
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/*
|
||||
* fault_handler.c — STM32F7 fault detection and recovery (Issue #565)
|
||||
*
|
||||
* Recovery flow:
|
||||
* Fault ISR (naked) → _capture_and_reset() captures registers into .noinit
|
||||
* SRAM → sets FAULT_SRAM_MAGIC → NVIC_SystemReset().
|
||||
* On next boot: fault_handler_init() sees FAULT_SRAM_MAGIC → persists to
|
||||
* flash log → prints CDC dump → starts LED blink code.
|
||||
*
|
||||
* No flash writes occur inside fault ISRs. All flash operations happen safely
|
||||
* in the normal boot context, well before safety_init() / IWDG start.
|
||||
*/
|
||||
|
||||
/* ---- .noinit SRAM (preserved across NVIC_SystemReset) ---- */
|
||||
/*
|
||||
* GCC startup code only zeroes .bss and initialises .data. Variables in
|
||||
* .noinit are left untouched. The magic word guards against cold-boot garbage.
|
||||
*/
|
||||
#define FAULT_SRAM_MAGIC 0xFADE5A01u
|
||||
#define RESET_COUNT_MAGIC 0x1234ABCDu
|
||||
|
||||
static __attribute__((section(".noinit"))) volatile uint32_t s_fault_magic;
|
||||
static __attribute__((section(".noinit"))) volatile fault_log_entry_t s_fault_sram;
|
||||
static __attribute__((section(".noinit"))) volatile uint32_t s_reset_count_magic;
|
||||
static __attribute__((section(".noinit"))) volatile uint32_t s_reset_count;
|
||||
|
||||
/* ---- LED blink sequencer ---- */
|
||||
/*
|
||||
* Each pattern is a 16-bit bitmask; bit 15 = first step.
|
||||
* One step = period_ms milliseconds. LED2 (PC14) is active-low.
|
||||
*/
|
||||
typedef struct {
|
||||
uint16_t pattern; /* bitmask: 1 = LED on */
|
||||
uint8_t steps; /* number of valid bits to cycle */
|
||||
uint16_t period_ms; /* ms per step */
|
||||
} LedBlink;
|
||||
|
||||
/*
|
||||
* Pattern table indexed by FaultType (0..8).
|
||||
* NONE = silent
|
||||
* HARDFAULT = 1010 1010 1010 1010 (3 fast blinks, 100 ms)
|
||||
* WATCHDOG = 1111 0000 1111 0000 (2 slow pulses, 150 ms × 8 steps = 1.2 s)
|
||||
* BROWNOUT = 1111 1111 0000 0000 (1 long pulse, 100 ms × 16 = 1.6 s)
|
||||
* STACK_OVF = 1110 1110 1110 1110 (4 short bursts, 100 ms)
|
||||
* BUS_FAULT = 1010 1111 1100 0000 (3+1 pattern)
|
||||
* USAGE_FAULT = 1010 0000 0000 0000 (2 fast blinks)
|
||||
* MEM_FAULT = 1010 1010 1000 0000 (3 blinks, slower tail)
|
||||
* ASSERT = 1101 1011 0000 0000 (SOS-like)
|
||||
*/
|
||||
static const LedBlink s_blink_table[] = {
|
||||
/* FAULT_NONE */ { 0x0000u, 16, 100 },
|
||||
/* FAULT_HARDFAULT */ { 0xAAAAu, 16, 100 },
|
||||
/* FAULT_WATCHDOG */ { 0xF0F0u, 16, 150 },
|
||||
/* FAULT_BROWNOUT */ { 0xFF00u, 16, 100 },
|
||||
/* FAULT_STACK_OVF */ { 0xEEEEu, 16, 100 },
|
||||
/* FAULT_BUS_FAULT */ { 0xAFC0u, 16, 100 },
|
||||
/* FAULT_USAGE_FAULT */ { 0xA000u, 16, 100 },
|
||||
/* FAULT_MEM_FAULT */ { 0xAA80u, 16, 100 },
|
||||
/* FAULT_ASSERT */ { 0xDB00u, 16, 100 },
|
||||
};
|
||||
#define BLINK_TABLE_SIZE (sizeof(s_blink_table) / sizeof(s_blink_table[0]))
|
||||
|
||||
static FaultType s_led_fault = FAULT_NONE;
|
||||
static uint32_t s_led_start = 0;
|
||||
static uint32_t s_led_last = 0;
|
||||
static uint8_t s_led_step = 0;
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Flash helpers */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
static uint32_t _slot_addr(uint8_t idx)
|
||||
{
|
||||
return FAULT_LOG_BASE_ADDR + (uint32_t)idx * FAULT_LOG_ENTRY_SIZE;
|
||||
}
|
||||
|
||||
static bool _slot_empty(uint8_t idx)
|
||||
{
|
||||
/* An erased 32-bit word reads as 0xFFFFFFFF */
|
||||
const uint32_t *p = (const uint32_t *)_slot_addr(idx);
|
||||
return (*p == 0xFFFFFFFFu);
|
||||
}
|
||||
|
||||
static int _free_slot(void)
|
||||
{
|
||||
for (uint8_t i = 0; i < FAULT_LOG_MAX_ENTRIES; i++) {
|
||||
if (_slot_empty(i)) return (int)i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static bool _erase_sector7(void)
|
||||
{
|
||||
FLASH_EraseInitTypeDef er = {0};
|
||||
er.TypeErase = FLASH_TYPEERASE_SECTORS;
|
||||
er.Sector = FLASH_SECTOR_7;
|
||||
er.NbSectors = 1;
|
||||
er.VoltageRange = FLASH_VOLTAGE_RANGE_3;
|
||||
uint32_t err = 0;
|
||||
return HAL_FLASHEx_Erase(&er, &err) == HAL_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Write fault entry to the next free flash slot.
|
||||
* When all 8 slots are occupied: erase sector 7, restore PID if valid,
|
||||
* then write entry at slot 0. Sector 7 erase stalls CPU ~1 s — only
|
||||
* called from fault_handler_init() before IWDG is started.
|
||||
*/
|
||||
static bool _fault_log_write(const fault_log_entry_t *entry)
|
||||
{
|
||||
int slot = _free_slot();
|
||||
|
||||
/* ---- Handle full log: erase sector 7 ---- */
|
||||
if (slot < 0) {
|
||||
float kp, ki, kd;
|
||||
bool pid_ok = pid_flash_load(&kp, &ki, &kd);
|
||||
|
||||
HAL_FLASH_Unlock();
|
||||
bool erased = _erase_sector7();
|
||||
HAL_FLASH_Lock();
|
||||
|
||||
if (!erased) return false;
|
||||
|
||||
if (pid_ok) {
|
||||
/* pid_flash_save() manages its own unlock/lock */
|
||||
pid_flash_save(kp, ki, kd);
|
||||
}
|
||||
slot = 0;
|
||||
}
|
||||
|
||||
/* ---- Write 64 bytes (16 × 32-bit words) to chosen slot ---- */
|
||||
uint32_t addr = _slot_addr((uint8_t)slot);
|
||||
const uint32_t *words = (const uint32_t *)entry;
|
||||
|
||||
HAL_FLASH_Unlock();
|
||||
bool ok = true;
|
||||
for (uint8_t w = 0; w < FAULT_LOG_ENTRY_SIZE / 4u; w++) {
|
||||
if (HAL_FLASH_Program(FLASH_TYPEPROGRAM_WORD,
|
||||
addr + (uint32_t)w * 4u, words[w]) != HAL_OK) {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
HAL_FLASH_Lock();
|
||||
return ok;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* LED blink */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
static void _led_start(FaultType type)
|
||||
{
|
||||
s_led_fault = type;
|
||||
s_led_start = HAL_GetTick();
|
||||
s_led_last = s_led_start;
|
||||
s_led_step = 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Public API */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
void fault_mpu_guard_init(void)
|
||||
{
|
||||
/*
|
||||
* Configure MPU Region 0 as a 32-byte no-access guard page at
|
||||
* __stack_end (lowest address of the main stack). The stack grows
|
||||
* downward; when it overflows into this region a MemManage fault fires.
|
||||
*
|
||||
* MPU RASR SIZE field = log2(region_bytes) - 1 = log2(32) - 1 = 4.
|
||||
* AP = 0b000 → no access in any mode.
|
||||
*/
|
||||
extern uint32_t __stack_end; /* defined in linker script */
|
||||
|
||||
HAL_MPU_Disable();
|
||||
|
||||
MPU_Region_InitTypeDef r = {0};
|
||||
r.Enable = MPU_REGION_ENABLE;
|
||||
r.Number = MPU_REGION_NUMBER0;
|
||||
r.BaseAddress = (uint32_t)&__stack_end;
|
||||
r.Size = MPU_REGION_SIZE_32B;
|
||||
r.SubRegionDisable = 0x00u;
|
||||
r.TypeExtField = MPU_TEX_LEVEL0;
|
||||
r.AccessPermission = MPU_REGION_NO_ACCESS;
|
||||
r.DisableExec = MPU_INSTRUCTION_ACCESS_DISABLE;
|
||||
r.IsShareable = MPU_ACCESS_NOT_SHAREABLE;
|
||||
r.IsCacheable = MPU_ACCESS_NOT_CACHEABLE;
|
||||
r.IsBufferable = MPU_ACCESS_NOT_BUFFERABLE;
|
||||
HAL_MPU_ConfigRegion(&r);
|
||||
|
||||
/* Enable MPU with default memory map for privileged access */
|
||||
HAL_MPU_Enable(MPU_PRIVILEGED_DEFAULT);
|
||||
|
||||
/* Enable configurable fault handlers */
|
||||
SCB->SHCSR |= SCB_SHCSR_MEMFAULTENA_Msk
|
||||
| SCB_SHCSR_BUSFAULTENA_Msk
|
||||
| SCB_SHCSR_USGFAULTENA_Msk;
|
||||
}
|
||||
|
||||
void fault_handler_init(void)
|
||||
{
|
||||
/* ---- Maintain lifetime reset counter ---- */
|
||||
if (s_reset_count_magic != RESET_COUNT_MAGIC) {
|
||||
s_reset_count_magic = RESET_COUNT_MAGIC;
|
||||
s_reset_count = 0u;
|
||||
}
|
||||
s_reset_count++;
|
||||
|
||||
/* ---- Detect brownout via RCC_CSR ---- */
|
||||
bool brownout = (RCC->CSR & RCC_CSR_BORRSTF) != 0u;
|
||||
if (brownout) {
|
||||
printf("[FAULT] Brownout reset detected (reset_count=%lu)\n",
|
||||
(unsigned long)s_reset_count);
|
||||
fault_log_entry_t e;
|
||||
memset(&e, 0, sizeof(e));
|
||||
e.magic = FAULT_LOG_MAGIC;
|
||||
e.fault_type = (uint8_t)FAULT_BROWNOUT;
|
||||
e.reset_count = (uint8_t)(s_reset_count & 0xFFu);
|
||||
_fault_log_write(&e);
|
||||
_led_start(FAULT_BROWNOUT);
|
||||
}
|
||||
|
||||
/* ---- Clear all RCC reset source flags ---- */
|
||||
RCC->CSR |= RCC_CSR_RMVF;
|
||||
|
||||
/* ---- Check for pending .noinit fault capture ---- */
|
||||
if (s_fault_magic == FAULT_SRAM_MAGIC) {
|
||||
s_fault_magic = 0u; /* consume once */
|
||||
|
||||
fault_log_entry_t e;
|
||||
memcpy(&e, (const void *)&s_fault_sram, sizeof(e));
|
||||
e.reset_count = (uint8_t)(s_reset_count & 0xFFu);
|
||||
|
||||
/* Print register dump over CDC/UART */
|
||||
printf("[FAULT] *** FAULT RECOVERED ***\n");
|
||||
printf("[FAULT] type=%u reset_count=%u ts=%lu ms\n",
|
||||
e.fault_type, e.reset_count, (unsigned long)e.timestamp_ms);
|
||||
printf("[FAULT] PC=0x%08lX LR=0x%08lX SP=0x%08lX\n",
|
||||
(unsigned long)e.pc, (unsigned long)e.lr, (unsigned long)e.sp);
|
||||
printf("[FAULT] R0=0x%08lX R1=0x%08lX R2=0x%08lX R3=0x%08lX\n",
|
||||
(unsigned long)e.r0, (unsigned long)e.r1,
|
||||
(unsigned long)e.r2, (unsigned long)e.r3);
|
||||
printf("[FAULT] CFSR=0x%08lX HFSR=0x%08lX MMFAR=0x%08lX BFAR=0x%08lX\n",
|
||||
(unsigned long)e.cfsr, (unsigned long)e.hfsr,
|
||||
(unsigned long)e.mmfar, (unsigned long)e.bfar);
|
||||
|
||||
_fault_log_write(&e);
|
||||
|
||||
FaultType ft = (e.fault_type < (uint8_t)BLINK_TABLE_SIZE)
|
||||
? (FaultType)e.fault_type : FAULT_HARDFAULT;
|
||||
_led_start(ft);
|
||||
}
|
||||
|
||||
/* ---- Install MPU stack guard & enable fault handlers ---- */
|
||||
fault_mpu_guard_init();
|
||||
}
|
||||
|
||||
FaultType fault_get_last_type(void)
|
||||
{
|
||||
for (int i = (int)FAULT_LOG_MAX_ENTRIES - 1; i >= 0; i--) {
|
||||
if (_slot_empty((uint8_t)i)) continue;
|
||||
const fault_log_entry_t *e =
|
||||
(const fault_log_entry_t *)_slot_addr((uint8_t)i);
|
||||
if (e->magic == FAULT_LOG_MAGIC)
|
||||
return (FaultType)e->fault_type;
|
||||
}
|
||||
return FAULT_NONE;
|
||||
}
|
||||
|
||||
bool fault_log_read(uint8_t idx, fault_log_entry_t *out)
|
||||
{
|
||||
if (idx >= FAULT_LOG_MAX_ENTRIES) return false;
|
||||
if (_slot_empty(idx)) return false;
|
||||
const fault_log_entry_t *e =
|
||||
(const fault_log_entry_t *)_slot_addr(idx);
|
||||
if (e->magic != FAULT_LOG_MAGIC) return false;
|
||||
memcpy(out, e, sizeof(*out));
|
||||
return true;
|
||||
}
|
||||
|
||||
uint8_t fault_log_get_count(void)
|
||||
{
|
||||
uint8_t n = 0;
|
||||
for (uint8_t i = 0; i < FAULT_LOG_MAX_ENTRIES; i++) {
|
||||
if (!_slot_empty(i)) n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
void fault_log_clear(void)
|
||||
{
|
||||
float kp, ki, kd;
|
||||
bool pid_ok = pid_flash_load(&kp, &ki, &kd);
|
||||
|
||||
HAL_FLASH_Unlock();
|
||||
_erase_sector7();
|
||||
HAL_FLASH_Lock();
|
||||
|
||||
if (pid_ok) {
|
||||
pid_flash_save(kp, ki, kd);
|
||||
}
|
||||
}
|
||||
|
||||
void fault_assert_impl(const char *file, int line)
|
||||
{
|
||||
(void)file; (void)line;
|
||||
s_fault_sram.magic = FAULT_LOG_MAGIC;
|
||||
s_fault_sram.fault_type = (uint8_t)FAULT_ASSERT;
|
||||
s_fault_sram.timestamp_ms = HAL_GetTick();
|
||||
s_fault_sram.pc = (uint32_t)__builtin_return_address(0);
|
||||
s_fault_sram.lr = 0u;
|
||||
s_fault_sram.r0 = (uint32_t)(uintptr_t)file;
|
||||
s_fault_sram.r1 = (uint32_t)line;
|
||||
s_fault_sram.cfsr = SCB->CFSR;
|
||||
s_fault_sram.hfsr = 0u;
|
||||
s_fault_sram.mmfar = 0u;
|
||||
s_fault_sram.bfar = 0u;
|
||||
s_fault_sram.sp = 0u;
|
||||
s_fault_magic = FAULT_SRAM_MAGIC;
|
||||
NVIC_SystemReset();
|
||||
}
|
||||
|
||||
void fault_led_tick(uint32_t now_ms)
|
||||
{
|
||||
if (s_led_fault == FAULT_NONE) return;
|
||||
|
||||
/* Auto-disable after 10 s */
|
||||
if ((now_ms - s_led_start) > 10000u) {
|
||||
s_led_fault = FAULT_NONE;
|
||||
HAL_GPIO_WritePin(LED2_PORT, LED2_PIN, GPIO_PIN_SET); /* off */
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t fi = (uint8_t)s_led_fault;
|
||||
if (fi >= BLINK_TABLE_SIZE) return;
|
||||
|
||||
const LedBlink *b = &s_blink_table[fi];
|
||||
if ((now_ms - s_led_last) >= b->period_ms) {
|
||||
s_led_last = now_ms;
|
||||
bool on = ((b->pattern >> (15u - s_led_step)) & 1u) != 0u;
|
||||
/* LED2 is active-low (GPIO_PIN_RESET = lit) */
|
||||
HAL_GPIO_WritePin(LED2_PORT, LED2_PIN,
|
||||
on ? GPIO_PIN_RESET : GPIO_PIN_SET);
|
||||
s_led_step = (uint8_t)((s_led_step + 1u) % b->steps);
|
||||
}
|
||||
}
|
||||
|
||||
/* ================================================================
|
||||
* Fault vector hooks
|
||||
* ================================================================
|
||||
*
|
||||
* Naked entry stubs determine whether the auto-saved stack frame is on
|
||||
* MSP or PSP (bit 2 of EXC_RETURN in LR), then tail-call the C handler
|
||||
* with the frame pointer in R0.
|
||||
*
|
||||
* Cortex-M auto-pushed stack frame layout (from [SP]):
|
||||
* [0] R0 [1] R1 [2] R2 [3] R3
|
||||
* [4] R12 [5] LR [6] PC [7] xPSR
|
||||
*/
|
||||
|
||||
static void _capture_and_reset(FaultType type, uint32_t *frame)
|
||||
{
|
||||
s_fault_sram.magic = FAULT_LOG_MAGIC;
|
||||
s_fault_sram.fault_type = (uint8_t)type;
|
||||
s_fault_sram.timestamp_ms = HAL_GetTick();
|
||||
s_fault_sram.r0 = frame[0];
|
||||
s_fault_sram.r1 = frame[1];
|
||||
s_fault_sram.r2 = frame[2];
|
||||
s_fault_sram.r3 = frame[3];
|
||||
/* frame[4] = R12 (unused in log), frame[5] = LR, frame[6] = PC */
|
||||
s_fault_sram.lr = frame[5];
|
||||
s_fault_sram.pc = frame[6];
|
||||
s_fault_sram.sp = (uint32_t)(uintptr_t)(frame + 8); /* SP after push */
|
||||
s_fault_sram.cfsr = SCB->CFSR;
|
||||
s_fault_sram.hfsr = SCB->HFSR;
|
||||
s_fault_sram.mmfar = SCB->MMFAR;
|
||||
s_fault_sram.bfar = SCB->BFAR;
|
||||
s_fault_magic = FAULT_SRAM_MAGIC;
|
||||
|
||||
/* Brief LED flash so a scope can catch it (≈50 ms at 216 MHz) */
|
||||
HAL_GPIO_WritePin(LED1_PORT, LED1_PIN, GPIO_PIN_RESET); /* on */
|
||||
for (volatile uint32_t i = 0u; i < 10800000u; i++) __NOP();
|
||||
|
||||
NVIC_SystemReset();
|
||||
}
|
||||
|
||||
/* Determine if a MemManage is from stack overflow vs other memory fault */
|
||||
static FaultType _mem_fault_type(void)
|
||||
{
|
||||
if ((SCB->CFSR & SCB_CFSR_MMARVALID_Msk) != 0u) {
|
||||
extern uint32_t __stack_end;
|
||||
uint32_t guard = (uint32_t)&__stack_end;
|
||||
if (SCB->MMFAR >= guard && SCB->MMFAR < guard + 32u)
|
||||
return FAULT_STACK_OVF;
|
||||
}
|
||||
return FAULT_MEM_FAULT;
|
||||
}
|
||||
|
||||
/* C-level handlers — called from naked asm stubs */
|
||||
void fault_hard_c(uint32_t *frame) { _capture_and_reset(FAULT_HARDFAULT, frame); }
|
||||
void fault_mem_c(uint32_t *frame) { _capture_and_reset(_mem_fault_type(), frame); }
|
||||
void fault_bus_c(uint32_t *frame) { _capture_and_reset(FAULT_BUS_FAULT, frame); }
|
||||
void fault_usage_c(uint32_t *frame) { _capture_and_reset(FAULT_USAGE_FAULT, frame); }
|
||||
|
||||
/* ---- Naked asm entry stubs ---- */
|
||||
|
||||
__attribute__((naked)) void HardFault_Handler(void)
|
||||
{
|
||||
__asm volatile (
|
||||
"tst lr, #4 \n" /* EXC_RETURN[2]: 0=MSP, 1=PSP */
|
||||
"ite eq \n"
|
||||
"mrseq r0, msp \n"
|
||||
"mrsne r0, psp \n"
|
||||
"b fault_hard_c \n"
|
||||
);
|
||||
}
|
||||
|
||||
__attribute__((naked)) void MemManage_Handler(void)
|
||||
{
|
||||
__asm volatile (
|
||||
"tst lr, #4 \n"
|
||||
"ite eq \n"
|
||||
"mrseq r0, msp \n"
|
||||
"mrsne r0, psp \n"
|
||||
"b fault_mem_c \n"
|
||||
);
|
||||
}
|
||||
|
||||
__attribute__((naked)) void BusFault_Handler(void)
|
||||
{
|
||||
__asm volatile (
|
||||
"tst lr, #4 \n"
|
||||
"ite eq \n"
|
||||
"mrseq r0, msp \n"
|
||||
"mrsne r0, psp \n"
|
||||
"b fault_bus_c \n"
|
||||
);
|
||||
}
|
||||
|
||||
__attribute__((naked)) void UsageFault_Handler(void)
|
||||
{
|
||||
__asm volatile (
|
||||
"tst lr, #4 \n"
|
||||
"ite eq \n"
|
||||
"mrseq r0, msp \n"
|
||||
"mrsne r0, psp \n"
|
||||
"b fault_usage_c \n"
|
||||
);
|
||||
}
|
||||
28
src/jlink.c
28
src/jlink.c
@ -267,6 +267,10 @@ static void dispatch(const uint8_t *payload, uint8_t cmd, uint8_t plen)
|
||||
}
|
||||
break;
|
||||
|
||||
case JLINK_CMD_FAULT_LOG_GET: /* Issue #565: request fault log telemetry */
|
||||
jlink_state.fault_log_req = 1u;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -511,3 +515,27 @@ void jlink_send_sched_telemetry(const jlink_tlm_sched_t *tlm)
|
||||
|
||||
jlink_tx_locked(frame, (uint16_t)(3u + plen + 3u));
|
||||
}
|
||||
|
||||
/* ---- jlink_send_fault_log() -- Issue #565 ---- */
|
||||
void jlink_send_fault_log(const jlink_tlm_fault_log_t *fl)
|
||||
{
|
||||
/*
|
||||
* Frame: [STX][LEN][0x86][20 bytes fault_log][CRC_hi][CRC_lo][ETX]
|
||||
* Total: 1+1+1+20+2+1 = 26 bytes
|
||||
*/
|
||||
static uint8_t frame[26];
|
||||
const uint8_t plen = (uint8_t)sizeof(jlink_tlm_fault_log_t); /* 20 */
|
||||
const uint8_t len = 1u + plen; /* CMD byte + payload */
|
||||
|
||||
frame[0] = JLINK_STX;
|
||||
frame[1] = len;
|
||||
frame[2] = JLINK_TLM_FAULT_LOG;
|
||||
memcpy(&frame[3], fl, plen);
|
||||
|
||||
uint16_t crc = crc16_xmodem(&frame[2], len);
|
||||
frame[3 + plen] = (uint8_t)(crc >> 8);
|
||||
frame[3 + plen + 1] = (uint8_t)(crc & 0xFFu);
|
||||
frame[3 + plen + 2] = JLINK_ETX;
|
||||
|
||||
jlink_tx_locked(frame, sizeof(frame));
|
||||
}
|
||||
|
||||
49
src/main.c
49
src/main.c
@ -31,6 +31,7 @@
|
||||
#include "coulomb_counter.h"
|
||||
#include "watchdog.h"
|
||||
#include "pid_flash.h"
|
||||
#include "fault_handler.h"
|
||||
#include "servo_bus.h"
|
||||
#include "gimbal.h"
|
||||
#include <math.h>
|
||||
@ -132,6 +133,9 @@ int main(void) {
|
||||
HAL_Init();
|
||||
SystemClock_Config();
|
||||
|
||||
/* Fault recovery handler (Issue #565) — must be first, before safety_init() */
|
||||
fault_handler_init();
|
||||
|
||||
/* Detect watchdog reset (Issue #300) — must be before safety_init() */
|
||||
g_watchdog_reset_detected = watchdog_was_reset_by_watchdog();
|
||||
|
||||
@ -188,6 +192,28 @@ int main(void) {
|
||||
/* Init Jetson serial binary protocol on USART1 (PB6/PB7) at 921600 baud */
|
||||
jlink_init();
|
||||
|
||||
/* Send fault log summary on boot if a prior fault was recorded (Issue #565) */
|
||||
if (fault_get_last_type() != FAULT_NONE) {
|
||||
fault_log_entry_t fle;
|
||||
memset(&fle, 0, sizeof(fle));
|
||||
jlink_tlm_fault_log_t ftlm;
|
||||
memset(&ftlm, 0, sizeof(ftlm));
|
||||
ftlm.entry_count = fault_log_get_count();
|
||||
if (fault_log_read(0u, &fle)) {
|
||||
ftlm.fault_type = fle.fault_type;
|
||||
ftlm.reset_count = fle.reset_count;
|
||||
ftlm.timestamp_ms = fle.timestamp_ms;
|
||||
ftlm.pc = fle.pc;
|
||||
ftlm.lr = fle.lr;
|
||||
ftlm.cfsr = fle.cfsr;
|
||||
ftlm.hfsr = fle.hfsr;
|
||||
}
|
||||
jlink_send_fault_log(&ftlm);
|
||||
printf("[FAULT] Prior fault type=%u count=%u PC=0x%08lX\n",
|
||||
(unsigned)ftlm.fault_type, (unsigned)ftlm.entry_count,
|
||||
(unsigned long)ftlm.pc);
|
||||
}
|
||||
|
||||
/* Init Jetson UART command interface on USART6 (PC6/PC7) at 921600 baud.
|
||||
* Mirrors CDC command protocol over hardware UART (fixes USB CDC TX bug). */
|
||||
jetson_uart_init();
|
||||
@ -276,6 +302,9 @@ int main(void) {
|
||||
/* Advance LED animation sequencer (non-blocking, call every tick) */
|
||||
led_tick(now);
|
||||
|
||||
/* Fault recovery LED blink code (Issue #565; self-disables after 10 s) */
|
||||
fault_led_tick(now);
|
||||
|
||||
/* Servo pan-tilt animation tick — updates smooth sweeps */
|
||||
servo_tick(now);
|
||||
|
||||
@ -384,6 +413,26 @@ int main(void) {
|
||||
(double)bal.kp, (double)bal.ki, (double)bal.kd);
|
||||
}
|
||||
|
||||
/* FAULT_LOG_GET: send fault log telemetry to Jetson (Issue #565) */
|
||||
if (jlink_state.fault_log_req) {
|
||||
jlink_state.fault_log_req = 0u;
|
||||
fault_log_entry_t fle;
|
||||
memset(&fle, 0, sizeof(fle));
|
||||
jlink_tlm_fault_log_t ftlm;
|
||||
memset(&ftlm, 0, sizeof(ftlm));
|
||||
ftlm.entry_count = fault_log_get_count();
|
||||
if (fault_log_read(0u, &fle)) {
|
||||
ftlm.fault_type = fle.fault_type;
|
||||
ftlm.reset_count = fle.reset_count;
|
||||
ftlm.timestamp_ms = fle.timestamp_ms;
|
||||
ftlm.pc = fle.pc;
|
||||
ftlm.lr = fle.lr;
|
||||
ftlm.cfsr = fle.cfsr;
|
||||
ftlm.hfsr = fle.hfsr;
|
||||
}
|
||||
jlink_send_fault_log(&ftlm);
|
||||
}
|
||||
|
||||
/* Power management: CRSF/JLink activity or armed state resets idle timer */
|
||||
if ((crsf_state.last_rx_ms != 0 && (now - crsf_state.last_rx_ms) < 500) ||
|
||||
jlink_is_active(now) ||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user