#include "fault_handler.h" #include "config.h" #include "pid_flash.h" #include "stm32f7xx_hal.h" #include #include /* * fault_handler.c — STM32F7 fault detection and recovery (Issue #565) * * Recovery flow: * Fault ISR (naked) → _capture_and_reset() captures registers into .noinit * SRAM → sets FAULT_SRAM_MAGIC → NVIC_SystemReset(). * On next boot: fault_handler_init() sees FAULT_SRAM_MAGIC → persists to * flash log → prints CDC dump → starts LED blink code. * * No flash writes occur inside fault ISRs. All flash operations happen safely * in the normal boot context, well before safety_init() / IWDG start. */ /* ---- .noinit SRAM (preserved across NVIC_SystemReset) ---- */ /* * GCC startup code only zeroes .bss and initialises .data. Variables in * .noinit are left untouched. The magic word guards against cold-boot garbage. */ #define FAULT_SRAM_MAGIC 0xFADE5A01u #define RESET_COUNT_MAGIC 0x1234ABCDu static __attribute__((section(".noinit"))) volatile uint32_t s_fault_magic; static __attribute__((section(".noinit"))) volatile fault_log_entry_t s_fault_sram; static __attribute__((section(".noinit"))) volatile uint32_t s_reset_count_magic; static __attribute__((section(".noinit"))) volatile uint32_t s_reset_count; /* ---- LED blink sequencer ---- */ /* * Each pattern is a 16-bit bitmask; bit 15 = first step. * One step = period_ms milliseconds. LED2 (PC14) is active-low. */ typedef struct { uint16_t pattern; /* bitmask: 1 = LED on */ uint8_t steps; /* number of valid bits to cycle */ uint16_t period_ms; /* ms per step */ } LedBlink; /* * Pattern table indexed by FaultType (0..8). * NONE = silent * HARDFAULT = 1010 1010 1010 1010 (3 fast blinks, 100 ms) * WATCHDOG = 1111 0000 1111 0000 (2 slow pulses, 150 ms × 8 steps = 1.2 s) * BROWNOUT = 1111 1111 0000 0000 (1 long pulse, 100 ms × 16 = 1.6 s) * STACK_OVF = 1110 1110 1110 1110 (4 short bursts, 100 ms) * BUS_FAULT = 1010 1111 1100 0000 (3+1 pattern) * USAGE_FAULT = 1010 0000 0000 0000 (2 fast blinks) * MEM_FAULT = 1010 1010 1000 0000 (3 blinks, slower tail) * ASSERT = 1101 1011 0000 0000 (SOS-like) */ static const LedBlink s_blink_table[] = { /* FAULT_NONE */ { 0x0000u, 16, 100 }, /* FAULT_HARDFAULT */ { 0xAAAAu, 16, 100 }, /* FAULT_WATCHDOG */ { 0xF0F0u, 16, 150 }, /* FAULT_BROWNOUT */ { 0xFF00u, 16, 100 }, /* FAULT_STACK_OVF */ { 0xEEEEu, 16, 100 }, /* FAULT_BUS_FAULT */ { 0xAFC0u, 16, 100 }, /* FAULT_USAGE_FAULT */ { 0xA000u, 16, 100 }, /* FAULT_MEM_FAULT */ { 0xAA80u, 16, 100 }, /* FAULT_ASSERT */ { 0xDB00u, 16, 100 }, }; #define BLINK_TABLE_SIZE (sizeof(s_blink_table) / sizeof(s_blink_table[0])) static FaultType s_led_fault = FAULT_NONE; static uint32_t s_led_start = 0; static uint32_t s_led_last = 0; static uint8_t s_led_step = 0; /* ------------------------------------------------------------------ */ /* Flash helpers */ /* ------------------------------------------------------------------ */ static uint32_t _slot_addr(uint8_t idx) { return FAULT_LOG_BASE_ADDR + (uint32_t)idx * FAULT_LOG_ENTRY_SIZE; } static bool _slot_empty(uint8_t idx) { /* An erased 32-bit word reads as 0xFFFFFFFF */ const uint32_t *p = (const uint32_t *)_slot_addr(idx); return (*p == 0xFFFFFFFFu); } static int _free_slot(void) { for (uint8_t i = 0; i < FAULT_LOG_MAX_ENTRIES; i++) { if (_slot_empty(i)) return (int)i; } return -1; } static bool _erase_sector7(void) { FLASH_EraseInitTypeDef er = {0}; er.TypeErase = FLASH_TYPEERASE_SECTORS; er.Sector = FLASH_SECTOR_7; er.NbSectors = 1; er.VoltageRange = FLASH_VOLTAGE_RANGE_3; uint32_t err = 0; return HAL_FLASHEx_Erase(&er, &err) == HAL_OK; } /* * Write fault entry to the next free flash slot. * When all 8 slots are occupied: erase sector 7, restore PID if valid, * then write entry at slot 0. Sector 7 erase stalls CPU ~1 s — only * called from fault_handler_init() before IWDG is started. */ static bool _fault_log_write(const fault_log_entry_t *entry) { int slot = _free_slot(); /* ---- Handle full log: erase sector 7 ---- */ if (slot < 0) { float kp, ki, kd; bool pid_ok = pid_flash_load(&kp, &ki, &kd); HAL_FLASH_Unlock(); bool erased = _erase_sector7(); HAL_FLASH_Lock(); if (!erased) return false; if (pid_ok) { /* pid_flash_save() manages its own unlock/lock */ pid_flash_save(kp, ki, kd); } slot = 0; } /* ---- Write 64 bytes (16 × 32-bit words) to chosen slot ---- */ uint32_t addr = _slot_addr((uint8_t)slot); const uint32_t *words = (const uint32_t *)entry; HAL_FLASH_Unlock(); bool ok = true; for (uint8_t w = 0; w < FAULT_LOG_ENTRY_SIZE / 4u; w++) { if (HAL_FLASH_Program(FLASH_TYPEPROGRAM_WORD, addr + (uint32_t)w * 4u, words[w]) != HAL_OK) { ok = false; break; } } HAL_FLASH_Lock(); return ok; } /* ------------------------------------------------------------------ */ /* LED blink */ /* ------------------------------------------------------------------ */ static void _led_start(FaultType type) { s_led_fault = type; s_led_start = HAL_GetTick(); s_led_last = s_led_start; s_led_step = 0; } /* ------------------------------------------------------------------ */ /* Public API */ /* ------------------------------------------------------------------ */ void fault_mpu_guard_init(void) { /* * Configure MPU Region 0 as a 32-byte no-access guard page at * __stack_end (lowest address of the main stack). The stack grows * downward; when it overflows into this region a MemManage fault fires. * * MPU RASR SIZE field = log2(region_bytes) - 1 = log2(32) - 1 = 4. * AP = 0b000 → no access in any mode. */ extern uint32_t __stack_end; /* defined in linker script */ HAL_MPU_Disable(); MPU_Region_InitTypeDef r = {0}; r.Enable = MPU_REGION_ENABLE; r.Number = MPU_REGION_NUMBER0; r.BaseAddress = (uint32_t)&__stack_end; r.Size = MPU_REGION_SIZE_32B; r.SubRegionDisable = 0x00u; r.TypeExtField = MPU_TEX_LEVEL0; r.AccessPermission = MPU_REGION_NO_ACCESS; r.DisableExec = MPU_INSTRUCTION_ACCESS_DISABLE; r.IsShareable = MPU_ACCESS_NOT_SHAREABLE; r.IsCacheable = MPU_ACCESS_NOT_CACHEABLE; r.IsBufferable = MPU_ACCESS_NOT_BUFFERABLE; HAL_MPU_ConfigRegion(&r); /* Enable MPU with default memory map for privileged access */ HAL_MPU_Enable(MPU_PRIVILEGED_DEFAULT); /* Enable configurable fault handlers */ SCB->SHCSR |= SCB_SHCSR_MEMFAULTENA_Msk | SCB_SHCSR_BUSFAULTENA_Msk | SCB_SHCSR_USGFAULTENA_Msk; } void fault_handler_init(void) { /* ---- Maintain lifetime reset counter ---- */ if (s_reset_count_magic != RESET_COUNT_MAGIC) { s_reset_count_magic = RESET_COUNT_MAGIC; s_reset_count = 0u; } s_reset_count++; /* ---- Detect brownout via RCC_CSR ---- */ bool brownout = (RCC->CSR & RCC_CSR_BORRSTF) != 0u; if (brownout) { printf("[FAULT] Brownout reset detected (reset_count=%lu)\n", (unsigned long)s_reset_count); fault_log_entry_t e; memset(&e, 0, sizeof(e)); e.magic = FAULT_LOG_MAGIC; e.fault_type = (uint8_t)FAULT_BROWNOUT; e.reset_count = (uint8_t)(s_reset_count & 0xFFu); _fault_log_write(&e); _led_start(FAULT_BROWNOUT); } /* ---- Clear all RCC reset source flags ---- */ RCC->CSR |= RCC_CSR_RMVF; /* ---- Check for pending .noinit fault capture ---- */ if (s_fault_magic == FAULT_SRAM_MAGIC) { s_fault_magic = 0u; /* consume once */ fault_log_entry_t e; memcpy(&e, (const void *)&s_fault_sram, sizeof(e)); e.reset_count = (uint8_t)(s_reset_count & 0xFFu); /* Print register dump over CDC/UART */ printf("[FAULT] *** FAULT RECOVERED ***\n"); printf("[FAULT] type=%u reset_count=%u ts=%lu ms\n", e.fault_type, e.reset_count, (unsigned long)e.timestamp_ms); printf("[FAULT] PC=0x%08lX LR=0x%08lX SP=0x%08lX\n", (unsigned long)e.pc, (unsigned long)e.lr, (unsigned long)e.sp); printf("[FAULT] R0=0x%08lX R1=0x%08lX R2=0x%08lX R3=0x%08lX\n", (unsigned long)e.r0, (unsigned long)e.r1, (unsigned long)e.r2, (unsigned long)e.r3); printf("[FAULT] CFSR=0x%08lX HFSR=0x%08lX MMFAR=0x%08lX BFAR=0x%08lX\n", (unsigned long)e.cfsr, (unsigned long)e.hfsr, (unsigned long)e.mmfar, (unsigned long)e.bfar); _fault_log_write(&e); FaultType ft = (e.fault_type < (uint8_t)BLINK_TABLE_SIZE) ? (FaultType)e.fault_type : FAULT_HARDFAULT; _led_start(ft); } /* ---- Install MPU stack guard & enable fault handlers ---- */ fault_mpu_guard_init(); } FaultType fault_get_last_type(void) { for (int i = (int)FAULT_LOG_MAX_ENTRIES - 1; i >= 0; i--) { if (_slot_empty((uint8_t)i)) continue; const fault_log_entry_t *e = (const fault_log_entry_t *)_slot_addr((uint8_t)i); if (e->magic == FAULT_LOG_MAGIC) return (FaultType)e->fault_type; } return FAULT_NONE; } bool fault_log_read(uint8_t idx, fault_log_entry_t *out) { if (idx >= FAULT_LOG_MAX_ENTRIES) return false; if (_slot_empty(idx)) return false; const fault_log_entry_t *e = (const fault_log_entry_t *)_slot_addr(idx); if (e->magic != FAULT_LOG_MAGIC) return false; memcpy(out, e, sizeof(*out)); return true; } uint8_t fault_log_get_count(void) { uint8_t n = 0; for (uint8_t i = 0; i < FAULT_LOG_MAX_ENTRIES; i++) { if (!_slot_empty(i)) n++; } return n; } void fault_log_clear(void) { float kp, ki, kd; bool pid_ok = pid_flash_load(&kp, &ki, &kd); HAL_FLASH_Unlock(); _erase_sector7(); HAL_FLASH_Lock(); if (pid_ok) { pid_flash_save(kp, ki, kd); } } void fault_assert_impl(const char *file, int line) { (void)file; (void)line; s_fault_sram.magic = FAULT_LOG_MAGIC; s_fault_sram.fault_type = (uint8_t)FAULT_ASSERT; s_fault_sram.timestamp_ms = HAL_GetTick(); s_fault_sram.pc = (uint32_t)__builtin_return_address(0); s_fault_sram.lr = 0u; s_fault_sram.r0 = (uint32_t)(uintptr_t)file; s_fault_sram.r1 = (uint32_t)line; s_fault_sram.cfsr = SCB->CFSR; s_fault_sram.hfsr = 0u; s_fault_sram.mmfar = 0u; s_fault_sram.bfar = 0u; s_fault_sram.sp = 0u; s_fault_magic = FAULT_SRAM_MAGIC; NVIC_SystemReset(); } void fault_led_tick(uint32_t now_ms) { if (s_led_fault == FAULT_NONE) return; /* Auto-disable after 10 s */ if ((now_ms - s_led_start) > 10000u) { s_led_fault = FAULT_NONE; HAL_GPIO_WritePin(LED2_PORT, LED2_PIN, GPIO_PIN_SET); /* off */ return; } uint8_t fi = (uint8_t)s_led_fault; if (fi >= BLINK_TABLE_SIZE) return; const LedBlink *b = &s_blink_table[fi]; if ((now_ms - s_led_last) >= b->period_ms) { s_led_last = now_ms; bool on = ((b->pattern >> (15u - s_led_step)) & 1u) != 0u; /* LED2 is active-low (GPIO_PIN_RESET = lit) */ HAL_GPIO_WritePin(LED2_PORT, LED2_PIN, on ? GPIO_PIN_RESET : GPIO_PIN_SET); s_led_step = (uint8_t)((s_led_step + 1u) % b->steps); } } /* ================================================================ * Fault vector hooks * ================================================================ * * Naked entry stubs determine whether the auto-saved stack frame is on * MSP or PSP (bit 2 of EXC_RETURN in LR), then tail-call the C handler * with the frame pointer in R0. * * Cortex-M auto-pushed stack frame layout (from [SP]): * [0] R0 [1] R1 [2] R2 [3] R3 * [4] R12 [5] LR [6] PC [7] xPSR */ static void _capture_and_reset(FaultType type, uint32_t *frame) { s_fault_sram.magic = FAULT_LOG_MAGIC; s_fault_sram.fault_type = (uint8_t)type; s_fault_sram.timestamp_ms = HAL_GetTick(); s_fault_sram.r0 = frame[0]; s_fault_sram.r1 = frame[1]; s_fault_sram.r2 = frame[2]; s_fault_sram.r3 = frame[3]; /* frame[4] = R12 (unused in log), frame[5] = LR, frame[6] = PC */ s_fault_sram.lr = frame[5]; s_fault_sram.pc = frame[6]; s_fault_sram.sp = (uint32_t)(uintptr_t)(frame + 8); /* SP after push */ s_fault_sram.cfsr = SCB->CFSR; s_fault_sram.hfsr = SCB->HFSR; s_fault_sram.mmfar = SCB->MMFAR; s_fault_sram.bfar = SCB->BFAR; s_fault_magic = FAULT_SRAM_MAGIC; /* Brief LED flash so a scope can catch it (≈50 ms at 216 MHz) */ HAL_GPIO_WritePin(LED1_PORT, LED1_PIN, GPIO_PIN_RESET); /* on */ for (volatile uint32_t i = 0u; i < 10800000u; i++) __NOP(); NVIC_SystemReset(); } /* Determine if a MemManage is from stack overflow vs other memory fault */ static FaultType _mem_fault_type(void) { if ((SCB->CFSR & SCB_CFSR_MMARVALID_Msk) != 0u) { extern uint32_t __stack_end; uint32_t guard = (uint32_t)&__stack_end; if (SCB->MMFAR >= guard && SCB->MMFAR < guard + 32u) return FAULT_STACK_OVF; } return FAULT_MEM_FAULT; } /* C-level handlers — called from naked asm stubs */ void fault_hard_c(uint32_t *frame) { _capture_and_reset(FAULT_HARDFAULT, frame); } void fault_mem_c(uint32_t *frame) { _capture_and_reset(_mem_fault_type(), frame); } void fault_bus_c(uint32_t *frame) { _capture_and_reset(FAULT_BUS_FAULT, frame); } void fault_usage_c(uint32_t *frame) { _capture_and_reset(FAULT_USAGE_FAULT, frame); } /* ---- Naked asm entry stubs ---- */ __attribute__((naked)) void HardFault_Handler(void) { __asm volatile ( "tst lr, #4 \n" /* EXC_RETURN[2]: 0=MSP, 1=PSP */ "ite eq \n" "mrseq r0, msp \n" "mrsne r0, psp \n" "b fault_hard_c \n" ); } __attribute__((naked)) void MemManage_Handler(void) { __asm volatile ( "tst lr, #4 \n" "ite eq \n" "mrseq r0, msp \n" "mrsne r0, psp \n" "b fault_mem_c \n" ); } __attribute__((naked)) void BusFault_Handler(void) { __asm volatile ( "tst lr, #4 \n" "ite eq \n" "mrseq r0, msp \n" "mrsne r0, psp \n" "b fault_bus_c \n" ); } __attribute__((naked)) void UsageFault_Handler(void) { __asm volatile ( "tst lr, #4 \n" "ite eq \n" "mrseq r0, msp \n" "mrsne r0, psp \n" "b fault_usage_c \n" ); }