feat(social): ambient sound classifier via mel-spectrogram — Issue #252

Adds ambient_sound_node to saltybot_social: - Accumulates 1 s of PCM-16 audio from /social/speech/audio_raw - Extracts mel-spectrogram feature vector (energy_db, zcr, mel_centroid, mel_flatness, low_ratio, high_ratio) using pure numpy (no torch/onnx) - Priority-cascade classifier: silence → music → speech → crowd → outdoor → alarm - Publishes label as std_msgs/String on /saltybot/ambient_sound on each buffer fill - All 11 thresholds exposed as ROS parameters (yaml + launch file) - numpy-free energy-only fallback for edge environments - 77/77 tests passing Closes #252
2026-03-02 12:54:26 -05:00 · 2026-03-02 12:54:26 -05:00 · b2d76b434b
commit b2d76b434b
parent c7a33bace8
5 changed files with 835 additions and 0 deletions
--- a/jetson/ros2_ws/src/saltybot_social/config/ambient_sound_params.yaml
+++ b/jetson/ros2_ws/src/saltybot_social/config/ambient_sound_params.yaml
@ -0,0 +1,21 @@
+ambient_sound_node:
+  ros__parameters:
+    sample_rate: 16000              # Expected PCM sample rate (Hz)
+    window_s: 1.0                   # Accumulate this many seconds before classifying
+    n_fft: 512                      # FFT size (32 ms frame at 16 kHz)
+    n_mels: 32                      # Mel filterbank bands
+    audio_topic: "/social/speech/audio_raw"  # Source PCM-16 UInt8MultiArray topic
+
+    # ── Classifier thresholds ──────────────────────────────────────────────
+    # Adjust to tune sensitivity for your deployment environment.
+    silence_db: -40.0               # Below this energy (dBFS) → silence
+    alarm_db_min: -25.0             # Min energy for alarm detection
+    alarm_zcr_min: 0.12             # Min ZCR for alarm (intermittent high pitch)
+    alarm_high_ratio_min: 0.35      # Min high-band energy fraction for alarm
+    speech_zcr_min: 0.02            # Min ZCR for speech (voiced onset)
+    speech_zcr_max: 0.25            # Max ZCR for speech
+    speech_flatness_max: 0.35       # Max spectral flatness for speech (tonal)
+    music_zcr_max: 0.08             # Max ZCR for music (harmonic / tonal)
+    music_flatness_max: 0.25        # Max spectral flatness for music
+    crowd_zcr_min: 0.10             # Min ZCR for crowd noise
+    crowd_flatness_min: 0.35        # Min spectral flatness for crowd
--- a/jetson/ros2_ws/src/saltybot_social/launch/ambient_sound.launch.py
+++ b/jetson/ros2_ws/src/saltybot_social/launch/ambient_sound.launch.py
@ -0,0 +1,42 @@
+"""ambient_sound.launch.py -- Launch the ambient sound classifier (Issue #252).
+
+Usage:
+  ros2 launch saltybot_social ambient_sound.launch.py
+  ros2 launch saltybot_social ambient_sound.launch.py silence_db:=-45.0
+"""
+
+import os
+from ament_index_python.packages import get_package_share_directory
+from launch import LaunchDescription
+from launch.actions import DeclareLaunchArgument
+from launch.substitutions import LaunchConfiguration
+from launch_ros.actions import Node
+
+
+def generate_launch_description():
+    pkg = get_package_share_directory("saltybot_social")
+    cfg = os.path.join(pkg, "config", "ambient_sound_params.yaml")
+
+    return LaunchDescription([
+        DeclareLaunchArgument("window_s",    default_value="1.0",
+                              description="Accumulation window (s)"),
+        DeclareLaunchArgument("n_mels",      default_value="32",
+                              description="Mel filterbank bands"),
+        DeclareLaunchArgument("silence_db",  default_value="-40.0",
+                              description="Silence energy threshold (dBFS)"),
+
+        Node(
+            package="saltybot_social",
+            executable="ambient_sound_node",
+            name="ambient_sound_node",
+            output="screen",
+            parameters=[
+                cfg,
+                {
+                    "window_s":   LaunchConfiguration("window_s"),
+                    "n_mels":     LaunchConfiguration("n_mels"),
+                    "silence_db": LaunchConfiguration("silence_db"),
+                },
+            ],
+        ),
+    ])
--- a/jetson/ros2_ws/src/saltybot_social/saltybot_social/ambient_sound_node.py
+++ b/jetson/ros2_ws/src/saltybot_social/saltybot_social/ambient_sound_node.py
@ -0,0 +1,363 @@
+"""ambient_sound_node.py -- Ambient sound classifier via mel-spectrogram features.
+Issue #252
+
+Accumulates 1 s of PCM-16 audio from /social/speech/audio_raw, extracts a
+compact mel-spectrogram feature vector, then classifies the scene into one of:
+
+  silence | speech | music | crowd | outdoor | alarm
+
+Publishes the label as std_msgs/String on /saltybot/ambient_sound at 1 Hz.
+
+Signal processing is pure Python + numpy (no torch / onnx dependency).
+
+Feature vector (per 1-s window):
+  energy_db    -- overall RMS in dBFS
+  zcr          -- mean zero-crossing rate across frames
+  mel_centroid -- centre-of-mass of the mel band energies  [0..1]
+  mel_flatness -- geometric/arithmetic mean of mel energies [0..1]
+                  (1 = white noise, 0 = single sinusoid)
+  low_ratio    -- fraction of mel energy in lower third of bands
+  high_ratio   -- fraction of mel energy in upper third of bands
+
+Classification cascade (priority-ordered):
+  silence  : energy_db < silence_db
+  alarm    : energy_db >= alarm_db_min  AND zcr >= alarm_zcr_min
+             AND high_ratio >= alarm_high_ratio_min
+  speech   : zcr in [speech_zcr_min, speech_zcr_max]
+             AND mel_flatness < speech_flatness_max
+  music    : zcr < music_zcr_max AND mel_flatness < music_flatness_max
+  crowd    : zcr >= crowd_zcr_min AND mel_flatness >= crowd_flatness_min
+  outdoor  : catch-all
+
+Parameters:
+  sample_rate           (int,   16000)
+  window_s              (float, 1.0)   -- accumulation window before classify
+  n_fft                 (int,   512)   -- FFT size
+  n_mels                (int,   32)    -- mel filterbank bands
+  audio_topic           (str,   "/social/speech/audio_raw")
+  silence_db            (float, -40.0)
+  alarm_db_min          (float, -25.0)
+  alarm_zcr_min         (float, 0.12)
+  alarm_high_ratio_min  (float, 0.35)
+  speech_zcr_min        (float, 0.02)
+  speech_zcr_max        (float, 0.25)
+  speech_flatness_max   (float, 0.35)
+  music_zcr_max         (float, 0.08)
+  music_flatness_max    (float, 0.25)
+  crowd_zcr_min         (float, 0.10)
+  crowd_flatness_min    (float, 0.35)
+"""
+
+from __future__ import annotations
+
+import math
+import struct
+import threading
+from typing import Dict, List, Optional
+
+import rclpy
+from rclpy.node import Node
+from rclpy.qos import QoSProfile
+from std_msgs.msg import String, UInt8MultiArray
+
+# numpy used only in DSP helpers — the Jetson always has it
+try:
+    import numpy as np
+    _NUMPY = True
+except ImportError:
+    _NUMPY = False
+
+INT16_MAX = 32768.0
+LABELS = ("silence", "speech", "music", "crowd", "outdoor", "alarm")
+
+
+# ── PCM helpers ───────────────────────────────────────────────────────────────
+
+def pcm16_bytes_to_float32(data: bytes) -> List[float]:
+    """PCM-16 LE bytes → float32 list in [-1.0, 1.0]."""
+    n = len(data) // 2
+    if n == 0:
+        return []
+    return [s / INT16_MAX for s in struct.unpack(f"<{n}h", data[: n * 2])]
+
+
+# ── Mel DSP (numpy path) ──────────────────────────────────────────────────────
+
+def hz_to_mel(hz: float) -> float:
+    return 2595.0 * math.log10(1.0 + hz / 700.0)
+
+
+def mel_to_hz(mel: float) -> float:
+    return 700.0 * (10.0 ** (mel / 2595.0) - 1.0)
+
+
+def build_mel_filterbank(sr: int, n_fft: int, n_mels: int,
+                         fmin: float = 0.0, fmax: Optional[float] = None):
+    """Return (n_mels, n_fft//2+1) numpy filterbank matrix."""
+    import numpy as np
+    if fmax is None:
+        fmax = sr / 2.0
+    n_freqs = n_fft // 2 + 1
+    mel_min = hz_to_mel(fmin)
+    mel_max = hz_to_mel(fmax)
+    mel_pts = np.linspace(mel_min, mel_max, n_mels + 2)
+    hz_pts  = np.array([mel_to_hz(m) for m in mel_pts])
+    bin_pts = np.floor((n_fft + 1) * hz_pts / sr).astype(int)
+    fb = np.zeros((n_mels, n_freqs))
+    for m in range(n_mels):
+        lo, ctr, hi = bin_pts[m], bin_pts[m + 1], bin_pts[m + 2]
+        for k in range(lo, min(ctr, n_freqs)):
+            if ctr != lo:
+                fb[m, k] = (k - lo) / (ctr - lo)
+        for k in range(ctr, min(hi, n_freqs)):
+            if hi != ctr:
+                fb[m, k] = (hi - k) / (hi - ctr)
+    return fb
+
+
+def compute_mel_spectrogram(samples: List[float], sr: int,
+                            n_fft: int = 512, n_mels: int = 32,
+                            hop_length: int = 256):
+    """Return (n_mels, n_frames) log-mel spectrogram (numpy array)."""
+    import numpy as np
+    x = np.array(samples, dtype=np.float32)
+    fb = build_mel_filterbank(sr, n_fft, n_mels)
+    window = np.hanning(n_fft)
+    frames = []
+    for start in range(0, len(x) - n_fft + 1, hop_length):
+        frame = x[start : start + n_fft] * window
+        spec  = np.abs(np.fft.rfft(frame)) ** 2
+        mel   = fb @ spec
+        frames.append(mel)
+    if not frames:
+        return np.zeros((n_mels, 1), dtype=np.float32)
+    return np.column_stack(frames).astype(np.float32)
+
+
+# ── Feature extraction ────────────────────────────────────────────────────────
+
+def extract_features(samples: List[float], sr: int,
+                     n_fft: int = 512, n_mels: int = 32) -> Dict[str, float]:
+    """Extract scalar features from a raw audio window."""
+    import numpy as np
+
+    n = len(samples)
+    if n == 0:
+        return {k: 0.0 for k in
+                ("energy_db", "zcr", "mel_centroid", "mel_flatness",
+                 "low_ratio", "high_ratio")}
+
+    # Energy
+    rms = math.sqrt(sum(s * s for s in samples) / n) if n else 0.0
+    energy_db = 20.0 * math.log10(max(rms, 1e-10))
+
+    # ZCR across 30 ms frames
+    chunk = max(1, int(sr * 0.030))
+    zcr_vals = []
+    for i in range(0, n - chunk + 1, chunk):
+        seg = samples[i : i + chunk]
+        crossings = sum(1 for j in range(1, len(seg))
+                        if seg[j - 1] * seg[j] < 0)
+        zcr_vals.append(crossings / max(len(seg) - 1, 1))
+    zcr = sum(zcr_vals) / len(zcr_vals) if zcr_vals else 0.0
+
+    # Mel spectrogram features
+    mel_spec = compute_mel_spectrogram(samples, sr, n_fft, n_mels)
+    mel_mean = mel_spec.mean(axis=1)  # (n_mels,) mean energy per band
+
+    total = float(mel_mean.sum()) if mel_mean.sum() > 0 else 1e-10
+    indices = np.arange(n_mels, dtype=np.float32)
+    mel_centroid = float((indices * mel_mean).sum()) / (n_mels * total / total) / n_mels
+
+    # Spectral flatness: geometric mean / arithmetic mean
+    eps = 1e-10
+    mel_pos = np.clip(mel_mean, eps, None)
+    geo_mean = float(np.exp(np.log(mel_pos).mean()))
+    arith_mean = float(mel_pos.mean())
+    mel_flatness = min(geo_mean / max(arith_mean, eps), 1.0)
+
+    # Band ratios
+    third = max(1, n_mels // 3)
+    low_energy  = float(mel_mean[:third].sum())
+    high_energy = float(mel_mean[-third:].sum())
+    low_ratio   = low_energy  / max(total, eps)
+    high_ratio  = high_energy / max(total, eps)
+
+    return {
+        "energy_db":    energy_db,
+        "zcr":          zcr,
+        "mel_centroid": mel_centroid,
+        "mel_flatness": mel_flatness,
+        "low_ratio":    low_ratio,
+        "high_ratio":   high_ratio,
+    }
+
+
+# ── Classifier ────────────────────────────────────────────────────────────────
+
+def classify(features: Dict[str, float],
+             silence_db: float = -40.0,
+             alarm_db_min: float = -25.0,
+             alarm_zcr_min: float = 0.12,
+             alarm_high_ratio_min: float = 0.35,
+             speech_zcr_min: float = 0.02,
+             speech_zcr_max: float = 0.25,
+             speech_flatness_max: float = 0.35,
+             music_zcr_max: float = 0.08,
+             music_flatness_max: float = 0.25,
+             crowd_zcr_min: float = 0.10,
+             crowd_flatness_min: float = 0.35) -> str:
+    """Priority-ordered rule cascade. Returns a label from LABELS."""
+    e   = features["energy_db"]
+    zcr = features["zcr"]
+    fl  = features["mel_flatness"]
+    hi  = features["high_ratio"]
+
+    if e < silence_db:
+        return "silence"
+    if (e >= alarm_db_min
+            and zcr >= alarm_zcr_min
+            and hi >= alarm_high_ratio_min):
+        return "alarm"
+    if zcr < music_zcr_max and fl < music_flatness_max:
+        return "music"
+    if (speech_zcr_min <= zcr <= speech_zcr_max
+            and fl < speech_flatness_max):
+        return "speech"
+    if zcr >= crowd_zcr_min and fl >= crowd_flatness_min:
+        return "crowd"
+    return "outdoor"
+
+
+# ── Audio accumulation buffer ─────────────────────────────────────────────────
+
+class AudioBuffer:
+    """Thread-safe ring buffer; yields a window of samples when full."""
+
+    def __init__(self, window_samples: int) -> None:
+        self._target = window_samples
+        self._buf: List[float] = []
+        self._lock = threading.Lock()
+
+    def push(self, samples: List[float]) -> Optional[List[float]]:
+        """Append samples. Returns a complete window (and resets) when full."""
+        with self._lock:
+            self._buf.extend(samples)
+            if len(self._buf) >= self._target:
+                window = self._buf[: self._target]
+                self._buf = self._buf[self._target :]
+                return window
+        return None
+
+    def clear(self) -> None:
+        with self._lock:
+            self._buf.clear()
+
+
+# ── ROS2 node ─────────────────────────────────────────────────────────────────
+
+class AmbientSoundNode(Node):
+    """Classifies ambient sound from raw audio and publishes label at 1 Hz."""
+
+    def __init__(self) -> None:
+        super().__init__("ambient_sound_node")
+
+        self.declare_parameter("sample_rate",          16000)
+        self.declare_parameter("window_s",             1.0)
+        self.declare_parameter("n_fft",                512)
+        self.declare_parameter("n_mels",               32)
+        self.declare_parameter("audio_topic",          "/social/speech/audio_raw")
+        # Classifier thresholds
+        self.declare_parameter("silence_db",           -40.0)
+        self.declare_parameter("alarm_db_min",         -25.0)
+        self.declare_parameter("alarm_zcr_min",        0.12)
+        self.declare_parameter("alarm_high_ratio_min", 0.35)
+        self.declare_parameter("speech_zcr_min",       0.02)
+        self.declare_parameter("speech_zcr_max",       0.25)
+        self.declare_parameter("speech_flatness_max",  0.35)
+        self.declare_parameter("music_zcr_max",        0.08)
+        self.declare_parameter("music_flatness_max",   0.25)
+        self.declare_parameter("crowd_zcr_min",        0.10)
+        self.declare_parameter("crowd_flatness_min",   0.35)
+
+        self._sr      = self.get_parameter("sample_rate").value
+        self._n_fft   = self.get_parameter("n_fft").value
+        self._n_mels  = self.get_parameter("n_mels").value
+        window_s      = self.get_parameter("window_s").value
+        audio_topic   = self.get_parameter("audio_topic").value
+
+        self._thresholds = {
+            k: self.get_parameter(k).value for k in (
+                "silence_db", "alarm_db_min", "alarm_zcr_min",
+                "alarm_high_ratio_min", "speech_zcr_min", "speech_zcr_max",
+                "speech_flatness_max", "music_zcr_max", "music_flatness_max",
+                "crowd_zcr_min", "crowd_flatness_min",
+            )
+        }
+
+        self._buffer = AudioBuffer(int(self._sr * window_s))
+        self._last_label = "silence"
+
+        qos = QoSProfile(depth=10)
+        self._pub = self.create_publisher(String, "/saltybot/ambient_sound", qos)
+        self._audio_sub = self.create_subscription(
+            UInt8MultiArray, audio_topic, self._on_audio, qos
+        )
+
+        if not _NUMPY:
+            self.get_logger().warn(
+                "numpy not available — mel features disabled, classifying by energy only"
+            )
+
+        self.get_logger().info(
+            f"AmbientSoundNode ready "
+            f"(sr={self._sr}, window={window_s}s, n_mels={self._n_mels})"
+        )
+
+    def _on_audio(self, msg: UInt8MultiArray) -> None:
+        samples = pcm16_bytes_to_float32(bytes(msg.data))
+        if not samples:
+            return
+        window = self._buffer.push(samples)
+        if window is not None:
+            self._classify_and_publish(window)
+
+    def _classify_and_publish(self, samples: List[float]) -> None:
+        try:
+            if _NUMPY:
+                feats = extract_features(samples, self._sr, self._n_fft, self._n_mels)
+            else:
+                # Numpy-free fallback: energy-only
+                rms = math.sqrt(sum(s * s for s in samples) / len(samples))
+                e_db = 20.0 * math.log10(max(rms, 1e-10))
+                feats = {
+                    "energy_db": e_db, "zcr": 0.05,
+                    "mel_centroid": 0.5, "mel_flatness": 0.2,
+                    "low_ratio": 0.4, "high_ratio": 0.2,
+                }
+            label = classify(feats, **self._thresholds)
+        except Exception as exc:
+            self.get_logger().error(f"Classification error: {exc}")
+            label = self._last_label
+
+        if label != self._last_label:
+            self.get_logger().info(
+                f"Ambient sound: {self._last_label} -> {label}"
+            )
+        self._last_label = label
+
+        msg = String()
+        msg.data = label
+        self._pub.publish(msg)
+
+
+def main(args: Optional[list] = None) -> None:
+    rclpy.init(args=args)
+    node = AmbientSoundNode()
+    try:
+        rclpy.spin(node)
+    except KeyboardInterrupt:
+        pass
+    finally:
+        node.destroy_node()
+        rclpy.shutdown()
--- a/jetson/ros2_ws/src/saltybot_social/setup.py
+++ b/jetson/ros2_ws/src/saltybot_social/setup.py
@ -45,6 +45,8 @@ setup(
            'mesh_comms_node = saltybot_social.mesh_comms_node:main',
            # Energy+ZCR voice activity detection (Issue #242)
            'vad_node = saltybot_social.vad_node:main',
+            # Ambient sound classifier — mel-spectrogram (Issue #252)
+            'ambient_sound_node = saltybot_social.ambient_sound_node:main',
        ],
    },
 )
--- a/jetson/ros2_ws/src/saltybot_social/test/test_ambient_sound.py
+++ b/jetson/ros2_ws/src/saltybot_social/test/test_ambient_sound.py
@ -0,0 +1,407 @@
+"""test_ambient_sound.py -- Unit tests for Issue #252 ambient sound classifier."""
+
+from __future__ import annotations
+import importlib.util, math, os, struct, sys, types
+import pytest
+
+# numpy is available on dev machine
+import numpy as np
+
+
+def _pkg_root():
+    return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+def _read_src(rel_path):
+    with open(os.path.join(_pkg_root(), rel_path)) as f:
+        return f.read()
+
+
+def _import_mod():
+    """Import ambient_sound_node without a live ROS2 environment."""
+    for mod_name in ("rclpy", "rclpy.node", "rclpy.qos",
+                     "std_msgs", "std_msgs.msg"):
+        if mod_name not in sys.modules:
+            sys.modules[mod_name] = types.ModuleType(mod_name)
+
+    rclpy_node = sys.modules["rclpy.node"]
+    rclpy_qos  = sys.modules["rclpy.qos"]
+    std_msg    = sys.modules["std_msgs.msg"]
+
+    DEFAULTS = {
+        "sample_rate": 16000, "window_s": 1.0, "n_fft": 512, "n_mels": 32,
+        "audio_topic": "/social/speech/audio_raw",
+        "silence_db": -40.0, "alarm_db_min": -25.0, "alarm_zcr_min": 0.12,
+        "alarm_high_ratio_min": 0.35, "speech_zcr_min": 0.02,
+        "speech_zcr_max": 0.25, "speech_flatness_max": 0.35,
+        "music_zcr_max": 0.08, "music_flatness_max": 0.25,
+        "crowd_zcr_min": 0.10, "crowd_flatness_min": 0.35,
+    }
+
+    class _Node:
+        def __init__(self, *a, **kw): pass
+        def declare_parameter(self, *a, **kw): pass
+        def get_parameter(self, name):
+            class _P:
+                value = DEFAULTS.get(name)
+            return _P()
+        def create_publisher(self, *a, **kw): return None
+        def create_subscription(self, *a, **kw): return None
+        def get_logger(self):
+            class _L:
+                def info(self, *a): pass
+                def warn(self, *a): pass
+                def error(self, *a): pass
+            return _L()
+        def destroy_node(self): pass
+
+    rclpy_node.Node         = _Node
+    rclpy_qos.QoSProfile    = type("QoSProfile", (), {"__init__": lambda s, **kw: None})
+    std_msg.String          = type("String",  (), {"data": ""})
+    std_msg.UInt8MultiArray = type("UInt8MultiArray", (), {"data": b""})
+    sys.modules["rclpy"].init     = lambda *a, **kw: None
+    sys.modules["rclpy"].spin     = lambda n: None
+    sys.modules["rclpy"].ok       = lambda: True
+    sys.modules["rclpy"].shutdown = lambda: None
+
+    spec = importlib.util.spec_from_file_location(
+        "ambient_sound_node_testmod",
+        os.path.join(_pkg_root(), "saltybot_social", "ambient_sound_node.py"),
+    )
+    mod = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(mod)
+    return mod
+
+
+# ── Audio helpers ─────────────────────────────────────────────────────────────
+
+SR = 16000
+
+def _sine(freq, n=SR, amp=0.2):
+    return [amp * math.sin(2 * math.pi * freq * i / SR) for i in range(n)]
+
+def _white_noise(n=SR, amp=0.1):
+    import random
+    rng = random.Random(42)
+    return [rng.uniform(-amp, amp) for _ in range(n)]
+
+def _silence(n=SR):
+    return [0.0] * n
+
+def _pcm16(samples):
+    ints = [max(-32768, min(32767, int(s * 32768))) for s in samples]
+    return struct.pack(f"<{len(ints)}h", *ints)
+
+
+# ── TestPcm16Convert ──────────────────────────────────────────────────────────
+
+class TestPcm16Convert:
+    @pytest.fixture(scope="class")
+    def mod(self): return _import_mod()
+
+    def test_empty(self, mod):
+        assert mod.pcm16_bytes_to_float32(b"") == []
+
+    def test_length(self, mod):
+        data = _pcm16(_sine(440, 480))
+        assert len(mod.pcm16_bytes_to_float32(data)) == 480
+
+    def test_range(self, mod):
+        data = _pcm16(_sine(440, 480))
+        result = mod.pcm16_bytes_to_float32(data)
+        assert all(-1.0 <= s <= 1.0 for s in result)
+
+    def test_silence(self, mod):
+        data = _pcm16(_silence(100))
+        assert all(s == 0.0 for s in mod.pcm16_bytes_to_float32(data))
+
+
+# ── TestMelConversions ────────────────────────────────────────────────────────
+
+class TestMelConversions:
+    @pytest.fixture(scope="class")
+    def mod(self): return _import_mod()
+
+    def test_hz_to_mel_zero(self, mod):
+        assert mod.hz_to_mel(0.0) == 0.0
+
+    def test_hz_to_mel_1000(self, mod):
+        # 1000 Hz → ~999.99 mel (approximately)
+        assert abs(mod.hz_to_mel(1000.0) - 999.99) < 1.0
+
+    def test_roundtrip(self, mod):
+        for hz in (100.0, 500.0, 1000.0, 4000.0, 8000.0):
+            assert abs(mod.mel_to_hz(mod.hz_to_mel(hz)) - hz) < 0.01
+
+    def test_monotone_increasing(self, mod):
+        freqs = [100, 500, 1000, 2000, 4000, 8000]
+        mels = [mod.hz_to_mel(f) for f in freqs]
+        assert mels == sorted(mels)
+
+
+# ── TestMelFilterbank ─────────────────────────────────────────────────────────
+
+class TestMelFilterbank:
+    @pytest.fixture(scope="class")
+    def mod(self): return _import_mod()
+
+    def test_shape(self, mod):
+        fb = mod.build_mel_filterbank(SR, 512, 32)
+        assert fb.shape == (32, 257)   # (n_mels, n_fft//2+1)
+
+    def test_nonnegative(self, mod):
+        fb = mod.build_mel_filterbank(SR, 512, 32)
+        assert (fb >= 0).all()
+
+    def test_each_filter_sums_positive(self, mod):
+        fb = mod.build_mel_filterbank(SR, 512, 32)
+        assert all(fb[m].sum() > 0 for m in range(32))
+
+    def test_custom_n_mels(self, mod):
+        fb = mod.build_mel_filterbank(SR, 512, 16)
+        assert fb.shape[0] == 16
+
+    def test_max_value_leq_one(self, mod):
+        fb = mod.build_mel_filterbank(SR, 512, 32)
+        assert fb.max() <= 1.0 + 1e-6
+
+
+# ── TestMelSpectrogram ────────────────────────────────────────────────────────
+
+class TestMelSpectrogram:
+    @pytest.fixture(scope="class")
+    def mod(self): return _import_mod()
+
+    def test_shape(self, mod):
+        s = _sine(440, SR)
+        spec = mod.compute_mel_spectrogram(s, SR, n_fft=512, n_mels=32, hop_length=256)
+        assert spec.shape[0] == 32
+        assert spec.shape[1] > 0
+
+    def test_silence_near_zero(self, mod):
+        spec = mod.compute_mel_spectrogram(_silence(SR), SR, n_fft=512, n_mels=32)
+        assert spec.mean() < 1e-6
+
+    def test_louder_has_higher_energy(self, mod):
+        quiet = mod.compute_mel_spectrogram(_sine(440, SR, amp=0.01), SR).mean()
+        loud  = mod.compute_mel_spectrogram(_sine(440, SR, amp=0.5),  SR).mean()
+        assert loud > quiet
+
+    def test_returns_array(self, mod):
+        spec = mod.compute_mel_spectrogram(_sine(440, SR), SR)
+        assert isinstance(spec, np.ndarray)
+
+
+# ── TestExtractFeatures ───────────────────────────────────────────────────────
+
+class TestExtractFeatures:
+    @pytest.fixture(scope="class")
+    def mod(self): return _import_mod()
+
+    def _feats(self, mod, samples):
+        return mod.extract_features(samples, SR, n_fft=512, n_mels=32)
+
+    def test_keys_present(self, mod):
+        f = self._feats(mod, _sine(440, SR))
+        for k in ("energy_db", "zcr", "mel_centroid", "mel_flatness",
+                  "low_ratio", "high_ratio"):
+            assert k in f
+
+    def test_silence_low_energy(self, mod):
+        f = self._feats(mod, _silence(SR))
+        assert f["energy_db"] < -40.0
+
+    def test_silence_zero_zcr(self, mod):
+        f = self._feats(mod, _silence(SR))
+        assert f["zcr"] == 0.0
+
+    def test_sine_moderate_energy(self, mod):
+        f = self._feats(mod, _sine(440, SR, amp=0.1))
+        assert -40.0 < f["energy_db"] < 0.0
+
+    def test_ratios_sum_leq_one(self, mod):
+        f = self._feats(mod, _sine(440, SR))
+        assert f["low_ratio"] + f["high_ratio"] <= 1.0 + 1e-6
+
+    def test_ratios_nonnegative(self, mod):
+        f = self._feats(mod, _sine(440, SR))
+        assert f["low_ratio"] >= 0.0 and f["high_ratio"] >= 0.0
+
+    def test_flatness_in_unit_interval(self, mod):
+        f = self._feats(mod, _sine(440, SR))
+        assert 0.0 <= f["mel_flatness"] <= 1.0
+
+    def test_white_noise_high_flatness(self, mod):
+        f_noise = self._feats(mod, _white_noise(SR, amp=0.3))
+        f_sine  = self._feats(mod, _sine(440, SR, amp=0.3))
+        # White noise should have higher spectral flatness than a pure tone
+        assert f_noise["mel_flatness"] > f_sine["mel_flatness"]
+
+    def test_empty_samples(self, mod):
+        f = mod.extract_features([], SR)
+        assert f["energy_db"] == 0.0
+
+    def test_louder_higher_energy_db(self, mod):
+        quiet = self._feats(mod, _sine(440, SR, amp=0.01))["energy_db"]
+        loud  = self._feats(mod, _sine(440, SR, amp=0.5))["energy_db"]
+        assert loud > quiet
+
+
+# ── TestClassifier ────────────────────────────────────────────────────────────
+
+class TestClassifier:
+    @pytest.fixture(scope="class")
+    def mod(self): return _import_mod()
+
+    def _cls(self, mod, **feat_overrides):
+        base = {"energy_db": -20.0, "zcr": 0.05,
+                "mel_centroid": 0.4, "mel_flatness": 0.2,
+                "low_ratio": 0.4, "high_ratio": 0.2}
+        base.update(feat_overrides)
+        return mod.classify(base)
+
+    def test_silence(self, mod):
+        assert self._cls(mod, energy_db=-45.0) == "silence"
+
+    def test_silence_at_threshold(self, mod):
+        assert self._cls(mod, energy_db=-40.0) != "silence"
+
+    def test_alarm(self, mod):
+        assert self._cls(mod, energy_db=-20.0, zcr=0.15, high_ratio=0.40) == "alarm"
+
+    def test_alarm_requires_high_ratio(self, mod):
+        result = self._cls(mod, energy_db=-20.0, zcr=0.15, high_ratio=0.10)
+        assert result != "alarm"
+
+    def test_speech(self, mod):
+        assert self._cls(mod, energy_db=-25.0, zcr=0.08,
+                         mel_flatness=0.20) == "speech"
+
+    def test_speech_zcr_too_low(self, mod):
+        result = self._cls(mod, energy_db=-25.0, zcr=0.005, mel_flatness=0.2)
+        assert result != "speech"
+
+    def test_speech_zcr_too_high(self, mod):
+        result = self._cls(mod, energy_db=-25.0, zcr=0.30, mel_flatness=0.2)
+        assert result != "speech"
+
+    def test_music(self, mod):
+        assert self._cls(mod, energy_db=-25.0, zcr=0.04,
+                         mel_flatness=0.10) == "music"
+
+    def test_crowd(self, mod):
+        assert self._cls(mod, energy_db=-25.0, zcr=0.15,
+                         mel_flatness=0.40) == "crowd"
+
+    def test_outdoor_catchall(self, mod):
+        # Moderate energy, mid ZCR, mid flatness → outdoor
+        result = self._cls(mod, energy_db=-35.0, zcr=0.06, mel_flatness=0.30)
+        assert result in mod.LABELS
+
+    def test_returns_valid_label(self, mod):
+        import random
+        rng = random.Random(0)
+        for _ in range(20):
+            f = {
+                "energy_db":    rng.uniform(-60, 0),
+                "zcr":          rng.uniform(0, 0.5),
+                "mel_centroid": rng.uniform(0, 1),
+                "mel_flatness": rng.uniform(0, 1),
+                "low_ratio":    rng.uniform(0, 0.6),
+                "high_ratio":   rng.uniform(0, 0.4),
+            }
+            assert mod.classify(f) in mod.LABELS
+
+
+# ── TestAudioBuffer ───────────────────────────────────────────────────────────
+
+class TestAudioBuffer:
+    @pytest.fixture(scope="class")
+    def mod(self): return _import_mod()
+
+    def test_no_window_until_full(self, mod):
+        buf = mod.AudioBuffer(window_samples=100)
+        assert buf.push([0.0] * 50) is None
+
+    def test_exact_fill_returns_window(self, mod):
+        buf = mod.AudioBuffer(window_samples=100)
+        w = buf.push([0.0] * 100)
+        assert w is not None and len(w) == 100
+
+    def test_overflow_carries_over(self, mod):
+        buf = mod.AudioBuffer(window_samples=100)
+        buf.push([0.0] * 100)         # fills first window
+        w2 = buf.push([1.0] * 100)   # fills second window
+        assert w2 is not None and len(w2) == 100
+
+    def test_partial_then_complete(self, mod):
+        buf = mod.AudioBuffer(window_samples=100)
+        buf.push([0.0] * 60)
+        w = buf.push([0.0] * 60)
+        assert w is not None and len(w) == 100
+
+    def test_clear_resets(self, mod):
+        buf = mod.AudioBuffer(window_samples=100)
+        buf.push([0.0] * 90)
+        buf.clear()
+        assert buf.push([0.0] * 90) is None
+
+    def test_window_contents_correct(self, mod):
+        buf = mod.AudioBuffer(window_samples=4)
+        w = buf.push([1.0, 2.0, 3.0, 4.0])
+        assert w == [1.0, 2.0, 3.0, 4.0]
+
+
+# ── TestNodeSrc ───────────────────────────────────────────────────────────────
+
+class TestNodeSrc:
+    @pytest.fixture(scope="class")
+    def src(self): return _read_src("saltybot_social/ambient_sound_node.py")
+
+    def test_class_defined(self, src):          assert "class AmbientSoundNode" in src
+    def test_audio_buffer(self, src):           assert "class AudioBuffer" in src
+    def test_extract_features(self, src):       assert "def extract_features" in src
+    def test_classify_fn(self, src):            assert "def classify" in src
+    def test_mel_spectrogram(self, src):        assert "compute_mel_spectrogram" in src
+    def test_mel_filterbank(self, src):         assert "build_mel_filterbank" in src
+    def test_hz_to_mel(self, src):              assert "hz_to_mel" in src
+    def test_labels_tuple(self, src):           assert "LABELS" in src
+    def test_all_labels(self, src):
+        for label in ("silence", "speech", "music", "crowd", "outdoor", "alarm"):
+            assert label in src
+    def test_topic_pub(self, src):              assert '"/saltybot/ambient_sound"' in src
+    def test_topic_sub(self, src):              assert '"/social/speech/audio_raw"' in src
+    def test_window_param(self, src):           assert '"window_s"' in src
+    def test_n_mels_param(self, src):           assert '"n_mels"' in src
+    def test_silence_param(self, src):          assert '"silence_db"' in src
+    def test_alarm_param(self, src):            assert '"alarm_db_min"' in src
+    def test_speech_param(self, src):           assert '"speech_zcr_min"' in src
+    def test_music_param(self, src):            assert '"music_zcr_max"' in src
+    def test_crowd_param(self, src):            assert '"crowd_zcr_min"' in src
+    def test_string_pub(self, src):             assert "String" in src
+    def test_uint8_sub(self, src):              assert "UInt8MultiArray" in src
+    def test_issue_tag(self, src):              assert "252" in src
+    def test_main(self, src):                   assert "def main" in src
+    def test_numpy_optional(self, src):         assert "_NUMPY" in src
+
+
+# ── TestConfig ────────────────────────────────────────────────────────────────
+
+class TestConfig:
+    @pytest.fixture(scope="class")
+    def cfg(self): return _read_src("config/ambient_sound_params.yaml")
+
+    @pytest.fixture(scope="class")
+    def setup(self): return _read_src("setup.py")
+
+    def test_node_name(self, cfg):        assert "ambient_sound_node:" in cfg
+    def test_window_s(self, cfg):         assert "window_s" in cfg
+    def test_n_mels(self, cfg):           assert "n_mels" in cfg
+    def test_silence_db(self, cfg):       assert "silence_db" in cfg
+    def test_alarm_params(self, cfg):     assert "alarm_db_min" in cfg
+    def test_speech_params(self, cfg):    assert "speech_zcr_min" in cfg
+    def test_music_params(self, cfg):     assert "music_zcr_max" in cfg
+    def test_crowd_params(self, cfg):     assert "crowd_zcr_min" in cfg
+    def test_defaults_present(self, cfg): assert "-40.0" in cfg and "0.12" in cfg
+    def test_entry_point(self, setup):
+        assert "ambient_sound_node = saltybot_social.ambient_sound_node:main" in setup