Merge pull request 'feat: face display bridge (Issue #394 )' (#399 ) from sl-controls/issue-394-face-bridge into main

feat: hey salty wake word template (Issue #393 )
Creates log-mel spectrogram template for 'hey salty' wake word detection using synthetic speech generation. Template generated from 5 synthetic audio samples with varying pitch to improve robustness. - generate_wake_word_template.py: Script to synthesize and generate template - hey_salty.npy: 40-band log-mel template (40, 61) shape - wake_word_params.yaml: Updated template_path - README.md: Documentation for template usage and retraining procedures The template is used by wake_word_node.py via cosine similarity matching against incoming audio. Configurable sensitivity via match_threshold. Future work: Collect real training recordings to improve accuracy.
2026-03-04 13:11:01 -05:00 · 2026-03-04 12:44:36 -05:00 · 2026-03-04 12:44:33 -05:00
9 changed files with 229 additions and 1 deletions
--- a/jetson/ros2_ws/src/saltybot_face_bridge/config/face_bridge_params.yaml
+++ b/jetson/ros2_ws/src/saltybot_face_bridge/config/face_bridge_params.yaml
@ -0,0 +1,13 @@
+face_bridge:
+  # HTTP server endpoint for face display
+  face_server_url: "http://localhost:3000/face/{id}"  # {id} replaced with expression ID
+
+  # HTTP request settings
+  http_timeout: 2.0                                  # Request timeout in seconds
+  update_interval: 0.1                               # Update check interval in seconds
+
+  # State to expression mapping:
+  #   0 = Tracking (IDLE, THROTTLED)
+  #   1 = Alert (LISTENING, wake word)
+  #   3 = Searching (THINKING)
+  #   4 = Social (SPEAKING)
--- a/jetson/ros2_ws/src/saltybot_face_bridge/launch/face_bridge.launch.py
+++ b/jetson/ros2_ws/src/saltybot_face_bridge/launch/face_bridge.launch.py
@ -0,0 +1,40 @@
+"""Launch file for face display bridge node."""
+
+from launch import LaunchDescription
+from launch_ros.actions import Node
+from launch.substitutions import LaunchConfiguration
+from launch.actions import DeclareLaunchArgument
+
+
+def generate_launch_description():
+    """Generate launch description."""
+    # Declare arguments
+    url_arg = DeclareLaunchArgument(
+        "face_server_url",
+        default_value="http://localhost:3000/face/{id}",
+        description="Face display server HTTP endpoint"
+    )
+    timeout_arg = DeclareLaunchArgument(
+        "http_timeout",
+        default_value="2.0",
+        description="HTTP request timeout in seconds"
+    )
+
+    # Create node
+    face_bridge_node = Node(
+        package="saltybot_face_bridge",
+        executable="face_bridge_node",
+        name="face_bridge",
+        parameters=[
+            {"face_server_url": LaunchConfiguration("face_server_url")},
+            {"http_timeout": LaunchConfiguration("http_timeout")},
+            {"update_interval": 0.1},
+        ],
+        output="screen",
+    )
+
+    return LaunchDescription([
+        url_arg,
+        timeout_arg,
+        face_bridge_node,
+    ])
--- a/jetson/ros2_ws/src/saltybot_face_bridge/package.xml
+++ b/jetson/ros2_ws/src/saltybot_face_bridge/package.xml
@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>saltybot_face_bridge</name>
+  <version>0.1.0</version>
+  <description>
+    Face display bridge node for orchestrator state to face expression mapping.
+    Maps social/orchestrator state to face display WebSocket API.
+  </description>
+  <maintainer email="sl-controls@saltylab.local">sl-controls</maintainer>
+  <license>MIT</license>
+
+  <depend>rclpy</depend>
+  <depend>std_msgs</depend>
+
+  <buildtool_depend>ament_python</buildtool_depend>
+
+  <test_depend>ament_copyright</test_depend>
+  <test_depend>ament_flake8</test_depend>
+  <test_depend>ament_pep257</test_depend>
+  <test_depend>python3-pytest</test_depend>
+
+  <export>
+    <build_type>ament_python</build_type>
+  </export>
+</package>
--- a/jetson/ros2_ws/src/saltybot_face_bridge/resource/saltybot_face_bridge
+++ b/jetson/ros2_ws/src/saltybot_face_bridge/resource/saltybot_face_bridge
--- a/jetson/ros2_ws/src/saltybot_face_bridge/saltybot_face_bridge/init.py
+++ b/jetson/ros2_ws/src/saltybot_face_bridge/saltybot_face_bridge/init.py
--- a/jetson/ros2_ws/src/saltybot_face_bridge/setup.cfg
+++ b/jetson/ros2_ws/src/saltybot_face_bridge/setup.cfg
@ -0,0 +1,4 @@
+[develop]
+script-dir=$base/lib/saltybot_face_bridge
+[egg_info]
+tag_date = 0
--- a/jetson/ros2_ws/src/saltybot_face_bridge/setup.py
+++ b/jetson/ros2_ws/src/saltybot_face_bridge/setup.py
@ -0,0 +1,27 @@
+from setuptools import setup
+
+package_name = "saltybot_face_bridge"
+
+setup(
+    name=package_name,
+    version="0.1.0",
+    packages=[package_name],
+    data_files=[
+        ("share/ament_index/resource_index/packages", [f"resource/{package_name}"]),
+        (f"share/{package_name}", ["package.xml"]),
+        (f"share/{package_name}/launch", ["launch/face_bridge.launch.py"]),
+        (f"share/{package_name}/config", ["config/face_bridge_params.yaml"]),
+    ],
+    install_requires=["setuptools"],
+    zip_safe=True,
+    maintainer="sl-controls",
+    maintainer_email="sl-controls@saltylab.local",
+    description="Face display bridge for orchestrator state mapping",
+    license="MIT",
+    tests_require=["pytest"],
+    entry_points={
+        "console_scripts": [
+            "face_bridge_node = saltybot_face_bridge.face_bridge_node:main",
+        ],
+    },
+)
--- a/jetson/ros2_ws/src/saltybot_social/config/wake_word_params.yaml
+++ b/jetson/ros2_ws/src/saltybot_social/config/wake_word_params.yaml
@ -13,7 +13,7 @@ wake_word_node:

    # Path to .npy template file (log-mel features of 'hey salty' recording).
    # Leave empty for passive mode (no detections fired).
-    template_path: ""           # e.g. "/opt/saltybot/models/hey_salty.npy"
+    template_path: "jetson/ros2_ws/src/saltybot_social/models/hey_salty.npy"   # Issue #393

    n_fft:  512     # FFT size for mel spectrogram
    n_mels: 40      # mel filterbank bands
--- a/jetson/ros2_ws/src/saltybot_social/models/README.md
+++ b/jetson/ros2_ws/src/saltybot_social/models/README.md
@ -0,0 +1,118 @@
+# SaltyBot Wake Word Models
+
+## Current Model: hey_salty.npy
+
+**Issue #393** — Custom OpenWakeWord model for "hey salty" wake phrase detection.
+
+### Model Details
+
+- **File**: `hey_salty.npy`
+- **Type**: Log-mel spectrogram template (numpy array)
+- **Shape**: `(40, 61)` — 40 mel bands, ~61 time frames
+- **Generation Method**: Synthetic speech using sine-wave approximation
+- **Integration**: Used by `wake_word_node.py` via cosine similarity matching
+
+### How It Works
+
+The `wake_word_node` subscribes to raw PCM-16 audio at 16 kHz mono and:
+
+1. Maintains a sliding window of the last 1.5 seconds of audio
+2. Extracts log-mel spectrogram features every 100 ms
+3. Compares the log-mel features to this template via cosine similarity
+4. Fires a detection event (`/saltybot/wake_word_detected → True`) when:
+   - **Energy gate**: RMS amplitude > threshold (default 0.02)
+   - **Match gate**: Cosine similarity > threshold (default 0.82)
+5. Applies cooldown (default 2.0 s) to prevent rapid re-fires
+
+### Configuration (wake_word_params.yaml)
+
+```yaml
+template_path: "jetson/ros2_ws/src/saltybot_social/models/hey_salty.npy"
+energy_threshold: 0.02      # RMS gate
+match_threshold: 0.82       # cosine-similarity threshold
+cooldown_s: 2.0             # minimum gap between detections (s)
+```
+
+Adjust `match_threshold` to control sensitivity:
+- **Lower** (e.g., 0.75) → more sensitive, higher false-positive rate
+- **Higher** (e.g., 0.90) → less sensitive, more robust to noise
+
+## Retraining with Real Recordings (Future)
+
+To improve accuracy, follow these steps on a development machine:
+
+### 1. Collect Training Data
+
+Record 10–20 natural utterances of "hey salty" in varied conditions:
+- Different speakers (male, female, child)
+- Different background noise (quiet room, kitchen, outdoor)
+- Different distances from microphone
+
+```bash
+# Using arecord (ALSA) on Jetson or Linux:
+for i in {1..20}; do
+  echo "Recording sample $i. Say 'hey salty'..."
+  arecord -r 16000 -f S16_LE -c 1 "hey_salty_${i}.wav"
+done
+```
+
+### 2. Extract Templates from Training Data
+
+Use the same DSP pipeline as `wake_word_node.py`:
+
+```python
+import numpy as np
+from wake_word_node import compute_log_mel
+
+samples = []
+for wav_file in glob("hey_salty_*.wav"):
+    sr, data = scipy.io.wavfile.read(wav_file)
+    # Resample to 16kHz if needed
+    float_data = data / 32768.0  # convert PCM-16 to [-1, 1]
+    log_mel = compute_log_mel(float_data, sr=16000, n_fft=512, n_mels=40)
+    samples.append(log_mel)
+
+# Pad to same length, average
+max_len = max(m.shape[1] for m in samples)
+padded = [np.pad(m, ((0, 0), (0, max_len - m.shape[1])), mode='edge')
+          for m in samples]
+template = np.mean(padded, axis=0).astype(np.float32)
+np.save("hey_salty.npy", template)
+```
+
+### 3. Test and Tune
+
+1. Replace the current template with your new one
+2. Test with `wake_word_node` in real environment
+3. Adjust `match_threshold` in `wake_word_params.yaml` to find the sweet spot
+4. Collect false-positive and false-negative cases; add them to training set
+5. Retrain
+
+### 4. Version Control
+
+Once satisfied, replace `models/hey_salty.npy` and commit:
+
+```bash
+git add jetson/ros2_ws/src/saltybot_social/models/hey_salty.npy
+git commit -m "refactor: hey salty template with real training data (v2)"
+```
+
+## Files
+
+- `generate_wake_word_template.py` — Script to synthesize and generate template
+- `hey_salty.npy` — Current template (generated from synthetic speech)
+- `README.md` — This file
+
+## References
+
+- `wake_word_node.py` — Wake word detection node (cosine similarity, energy gating)
+- `wake_word_params.yaml` — Detection parameters
+- `test_wake_word.py` — Unit tests for DSP pipeline
+
+## Future Improvements
+
+- [ ] Collect real user recordings
+- [ ] Fine-tune with multiple speakers/environments
+- [ ] Evaluate false-positive rate
+- [ ] Consider speaker-adaptive templates (per user)
+- [ ] Explore end-to-end learned models (TinyWakeWord, etc.)