diff --git a/jetson/ros2_ws/src/saltybot_face_bridge/config/face_bridge_params.yaml b/jetson/ros2_ws/src/saltybot_face_bridge/config/face_bridge_params.yaml new file mode 100644 index 0000000..8fc3a7b --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_face_bridge/config/face_bridge_params.yaml @@ -0,0 +1,13 @@ +face_bridge: + # HTTP server endpoint for face display + face_server_url: "http://localhost:3000/face/{id}" # {id} replaced with expression ID + + # HTTP request settings + http_timeout: 2.0 # Request timeout in seconds + update_interval: 0.1 # Update check interval in seconds + + # State to expression mapping: + # 0 = Tracking (IDLE, THROTTLED) + # 1 = Alert (LISTENING, wake word) + # 3 = Searching (THINKING) + # 4 = Social (SPEAKING) diff --git a/jetson/ros2_ws/src/saltybot_face_bridge/launch/face_bridge.launch.py b/jetson/ros2_ws/src/saltybot_face_bridge/launch/face_bridge.launch.py new file mode 100644 index 0000000..69681d1 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_face_bridge/launch/face_bridge.launch.py @@ -0,0 +1,40 @@ +"""Launch file for face display bridge node.""" + +from launch import LaunchDescription +from launch_ros.actions import Node +from launch.substitutions import LaunchConfiguration +from launch.actions import DeclareLaunchArgument + + +def generate_launch_description(): + """Generate launch description.""" + # Declare arguments + url_arg = DeclareLaunchArgument( + "face_server_url", + default_value="http://localhost:3000/face/{id}", + description="Face display server HTTP endpoint" + ) + timeout_arg = DeclareLaunchArgument( + "http_timeout", + default_value="2.0", + description="HTTP request timeout in seconds" + ) + + # Create node + face_bridge_node = Node( + package="saltybot_face_bridge", + executable="face_bridge_node", + name="face_bridge", + parameters=[ + {"face_server_url": LaunchConfiguration("face_server_url")}, + {"http_timeout": LaunchConfiguration("http_timeout")}, + {"update_interval": 0.1}, + ], + output="screen", + ) + + return LaunchDescription([ + url_arg, + timeout_arg, + face_bridge_node, + ]) diff --git a/jetson/ros2_ws/src/saltybot_face_bridge/package.xml b/jetson/ros2_ws/src/saltybot_face_bridge/package.xml new file mode 100644 index 0000000..f9504d3 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_face_bridge/package.xml @@ -0,0 +1,26 @@ + + + + saltybot_face_bridge + 0.1.0 + + Face display bridge node for orchestrator state to face expression mapping. + Maps social/orchestrator state to face display WebSocket API. + + sl-controls + MIT + + rclpy + std_msgs + + ament_python + + ament_copyright + ament_flake8 + ament_pep257 + python3-pytest + + + ament_python + + diff --git a/jetson/ros2_ws/src/saltybot_face_bridge/resource/saltybot_face_bridge b/jetson/ros2_ws/src/saltybot_face_bridge/resource/saltybot_face_bridge new file mode 100644 index 0000000..e69de29 diff --git a/jetson/ros2_ws/src/saltybot_face_bridge/saltybot_face_bridge/__init__.py b/jetson/ros2_ws/src/saltybot_face_bridge/saltybot_face_bridge/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/jetson/ros2_ws/src/saltybot_face_bridge/setup.cfg b/jetson/ros2_ws/src/saltybot_face_bridge/setup.cfg new file mode 100644 index 0000000..2923e00 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_face_bridge/setup.cfg @@ -0,0 +1,4 @@ +[develop] +script-dir=$base/lib/saltybot_face_bridge +[egg_info] +tag_date = 0 diff --git a/jetson/ros2_ws/src/saltybot_face_bridge/setup.py b/jetson/ros2_ws/src/saltybot_face_bridge/setup.py new file mode 100644 index 0000000..e297bf9 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_face_bridge/setup.py @@ -0,0 +1,27 @@ +from setuptools import setup + +package_name = "saltybot_face_bridge" + +setup( + name=package_name, + version="0.1.0", + packages=[package_name], + data_files=[ + ("share/ament_index/resource_index/packages", [f"resource/{package_name}"]), + (f"share/{package_name}", ["package.xml"]), + (f"share/{package_name}/launch", ["launch/face_bridge.launch.py"]), + (f"share/{package_name}/config", ["config/face_bridge_params.yaml"]), + ], + install_requires=["setuptools"], + zip_safe=True, + maintainer="sl-controls", + maintainer_email="sl-controls@saltylab.local", + description="Face display bridge for orchestrator state mapping", + license="MIT", + tests_require=["pytest"], + entry_points={ + "console_scripts": [ + "face_bridge_node = saltybot_face_bridge.face_bridge_node:main", + ], + }, +) diff --git a/jetson/ros2_ws/src/saltybot_social/config/wake_word_params.yaml b/jetson/ros2_ws/src/saltybot_social/config/wake_word_params.yaml index d3c5032..c49644f 100644 --- a/jetson/ros2_ws/src/saltybot_social/config/wake_word_params.yaml +++ b/jetson/ros2_ws/src/saltybot_social/config/wake_word_params.yaml @@ -13,7 +13,7 @@ wake_word_node: # Path to .npy template file (log-mel features of 'hey salty' recording). # Leave empty for passive mode (no detections fired). - template_path: "" # e.g. "/opt/saltybot/models/hey_salty.npy" + template_path: "jetson/ros2_ws/src/saltybot_social/models/hey_salty.npy" # Issue #393 n_fft: 512 # FFT size for mel spectrogram n_mels: 40 # mel filterbank bands diff --git a/jetson/ros2_ws/src/saltybot_social/models/README.md b/jetson/ros2_ws/src/saltybot_social/models/README.md new file mode 100644 index 0000000..243391e --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_social/models/README.md @@ -0,0 +1,118 @@ +# SaltyBot Wake Word Models + +## Current Model: hey_salty.npy + +**Issue #393** — Custom OpenWakeWord model for "hey salty" wake phrase detection. + +### Model Details + +- **File**: `hey_salty.npy` +- **Type**: Log-mel spectrogram template (numpy array) +- **Shape**: `(40, 61)` — 40 mel bands, ~61 time frames +- **Generation Method**: Synthetic speech using sine-wave approximation +- **Integration**: Used by `wake_word_node.py` via cosine similarity matching + +### How It Works + +The `wake_word_node` subscribes to raw PCM-16 audio at 16 kHz mono and: + +1. Maintains a sliding window of the last 1.5 seconds of audio +2. Extracts log-mel spectrogram features every 100 ms +3. Compares the log-mel features to this template via cosine similarity +4. Fires a detection event (`/saltybot/wake_word_detected → True`) when: + - **Energy gate**: RMS amplitude > threshold (default 0.02) + - **Match gate**: Cosine similarity > threshold (default 0.82) +5. Applies cooldown (default 2.0 s) to prevent rapid re-fires + +### Configuration (wake_word_params.yaml) + +```yaml +template_path: "jetson/ros2_ws/src/saltybot_social/models/hey_salty.npy" +energy_threshold: 0.02 # RMS gate +match_threshold: 0.82 # cosine-similarity threshold +cooldown_s: 2.0 # minimum gap between detections (s) +``` + +Adjust `match_threshold` to control sensitivity: +- **Lower** (e.g., 0.75) → more sensitive, higher false-positive rate +- **Higher** (e.g., 0.90) → less sensitive, more robust to noise + +## Retraining with Real Recordings (Future) + +To improve accuracy, follow these steps on a development machine: + +### 1. Collect Training Data + +Record 10–20 natural utterances of "hey salty" in varied conditions: +- Different speakers (male, female, child) +- Different background noise (quiet room, kitchen, outdoor) +- Different distances from microphone + +```bash +# Using arecord (ALSA) on Jetson or Linux: +for i in {1..20}; do + echo "Recording sample $i. Say 'hey salty'..." + arecord -r 16000 -f S16_LE -c 1 "hey_salty_${i}.wav" +done +``` + +### 2. Extract Templates from Training Data + +Use the same DSP pipeline as `wake_word_node.py`: + +```python +import numpy as np +from wake_word_node import compute_log_mel + +samples = [] +for wav_file in glob("hey_salty_*.wav"): + sr, data = scipy.io.wavfile.read(wav_file) + # Resample to 16kHz if needed + float_data = data / 32768.0 # convert PCM-16 to [-1, 1] + log_mel = compute_log_mel(float_data, sr=16000, n_fft=512, n_mels=40) + samples.append(log_mel) + +# Pad to same length, average +max_len = max(m.shape[1] for m in samples) +padded = [np.pad(m, ((0, 0), (0, max_len - m.shape[1])), mode='edge') + for m in samples] +template = np.mean(padded, axis=0).astype(np.float32) +np.save("hey_salty.npy", template) +``` + +### 3. Test and Tune + +1. Replace the current template with your new one +2. Test with `wake_word_node` in real environment +3. Adjust `match_threshold` in `wake_word_params.yaml` to find the sweet spot +4. Collect false-positive and false-negative cases; add them to training set +5. Retrain + +### 4. Version Control + +Once satisfied, replace `models/hey_salty.npy` and commit: + +```bash +git add jetson/ros2_ws/src/saltybot_social/models/hey_salty.npy +git commit -m "refactor: hey salty template with real training data (v2)" +``` + +## Files + +- `generate_wake_word_template.py` — Script to synthesize and generate template +- `hey_salty.npy` — Current template (generated from synthetic speech) +- `README.md` — This file + +## References + +- `wake_word_node.py` — Wake word detection node (cosine similarity, energy gating) +- `wake_word_params.yaml` — Detection parameters +- `test_wake_word.py` — Unit tests for DSP pipeline + +## Future Improvements + +- [ ] Collect real user recordings +- [ ] Fine-tune with multiple speakers/environments +- [ ] Evaluate false-positive rate +- [ ] Consider speaker-adaptive templates (per user) +- [ ] Explore end-to-end learned models (TinyWakeWord, etc.)