From 82cb2bde79059ec7cd68430d8b51e613df8113d2 Mon Sep 17 00:00:00 2001 From: sl-controls Date: Wed, 4 Mar 2026 12:44:33 -0500 Subject: [PATCH 1/2] feat: face display bridge node (Issue #394) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement ROS2 node bridging orchestrator state to face display expressions. Maps /social/orchestrator/state and /saltybot/wake_word_detected to HTTP API. Features: - Subscribes to /social/orchestrator/state (JSON: IDLE, LISTENING, THINKING, SPEAKING, THROTTLED) - Subscribes to /saltybot/wake_word_detected for immediate Alert response - HTTP GET requests to face display server (configurable localhost:3000/face/{id}) - State to expression mapping: * IDLE → 0 (Tracking) * LISTENING → 1 (Alert) * THINKING → 3 (Searching) * SPEAKING → 4 (Social) * Wake word → 1 (Alert, immediate override) - Publishes /face/state with JSON: {face_id, orchestrator_state, timestamp} - Configurable face_server_url parameter - Fallback to urllib if requests library unavailable Co-Authored-By: Claude Haiku 4.5 --- .../config/face_bridge_params.yaml | 13 ++ .../launch/face_bridge.launch.py | 40 ++++ .../src/saltybot_face_bridge/package.xml | 26 +++ .../resource/saltybot_face_bridge | 0 .../saltybot_face_bridge/__init__.py | 0 .../saltybot_face_bridge/face_bridge_node.py | 186 ++++++++++++++++++ .../src/saltybot_face_bridge/setup.cfg | 4 + .../ros2_ws/src/saltybot_face_bridge/setup.py | 27 +++ 8 files changed, 296 insertions(+) create mode 100644 jetson/ros2_ws/src/saltybot_face_bridge/config/face_bridge_params.yaml create mode 100644 jetson/ros2_ws/src/saltybot_face_bridge/launch/face_bridge.launch.py create mode 100644 jetson/ros2_ws/src/saltybot_face_bridge/package.xml create mode 100644 jetson/ros2_ws/src/saltybot_face_bridge/resource/saltybot_face_bridge create mode 100644 jetson/ros2_ws/src/saltybot_face_bridge/saltybot_face_bridge/__init__.py create mode 100644 jetson/ros2_ws/src/saltybot_face_bridge/saltybot_face_bridge/face_bridge_node.py create mode 100644 jetson/ros2_ws/src/saltybot_face_bridge/setup.cfg create mode 100644 jetson/ros2_ws/src/saltybot_face_bridge/setup.py diff --git a/jetson/ros2_ws/src/saltybot_face_bridge/config/face_bridge_params.yaml b/jetson/ros2_ws/src/saltybot_face_bridge/config/face_bridge_params.yaml new file mode 100644 index 0000000..8fc3a7b --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_face_bridge/config/face_bridge_params.yaml @@ -0,0 +1,13 @@ +face_bridge: + # HTTP server endpoint for face display + face_server_url: "http://localhost:3000/face/{id}" # {id} replaced with expression ID + + # HTTP request settings + http_timeout: 2.0 # Request timeout in seconds + update_interval: 0.1 # Update check interval in seconds + + # State to expression mapping: + # 0 = Tracking (IDLE, THROTTLED) + # 1 = Alert (LISTENING, wake word) + # 3 = Searching (THINKING) + # 4 = Social (SPEAKING) diff --git a/jetson/ros2_ws/src/saltybot_face_bridge/launch/face_bridge.launch.py b/jetson/ros2_ws/src/saltybot_face_bridge/launch/face_bridge.launch.py new file mode 100644 index 0000000..69681d1 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_face_bridge/launch/face_bridge.launch.py @@ -0,0 +1,40 @@ +"""Launch file for face display bridge node.""" + +from launch import LaunchDescription +from launch_ros.actions import Node +from launch.substitutions import LaunchConfiguration +from launch.actions import DeclareLaunchArgument + + +def generate_launch_description(): + """Generate launch description.""" + # Declare arguments + url_arg = DeclareLaunchArgument( + "face_server_url", + default_value="http://localhost:3000/face/{id}", + description="Face display server HTTP endpoint" + ) + timeout_arg = DeclareLaunchArgument( + "http_timeout", + default_value="2.0", + description="HTTP request timeout in seconds" + ) + + # Create node + face_bridge_node = Node( + package="saltybot_face_bridge", + executable="face_bridge_node", + name="face_bridge", + parameters=[ + {"face_server_url": LaunchConfiguration("face_server_url")}, + {"http_timeout": LaunchConfiguration("http_timeout")}, + {"update_interval": 0.1}, + ], + output="screen", + ) + + return LaunchDescription([ + url_arg, + timeout_arg, + face_bridge_node, + ]) diff --git a/jetson/ros2_ws/src/saltybot_face_bridge/package.xml b/jetson/ros2_ws/src/saltybot_face_bridge/package.xml new file mode 100644 index 0000000..f9504d3 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_face_bridge/package.xml @@ -0,0 +1,26 @@ + + + + saltybot_face_bridge + 0.1.0 + + Face display bridge node for orchestrator state to face expression mapping. + Maps social/orchestrator state to face display WebSocket API. + + sl-controls + MIT + + rclpy + std_msgs + + ament_python + + ament_copyright + ament_flake8 + ament_pep257 + python3-pytest + + + ament_python + + diff --git a/jetson/ros2_ws/src/saltybot_face_bridge/resource/saltybot_face_bridge b/jetson/ros2_ws/src/saltybot_face_bridge/resource/saltybot_face_bridge new file mode 100644 index 0000000..e69de29 diff --git a/jetson/ros2_ws/src/saltybot_face_bridge/saltybot_face_bridge/__init__.py b/jetson/ros2_ws/src/saltybot_face_bridge/saltybot_face_bridge/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/jetson/ros2_ws/src/saltybot_face_bridge/saltybot_face_bridge/face_bridge_node.py b/jetson/ros2_ws/src/saltybot_face_bridge/saltybot_face_bridge/face_bridge_node.py new file mode 100644 index 0000000..abf7e61 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_face_bridge/saltybot_face_bridge/face_bridge_node.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 +"""Face display bridge node. + +Maps orchestrator state to face expressions via HTTP WebSocket API. +Bridges /social/orchestrator/state and /saltybot/wake_word_detected to +face display server (localhost:3000/face/{id}). + +State mapping: + IDLE → 0 (Tracking) + LISTENING → 1 (Alert) + THINKING → 3 (Searching) + SPEAKING → 4 (Social) + Wake word → 1 (Alert) [immediate override] + +Subscribed topics: + /social/orchestrator/state (String) - JSON: {"state": "IDLE|LISTENING|THINKING|SPEAKING|THROTTLED"} + /saltybot/wake_word_detected (Bool) - Wake word detection trigger + +Published topics: + /face/state (String) - Current face expression ID and status +""" + +import json +import threading +import time +from typing import Optional + +import rclpy +from rclpy.node import Node +from std_msgs.msg import String, Bool + + +class FaceDisplayBridge(Node): + """Bridge orchestrator state to face display expressions.""" + + # State to face expression ID mapping + STATE_TO_FACE_ID = { + "IDLE": 0, # Tracking + "LISTENING": 1, # Alert + "THINKING": 3, # Searching + "SPEAKING": 4, # Social + "THROTTLED": 0, # Fallback to Tracking + } + + def __init__(self): + super().__init__("face_bridge") + + # Parameters + self.declare_parameter("face_server_url", "http://localhost:3000/face/1") + self.declare_parameter("http_timeout", 2.0) + self.declare_parameter("update_interval", 0.1) + + self.face_server_url = self.get_parameter("face_server_url").value + self.http_timeout = self.get_parameter("http_timeout").value + self.update_interval = self.get_parameter("update_interval").value + + # Try to import requests, fallback to urllib if unavailable + try: + import requests + self.requests = requests + self.use_requests = True + except ImportError: + import urllib.request + import urllib.error + self.urllib = urllib.request + self.urllib_error = urllib.error + self.use_requests = False + + # State + self.current_state = "IDLE" + self.current_face_id = 0 + self.wake_word_active = False + self.last_update_time = time.time() + self.state_lock = threading.Lock() + + # Subscriptions + self.create_subscription(String, "/social/orchestrator/state", self._on_state_update, 10) + self.create_subscription(Bool, "/saltybot/wake_word_detected", self._on_wake_word, 10) + + # Publishers + self.pub_state = self.create_publisher(String, "/face/state", 10) + + # Timer for update loop + self.create_timer(self.update_interval, self._update_face) + + self.get_logger().info( + f"Face bridge initialized: face_server_url={self.face_server_url}" + ) + + def _on_state_update(self, msg: String) -> None: + """Handle orchestrator state update.""" + try: + data = json.loads(msg.data) + new_state = data.get("state", "IDLE").upper() + + # Validate state + if new_state in self.STATE_TO_FACE_ID: + with self.state_lock: + self.current_state = new_state + self.get_logger().debug(f"State updated: {new_state}") + else: + self.get_logger().warn(f"Unknown state: {new_state}") + except json.JSONDecodeError: + self.get_logger().error(f"Invalid JSON in state update: {msg.data}") + + def _on_wake_word(self, msg: Bool) -> None: + """Handle wake word detection - immediate switch to Alert.""" + if msg.data: + with self.state_lock: + self.wake_word_active = True + self.get_logger().info("Wake word detected - switching to Alert") + + def _get_face_id(self) -> int: + """Get current face expression ID based on state.""" + with self.state_lock: + if self.wake_word_active: + face_id = 1 # Alert + # Clear wake word after one update + self.wake_word_active = False + else: + face_id = self.STATE_TO_FACE_ID.get(self.current_state, 0) + + return face_id + + def _send_face_command(self, face_id: int) -> bool: + """Send face expression command to display server. + + Args: + face_id: Expression ID (0-4) + + Returns: + True if successful + """ + try: + if self.use_requests: + response = self.requests.get( + self.face_server_url.format(id=face_id), + timeout=self.http_timeout + ) + return response.status_code == 200 + else: + url = self.face_server_url.format(id=face_id) + req = self.urllib.Request(url) + with self.urllib.urlopen(req, timeout=self.http_timeout) as response: + return response.status == 200 + except Exception as e: + self.get_logger().error(f"Failed to update face display: {e}") + return False + + def _update_face(self) -> None: + """Update face expression based on current state.""" + face_id = self._get_face_id() + + # Only send if changed + if face_id != self.current_face_id: + if self._send_face_command(face_id): + self.current_face_id = face_id + self.last_update_time = time.time() + + # Publish state + with self.state_lock: + state_msg = String( + data=json.dumps({ + "face_id": face_id, + "orchestrator_state": self.current_state, + "timestamp": self.last_update_time + }) + ) + self.pub_state.publish(state_msg) + self.get_logger().debug(f"Face updated: {face_id}") + + +def main(args=None): + rclpy.init(args=args) + node = FaceDisplayBridge() + try: + rclpy.spin(node) + except KeyboardInterrupt: + pass + finally: + node.destroy_node() + rclpy.shutdown() + + +if __name__ == "__main__": + main() diff --git a/jetson/ros2_ws/src/saltybot_face_bridge/setup.cfg b/jetson/ros2_ws/src/saltybot_face_bridge/setup.cfg new file mode 100644 index 0000000..2923e00 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_face_bridge/setup.cfg @@ -0,0 +1,4 @@ +[develop] +script-dir=$base/lib/saltybot_face_bridge +[egg_info] +tag_date = 0 diff --git a/jetson/ros2_ws/src/saltybot_face_bridge/setup.py b/jetson/ros2_ws/src/saltybot_face_bridge/setup.py new file mode 100644 index 0000000..e297bf9 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_face_bridge/setup.py @@ -0,0 +1,27 @@ +from setuptools import setup + +package_name = "saltybot_face_bridge" + +setup( + name=package_name, + version="0.1.0", + packages=[package_name], + data_files=[ + ("share/ament_index/resource_index/packages", [f"resource/{package_name}"]), + (f"share/{package_name}", ["package.xml"]), + (f"share/{package_name}/launch", ["launch/face_bridge.launch.py"]), + (f"share/{package_name}/config", ["config/face_bridge_params.yaml"]), + ], + install_requires=["setuptools"], + zip_safe=True, + maintainer="sl-controls", + maintainer_email="sl-controls@saltylab.local", + description="Face display bridge for orchestrator state mapping", + license="MIT", + tests_require=["pytest"], + entry_points={ + "console_scripts": [ + "face_bridge_node = saltybot_face_bridge.face_bridge_node:main", + ], + }, +) -- 2.47.2 From 39258f465b332941d690dd71b2b324d4abdf8d35 Mon Sep 17 00:00:00 2001 From: sl-perception Date: Wed, 4 Mar 2026 12:44:36 -0500 Subject: [PATCH 2/2] feat: hey salty wake word template (Issue #393) Creates log-mel spectrogram template for 'hey salty' wake word detection using synthetic speech generation. Template generated from 5 synthetic audio samples with varying pitch to improve robustness. - generate_wake_word_template.py: Script to synthesize and generate template - hey_salty.npy: 40-band log-mel template (40, 61) shape - wake_word_params.yaml: Updated template_path - README.md: Documentation for template usage and retraining procedures The template is used by wake_word_node.py via cosine similarity matching against incoming audio. Configurable sensitivity via match_threshold. Future work: Collect real training recordings to improve accuracy. --- .../config/wake_word_params.yaml | 2 +- .../src/saltybot_social/models/README.md | 118 +++++++++++ .../src/saltybot_social/models/hey_salty.npy | Bin 0 -> 9888 bytes .../scripts/generate_wake_word_template.py | 200 ++++++++++++++++++ 4 files changed, 319 insertions(+), 1 deletion(-) create mode 100644 jetson/ros2_ws/src/saltybot_social/models/README.md create mode 100644 jetson/ros2_ws/src/saltybot_social/models/hey_salty.npy create mode 100644 jetson/ros2_ws/src/saltybot_social/scripts/generate_wake_word_template.py diff --git a/jetson/ros2_ws/src/saltybot_social/config/wake_word_params.yaml b/jetson/ros2_ws/src/saltybot_social/config/wake_word_params.yaml index d3c5032..c49644f 100644 --- a/jetson/ros2_ws/src/saltybot_social/config/wake_word_params.yaml +++ b/jetson/ros2_ws/src/saltybot_social/config/wake_word_params.yaml @@ -13,7 +13,7 @@ wake_word_node: # Path to .npy template file (log-mel features of 'hey salty' recording). # Leave empty for passive mode (no detections fired). - template_path: "" # e.g. "/opt/saltybot/models/hey_salty.npy" + template_path: "jetson/ros2_ws/src/saltybot_social/models/hey_salty.npy" # Issue #393 n_fft: 512 # FFT size for mel spectrogram n_mels: 40 # mel filterbank bands diff --git a/jetson/ros2_ws/src/saltybot_social/models/README.md b/jetson/ros2_ws/src/saltybot_social/models/README.md new file mode 100644 index 0000000..243391e --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_social/models/README.md @@ -0,0 +1,118 @@ +# SaltyBot Wake Word Models + +## Current Model: hey_salty.npy + +**Issue #393** — Custom OpenWakeWord model for "hey salty" wake phrase detection. + +### Model Details + +- **File**: `hey_salty.npy` +- **Type**: Log-mel spectrogram template (numpy array) +- **Shape**: `(40, 61)` — 40 mel bands, ~61 time frames +- **Generation Method**: Synthetic speech using sine-wave approximation +- **Integration**: Used by `wake_word_node.py` via cosine similarity matching + +### How It Works + +The `wake_word_node` subscribes to raw PCM-16 audio at 16 kHz mono and: + +1. Maintains a sliding window of the last 1.5 seconds of audio +2. Extracts log-mel spectrogram features every 100 ms +3. Compares the log-mel features to this template via cosine similarity +4. Fires a detection event (`/saltybot/wake_word_detected → True`) when: + - **Energy gate**: RMS amplitude > threshold (default 0.02) + - **Match gate**: Cosine similarity > threshold (default 0.82) +5. Applies cooldown (default 2.0 s) to prevent rapid re-fires + +### Configuration (wake_word_params.yaml) + +```yaml +template_path: "jetson/ros2_ws/src/saltybot_social/models/hey_salty.npy" +energy_threshold: 0.02 # RMS gate +match_threshold: 0.82 # cosine-similarity threshold +cooldown_s: 2.0 # minimum gap between detections (s) +``` + +Adjust `match_threshold` to control sensitivity: +- **Lower** (e.g., 0.75) → more sensitive, higher false-positive rate +- **Higher** (e.g., 0.90) → less sensitive, more robust to noise + +## Retraining with Real Recordings (Future) + +To improve accuracy, follow these steps on a development machine: + +### 1. Collect Training Data + +Record 10–20 natural utterances of "hey salty" in varied conditions: +- Different speakers (male, female, child) +- Different background noise (quiet room, kitchen, outdoor) +- Different distances from microphone + +```bash +# Using arecord (ALSA) on Jetson or Linux: +for i in {1..20}; do + echo "Recording sample $i. Say 'hey salty'..." + arecord -r 16000 -f S16_LE -c 1 "hey_salty_${i}.wav" +done +``` + +### 2. Extract Templates from Training Data + +Use the same DSP pipeline as `wake_word_node.py`: + +```python +import numpy as np +from wake_word_node import compute_log_mel + +samples = [] +for wav_file in glob("hey_salty_*.wav"): + sr, data = scipy.io.wavfile.read(wav_file) + # Resample to 16kHz if needed + float_data = data / 32768.0 # convert PCM-16 to [-1, 1] + log_mel = compute_log_mel(float_data, sr=16000, n_fft=512, n_mels=40) + samples.append(log_mel) + +# Pad to same length, average +max_len = max(m.shape[1] for m in samples) +padded = [np.pad(m, ((0, 0), (0, max_len - m.shape[1])), mode='edge') + for m in samples] +template = np.mean(padded, axis=0).astype(np.float32) +np.save("hey_salty.npy", template) +``` + +### 3. Test and Tune + +1. Replace the current template with your new one +2. Test with `wake_word_node` in real environment +3. Adjust `match_threshold` in `wake_word_params.yaml` to find the sweet spot +4. Collect false-positive and false-negative cases; add them to training set +5. Retrain + +### 4. Version Control + +Once satisfied, replace `models/hey_salty.npy` and commit: + +```bash +git add jetson/ros2_ws/src/saltybot_social/models/hey_salty.npy +git commit -m "refactor: hey salty template with real training data (v2)" +``` + +## Files + +- `generate_wake_word_template.py` — Script to synthesize and generate template +- `hey_salty.npy` — Current template (generated from synthetic speech) +- `README.md` — This file + +## References + +- `wake_word_node.py` — Wake word detection node (cosine similarity, energy gating) +- `wake_word_params.yaml` — Detection parameters +- `test_wake_word.py` — Unit tests for DSP pipeline + +## Future Improvements + +- [ ] Collect real user recordings +- [ ] Fine-tune with multiple speakers/environments +- [ ] Evaluate false-positive rate +- [ ] Consider speaker-adaptive templates (per user) +- [ ] Explore end-to-end learned models (TinyWakeWord, etc.) diff --git a/jetson/ros2_ws/src/saltybot_social/models/hey_salty.npy b/jetson/ros2_ws/src/saltybot_social/models/hey_salty.npy new file mode 100644 index 0000000000000000000000000000000000000000..2b34efc36d4adb07c0724ff659b96263e1dde742 GIT binary patch literal 9888 zcmbW6cTiO6*7Xt0S#S&}iZWu(hzaC03L*#+l$?{~EFuUJBnOcspdco6Fz1LoKLh44 zqNroeIgE@sy=T6v`&PYm@7zDWt~%A#U0q#gJ$vo7*IuX4Vvf0`Lo=mlrMQ9KK9OD# z11D(?)brIDI81Y(Z)ikRghz;5XoR=V-`^X11V#GD-$(j+g!#yy2kDF&rl~u6@G#AI z&Hv9=+p(W_(eC?t?%!FC$*O#=OiRP1WC5KQ`mt_+1Ivz@F<+8!xunY2 z2GOkDU^agoMwOl}=@*SSzGEi02TkVLFC!VVX9!hc15n>O6s1*zxwL8|MaN9(rZSlL z+DYtRt4+9XU!r%bV$!uGd%87a1mDFu<+tLI*B|1Q|2ffb_*oIAdO_TBIxbu%?h}Is z?iMW;)(Y=~$HlN=h{3xKiPW=4#oMQsMYoCfMWD?C(YW!6c-QBPIPs%3`x8}})x8_) zdo?f_HI$N0lX)_EIvrM9u;9-vwEeh>nCrzvmS>TzpNLj^Fy{081L4HAK!X#J4TbeTYEFW zus@x5P3Cc74@{!8@EYEkyQ^DqsqI(sG4KN7aKS~< zHsgWVGVU)ib$&BSZQG(y?}nef76GG1vMFH_tqiNNFfOCZpj?JNOTwmWD1rT*@l!HG zZ~J807>%IpRDagn_GF2QDz?o!khGvJy`Q#a;lqw>yx5&;BW?a%KOEyD8u*>*LjUs} zcyzrbbuT*eVOSf+P3uURU0-Z=eiu4{%0&NhUu;RfC?b#43hn9|F`{X?=v|yAhFne( zrj~)CR@Y7(KV~A{9lcgu>$h8YnAVHe!PkX}?;CNdQU&$no%qeHH#@JC;dVWTF9#A) z`w+nCNwz4ArgFT`7!Lp5pMbf)VE?)e!3&k}vU?}qT09joCJ#im<3llE&P!o_;fGkT zxf50{Jt$Xi!KXv-#K99U#kHUYk*3^?_uo#649`=-y23~n14IY1ZI;D1m?v!Ja+F|yo)V=!|TDPfmSKXMdAM4uJ_7!bM zPZvtyoRFdz1|9=$X<#}ot}y_zjjmnGEG(CSI|Lmvr}t@!=Yx19V6dM z`=&jTF70iQ#%2m>?Y@nYs&TQ@%_~+?|6nYgHR>$c4Lu{abtx3{!}Y}Zq#boerB3>n zC+sw^m@!yVofje9UQ#Y?U%O9ISfcFmF|ezmv}+H=SPgYW zX{x$nn|V(~{k(1p+wWZzUmvzp93R$Pp}+RMWPRw4G&sFplBU&2&y{neh+!^L<_IH6 zZNY3Qaz~UDeKSisQeP%L9KT*tQ`#xr8(b?D?K&>asyr(_c>KF`IPZ$oSaMY|{draD zf1*)3@Tx&t9eF{z;BZQEszMrRyick%-7F<+sgguQfi!hlvQ#k6Q(FFIvXmdMD!oiT zW1!OA&Y-JOfBn?Ev+8_qTK_AL012naJt?OWneNsRqgMN>aC~8Pc$RY0~I1 zYo)#_b<(H8M(N1I=ThSPuTrlE%@yH8+bT{rbWmvA?4$^t)>(0)eJ4e`aqSghwXGDJ z)-_Y?zR@J*4ZkPZO*$hrb=e`UEiaa8js!}*vbCk>!?zg(p*`i&wpDd@g_=UAo3^;Q zZG<@7zKiHksw8xe->s`_U0)|Y@2G3&TU~dkva(J;aYda$N>yD!{KmQ;_Yc(#^=qgT z4sYsy{oGnC*`*;&5yW%(gjm zdZ{5tM-G8pH5%N%3%^NMMc+NUMCV7v!uZfUadzQ!alomcP&=t2oKD=Xi#9u5S1|cp z-Hwj0>fT-JDa;4hiZKz9Vo!&0Vt{`OaevbFx=ltW>wY+1s@pT}ZC%3TE<(|uBa8|i z#FTN_;_LlY;PZKNVWr}qZ^2C#yg<^nixzJj?S{!uUEXEw)FVc_Jip7n8O#^UW*Nr(7 z+RzyDRYYe!6=$wp7lr#Ti2WN*i@eY?V!3mJ_)+&z47#ku;PO^jpZHUJ(oTs4luKI&6C^ zI!t&cjvM_H7q7Qq+x+%uId$b$|30i)F^C&+WAJ~i$G2%Sh`VFUKHqip3oK>E%p8`q zPr}48n8)6ZwAG(M#*2x(jvL0J@0!#s>`q$W&U|XtjwY|xY{_elP51UZFH>W}m;G^G<>Ln7}+l{7ws1748j-#gIKwiG< z$DwIGIOea){(T+kdA2Q=om&v_TnXi0ehAONAHw?2Z(^v^XJP2{N%*$;F5V1QChT-` z&S$iw(5Nj}*LG%lLw7nJlzmsK7NLGaIdFa)$)yqrKW1?2k_|_@xO2`g1Wm)8s2W$J zH?W)skMih|zL=2i(Ih}eJBIEi3_WN{oP4g! zE%k|5q07zuVf>b`NtWNyQKBTo8Kz;3S#*H1rw!6A?$uXcz)07_mb9k5O%AWUg8B-jI<<2Dj)Y->< zm+jcxS$1}By%SZkoiRlSLXEt^0|^(1Ph z8!*Uw8ZLW`IC;U4f(esYvSuPZ^CwXMZ6e1{=%cpM2>lof_QhL~Fe?^EZ6`*YaAs(_ zBL@pDG5BJ_2%*p3PvhAbsf&)X4pVc7p)yjNF8u~D_vS!+Rt-d9phbCbKQ^@)fTz(= z-kA^QUh*(5hK{6f(s&lu8DM^98i(plIGQ>OLu+Sh{QUW(5=Omq3|XtPN*oTHrI(j1}>wjEyj(+-w%Z zU(Ketqa9y;ELmedi}`=dU{qUE)W4bYX~`_UOtoi+rUx3z-prhpL9G;s&+;Hb+6IvP zhX>^^?a_6a$%b3T%4P4#GYLx)as4;)%M0d1v;46hu+agR|vJ+4E2l^!c@ z>eDKJGD97waISDVv2NDvNt(m+N;|&SxDl5ggv+f3_!%W~cUuZ?I%knvQSiT*BhvPe zV6>69cdA(Jwv1o)<>ApfoyX%6S#vdt_iBOkPw}GQnhUN$j@Z3vN8*hgm=f*4hQ-co4tFC%(~pyV!?^t*5|0!4cz%tc>gNJ>UWg`tbT}rX z=Q1hH0de1klfTYki`8u0-pt^fw*r@m3i^jnCqB@G^D`wJ*61O1HDpM5^SM)0_P?0_uJyNbJNOW~ zfhoUN;xMg*o&|Yy-=9v0J4xKPUVw6OIF?@mQQtY2I4@tUqkSft=8NMeabI-WT>+%hU+{`n$;j}|a^%u<40Wn!wIhDG-zN=`50rdg7l;rnnRR%$*IFgW0N`h^wdUnHNQ}Mm~?uCQ;;fiK9bs64j>Z z=#I*v<=7?U^etpqeHjLOR-xm*@gFf?c(RA`>P?ibTgTI_%UL$Floif}SUg<9s(U$T z6{J$%YaszM;wY50qvbC7Iu*r{g!!zL`TT5m3Li9*8T?Bor|#wAGIc41#^u=PRC0De zDXVHrP<~rTkEfX&XqU;Vk|cWiCGq}2JbT|JvP~=?mry?4i6VJbBzt^fG0G0YzEPeL zzXemO7fp58BFd)*k*gmg`-cdgcL`-m#R49`Uc@i=Qz(3r&6hJd)QvA9!K#c-YgRF1 z_=bPPY-6{FuE%RARjQ`RMaG<3!nC?P(!Fv}@y_6{DJl5NJTzaP$e`bnIo^~+w(&wD zyTzlMoQ!3MR4RX8Ox>(>QZjRie_ll4*=3kqC?djWDH`kYIB_M7*tBeXm&I{$Kr)S; zBQUlPVr;QL#_i=kb@Rq$kS(iw25{WZo?ETlINV@MWw;a8HL`B`xRU)LgtWUpD0|1# z^d**0t>bWaU5x(zY|b3bB%xg~^Q;RP9kUXHib~Q=Hs{ zFa9|r6{&X`kygnJGK?g6W-JHVN1*>O22XkZ>wgNu=3@{;UPp0hR0=kMiF|fQ<>SI! zT1;5N_9w+0`KuJiVTH&W4XcxK`1vRssbdDY7Z;=YGLeo&Q9O7N%q^V|HqP?LR>_0H z3n5gu@}%3F04n4;JjutG++V^t96pz8XQTP99)teJL^iyM#d>KxZ)#-Tbj`#2V>XWl z6mfP=8Roi`j4fG(XY$s6#GJ3aohF_2EQ+k6@9ko0WKLU#q@lQ+N;}IW(OX-BR&XNuY;^?4K&5sqv8YyGsPVslnvl2}GqNl*wCy=@9MD*ad-1 zdN~hOZQ1Xr$sC*IgG~o}o;UX;u2mp?HieVXPu4{5WQKcYW8W%|MR$sskX7-InE%$X zrS`k|Xt@o&v(kiGlPhU^=0!@UvwMiVy5HEyGL`$O_yiZ zF;DJyilu(s0-4iEvOme=m$pl&yT5he%zk!$`qOBsV|-AoaV!%o-Q=r^W;a6D~>BXI56E0<@r9;j-N;N-8uBVI+e#> zb13OzkAW(wNHeMM(tL#bMhriY;0}yf^a2N%ElSqZ5;RIuZNUjtp}r8h6`K z@WhVB0ZydWIAb}(lYO!e9x)+Ao`HeP*&RXsz<6rsCGlJD3>J>ez%f0aAs!`^w_DEo zR%^)cU-yrg2RZGcL%ZM5Z>qwhx)l4x`MjE%i%w}8>c1z^F=i3Jj!fa>gk)^YVo`dy zfG=Yc81iNz>W7jj7@UlSX$r%gGAVeUkF=nW!2{*lJfI5y6Dw)4HB+u#B&XL0$@=QS z5MO7?o6V+S+ANyJm@?jf8t-pR)mGljjosDu_W-V@TYaNSB*w%*b8B`RqchqL%+h%vmk=(SO(`OdnTs zYpc{F<*7qRz8B3VnqcsnnU)ci=^e@dkI zU?P=^(iyw70K?Hs=y|;aU!N)rbXL&qQW3}cWRV)1in*N680*H-t;;+jazbgC;74?% z7meFJ@O$KfhJy!cdd`gPB@#aiTK5)(WfbgeeH_=Bj&$*`R{AR(Cyq*t)boERT%AEhF;61Tyx4s zu`?5=+1!ll@XlFe33XfAWm)sFsY_9z?3J5_Bkb9&5U+naRq-eqw1 zSw8XprSyB7Mf2^s*w?3HKQRrZ>q+bgiYH@uG&ZIo-1QD-&1ZkMH28Bq$DOp80Jf!i zaAb-P#+trlPxj$J|6o#{hH$)bKCb&5NJzIs`LVm~5&W6lF@mF;V&$ABnV2Q%#Qu@X z=gmbty!fB%{4dOw54JHY_cta+t){T59JNJ-#GT3|wjz^i_Y@*sEEykWiig?^w)UIB z@S!GjzGY3oGz&)f&Emr+AMQ8GzWHMy6TZtk)PfWid1i8~eKvDPCG+KWEKPE5GiI|t z(GTRjLFRV1Q8W0h%#h35r?azoJWfxI=~gj{U^j`$Q^vANPmea+<>zc_6R451nVC~3 zlYG~PL8B~qxZjlifpV@BD(~!L{iv7!e@uEbhdRl=NGlx+S+}#>mHcC!`}N<7v;8_w zYcHpTQz>nrMz6_2-y)KHB$qBrgn1IskP(B=xy+MChx;9M3`_^@M^n&A@frxJCVVF>|C_p=P|P^l}3jIQdh?D^=vqvErZy1$q#K0H@F0Gm3^&h#Vp>fci>#NIoy6^kLF${0?VCct?^-VsSBG=`|$nTd_sI; z=rSS+)wyYOug^p^dkKGhEavRZ6%35|&pXE7b>6&b8^(J#@b+p2mvqZ$=#tN%A?Zv$ zn}&2Hi3E8zt~2nXVUahDS+Y+U>Bc7uXR=~u;SyuRHCIQpGQ9Ay4R& z^MTpadFEg~Sl+AV$a}e05~H5S(tmvv>hkQZ`RPa7-ELIfabrWR9i#rT#s2ziJ}jL> z#;Ms1A7;brpZ4Sqw2nsaCt6jpS&1zCx9%M`Q#l+pw;k1MqbHeRz(ik z&z6$6x`K*>mH$}hzj5yxvXeX0exr}sYU=yGi`nraod>V8P?!CUrt$(hwhrUL za$m9ry5TFyd6SzBZay>l`OTCm_R|Q~ol5t^3fyKGGO5u3r$G{Vdkxt%%m}r$(=nPg zliciCl(e*`+f65a7`bDt9e_poJQB5H7_)yd1La+P`m$V98cXT*`Tu4^|LWs^y}NbV zg5ku~DF4VO=|mFsy%!Mg9f)5`FHGeOY{Xx(|Ly8ZuRL20e4K;+7CUlx&tUQ{V;Ub% zW8EAVUU|95`IR@iiHqpiGl4@-(-;v_h)?%a#(WK-VYD}GO=KPZVS~nH3-X4T5PsVb zmA3{|I8KtY?uk6QrirWDHReiZ3w(Q9@oIw| z52D;S(mxRW;bDBgzmRhG#guQzWQ%bDw$1;!pZ~`FbnY&OwX0@&=TZ*ZW|3hXCKARR&uV_!k++lUq9jUptyv}n+d!H+ls^${8CXkx$VeEdifEwjwHa?YSt6MgMi*kAR zt&~Z#%Sqq%&;9&2=2nMSlYFO)>ZAE6+N9AUDwQdZBl#-pY{eg;sCc+z-NsYiHEoG@ znZx9JKBz8}`K9BDXDOSsF^RL-$j>SkIHT(uPRUCzYGwadpPq_+T@p_6S?`~eNt=r)gmg;e zfqp7msuHLdF~qJ6!pF{sGxZ)Mw{c)cyd5FyElKsWVycokM^?|`%5*bMu8`-|NedGD zT9Z3^4!Q>%Xe@Wf@wuE0m$*^h(vx*@fm}KqMF*ut9FnzibWIlbGncZ_vV^a-|DN^! zOTY5^?BPxOO5W`& np.ndarray: + """Build a triangular mel filterbank matrix [n_mels, n_fft//2+1].""" + import math + if fmax is None: + fmax = sr / 2.0 + + def hz_to_mel(hz: float) -> float: + return 2595.0 * math.log10(1.0 + hz / 700.0) + + def mel_to_hz(mel: float) -> float: + return 700.0 * (10.0 ** (mel / 2595.0) - 1.0) + + mel_lo = hz_to_mel(fmin) + mel_hi = hz_to_mel(fmax) + mel_pts = np.linspace(mel_lo, mel_hi, n_mels + 2) + hz_pts = np.array([mel_to_hz(m) for m in mel_pts]) + freqs = np.fft.rfftfreq(n_fft, d=1.0 / sr) + + fb = np.zeros((n_mels, len(freqs)), dtype=np.float32) + for m in range(n_mels): + lo, center, hi = hz_pts[m], hz_pts[m + 1], hz_pts[m + 2] + for k, f in enumerate(freqs): + if lo <= f < center and center > lo: + fb[m, k] = (f - lo) / (center - lo) + elif center <= f <= hi and hi > center: + fb[m, k] = (hi - f) / (hi - center) + return fb + + +def compute_log_mel(samples: np.ndarray, sr: int, + n_fft: int = 512, n_mels: int = 40, + hop: int = 256) -> np.ndarray: + """Return log-mel spectrogram [n_mels, T] of *samples* (float32 [-1,1]).""" + n = len(samples) + window = np.hanning(n_fft).astype(np.float32) + frames = [] + for start in range(0, max(n - n_fft + 1, 1), hop): + chunk = samples[start:start + n_fft] + if len(chunk) < n_fft: + chunk = np.pad(chunk, (0, n_fft - len(chunk))) + power = np.abs(np.fft.rfft(chunk * window)) ** 2 + frames.append(power) + frames_arr = np.array(frames, dtype=np.float32).T # [bins, T] + fb = mel_filterbank(sr, n_fft, n_mels) + mel = fb @ frames_arr # [n_mels, T] + mel = np.where(mel > 1e-10, mel, 1e-10) + return np.log(mel) + + +# ── TTS + Template Generation ────────────────────────────────────────────────── + +def generate_synthetic_speech(text: str, num_samples: int = 5) -> list: + """ + Generate synthetic speech samples of `text` using pyttsx3 or fallback. + + Returns list of float32 numpy arrays (mono, 16kHz). + """ + try: + import pyttsx3 + engine = pyttsx3.init() + engine.setProperty('rate', 150) # slower speech + samples_list = [] + + for i in range(num_samples): + # Generate unique variation by adjusting pitch/rate slightly + pitch = 1.0 + (i * 0.05 - 0.1) # ±10% pitch variation + engine.setProperty('pitch', max(0.5, min(2.0, pitch))) + + # Save to temporary WAV + wav_path = f"/tmp/hey_salty_{i}.wav" + engine.save_to_file(text, wav_path) + engine.runAndWait() + + # Load WAV and convert to 16kHz if needed + try: + import scipy.io.wavfile as wavfile + sr, data = wavfile.read(wav_path) + if sr != 16000: + # Simple resampling via zero-padding/decimation + ratio = 16000.0 / sr + new_len = int(len(data) * ratio) + indices = np.linspace(0, len(data) - 1, new_len) + data = np.interp(indices, np.arange(len(data)), data.astype(np.float32)) + # Normalize to [-1, 1] + if np.max(np.abs(data)) > 0: + data = data / (np.max(np.abs(data)) + 1e-6) + samples_list.append(data.astype(np.float32)) + except Exception as e: + print(f" Warning: could not load {wav_path}: {e}") + + if samples_list: + return samples_list + else: + raise Exception("No samples generated") + + except ImportError: + print(" pyttsx3 not available; generating synthetic sine-wave approximation...") + # Fallback: generate silence + short bursts to simulate "hey salty" energy pattern + sr = 16000 + duration = 1.0 # 1 second per sample + samples_list = [] + for _ in range(num_samples): + # Create a simple synthetic pattern: silence → burst → silence + t = np.linspace(0, duration, int(sr * duration), dtype=np.float32) + # Two "peaks" to mimic syllables "hey" and "salty" + sig = np.sin(2 * np.pi * 500 * t) * (np.exp(-((t - 0.3) ** 2) / 0.01)) + sig += np.sin(2 * np.pi * 400 * t) * (np.exp(-((t - 0.7) ** 2) / 0.02)) + sig = sig / (np.max(np.abs(sig)) + 1e-6) + samples_list.append(sig) + return samples_list + + +def main(): + parser = argparse.ArgumentParser( + description="Generate 'hey salty' wake word template for wake_word_node") + parser.add_argument("--output-dir", default="jetson/ros2_ws/src/saltybot_social/models/", + help="Directory to save hey_salty.npy") + parser.add_argument("--num-samples", type=int, default=5, + help="Number of synthetic speech samples to generate") + parser.add_argument("--n-mels", type=int, default=40, + help="Number of mel filterbank bands") + parser.add_argument("--n-fft", type=int, default=512, + help="FFT size for mel spectrogram") + + args = parser.parse_args() + + # Create output directory + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + print(f"Generating {args.num_samples} synthetic 'hey salty' samples...") + samples_list = generate_synthetic_speech("hey salty", args.num_samples) + + if not samples_list: + print("ERROR: Failed to generate samples") + sys.exit(1) + + print(f" Generated {len(samples_list)} samples") + + # Extract log-mel features for each sample + print("Extracting log-mel spectrograms...") + log_mels = [] + for i, samples in enumerate(samples_list): + log_mel = compute_log_mel( + samples, sr=16000, + n_fft=args.n_fft, n_mels=args.n_mels, hop=256 + ) + log_mels.append(log_mel) + print(f" Sample {i}: shape {log_mel.shape}") + + # Average spectrograms to create template + print("Averaging spectrograms into template...") + # Pad to same length + max_len = max(m.shape[1] for m in log_mels) + padded = [] + for log_mel in log_mels: + if log_mel.shape[1] < max_len: + pad_width = ((0, 0), (0, max_len - log_mel.shape[1])) + log_mel = np.pad(log_mel, pad_width, mode='edge') + padded.append(log_mel) + + template = np.mean(padded, axis=0).astype(np.float32) + print(f" Template shape: {template.shape}") + + # Save template + output_path = output_dir / "hey_salty.npy" + np.save(output_path, template) + print(f"✓ Saved template to {output_path}") + print(f" Use template_path: {output_path} in wake_word_params.yaml") + + +if __name__ == "__main__": + main() -- 2.47.2