2026-03-04 13:10:51 -05:00
5 changed files with 388 additions and 2 deletions
--- a/jetson/ros2_ws/src/saltybot_face_bridge/saltybot_face_bridge/face_bridge_node.py
+++ b/jetson/ros2_ws/src/saltybot_face_bridge/saltybot_face_bridge/face_bridge_node.py
@ -0,0 +1,186 @@
 #!/usr/bin/env python3
 """Face display bridge node.
 Maps orchestrator state to face expressions via HTTP WebSocket API.
 Bridges /social/orchestrator/state and /saltybot/wake_word_detected to
 face display server (localhost:3000/face/{id}).
 State mapping:
  IDLE → 0 (Tracking)
  LISTENING → 1 (Alert)
  THINKING → 3 (Searching)
  SPEAKING → 4 (Social)
  Wake word → 1 (Alert) [immediate override]
 Subscribed topics:
  /social/orchestrator/state (String) - JSON: {"state": "IDLE|LISTENING|THINKING|SPEAKING|THROTTLED"}
  /saltybot/wake_word_detected (Bool) - Wake word detection trigger
 Published topics:
  /face/state (String) - Current face expression ID and status
 """
 import json
 import threading
 import time
 from typing import Optional
 import rclpy
 from rclpy.node import Node
 from std_msgs.msg import String, Bool
 class FaceDisplayBridge(Node):
    """Bridge orchestrator state to face display expressions."""
    # State to face expression ID mapping
    STATE_TO_FACE_ID = {
        "IDLE": 0,         # Tracking
        "LISTENING": 1,    # Alert
        "THINKING": 3,     # Searching
        "SPEAKING": 4,     # Social
        "THROTTLED": 0,    # Fallback to Tracking
    }
    def __init__(self):
        super().__init__("face_bridge")
        # Parameters
        self.declare_parameter("face_server_url", "http://localhost:3000/face/1")
        self.declare_parameter("http_timeout", 2.0)
        self.declare_parameter("update_interval", 0.1)
        self.face_server_url = self.get_parameter("face_server_url").value
        self.http_timeout = self.get_parameter("http_timeout").value
        self.update_interval = self.get_parameter("update_interval").value
        # Try to import requests, fallback to urllib if unavailable
        try:
            import requests
            self.requests = requests
            self.use_requests = True
        except ImportError:
            import urllib.request
            import urllib.error
            self.urllib = urllib.request
            self.urllib_error = urllib.error
            self.use_requests = False
        # State
        self.current_state = "IDLE"
        self.current_face_id = 0
        self.wake_word_active = False
        self.last_update_time = time.time()
        self.state_lock = threading.Lock()
        # Subscriptions
        self.create_subscription(String, "/social/orchestrator/state", self._on_state_update, 10)
        self.create_subscription(Bool, "/saltybot/wake_word_detected", self._on_wake_word, 10)
        # Publishers
        self.pub_state = self.create_publisher(String, "/face/state", 10)
        # Timer for update loop
        self.create_timer(self.update_interval, self._update_face)
        self.get_logger().info(
            f"Face bridge initialized: face_server_url={self.face_server_url}"
        )
    def _on_state_update(self, msg: String) -> None:
        """Handle orchestrator state update."""
        try:
            data = json.loads(msg.data)
            new_state = data.get("state", "IDLE").upper()
            # Validate state
            if new_state in self.STATE_TO_FACE_ID:
                with self.state_lock:
                    self.current_state = new_state
                self.get_logger().debug(f"State updated: {new_state}")
            else:
                self.get_logger().warn(f"Unknown state: {new_state}")
        except json.JSONDecodeError:
            self.get_logger().error(f"Invalid JSON in state update: {msg.data}")
    def _on_wake_word(self, msg: Bool) -> None:
        """Handle wake word detection - immediate switch to Alert."""
        if msg.data:
            with self.state_lock:
                self.wake_word_active = True
            self.get_logger().info("Wake word detected - switching to Alert")
    def _get_face_id(self) -> int:
        """Get current face expression ID based on state."""
        with self.state_lock:
            if self.wake_word_active:
                face_id = 1  # Alert
                # Clear wake word after one update
                self.wake_word_active = False
            else:
                face_id = self.STATE_TO_FACE_ID.get(self.current_state, 0)
        return face_id
    def _send_face_command(self, face_id: int) -> bool:
        """Send face expression command to display server.
        Args:
            face_id: Expression ID (0-4)
        Returns:
            True if successful
        """
        try:
            if self.use_requests:
                response = self.requests.get(
                    self.face_server_url.format(id=face_id),
                    timeout=self.http_timeout
                )
                return response.status_code == 200
            else:
                url = self.face_server_url.format(id=face_id)
                req = self.urllib.Request(url)
                with self.urllib.urlopen(req, timeout=self.http_timeout) as response:
                    return response.status == 200
        except Exception as e:
            self.get_logger().error(f"Failed to update face display: {e}")
            return False
    def _update_face(self) -> None:
        """Update face expression based on current state."""
        face_id = self._get_face_id()
        # Only send if changed
        if face_id != self.current_face_id:
            if self._send_face_command(face_id):
                self.current_face_id = face_id
                self.last_update_time = time.time()
                # Publish state
                with self.state_lock:
                    state_msg = String(
                        data=json.dumps({
                            "face_id": face_id,
                            "orchestrator_state": self.current_state,
                            "timestamp": self.last_update_time
                        })
                    )
                self.pub_state.publish(state_msg)
                self.get_logger().debug(f"Face updated: {face_id}")
 def main(args=None):
    rclpy.init(args=args)
    node = FaceDisplayBridge()
    try:
        rclpy.spin(node)
    except KeyboardInterrupt:
        pass
    finally:
        node.destroy_node()
        rclpy.shutdown()
 if __name__ == "__main__":
    main()
--- a/jetson/ros2_ws/src/saltybot_social/models/hey_salty.npy
+++ b/jetson/ros2_ws/src/saltybot_social/models/hey_salty.npy
--- a/jetson/ros2_ws/src/saltybot_social/saltybot_social/mesh_comms_node.py
+++ b/jetson/ros2_ws/src/saltybot_social/saltybot_social/mesh_comms_node.py
@ -178,7 +178,7 @@ class MeshCommsNode(Node):
        msg = MeshPeer()
        msg.header.stamp          = self.get_clock().now().to_msg()
        msg.robot_id              = self._robot_id
-        msg.namespace             = self._ns
+        msg.ros_namespace         = self._ns
        msg.social_state          = state
        msg.active_person_ids     = active_ids
        msg.greeted_person_names  = greeted_names
--- a/jetson/ros2_ws/src/saltybot_social/scripts/generate_wake_word_template.py
+++ b/jetson/ros2_ws/src/saltybot_social/scripts/generate_wake_word_template.py
@ -0,0 +1,200 @@
 #!/usr/bin/env python3
 """
 generate_wake_word_template.py — Generate 'hey salty' wake word template for Issue #393.
 Creates synthetic audio samples of "hey salty" using text-to-speech, extracts
 log-mel spectrograms, and averages them into a single template file.
 Usage:
  python3 generate_wake_word_template.py --output-dir path/to/models/
 The template is saved as hey_salty.npy (log-mel [n_mels, T] array).
 """
 import argparse
 import sys
 from pathlib import Path
 try:
    import numpy as np
 except ImportError:
    print("ERROR: numpy not found. Install: pip install numpy")
    sys.exit(1)
 # ── Copy of DSP functions from wake_word_node.py ────────────────────────────────
 def mel_filterbank(sr: int, n_fft: int, n_mels: int,
                   fmin: float = 80.0, fmax = None) -> np.ndarray:
    """Build a triangular mel filterbank matrix [n_mels, n_fft//2+1]."""
    import math
    if fmax is None:
        fmax = sr / 2.0
    def hz_to_mel(hz: float) -> float:
        return 2595.0 * math.log10(1.0 + hz / 700.0)
    def mel_to_hz(mel: float) -> float:
        return 700.0 * (10.0 ** (mel / 2595.0) - 1.0)
    mel_lo = hz_to_mel(fmin)
    mel_hi = hz_to_mel(fmax)
    mel_pts = np.linspace(mel_lo, mel_hi, n_mels + 2)
    hz_pts  = np.array([mel_to_hz(m) for m in mel_pts])
    freqs   = np.fft.rfftfreq(n_fft, d=1.0 / sr)
    fb = np.zeros((n_mels, len(freqs)), dtype=np.float32)
    for m in range(n_mels):
        lo, center, hi = hz_pts[m], hz_pts[m + 1], hz_pts[m + 2]
        for k, f in enumerate(freqs):
            if lo <= f < center and center > lo:
                fb[m, k] = (f - lo) / (center - lo)
            elif center <= f <= hi and hi > center:
                fb[m, k] = (hi - f) / (hi - center)
    return fb
 def compute_log_mel(samples: np.ndarray, sr: int,
                    n_fft: int = 512, n_mels: int = 40,
                    hop: int = 256) -> np.ndarray:
    """Return log-mel spectrogram [n_mels, T] of *samples* (float32 [-1,1])."""
    n = len(samples)
    window = np.hanning(n_fft).astype(np.float32)
    frames = []
    for start in range(0, max(n - n_fft + 1, 1), hop):
        chunk = samples[start:start + n_fft]
        if len(chunk) < n_fft:
            chunk = np.pad(chunk, (0, n_fft - len(chunk)))
        power = np.abs(np.fft.rfft(chunk * window)) ** 2
        frames.append(power)
    frames_arr = np.array(frames, dtype=np.float32).T   # [bins, T]
    fb  = mel_filterbank(sr, n_fft, n_mels)
    mel = fb @ frames_arr                               # [n_mels, T]
    mel = np.where(mel > 1e-10, mel, 1e-10)
    return np.log(mel)
 # ── TTS + Template Generation ──────────────────────────────────────────────────
 def generate_synthetic_speech(text: str, num_samples: int = 5) -> list:
    """
    Generate synthetic speech samples of `text` using pyttsx3 or fallback.
    Returns list of float32 numpy arrays (mono, 16kHz).
    """
    try:
        import pyttsx3
        engine = pyttsx3.init()
        engine.setProperty('rate', 150)  # slower speech
        samples_list = []
        for i in range(num_samples):
            # Generate unique variation by adjusting pitch/rate slightly
            pitch = 1.0 + (i * 0.05 - 0.1)  # ±10% pitch variation
            engine.setProperty('pitch', max(0.5, min(2.0, pitch)))
            # Save to temporary WAV
            wav_path = f"/tmp/hey_salty_{i}.wav"
            engine.save_to_file(text, wav_path)
            engine.runAndWait()
            # Load WAV and convert to 16kHz if needed
            try:
                import scipy.io.wavfile as wavfile
                sr, data = wavfile.read(wav_path)
                if sr != 16000:
                    # Simple resampling via zero-padding/decimation
                    ratio = 16000.0 / sr
                    new_len = int(len(data) * ratio)
                    indices = np.linspace(0, len(data) - 1, new_len)
                    data = np.interp(indices, np.arange(len(data)), data.astype(np.float32))
                # Normalize to [-1, 1]
                if np.max(np.abs(data)) > 0:
                    data = data / (np.max(np.abs(data)) + 1e-6)
                samples_list.append(data.astype(np.float32))
            except Exception as e:
                print(f"  Warning: could not load {wav_path}: {e}")
        if samples_list:
            return samples_list
        else:
            raise Exception("No samples generated")
    except ImportError:
        print("  pyttsx3 not available; generating synthetic sine-wave approximation...")
        # Fallback: generate silence + short bursts to simulate "hey salty" energy pattern
        sr = 16000
        duration = 1.0  # 1 second per sample
        samples_list = []
        for _ in range(num_samples):
            # Create a simple synthetic pattern: silence → burst → silence
            t = np.linspace(0, duration, int(sr * duration), dtype=np.float32)
            # Two "peaks" to mimic syllables "hey" and "salty"
            sig = np.sin(2 * np.pi * 500 * t) * (np.exp(-((t - 0.3) ** 2) / 0.01))
            sig += np.sin(2 * np.pi * 400 * t) * (np.exp(-((t - 0.7) ** 2) / 0.02))
            sig = sig / (np.max(np.abs(sig)) + 1e-6)
            samples_list.append(sig)
        return samples_list
 def main():
    parser = argparse.ArgumentParser(
        description="Generate 'hey salty' wake word template for wake_word_node")
    parser.add_argument("--output-dir", default="jetson/ros2_ws/src/saltybot_social/models/",
                        help="Directory to save hey_salty.npy")
    parser.add_argument("--num-samples", type=int, default=5,
                        help="Number of synthetic speech samples to generate")
    parser.add_argument("--n-mels", type=int, default=40,
                        help="Number of mel filterbank bands")
    parser.add_argument("--n-fft", type=int, default=512,
                        help="FFT size for mel spectrogram")
    args = parser.parse_args()
    # Create output directory
    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    print(f"Generating {args.num_samples} synthetic 'hey salty' samples...")
    samples_list = generate_synthetic_speech("hey salty", args.num_samples)
    if not samples_list:
        print("ERROR: Failed to generate samples")
        sys.exit(1)
    print(f"  Generated {len(samples_list)} samples")
    # Extract log-mel features for each sample
    print("Extracting log-mel spectrograms...")
    log_mels = []
    for i, samples in enumerate(samples_list):
        log_mel = compute_log_mel(
            samples, sr=16000,
            n_fft=args.n_fft, n_mels=args.n_mels, hop=256
        )
        log_mels.append(log_mel)
        print(f"  Sample {i}: shape {log_mel.shape}")
    # Average spectrograms to create template
    print("Averaging spectrograms into template...")
    # Pad to same length
    max_len = max(m.shape[1] for m in log_mels)
    padded = []
    for log_mel in log_mels:
        if log_mel.shape[1] < max_len:
            pad_width = ((0, 0), (0, max_len - log_mel.shape[1]))
            log_mel = np.pad(log_mel, pad_width, mode='edge')
        padded.append(log_mel)
    template = np.mean(padded, axis=0).astype(np.float32)
    print(f"  Template shape: {template.shape}")
    # Save template
    output_path = output_dir / "hey_salty.npy"
    np.save(output_path, template)
    print(f"✓ Saved template to {output_path}")
    print(f"  Use template_path: {output_path} in wake_word_params.yaml")
 if __name__ == "__main__":
    main()
--- a/jetson/ros2_ws/src/saltybot_social_msgs/msg/MeshPeer.msg
+++ b/jetson/ros2_ws/src/saltybot_social_msgs/msg/MeshPeer.msg
@ -6,7 +6,7 @@
 std_msgs/Header header
 string  robot_id                # e.g. "saltybot_1"
-string  namespace               # ROS2 namespace, e.g. "/saltybot_1" (empty = default)
+string  ros_namespace           # ROS2 namespace, e.g. "/saltybot_1" (empty = default)
 # Current social pipeline state (mirrors orchestrator PipelineState)
 string  social_state            # "idle" | "listening" | "thinking" | "speaking" | "throttled"