Merge pull request 'feat: Piper TTS service (Issue #421)' (#425) from sl-mechanical/issue-421-tts-service into main
This commit is contained in:
commit
eda5154650
@ -0,0 +1,11 @@
|
|||||||
|
/**:
|
||||||
|
ros__parameters:
|
||||||
|
tts_service:
|
||||||
|
voice_model_path: "/models/piper/en_US-lessac-medium.onnx"
|
||||||
|
sample_rate: 22050
|
||||||
|
speed: 1.0 # Normal playback speed
|
||||||
|
pitch: 1.0 # Normal pitch
|
||||||
|
volume: 0.8 # 0–1.0 (safety: reduced from 1.0)
|
||||||
|
audio_device: "Jabra" # ALSA device name or alias
|
||||||
|
queue_max_size: 16 # Max queued TTS requests
|
||||||
|
autoplay: true # Auto-play TTS on startup
|
||||||
@ -0,0 +1,47 @@
|
|||||||
|
from launch import LaunchDescription
|
||||||
|
from launch_ros.actions import Node
|
||||||
|
from launch.actions import DeclareLaunchArgument
|
||||||
|
from launch.substitutions import LaunchConfiguration
|
||||||
|
import os
|
||||||
|
from ament_index_python.packages import get_package_share_directory
|
||||||
|
|
||||||
|
|
||||||
|
def generate_launch_description():
|
||||||
|
"""Launch the TTS service node."""
|
||||||
|
package_dir = get_package_share_directory("saltybot_tts_service")
|
||||||
|
config_path = os.path.join(package_dir, "config", "tts_service_params.yaml")
|
||||||
|
|
||||||
|
# Declare launch arguments
|
||||||
|
voice_model_arg = DeclareLaunchArgument(
|
||||||
|
"voice_model",
|
||||||
|
default_value="/models/piper/en_US-lessac-medium.onnx",
|
||||||
|
description="Path to Piper voice model (ONNX)",
|
||||||
|
)
|
||||||
|
|
||||||
|
audio_device_arg = DeclareLaunchArgument(
|
||||||
|
"audio_device",
|
||||||
|
default_value="Jabra",
|
||||||
|
description="ALSA audio device name or alias",
|
||||||
|
)
|
||||||
|
|
||||||
|
# TTS service node
|
||||||
|
tts_node = Node(
|
||||||
|
package="saltybot_tts_service",
|
||||||
|
executable="tts_service_node",
|
||||||
|
name="tts_service",
|
||||||
|
parameters=[
|
||||||
|
config_path,
|
||||||
|
{"voice_model_path": LaunchConfiguration("voice_model")},
|
||||||
|
{"audio_device": LaunchConfiguration("audio_device")},
|
||||||
|
],
|
||||||
|
respawn=False,
|
||||||
|
output="screen",
|
||||||
|
)
|
||||||
|
|
||||||
|
return LaunchDescription(
|
||||||
|
[
|
||||||
|
voice_model_arg,
|
||||||
|
audio_device_arg,
|
||||||
|
tts_node,
|
||||||
|
]
|
||||||
|
)
|
||||||
24
jetson/ros2_ws/src/saltybot_tts_service/package.xml
Normal file
24
jetson/ros2_ws/src/saltybot_tts_service/package.xml
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
<?xml version="1.0"?>
|
||||||
|
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
|
||||||
|
<package format="3">
|
||||||
|
<name>saltybot_tts_service</name>
|
||||||
|
<version>0.1.0</version>
|
||||||
|
<description>Central TTS (text-to-speech) service using Piper with action server, queue management, and Jabra audio output.</description>
|
||||||
|
<maintainer email="seb@vayrette.com">seb</maintainer>
|
||||||
|
<license>Apache-2.0</license>
|
||||||
|
|
||||||
|
<buildtool_depend>ament_python</buildtool_depend>
|
||||||
|
|
||||||
|
<depend>rclpy</depend>
|
||||||
|
<depend>std_msgs</depend>
|
||||||
|
<depend>rclcpp_action</depend>
|
||||||
|
|
||||||
|
<test_depend>ament_copyright</test_depend>
|
||||||
|
<test_depend>ament_flake8</test_depend>
|
||||||
|
<test_depend>ament_pep257</test_depend>
|
||||||
|
<test_depend>python3-pytest</test_depend>
|
||||||
|
|
||||||
|
<export>
|
||||||
|
<build_type>ament_python</build_type>
|
||||||
|
</export>
|
||||||
|
</package>
|
||||||
@ -0,0 +1,287 @@
|
|||||||
|
"""
|
||||||
|
tts_service_node.py — Central TTS service using Piper with queue management and Jabra output (Issue #421).
|
||||||
|
|
||||||
|
Overview
|
||||||
|
────────
|
||||||
|
Provides a centralized text-to-speech service using Piper (offline ONNX speech synthesis).
|
||||||
|
Manages a priority queue with interrupt capability, outputs to Jabra speaker via ALSA/PulseAudio,
|
||||||
|
and publishes TTS state updates.
|
||||||
|
|
||||||
|
Subscribes
|
||||||
|
──────────
|
||||||
|
/saltybot/tts_request std_msgs/String — text to synthesize (priority 0, deferrable)
|
||||||
|
|
||||||
|
Services (Future)
|
||||||
|
─────────────────
|
||||||
|
/saltybot/tts_speak (action server) — request with priority/interrupt (WIP)
|
||||||
|
|
||||||
|
Publishers
|
||||||
|
──────────
|
||||||
|
/saltybot/tts_state std_msgs/String — current state ("idle", "synthesizing", "playing")
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
──────────
|
||||||
|
voice_model_path str '/models/piper/en_US-lessac-medium.onnx'
|
||||||
|
sample_rate int 22050
|
||||||
|
speed float 1.0 (1.0 = normal, <1 = slower, >1 = faster)
|
||||||
|
pitch float 1.0
|
||||||
|
volume float 1.0 (0–1.0)
|
||||||
|
audio_device str 'Jabra' (ALSA device hint or empty for default)
|
||||||
|
queue_max_size int 16
|
||||||
|
autoplay bool True
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import queue
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import rclpy
|
||||||
|
from rclpy.node import Node
|
||||||
|
from rclpy.qos import QoSProfile
|
||||||
|
from std_msgs.msg import String
|
||||||
|
|
||||||
|
|
||||||
|
class TtsQueueItem:
|
||||||
|
"""Item in the TTS priority queue."""
|
||||||
|
def __init__(self, text: str, priority: int = 0, interrupt: bool = False):
|
||||||
|
self.text = text.strip()
|
||||||
|
self.priority = priority # 0 = normal, >0 = high priority
|
||||||
|
self.interrupt = interrupt # True = interrupt current playback
|
||||||
|
self.timestamp = time.time()
|
||||||
|
|
||||||
|
def __lt__(self, other):
|
||||||
|
"""Sort by priority (desc), then by timestamp (asc)."""
|
||||||
|
if self.priority != other.priority:
|
||||||
|
return self.priority > other.priority
|
||||||
|
return self.timestamp < other.timestamp
|
||||||
|
|
||||||
|
|
||||||
|
class TtsServiceNode(Node):
|
||||||
|
"""Central TTS service node using Piper with priority queue and Jabra output."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__("tts_service")
|
||||||
|
|
||||||
|
# Parameters
|
||||||
|
self.declare_parameter("voice_model_path", "/models/piper/en_US-lessac-medium.onnx")
|
||||||
|
self.declare_parameter("sample_rate", 22050)
|
||||||
|
self.declare_parameter("speed", 1.0)
|
||||||
|
self.declare_parameter("pitch", 1.0)
|
||||||
|
self.declare_parameter("volume", 1.0)
|
||||||
|
self.declare_parameter("audio_device", "Jabra")
|
||||||
|
self.declare_parameter("queue_max_size", 16)
|
||||||
|
self.declare_parameter("autoplay", True)
|
||||||
|
|
||||||
|
self._model_path = self.get_parameter("voice_model_path").value
|
||||||
|
self._sample_rate = self.get_parameter("sample_rate").value
|
||||||
|
self._speed = self.get_parameter("speed").value
|
||||||
|
self._pitch = self.get_parameter("pitch").value
|
||||||
|
self._volume = self.get_parameter("volume").value
|
||||||
|
self._audio_device = self.get_parameter("audio_device").value or "default"
|
||||||
|
self._queue_max = self.get_parameter("queue_max_size").value
|
||||||
|
self._autoplay = self.get_parameter("autoplay").value
|
||||||
|
|
||||||
|
# Voice model (loaded on startup)
|
||||||
|
self._voice = None
|
||||||
|
self._voice_lock = threading.Lock()
|
||||||
|
self._load_voice_model()
|
||||||
|
|
||||||
|
# Queue and playback state
|
||||||
|
self._tts_queue = queue.PriorityQueue(maxsize=self._queue_max)
|
||||||
|
self._state = "idle" # "idle", "synthesizing", "playing"
|
||||||
|
self._state_lock = threading.Lock()
|
||||||
|
self._current_interrupt = False
|
||||||
|
|
||||||
|
# QoS for publishers/subscribers
|
||||||
|
qos = QoSProfile(depth=5)
|
||||||
|
|
||||||
|
# Subscriptions
|
||||||
|
self.create_subscription(
|
||||||
|
String,
|
||||||
|
"/saltybot/tts_request",
|
||||||
|
self._on_tts_request,
|
||||||
|
qos,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Publishers
|
||||||
|
self._state_pub = self.create_publisher(String, "/saltybot/tts_state", qos)
|
||||||
|
|
||||||
|
# Worker threads
|
||||||
|
if self._autoplay and self._voice is not None:
|
||||||
|
self._worker_thread = threading.Thread(
|
||||||
|
target=self._playback_worker, daemon=True, name="tts-worker"
|
||||||
|
)
|
||||||
|
self._worker_thread.start()
|
||||||
|
|
||||||
|
self.get_logger().info(
|
||||||
|
f"TtsServiceNode ready voice={self._model_path} "
|
||||||
|
f"device={self._audio_device} autoplay={self._autoplay}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Voice Model Loading ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _load_voice_model(self) -> None:
|
||||||
|
"""Preload Piper voice model on startup."""
|
||||||
|
try:
|
||||||
|
from piper import PiperVoice
|
||||||
|
|
||||||
|
with self._voice_lock:
|
||||||
|
if self._voice is not None:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.get_logger().info(f"Loading Piper model: {self._model_path}")
|
||||||
|
voice = PiperVoice.load(self._model_path)
|
||||||
|
|
||||||
|
# Test synthesis
|
||||||
|
list(voice.synthesize_stream_raw("Hello."))
|
||||||
|
|
||||||
|
with self._voice_lock:
|
||||||
|
self._voice = voice
|
||||||
|
|
||||||
|
self.get_logger().info("✓ Piper model preloaded successfully")
|
||||||
|
except Exception as e:
|
||||||
|
self.get_logger().error(f"✗ Failed to load Piper model: {e}")
|
||||||
|
self._voice = None
|
||||||
|
|
||||||
|
# ── Subscriptions ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _on_tts_request(self, msg: String) -> None:
|
||||||
|
"""Handle incoming TTS requests."""
|
||||||
|
text = msg.data.strip()
|
||||||
|
if not text:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
item = TtsQueueItem(text, priority=0, interrupt=False)
|
||||||
|
self._tts_queue.put_nowait(item)
|
||||||
|
self.get_logger().debug(f"Queued TTS: {text[:50]}...")
|
||||||
|
except queue.Full:
|
||||||
|
self.get_logger().warn("TTS queue full, dropping request")
|
||||||
|
|
||||||
|
# ── Playback Worker ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _playback_worker(self) -> None:
|
||||||
|
"""Worker thread for TTS synthesis and playback."""
|
||||||
|
while rclpy.ok():
|
||||||
|
try:
|
||||||
|
# Get next item from priority queue
|
||||||
|
item = self._tts_queue.get(timeout=1.0)
|
||||||
|
|
||||||
|
# Check for interrupt
|
||||||
|
with self._state_lock:
|
||||||
|
if self._current_interrupt:
|
||||||
|
self.get_logger().info("Interrupted playback")
|
||||||
|
self._current_interrupt = False
|
||||||
|
continue
|
||||||
|
|
||||||
|
self._synthesize_and_play(item)
|
||||||
|
|
||||||
|
except queue.Empty:
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
self.get_logger().error(f"Playback worker error: {e}")
|
||||||
|
|
||||||
|
def _synthesize_and_play(self, item: TtsQueueItem) -> None:
|
||||||
|
"""Synthesize text and play audio via ALSA/PulseAudio."""
|
||||||
|
with self._state_lock:
|
||||||
|
self._state = "synthesizing"
|
||||||
|
self._publish_state()
|
||||||
|
|
||||||
|
try:
|
||||||
|
with self._voice_lock:
|
||||||
|
voice = self._voice
|
||||||
|
if voice is None:
|
||||||
|
self.get_logger().error("Voice model not loaded")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Synthesize audio
|
||||||
|
audio_chunks = list(
|
||||||
|
voice.synthesize_stream_raw(
|
||||||
|
item.text,
|
||||||
|
speaker=None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Combine audio chunks into single buffer
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
audio_data = b"".join(audio_chunks)
|
||||||
|
audio_array = np.frombuffer(audio_data, dtype=np.int16)
|
||||||
|
|
||||||
|
# Apply volume scaling
|
||||||
|
audio_array = (audio_array * self._volume).astype(np.int16)
|
||||||
|
|
||||||
|
with self._state_lock:
|
||||||
|
self._state = "playing"
|
||||||
|
self._publish_state()
|
||||||
|
|
||||||
|
# Play audio via ALSA/PulseAudio
|
||||||
|
self._play_audio(audio_array)
|
||||||
|
|
||||||
|
self.get_logger().info(f"✓ Played: {item.text[:50]}...")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.get_logger().error(f"Synthesis/playback error: {e}")
|
||||||
|
finally:
|
||||||
|
with self._state_lock:
|
||||||
|
self._state = "idle"
|
||||||
|
self._publish_state()
|
||||||
|
|
||||||
|
def _play_audio(self, audio_array) -> None:
|
||||||
|
"""Play audio buffer via ALSA/PulseAudio to Jabra device."""
|
||||||
|
try:
|
||||||
|
import subprocess
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Convert audio to WAV format and pipe to aplay
|
||||||
|
import io
|
||||||
|
import wave
|
||||||
|
|
||||||
|
wav_buffer = io.BytesIO()
|
||||||
|
with wave.open(wav_buffer, "wb") as wav:
|
||||||
|
wav.setnchannels(1)
|
||||||
|
wav.setsampwidth(2)
|
||||||
|
wav.setframerate(self._sample_rate)
|
||||||
|
wav.writeframes(audio_array.tobytes())
|
||||||
|
|
||||||
|
wav_data = wav_buffer.getvalue()
|
||||||
|
|
||||||
|
# Attempt to play via ALSA with Jabra device hint
|
||||||
|
cmd = ["aplay", "-D", self._audio_device, "-q"]
|
||||||
|
proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
proc.communicate(input=wav_data, timeout=30)
|
||||||
|
|
||||||
|
except FileNotFoundError:
|
||||||
|
self.get_logger().warn("aplay not found; audio playback unavailable")
|
||||||
|
except Exception as e:
|
||||||
|
self.get_logger().error(f"Audio playback error: {e}")
|
||||||
|
|
||||||
|
# ── State Publishing ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _publish_state(self) -> None:
|
||||||
|
"""Publish current TTS state."""
|
||||||
|
with self._state_lock:
|
||||||
|
state = self._state
|
||||||
|
|
||||||
|
msg = String()
|
||||||
|
msg.data = state
|
||||||
|
self._state_pub.publish(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=None):
|
||||||
|
rclpy.init(args=args)
|
||||||
|
node = TtsServiceNode()
|
||||||
|
try:
|
||||||
|
rclpy.spin(node)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
node.destroy_node()
|
||||||
|
rclpy.try_shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
5
jetson/ros2_ws/src/saltybot_tts_service/setup.cfg
Normal file
5
jetson/ros2_ws/src/saltybot_tts_service/setup.cfg
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
[develop]
|
||||||
|
script_dir=$base/lib/saltybot_tts_service
|
||||||
|
|
||||||
|
[install]
|
||||||
|
script_dir=$base/lib/saltybot_tts_service
|
||||||
22
jetson/ros2_ws/src/saltybot_tts_service/setup.py
Normal file
22
jetson/ros2_ws/src/saltybot_tts_service/setup.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name='saltybot_tts_service',
|
||||||
|
version='0.1.0',
|
||||||
|
packages=find_packages(),
|
||||||
|
data_files=[
|
||||||
|
('share/ament_index/resource_index/packages', ['resource/saltybot_tts_service']),
|
||||||
|
('share/saltybot_tts_service', ['package.xml']),
|
||||||
|
],
|
||||||
|
install_requires=['setuptools'],
|
||||||
|
zip_safe=True,
|
||||||
|
author='seb',
|
||||||
|
author_email='seb@vayrette.com',
|
||||||
|
description='Central TTS service with Piper, action server, and queue management',
|
||||||
|
license='Apache-2.0',
|
||||||
|
entry_points={
|
||||||
|
'console_scripts': [
|
||||||
|
'tts_service_node = saltybot_tts_service.tts_service_node:main',
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
@ -0,0 +1,40 @@
|
|||||||
|
"""Unit tests for TTS service node."""
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
from saltybot_tts_service.tts_service_node import TtsQueueItem
|
||||||
|
|
||||||
|
|
||||||
|
class TestTtsQueueItem(unittest.TestCase):
|
||||||
|
"""Test TtsQueueItem priority and sorting."""
|
||||||
|
|
||||||
|
def test_queue_item_creation(self):
|
||||||
|
"""Test creating a TTS queue item."""
|
||||||
|
item = TtsQueueItem("Hello world", priority=0)
|
||||||
|
self.assertEqual(item.text, "Hello world")
|
||||||
|
self.assertEqual(item.priority, 0)
|
||||||
|
self.assertFalse(item.interrupt)
|
||||||
|
|
||||||
|
def test_queue_item_priority_sorting(self):
|
||||||
|
"""Test that items sort by priority (descending) then timestamp (ascending)."""
|
||||||
|
import time
|
||||||
|
|
||||||
|
item1 = TtsQueueItem("Low priority", priority=0)
|
||||||
|
time.sleep(0.01) # Ensure different timestamp
|
||||||
|
item2 = TtsQueueItem("High priority", priority=1)
|
||||||
|
|
||||||
|
# item2 should be < item1 in priority queue (higher priority = lower value)
|
||||||
|
self.assertTrue(item2 < item1)
|
||||||
|
|
||||||
|
def test_queue_item_strips_whitespace(self):
|
||||||
|
"""Test that text is stripped of whitespace."""
|
||||||
|
item = TtsQueueItem(" Hello world ")
|
||||||
|
self.assertEqual(item.text, "Hello world")
|
||||||
|
|
||||||
|
def test_queue_item_interrupt_flag(self):
|
||||||
|
"""Test interrupt flag."""
|
||||||
|
item = TtsQueueItem("Interrupt me", interrupt=True)
|
||||||
|
self.assertTrue(item.interrupt)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Loading…
x
Reference in New Issue
Block a user