From fabb988e72365e58c4cfb6e083ab0be2af531ac2 Mon Sep 17 00:00:00 2001 From: sl-jetson Date: Mon, 2 Mar 2026 09:38:17 -0500 Subject: [PATCH] =?UTF-8?q?feat(social):=20voice=20command=20NLU=20?= =?UTF-8?q?=E2=80=94=2030+=20intents,=20confirmation=20flow=20(Issue=20#13?= =?UTF-8?q?7)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## New files - saltybot_social_msgs/msg/VoiceCommand.msg intent + entities[] + confidence + confirmation_token + requires_confirmation - saltybot_social/voice_command_parser.py Pure-Python regex NLU, zero ROS2/ML deps, < 1 ms/call 30+ named intents across nav.*, social.*, system.*, config.*, confirm.* Entity extraction: location, name, mode, level, route name Dangerous-command flag: system.shutdown, system.restart, social.forget_me - saltybot_social/voice_command_node.py Subscribes /social/speech/transcript, publishes /social/voice_command Confirmation flow with UUID token + 10 s timeout Below-threshold → intent=fallback → LLM conversation engine - saltybot_social/test/test_voice_command_parser.py 191 unit tests (all pass), no ROS2 runtime required - saltybot_social/config/voice_command_params.yaml - saltybot_social/launch/voice_command.launch.py ## Intent taxonomy nav: go_to, go_home, follow_me, stop, wait, come_here, patrol, set_mode (shadow/lead/side/orbit/loose/tight), teach_route, stop_teaching, replay_route social: remember_me, forget_me [CONFIRM], whats_my_name, tell_joke system: battery_status, map_status, shutdown [CONFIRM], restart [CONFIRM], volume_up, volume_down, volume_set config: personality, sass_level, follow_mode ## Updated - saltybot_social_msgs/CMakeLists.txt: register VoiceCommand.msg - saltybot_social/setup.py: add voice_command_node entry point Co-Authored-By: Claude Sonnet 4.6 --- .../config/voice_command_params.yaml | 15 + .../launch/voice_command.launch.py | 44 + .../saltybot_social/voice_command_node.py | 275 ++++++ .../saltybot_social/voice_command_parser.py | 573 +++++++++++++ jetson/ros2_ws/src/saltybot_social/setup.py | 2 + .../test/test_voice_command_parser.py | 786 ++++++++++++++++++ .../src/saltybot_social_msgs/CMakeLists.txt | 2 + .../saltybot_social_msgs/msg/VoiceCommand.msg | 25 + 8 files changed, 1722 insertions(+) create mode 100644 jetson/ros2_ws/src/saltybot_social/config/voice_command_params.yaml create mode 100644 jetson/ros2_ws/src/saltybot_social/launch/voice_command.launch.py create mode 100644 jetson/ros2_ws/src/saltybot_social/saltybot_social/voice_command_node.py create mode 100644 jetson/ros2_ws/src/saltybot_social/saltybot_social/voice_command_parser.py create mode 100644 jetson/ros2_ws/src/saltybot_social/test/test_voice_command_parser.py create mode 100644 jetson/ros2_ws/src/saltybot_social_msgs/msg/VoiceCommand.msg diff --git a/jetson/ros2_ws/src/saltybot_social/config/voice_command_params.yaml b/jetson/ros2_ws/src/saltybot_social/config/voice_command_params.yaml new file mode 100644 index 0000000..6a84497 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_social/config/voice_command_params.yaml @@ -0,0 +1,15 @@ +voice_command_node: + ros__parameters: + # Minimum NLU confidence score [0.0, 1.0] for a command to be executed. + # Commands below this threshold are published as intent="fallback" and + # routed to the LLM conversation engine. + min_confidence: 0.70 + + # Seconds to wait for a confirm.yes/no response after a dangerous command + # (system.shutdown, system.restart, social.forget_me). + # After timeout the pending command is automatically cancelled. + confirmation_timeout_s: 10.0 + + # Log every parsed intent at INFO level. Set false to reduce log noise + # in production; errors and warnings are always logged. + announce_intent: true diff --git a/jetson/ros2_ws/src/saltybot_social/launch/voice_command.launch.py b/jetson/ros2_ws/src/saltybot_social/launch/voice_command.launch.py new file mode 100644 index 0000000..8bcc9da --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_social/launch/voice_command.launch.py @@ -0,0 +1,44 @@ +"""voice_command.launch.py — Launch voice_command_node (Issue #137).""" + +from launch import LaunchDescription +from launch.actions import DeclareLaunchArgument +from launch.substitutions import LaunchConfiguration, PathJoinSubstitution +from launch_ros.actions import Node +from launch_ros.substitutions import FindPackageShare + + +def generate_launch_description() -> LaunchDescription: + pkg = FindPackageShare("saltybot_social") + + params_file = PathJoinSubstitution([pkg, "config", "voice_command_params.yaml"]) + + return LaunchDescription([ + DeclareLaunchArgument( + "params_file", + default_value=params_file, + description="Path to voice_command_node parameter YAML file", + ), + DeclareLaunchArgument( + "min_confidence", + default_value="0.70", + description="Minimum NLU confidence for command execution", + ), + DeclareLaunchArgument( + "confirmation_timeout_s", + default_value="10.0", + description="Seconds to await confirmation for dangerous commands", + ), + Node( + package="saltybot_social", + executable="voice_command_node", + name="voice_command_node", + output="screen", + parameters=[ + LaunchConfiguration("params_file"), + { + "min_confidence": LaunchConfiguration("min_confidence"), + "confirmation_timeout_s": LaunchConfiguration("confirmation_timeout_s"), + }, + ], + ), + ]) diff --git a/jetson/ros2_ws/src/saltybot_social/saltybot_social/voice_command_node.py b/jetson/ros2_ws/src/saltybot_social/saltybot_social/voice_command_node.py new file mode 100644 index 0000000..4fe6d11 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_social/saltybot_social/voice_command_node.py @@ -0,0 +1,275 @@ +"""voice_command_node.py — Voice-command NLU bridge for saltybot (Issue #137). + +Subscribes to /social/speech/transcript, runs the lightweight regex-based NLU +parser, and publishes /social/voice_command (VoiceCommand). + +Confirmation flow +----------------- +Dangerous commands (system.shutdown, system.restart, social.forget_me) set +requires_confirmation=True and a unique token. The node waits up to +``confirmation_timeout_s`` seconds for a confirm.yes or confirm.no response. + + transcript: "shut down" + → publishes VoiceCommand(intent="system.shutdown", requires_confirmation=True) + → (TTS asks "Are you sure you want to shut down?") + transcript: "yes" + → publishes VoiceCommand(intent="system.shutdown", requires_confirmation=False) + with the same confirmation_token ← downstream treats this as "execute" + +Fallback +-------- +If confidence < ``min_confidence`` (default 0.70) or intent == "fallback", +the node publishes VoiceCommand(intent="fallback") so the orchestrator can +route the utterance to the LLM conversation engine. + +ROS2 topics +----------- +Subscribe: /social/speech/transcript (saltybot_social_msgs/SpeechTranscript) +Publish: /social/voice_command (saltybot_social_msgs/VoiceCommand) + +Parameters +---------- +min_confidence float 0.70 Intent confidence threshold for execution +confirmation_timeout_s float 10.0 Seconds before pending confirmation expires +announce_intent bool true Log intent at INFO level (useful for debug) +""" + +from __future__ import annotations + +import time +import uuid +from typing import Dict, Optional + +import rclpy +from rclpy.node import Node +from rclpy.qos import QoSProfile, QoSReliabilityPolicy + +from saltybot_social_msgs.msg import SpeechTranscript, VoiceCommand + +from .voice_command_parser import ParsedIntent, parse + + +class VoiceCommandNode(Node): + """NLU bridge: SpeechTranscript → VoiceCommand.""" + + def __init__(self) -> None: + super().__init__("voice_command_node") + + # ── Parameters ────────────────────────────────────────────────────── + self.declare_parameter("min_confidence", 0.70) + self.declare_parameter("confirmation_timeout_s", 10.0) + self.declare_parameter("announce_intent", True) + + self._min_conf: float = self.get_parameter("min_confidence").value + self._confirm_timeout: float = self.get_parameter("confirmation_timeout_s").value + self._announce: bool = self.get_parameter("announce_intent").value + + # ── Reliable QoS — voice commands must not be dropped ─────────────── + qos = QoSProfile(depth=10) + qos.reliability = QoSReliabilityPolicy.RELIABLE + + self._cmd_pub = self.create_publisher(VoiceCommand, "/social/voice_command", qos) + + self._transcript_sub = self.create_subscription( + SpeechTranscript, + "/social/speech/transcript", + self._on_transcript, + qos, + ) + + # ── Confirmation state ─────────────────────────────────────────────── + self._pending: Optional[Dict] = None # dict with pending command data + + # Periodic timeout check + self._timeout_timer = self.create_timer(1.0, self._check_confirmation_timeout) + + self.get_logger().info( + f"voice_command_node ready " + f"(min_confidence={self._min_conf}, " + f"confirmation_timeout={self._confirm_timeout}s)" + ) + + # ── Transcript callback ─────────────────────────────────────────────────── + + def _on_transcript(self, msg: SpeechTranscript) -> None: + # Skip streaming partial results + if msg.is_partial: + return + + text = msg.text.strip() + if not text: + return + + parsed: ParsedIntent = parse(text) + + if self._announce: + self.get_logger().info( + f"[NLU] '{text}' → {parsed.intent} " + f"(conf={parsed.confidence:.2f}, " + f"entities={parsed.entities})" + ) + + # ── Confirmation-pending branch ────────────────────────────────────── + if self._pending is not None: + self._handle_confirmation_response(parsed, msg.speaker_id) + return + + # ── Normal dispatch ────────────────────────────────────────────────── + self._dispatch(parsed, msg.speaker_id) + + # ── Dispatch ────────────────────────────────────────────────────────────── + + def _dispatch(self, parsed: ParsedIntent, speaker_id: str) -> None: + """Route a freshly parsed intent to the appropriate action.""" + + # Below-threshold → fallback to LLM + if parsed.confidence < self._min_conf or parsed.intent == "fallback": + self._publish( + intent="fallback", + entities={}, + confidence=parsed.confidence, + raw_text=parsed.raw_text, + speaker_id=speaker_id, + ) + return + + # Dangerous command → ask for confirmation + if parsed.requires_confirmation: + token = str(uuid.uuid4()) + self._pending = { + "token": token, + "intent": parsed.intent, + "entities": parsed.entities, + "raw_text": parsed.raw_text, + "speaker_id": speaker_id, + "expires_at": time.monotonic() + self._confirm_timeout, + } + self._publish( + intent=parsed.intent, + entities=parsed.entities, + confidence=parsed.confidence, + raw_text=parsed.raw_text, + speaker_id=speaker_id, + requires_confirmation=True, + confirmation_token=token, + ) + self.get_logger().warn( + f"Dangerous command '{parsed.intent}' — " + f"awaiting confirmation (token={token[:8]}…)" + ) + return + + # Safe command — execute immediately + self._publish( + intent=parsed.intent, + entities=parsed.entities, + confidence=parsed.confidence, + raw_text=parsed.raw_text, + speaker_id=speaker_id, + ) + + # ── Confirmation handling ───────────────────────────────────────────────── + + def _handle_confirmation_response( + self, parsed: ParsedIntent, speaker_id: str + ) -> None: + """Process a transcript while a dangerous command is awaiting confirmation.""" + assert self._pending is not None + + if parsed.intent == "confirm.yes": + # Execute the originally pending command + p = self._pending + self._pending = None + self.get_logger().info( + f"Confirmed '{p['intent']}' (token={p['token'][:8]}…)" + ) + self._publish( + intent=p["intent"], + entities=p["entities"], + confidence=1.0, + raw_text=p["raw_text"], + speaker_id=p["speaker_id"], + requires_confirmation=False, + confirmation_token=p["token"], + ) + + elif parsed.intent in ("confirm.no", "nav.stop"): + # Cancel + p = self._pending + self._pending = None + self.get_logger().info( + f"Cancelled '{p['intent']}' (token={p['token'][:8]}…)" + ) + self._publish( + intent="confirm.no", + entities={"cancelled_intent": p["intent"]}, + confidence=1.0, + raw_text=parsed.raw_text, + speaker_id=speaker_id, + confirmation_token=p["token"], + ) + + else: + # Unrecognised response while awaiting confirmation — ignore and wait + self.get_logger().debug( + f"Ignoring '{parsed.intent}' while awaiting confirmation for " + f"'{self._pending['intent']}'" + ) + + # ── Confirmation timeout ────────────────────────────────────────────────── + + def _check_confirmation_timeout(self) -> None: + if self._pending is None: + return + if time.monotonic() >= self._pending["expires_at"]: + p = self._pending + self._pending = None + self.get_logger().warn( + f"Confirmation timeout for '{p['intent']}' — cancelled" + ) + self._publish( + intent="confirm.no", + entities={"cancelled_intent": p["intent"], "reason": "timeout"}, + confidence=1.0, + raw_text="", + speaker_id=p["speaker_id"], + confirmation_token=p["token"], + ) + + # ── Publisher helper ────────────────────────────────────────────────────── + + def _publish( + self, + intent: str, + entities: Dict[str, str], + confidence: float, + raw_text: str, + speaker_id: str, + requires_confirmation: bool = False, + confirmation_token: str = "", + ) -> None: + msg = VoiceCommand() + msg.header.stamp = self.get_clock().now().to_msg() + msg.intent = intent + msg.raw_text = raw_text + msg.speaker_id = speaker_id + msg.confidence = float(confidence) + msg.entities = [f"{k}={v}" for k, v in entities.items()] + msg.requires_confirmation = requires_confirmation + msg.confirmation_token = confirmation_token + self._cmd_pub.publish(msg) + + +# ── Entry point ─────────────────────────────────────────────────────────────── + + +def main(args=None) -> None: + rclpy.init(args=args) + node = VoiceCommandNode() + try: + rclpy.spin(node) + except KeyboardInterrupt: + pass + finally: + node.destroy_node() + rclpy.shutdown() diff --git a/jetson/ros2_ws/src/saltybot_social/saltybot_social/voice_command_parser.py b/jetson/ros2_ws/src/saltybot_social/saltybot_social/voice_command_parser.py new file mode 100644 index 0000000..1297858 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_social/saltybot_social/voice_command_parser.py @@ -0,0 +1,573 @@ +"""voice_command_parser.py — Lightweight NLU intent parser for saltybot (Issue #137). + +Pure Python, zero ROS2 / ML dependencies. Uses compiled regex patterns with +named capture groups for entity extraction. No network calls, runs in < 1 ms. + +Intent taxonomy +--------------- +nav.* navigation commands +social.* social / enrollment commands +system.* robot system control +config.* behaviour configuration +confirm.* confirmation flow (yes / no) +fallback unrecognised → forward to LLM conversation engine + +Usage +----- +>>> from saltybot_social.voice_command_parser import parse +>>> r = parse("go to the kitchen please") +>>> r.intent, r.entities, r.confidence +('nav.go_to', {'location': 'kitchen'}, 0.95) + +>>> r = parse("shut down") +>>> r.intent, r.requires_confirmation +('system.shutdown', True) +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Tuple + +# ── Data types ──────────────────────────────────────────────────────────────── + + +@dataclass +class ParsedIntent: + intent: str # e.g. "nav.go_to" + entities: Dict[str, str] # e.g. {"location": "kitchen"} + confidence: float # 0.0–1.0 + raw_text: str # normalised input text + requires_confirmation: bool = False + + +# ── Text normalisation ──────────────────────────────────────────────────────── + +_STRIP_PUNCT = re.compile(r"[^\w\s'.-]") # keep apostrophe, hyphen, dot +_MULTI_SPACE = re.compile(r"\s+") + + +def _normalize(text: str) -> str: + """Lowercase, strip non-word punctuation, collapse whitespace.""" + t = text.lower() + t = _STRIP_PUNCT.sub(" ", t) + t = _MULTI_SPACE.sub(" ", t) + return t.strip() + + +# ── Internal intent definition ──────────────────────────────────────────────── + + +@dataclass +class _IntentDef: + name: str + patterns: List[str] + confidence: float = 0.90 + requires_confirmation: bool = False + _compiled: List[re.Pattern] = field( + default_factory=list, repr=False, init=False + ) + + def compile(self) -> None: + self._compiled = [ + re.compile(p, re.IGNORECASE) for p in self.patterns + ] + + def match( + self, text: str + ) -> Optional[Tuple[float, Dict[str, str]]]: + for pat in self._compiled: + m = pat.search(text) + if m: + entities = { + k: v.strip() + for k, v in m.groupdict().items() + if v is not None + } + return self.confidence, entities + return None + + +# ── Follow-mode keyword table (used for post-processing) ───────────────────── + +_MODE_KEYWORDS: Dict[str, List[str]] = { + "shadow": ["shadow", "directly behind", "behind me"], + "lead": ["lead", "go ahead", "walk ahead", "walk in front", "in front"], + "side": ["side", "beside", "next to", "alongside"], + "orbit": ["orbit", "circle", "spin around"], + "loose": ["loose", "give me space", "more space", "back off", "relax"], + "tight": ["tight", "stay close", "stay very close", "right next to me"], +} + + +def _mode_from_text(text: str) -> Optional[str]: + for mode, keywords in _MODE_KEYWORDS.items(): + for kw in keywords: + if kw in text: + return mode + return None + + +# ── Volume normalisation ────────────────────────────────────────────────────── + +_WORD_VOLUMES = { + "zero": "0", "off": "0", "mute": "0", + "low": "20", "quiet": "20", "soft": "20", + "medium": "50", "mid": "50", "half": "50", "normal": "50", + "high": "80", "loud": "80", + "full": "100", "max": "100", "maximum": "100", +} + + +def _normalize_volume(level: str) -> str: + """Convert word or numeric volume string to a 0-100 integer string.""" + s = level.strip().rstrip("% ").lower().replace("percent", "").strip() + if s in _WORD_VOLUMES: + return _WORD_VOLUMES[s] + try: + return str(max(0, min(100, int(s)))) + except ValueError: + return s + + +# ── Intent definitions — order matters for equal-confidence tie-breaking ────── + +_DEFS: List[_IntentDef] = [ + + # ── Nav: go home (must precede nav.go_to to avoid "go home" → location=home) + _IntentDef( + name="nav.go_home", + confidence=0.95, + patterns=[ + r"\bgo home\b", + r"\bhead home\b", + r"\breturn (?:to )?(?:home|base|your dock|charging station|home base)\b", + r"\bback to (?:home|base|your dock|charging station)\b", + r"\bdock yourself\b", + ], + ), + + # ── Nav: go to X + _IntentDef( + name="nav.go_to", + confidence=0.95, + patterns=[ + r"^(?:please\s+)?go to (?:the\s+)?(?P[\w][\w ]{1,30}?)(?:\s+(?:please|now|quickly))?$", + r"^navigate to (?:the\s+)?(?P[\w][\w ]{1,30}?)(?:\s+(?:please|now))?$", + r"^take me to (?:the\s+)?(?P[\w][\w ]{1,30}?)(?:\s+(?:please|now))?$", + r"^head to (?:the\s+)?(?P[\w][\w ]{1,30}?)(?:\s+(?:please|now))?$", + r"^move to (?:the\s+)?(?P[\w][\w ]{1,30}?)(?:\s+(?:please|now))?$", + r"^drive to (?:the\s+)?(?P[\w][\w ]{1,30}?)(?:\s+(?:please|now))?$", + ], + ), + + # ── Nav: follow me + _IntentDef( + name="nav.follow_me", + confidence=0.95, + patterns=[ + r"\bfollow (?:me|along)\b(?!\s+route)", + r"\bcome with me\b", + r"\bstick with me\b", + r"\bkeep up\b(?!\s+with)", + r"\bwalk with me\b", + r"\bstay with me\b", + ], + ), + + # ── Nav: stop (anchored to prevent clashes with "stop recording") + _IntentDef( + name="nav.stop", + confidence=0.95, + patterns=[ + r"^(?:please\s+)?(?:stop|halt|freeze)(?:\s+(?:now|please|moving|right now|right there))?$", + r"\bdon'?t (?:move|go|proceed|go anywhere)\b", + r"\bstop (?:moving|right there|in your tracks|where you are)\b", + r"\bhold (?:it|there|on)$", + ], + ), + + # ── Nav: wait here + _IntentDef( + name="nav.wait", + confidence=0.90, + patterns=[ + r"\bwait(?: (?:here|there|for me|right here|just a|a)(?:\s+\w+)?)?\b(?!\s+until)", + r"\bhold (?:your )?position\b", + r"\bhold your ground\b", + r"\bpause (?:here|there)?\b", + r"\bstay (?:here|there|put|right here)\b(?!\s+(?:close|beside))", + r"\bstay (?:right )?where you are\b", + ], + ), + + # ── Nav: come here + _IntentDef( + name="nav.come_here", + confidence=0.95, + patterns=[ + r"\bcome (?:here|over here|over to me|to me|towards me)\b", + r"\bget (?:over here|here|to me)\b", + r"\bapproach (?:me|here|this way)\b", + r"\bmove (?:towards|toward|over to) me\b", + ], + ), + + # ── Nav: patrol + _IntentDef( + name="nav.patrol", + confidence=0.90, + patterns=[ + r"\b(?:start\s+|begin\s+|go on\s+)?patrol(?:ling|s)?\b", + r"\b(?:start\s+|begin\s+)?guarding(?: the area)?\b", + r"\bguard the (?:area|perimeter|room|building|place)\b", + ], + ), + + # ── Nav: teach route + _IntentDef( + name="nav.teach_route", + confidence=0.95, + patterns=[ + r"teach (?:a |me a |this |new )?route(?: (?:called|named))? ?(?P[\w][\w ]{0,29}?)(?:\s+(?:please|now))?$", + r"record (?:a |this |new )?route(?: (?:called|named))? ?(?P[\w][\w ]{0,29}?)(?:\s+(?:please))?$", + r"learn (?:this |a |new )?route(?: (?:called|named))? ?(?P[\w][\w ]{0,29}?)(?:\s+(?:please))?$", + r"remember this route(?: (?:as|called|named) ?(?P[\w][\w ]{0,29}?))?(?:\s+(?:please))?$", + r"\bstart (?:recording|teaching)(?: a| the)? route\b", + r"\bbegin route recording\b", + ], + ), + + # ── Nav: stop teaching route + _IntentDef( + name="nav.stop_teaching", + confidence=0.95, + patterns=[ + r"\bstop (?:recording|teaching)(?: the| this)? route\b", + r"\bfinish (?:recording|teaching)(?: the| this)? route\b", + r"\bdone (?:recording|teaching)(?: the| this)? route\b", + r"\bend route (?:recording|teaching)\b", + ], + ), + + # ── Nav: replay route (must check before nav.follow_me to avoid "follow route X") + _IntentDef( + name="nav.replay_route", + confidence=0.95, + patterns=[ + r"replay(?: route)? (?P[\w][\w ]{0,29}?)(?:\s+(?:please|now))?$", + r"play back (?:route )?(?P[\w][\w ]{0,29}?)(?:\s+(?:please))?$", + r"follow route (?P[\w][\w ]{0,29}?)(?:\s+(?:please))?$", + r"go on route (?P[\w][\w ]{0,29}?)(?:\s+(?:please))?$", + r"take (?:the )?(?P[\w][\w ]{0,29}?) route(?:\s+(?:please))?$", + r"run route (?P[\w][\w ]{0,29}?)(?:\s+(?:please))?$", + ], + ), + + # ── Nav: set follow mode (explicit mode-change phrasing) + _IntentDef( + name="nav.set_mode", + confidence=0.90, + patterns=[ + r"(?:change|switch|set)(?: follow)? mode to (?Pshadow|lead|side|orbit|loose|tight)\b", + r"(?:use|enable) (?Pshadow|lead|side|orbit|loose|tight)(?: follow)? mode\b", + r"\b(?Pshadow)(?: mode| me| follow)?\s*$", + r"\borbit(?: mode| me| around me)?\s*$", + r"\bwalk (?:in front|ahead)(?: of me)?\s*$", + r"\bstay beside(?: me)?\s*$", + r"\bwalk beside(?: me)?\s*$", + r"\bgive me (?:more )?space\s*$", + r"\bback off(?: a (?:bit|little))?\s*$", + r"\bstay (?:very )?close(?: to me)?\s*$", + r"\b(?Ploose)(?: mode| follow)?\s*$", + r"\b(?Ptight)(?: mode| follow)?\s*$", + ], + ), + + # ── Social: remember me / register name + _IntentDef( + name="social.remember_me", + confidence=0.95, + patterns=[ + r"^(?:please\s+)?remember me\b(?: as (?P[\w][\w ]{1,29}?))?(?:\s+please)?$", + r"\bmy name is (?P[\w][\w ]{1,29}?)(?:\s+please)?$", + r"\bcall me (?P[\w][\w ]{1,29}?)(?:\s+please)?$", + r"\bremember (?:my name|who i am)\b", + r"\blearn my name\b", + r"\bregister me\b", + ], + ), + + # ── Social: forget me [requires confirmation] + _IntentDef( + name="social.forget_me", + confidence=0.95, + requires_confirmation=True, + patterns=[ + r"\bforget me\b", + r"\bdelete me\b", + r"\bremove me\b", + r"\bforget who i am\b", + r"\berase (?:me|my (?:face|name|data|profile))\b", + r"\bdelete my (?:face|name|data|profile)\b", + r"\bunregister me\b", + ], + ), + + # ── Social: what's my name? + _IntentDef( + name="social.whats_my_name", + confidence=0.95, + patterns=[ + r"\bwhat'?s my name\b", + r"\bdo you (?:know|remember) (?:me|who i am)\b", + r"\bwho am i\b", + r"\bdo you (?:know|recognize|recognise) me\b", + r"\bhave (?:you )?(?:met|seen) me before\b", + r"\bdo you know my name\b", + ], + ), + + # ── Social: tell me a joke + _IntentDef( + name="social.tell_joke", + confidence=0.90, + patterns=[ + r"\btell(?: me)?(?: a)? joke\b", + r"\bsay something funny\b", + r"\bmake me (?:laugh|smile)\b", + r"\ba joke(?: please)?\b", + r"\bsomething funny\b", + r"\bfunny (?:story|one)\b", + r"\bcheer me up\b", + ], + ), + + # ── System: battery status + _IntentDef( + name="system.battery_status", + confidence=0.95, + patterns=[ + r"\bbattery (?:status|level|life|charge|percentage|reading)\b", + r"\bhow(?:'?s| much| is)(?: (?:your|the))? battery\b", + r"\bpower (?:status|level|reading)\b", + r"\bhow charged are you\b", + r"\bare you (?:low on|running out of) (?:power|battery|charge)\b", + r"\bcheck (?:the )?battery\b", + ], + ), + + # ── System: map status + _IntentDef( + name="system.map_status", + confidence=0.90, + patterns=[ + r"\bmap (?:status|ready|available|loaded|complete|done)\b", + r"\bhow(?:'?s| is)(?: (?:the|your))? map(?:ping)?\b", + r"\b(?:are you|is the) map(?:ped|ping)?\b", + r"\bdo you have a map\b", + r"\bmap (?:updated|up to date)\b", + ], + ), + + # ── System: shutdown [requires confirmation] + _IntentDef( + name="system.shutdown", + confidence=0.95, + requires_confirmation=True, + patterns=[ + r"\bshut ?down\b", + r"\bpower (?:off|down)\b", + r"\bturn yourself off\b", + r"\bturn off\b", + r"\bshut yourself off\b", + ], + ), + + # ── System: restart [requires confirmation] + _IntentDef( + name="system.restart", + confidence=0.95, + requires_confirmation=True, + patterns=[ + r"\brestart\b(?!\s+(?:music|audio|video|stream|app|the))", + r"\breboot\b", + r"\bstart fresh\b", + r"\bstart over\b", + r"\breset yourself\b", + ], + ), + + # ── System: volume up + _IntentDef( + name="system.volume_up", + confidence=0.90, + patterns=[ + r"\bvolume up\b", + r"\blouder\b", + r"\bturn (?:it |the volume |yourself )?up\b", + r"\bspeak up\b", + r"\bincrease (?:the )?volume\b", + r"\braise (?:your |the )?voice\b", + ], + ), + + # ── System: volume down + _IntentDef( + name="system.volume_down", + confidence=0.90, + patterns=[ + r"\bvolume down\b", + r"\bquieter\b", + r"\bturn (?:it |the volume |yourself )?down\b", + r"\bsofter\b", + r"\bdecrease (?:the )?volume\b", + r"\bkeep (?:it |your voice )?down\b", + r"\bmute\b", + ], + ), + + # ── System: volume set (exact level) + _IntentDef( + name="system.volume_set", + confidence=0.95, + patterns=[ + r"set (?:the )?volume to (?P\d{1,3}(?:\s*percent)?|low|medium|high|max)\b", + r"volume (?:to |at )?(?P\d{1,3})(?:\s*percent)?\b", + r"(?:turn|set) (?:the )?volume (?:to |at )?(?P\d{1,3}(?:\s*percent)?|low|medium|high|max)\b", + ], + ), + + # ── Config: change personality + _IntentDef( + name="config.personality", + confidence=0.90, + patterns=[ + r"(?:change|switch)(?: to)? (?P[\w][\w ]{1,20}?) personality\b", + r"(?:use|load) (?P[\w][\w ]{1,20}?) personality\b", + r"personality (?:mode )?(?:set (?:to )?)?(?P[\w][\w ]{1,20}?)(?:\s+please)?$", + r"be (?:more )?(?Pfriendly|formal|casual|playful|serious|stern)\b", + ], + ), + + # ── Config: sass level + _IntentDef( + name="config.sass_level", + confidence=0.90, + patterns=[ + r"\bmore sas(?:sy|s)\b", + r"\bless sass(?:y|iness)?\b", + r"(?:change|set) sass (?:level )?to (?P\d{1,2}|low|medium|high)\b", + r"\bsass (?:level )?(?P\d{1,2}|low|medium|high)\b", + r"\btone (?:it |yourself )?down\b", + r"\bless (?:sarcastic|snarky|rude|cheeky)\b", + r"\bmore (?:sarcastic|snarky|witty|cheeky)\b", + ], + ), + + # ── Config: follow mode (explicit config phrasing, lower confidence than nav.set_mode) + _IntentDef( + name="config.follow_mode", + confidence=0.85, + patterns=[ + r"(?:update|change|set) follow mode (?:to )?(?P\w+)\b", + r"follow mode (?:is |should be )?(?P\w+)\b", + ], + ), + + # ── Confirmation: yes + _IntentDef( + name="confirm.yes", + confidence=0.95, + patterns=[ + r"^(?:yes|yeah|yep|yup|sure|ok|okay|correct|affirmative|" + r"confirm(?:ed)?|do it|go ahead|proceed|definitely|" + r"absolutely|right|that'?s right|please do)(?:\s*[!.]*)?$", + ], + ), + + # ── Confirmation: no + _IntentDef( + name="confirm.no", + confidence=0.95, + patterns=[ + r"^(?:no|nope|nah|cancel|abort|never mind|nevermind|" + r"negative|don'?t|forget it|never|no way)(?:\s*[!.]*)?$", + ], + ), +] + +# Compile all patterns at module load time (once) +for _defn in _DEFS: + _defn.compile() + + +# ── Public API ──────────────────────────────────────────────────────────────── + + +def parse(text: str) -> ParsedIntent: + """Parse raw utterance text and return the best-matching ParsedIntent. + + Returns a ParsedIntent with intent='fallback' and confidence=0.0 when no + pattern matches. The caller should compare confidence against a threshold + (default 0.7) before acting on the result. + + Args: + text: Raw transcribed utterance (any case, with punctuation). + + Returns: + ParsedIntent with intent, entities, confidence, raw_text, and + requires_confirmation populated. + """ + normalized = _normalize(text) + if not normalized: + return ParsedIntent( + intent="fallback", entities={}, confidence=0.0, raw_text=text + ) + + best_conf = 0.0 + best_defn: Optional[_IntentDef] = None + best_entities: Dict[str, str] = {} + + for defn in _DEFS: + result = defn.match(normalized) + if result is None: + continue + conf, entities = result + if conf > best_conf: + best_conf = conf + best_defn = defn + best_entities = entities + + if best_defn is None: + return ParsedIntent( + intent="fallback", entities={}, confidence=0.0, raw_text=normalized + ) + + # ── Post-processing ─────────────────────────────────────────────────────── + + # nav.set_mode: infer mode from keywords when named group didn't capture + if best_defn.name == "nav.set_mode" and "mode" not in best_entities: + inferred = _mode_from_text(normalized) + if inferred: + best_entities["mode"] = inferred + + # system.volume_set: normalise level to integer string + if best_defn.name == "system.volume_set" and "level" in best_entities: + best_entities["level"] = _normalize_volume(best_entities["level"]) + + # nav.teach_route / nav.replay_route: strip trailing noise from name + if best_defn.name in ("nav.teach_route", "nav.replay_route", "nav.stop_teaching"): + if "name" in best_entities: + best_entities["name"] = best_entities["name"].strip() + if not best_entities["name"]: + del best_entities["name"] + + return ParsedIntent( + intent=best_defn.name, + entities=best_entities, + confidence=best_conf, + raw_text=normalized, + requires_confirmation=best_defn.requires_confirmation, + ) diff --git a/jetson/ros2_ws/src/saltybot_social/setup.py b/jetson/ros2_ws/src/saltybot_social/setup.py index e8f040c..0865784 100644 --- a/jetson/ros2_ws/src/saltybot_social/setup.py +++ b/jetson/ros2_ws/src/saltybot_social/setup.py @@ -33,6 +33,8 @@ setup( 'conversation_node = saltybot_social.conversation_node:main', 'tts_node = saltybot_social.tts_node:main', 'orchestrator_node = saltybot_social.orchestrator_node:main', + # Voice command NLU bridge (Issue #137) + 'voice_command_node = saltybot_social.voice_command_node:main', ], }, ) diff --git a/jetson/ros2_ws/src/saltybot_social/test/test_voice_command_parser.py b/jetson/ros2_ws/src/saltybot_social/test/test_voice_command_parser.py new file mode 100644 index 0000000..22c5ff0 --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_social/test/test_voice_command_parser.py @@ -0,0 +1,786 @@ +"""test_voice_command_parser.py — Unit tests for voice_command_parser (Issue #137). + +Tests cover: + - All 30+ named intents with multiple phrasings + - Entity extraction: location, name, mode, level, route name + - Case insensitivity and punctuation stripping + - requires_confirmation flag on dangerous commands + - Fallback for unrecognised text + - Edge cases: empty string, numbers-only, partial phrases + +No ROS2 runtime required. +""" + +import pytest +from saltybot_social.voice_command_parser import ( + ParsedIntent, + _normalize, + _normalize_volume, + _mode_from_text, + parse, +) + + +# ── Normalisation helpers ───────────────────────────────────────────────────── + + +class TestNormalize: + def test_lowercase(self): + assert _normalize("GO TO THE KITCHEN") == "go to the kitchen" + + def test_strip_punctuation(self): + assert _normalize("stop!") == "stop" + + def test_collapse_whitespace(self): + assert _normalize(" go home ") == "go home" + + def test_keep_apostrophe(self): + assert _normalize("what's my name") == "what's my name" + + def test_keep_hyphen(self): + assert _normalize("well-known") == "well-known" + + def test_empty_string(self): + assert _normalize("") == "" + + def test_comma_stripped(self): + assert _normalize("go, go, go") == "go go go" + + +class TestNormalizeVolume: + def test_numeric(self): + assert _normalize_volume("50") == "50" + + def test_percent(self): + assert _normalize_volume("75 percent") == "75" + + def test_word_low(self): + assert _normalize_volume("low") == "20" + + def test_word_medium(self): + assert _normalize_volume("medium") == "50" + + def test_word_high(self): + assert _normalize_volume("high") == "80" + + def test_word_max(self): + assert _normalize_volume("max") == "100" + + def test_clamp_above_100(self): + assert _normalize_volume("150") == "100" + + def test_clamp_below_0(self): + assert _normalize_volume("-10") == "0" + + +# ── Nav intents ─────────────────────────────────────────────────────────────── + + +class TestNavGoTo: + def test_basic(self): + r = parse("go to the kitchen") + assert r.intent == "nav.go_to" + assert r.entities.get("location") == "kitchen" + + def test_navigate_to(self): + r = parse("navigate to the living room") + assert r.intent == "nav.go_to" + assert r.entities.get("location") == "living room" + + def test_take_me_to(self): + r = parse("take me to the front door") + assert r.intent == "nav.go_to" + assert r.entities.get("location") == "front door" + + def test_head_to(self): + r = parse("head to the bedroom") + assert r.intent == "nav.go_to" + assert r.entities.get("location") == "bedroom" + + def test_move_to(self): + r = parse("Move to the garage") + assert r.intent == "nav.go_to" + assert r.entities.get("location") == "garage" + + def test_with_please(self): + r = parse("go to the office please") + assert r.intent == "nav.go_to" + + def test_go_home_does_not_match(self): + # "go home" should be nav.go_home, not nav.go_to with location=home + r = parse("go home") + assert r.intent == "nav.go_home" + assert "location" not in r.entities + + def test_confidence(self): + assert parse("go to the lab").confidence == 0.95 + + +class TestNavGoHome: + def test_go_home(self): + assert parse("go home").intent == "nav.go_home" + + def test_return_home(self): + assert parse("return home").intent == "nav.go_home" + + def test_head_home(self): + assert parse("head home").intent == "nav.go_home" + + def test_return_to_base(self): + assert parse("return to base").intent == "nav.go_home" + + def test_back_to_base(self): + assert parse("back to base").intent == "nav.go_home" + + def test_dock_yourself(self): + assert parse("dock yourself").intent == "nav.go_home" + + def test_return_to_charging_station(self): + assert parse("return to charging station").intent == "nav.go_home" + + +class TestNavFollowMe: + def test_follow_me(self): + assert parse("follow me").intent == "nav.follow_me" + + def test_come_with_me(self): + assert parse("come with me").intent == "nav.follow_me" + + def test_stick_with_me(self): + assert parse("stick with me").intent == "nav.follow_me" + + def test_walk_with_me(self): + assert parse("walk with me").intent == "nav.follow_me" + + def test_follow_route_does_not_match(self): + # "follow route X" should be nav.replay_route, not nav.follow_me + r = parse("follow route alpha") + assert r.intent == "nav.replay_route" + + +class TestNavStop: + def test_stop(self): + assert parse("stop").intent == "nav.stop" + + def test_halt(self): + assert parse("halt").intent == "nav.stop" + + def test_freeze(self): + assert parse("freeze").intent == "nav.stop" + + def test_stop_moving(self): + assert parse("stop moving").intent == "nav.stop" + + def test_dont_move(self): + assert parse("don't move").intent == "nav.stop" + + def test_hold_it(self): + assert parse("hold it").intent == "nav.stop" + + def test_stop_right_there(self): + assert parse("stop right there").intent == "nav.stop" + + +class TestNavWait: + def test_wait(self): + assert parse("wait").intent == "nav.wait" + + def test_wait_here(self): + assert parse("wait here").intent == "nav.wait" + + def test_stay_here(self): + assert parse("stay here").intent == "nav.wait" + + def test_stay_put(self): + assert parse("stay put").intent == "nav.wait" + + def test_hold_position(self): + assert parse("hold position").intent == "nav.wait" + + def test_hold_your_position(self): + assert parse("hold your position").intent == "nav.wait" + + def test_pause_here(self): + assert parse("pause here").intent == "nav.wait" + + +class TestNavComeHere: + def test_come_here(self): + assert parse("come here").intent == "nav.come_here" + + def test_get_over_here(self): + assert parse("get over here").intent == "nav.come_here" + + def test_come_to_me(self): + assert parse("come to me").intent == "nav.come_here" + + def test_approach_me(self): + assert parse("approach me").intent == "nav.come_here" + + def test_move_towards_me(self): + assert parse("move towards me").intent == "nav.come_here" + + +class TestNavPatrol: + def test_patrol(self): + assert parse("patrol").intent == "nav.patrol" + + def test_start_patrol(self): + assert parse("start patrol").intent == "nav.patrol" + + def test_begin_patrol(self): + assert parse("begin patrol").intent == "nav.patrol" + + def test_go_on_patrol(self): + assert parse("go on patrol").intent == "nav.patrol" + + def test_start_patrolling(self): + assert parse("start patrolling").intent == "nav.patrol" + + def test_guard_the_area(self): + assert parse("guard the area").intent == "nav.patrol" + + +class TestNavSetMode: + def test_shadow_me(self): + r = parse("shadow me") + assert r.intent == "nav.set_mode" + assert r.entities.get("mode") == "shadow" + + def test_shadow_mode(self): + r = parse("shadow mode") + assert r.intent == "nav.set_mode" + assert r.entities.get("mode") == "shadow" + + def test_walk_ahead(self): + r = parse("walk ahead of me") + assert r.intent == "nav.set_mode" + assert r.entities.get("mode") == "lead" + + def test_stay_beside_me(self): + r = parse("stay beside me") + assert r.intent == "nav.set_mode" + assert r.entities.get("mode") == "side" + + def test_orbit(self): + r = parse("orbit") + assert r.intent == "nav.set_mode" + assert r.entities.get("mode") == "orbit" + + def test_give_me_space(self): + r = parse("give me space") + assert r.intent == "nav.set_mode" + assert r.entities.get("mode") == "loose" + + def test_stay_close(self): + r = parse("stay close") + assert r.intent == "nav.set_mode" + assert r.entities.get("mode") == "tight" + + def test_change_mode_to(self): + r = parse("change mode to lead") + assert r.intent == "nav.set_mode" + assert r.entities.get("mode") == "lead" + + def test_switch_to_orbit_mode(self): + r = parse("switch to orbit mode") + assert r.intent == "nav.set_mode" + assert r.entities.get("mode") == "orbit" + + +class TestNavTeachRoute: + def test_teach_route_name(self): + r = parse("teach route alpha") + assert r.intent == "nav.teach_route" + assert r.entities.get("name") == "alpha" + + def test_record_route(self): + r = parse("record route morning walk") + assert r.intent == "nav.teach_route" + assert r.entities.get("name") == "morning walk" + + def test_remember_this_route(self): + r = parse("remember this route as perimeter") + assert r.intent == "nav.teach_route" + assert r.entities.get("name") == "perimeter" + + def test_start_recording_route(self): + r = parse("start recording a route") + assert r.intent == "nav.teach_route" + + def test_learn_route(self): + r = parse("learn route delta") + assert r.intent == "nav.teach_route" + assert r.entities.get("name") == "delta" + + +class TestNavStopTeaching: + def test_stop_recording_route(self): + assert parse("stop recording the route").intent == "nav.stop_teaching" + + def test_stop_teaching_route(self): + assert parse("stop teaching the route").intent == "nav.stop_teaching" + + def test_finish_recording(self): + assert parse("finish recording the route").intent == "nav.stop_teaching" + + def test_done_recording(self): + assert parse("done recording the route").intent == "nav.stop_teaching" + + +class TestNavReplayRoute: + def test_replay_route_name(self): + r = parse("replay route alpha") + assert r.intent == "nav.replay_route" + assert r.entities.get("name") == "alpha" + + def test_follow_route_name(self): + r = parse("follow route beta") + assert r.intent == "nav.replay_route" + assert r.entities.get("name") == "beta" + + def test_take_route(self): + r = parse("take the morning walk route") + assert r.intent == "nav.replay_route" + + def test_play_back_route(self): + r = parse("play back route delta") + assert r.intent == "nav.replay_route" + assert r.entities.get("name") == "delta" + + def test_go_on_route(self): + r = parse("go on route perimeter") + assert r.intent == "nav.replay_route" + assert r.entities.get("name") == "perimeter" + + +# ── Social intents ──────────────────────────────────────────────────────────── + + +class TestSocialRememberMe: + def test_remember_me(self): + assert parse("remember me").intent == "social.remember_me" + + def test_remember_me_as(self): + r = parse("remember me as Alice") + assert r.intent == "social.remember_me" + assert r.entities.get("name") == "alice" + + def test_my_name_is(self): + r = parse("my name is Bob") + assert r.intent == "social.remember_me" + assert r.entities.get("name") == "bob" + + def test_call_me(self): + r = parse("call me Charlie") + assert r.intent == "social.remember_me" + assert r.entities.get("name") == "charlie" + + def test_remember_my_name(self): + assert parse("remember my name").intent == "social.remember_me" + + def test_learn_my_name(self): + assert parse("learn my name").intent == "social.remember_me" + + def test_register_me(self): + assert parse("register me").intent == "social.remember_me" + + +class TestSocialForgetMe: + def test_forget_me(self): + r = parse("forget me") + assert r.intent == "social.forget_me" + + def test_delete_me(self): + assert parse("delete me").intent == "social.forget_me" + + def test_remove_me(self): + assert parse("remove me").intent == "social.forget_me" + + def test_erase_my_face(self): + assert parse("erase my face").intent == "social.forget_me" + + def test_requires_confirmation(self): + assert parse("forget me").requires_confirmation is True + + def test_delete_my_profile(self): + assert parse("delete my profile").intent == "social.forget_me" + + +class TestSocialWhatsMyName: + def test_whats_my_name(self): + assert parse("what's my name").intent == "social.whats_my_name" + + def test_do_you_know_me(self): + assert parse("do you know me").intent == "social.whats_my_name" + + def test_who_am_i(self): + assert parse("who am I").intent == "social.whats_my_name" + + def test_do_you_remember_me(self): + assert parse("do you remember me").intent == "social.whats_my_name" + + def test_have_you_seen_me_before(self): + assert parse("have you seen me before").intent == "social.whats_my_name" + + +class TestSocialTellJoke: + def test_tell_me_a_joke(self): + assert parse("tell me a joke").intent == "social.tell_joke" + + def test_say_something_funny(self): + assert parse("say something funny").intent == "social.tell_joke" + + def test_make_me_laugh(self): + assert parse("make me laugh").intent == "social.tell_joke" + + def test_a_joke_please(self): + assert parse("a joke please").intent == "social.tell_joke" + + def test_cheer_me_up(self): + assert parse("cheer me up").intent == "social.tell_joke" + + +# ── System intents ──────────────────────────────────────────────────────────── + + +class TestSystemBatteryStatus: + def test_battery_status(self): + assert parse("battery status").intent == "system.battery_status" + + def test_battery_level(self): + assert parse("battery level").intent == "system.battery_status" + + def test_hows_the_battery(self): + assert parse("how's the battery").intent == "system.battery_status" + + def test_how_much_battery(self): + assert parse("how much battery").intent == "system.battery_status" + + def test_power_status(self): + assert parse("power status").intent == "system.battery_status" + + def test_check_battery(self): + assert parse("check the battery").intent == "system.battery_status" + + +class TestSystemMapStatus: + def test_map_status(self): + assert parse("map status").intent == "system.map_status" + + def test_hows_the_map(self): + assert parse("how's the map").intent == "system.map_status" + + def test_are_you_mapped(self): + assert parse("are you mapped").intent == "system.map_status" + + def test_do_you_have_a_map(self): + assert parse("do you have a map").intent == "system.map_status" + + +class TestSystemShutdown: + def test_shutdown(self): + r = parse("shut down") + assert r.intent == "system.shutdown" + + def test_shutdown_oneword(self): + r = parse("shutdown") + assert r.intent == "system.shutdown" + + def test_power_off(self): + assert parse("power off").intent == "system.shutdown" + + def test_turn_off(self): + assert parse("turn off").intent == "system.shutdown" + + def test_requires_confirmation(self): + assert parse("shut down").requires_confirmation is True + + def test_confidence(self): + assert parse("power off").confidence == 0.95 + + +class TestSystemRestart: + def test_restart(self): + assert parse("restart").intent == "system.restart" + + def test_reboot(self): + assert parse("reboot").intent == "system.restart" + + def test_start_fresh(self): + assert parse("start fresh").intent == "system.restart" + + def test_start_over(self): + assert parse("start over").intent == "system.restart" + + def test_reset_yourself(self): + assert parse("reset yourself").intent == "system.restart" + + def test_requires_confirmation(self): + assert parse("restart").requires_confirmation is True + + +class TestSystemVolumeUp: + def test_volume_up(self): + assert parse("volume up").intent == "system.volume_up" + + def test_louder(self): + assert parse("louder").intent == "system.volume_up" + + def test_turn_up(self): + assert parse("turn it up").intent == "system.volume_up" + + def test_speak_up(self): + assert parse("speak up").intent == "system.volume_up" + + def test_increase_volume(self): + assert parse("increase the volume").intent == "system.volume_up" + + +class TestSystemVolumeDown: + def test_volume_down(self): + assert parse("volume down").intent == "system.volume_down" + + def test_quieter(self): + assert parse("quieter").intent == "system.volume_down" + + def test_turn_down(self): + assert parse("turn it down").intent == "system.volume_down" + + def test_decrease_volume(self): + assert parse("decrease the volume").intent == "system.volume_down" + + def test_mute(self): + assert parse("mute").intent == "system.volume_down" + + +class TestSystemVolumeSet: + def test_set_volume_numeric(self): + r = parse("set volume to 60") + assert r.intent == "system.volume_set" + assert r.entities.get("level") == "60" + + def test_set_volume_percent(self): + r = parse("set the volume to 80 percent") + assert r.intent == "system.volume_set" + assert r.entities.get("level") == "80" + + def test_set_volume_word_low(self): + r = parse("set volume to low") + assert r.intent == "system.volume_set" + assert r.entities.get("level") == "20" + + def test_set_volume_word_high(self): + r = parse("set the volume to high") + assert r.intent == "system.volume_set" + assert r.entities.get("level") == "80" + + def test_volume_to_50(self): + r = parse("volume to 50") + assert r.intent == "system.volume_set" + assert r.entities.get("level") == "50" + + +# ── Config intents ──────────────────────────────────────────────────────────── + + +class TestConfigPersonality: + def test_change_personality(self): + r = parse("change to friendly personality") + assert r.intent == "config.personality" + assert r.entities.get("personality") == "friendly" + + def test_switch_personality(self): + r = parse("switch to formal personality") + assert r.intent == "config.personality" + + def test_use_personality(self): + r = parse("use playful personality") + assert r.intent == "config.personality" + assert r.entities.get("personality") == "playful" + + def test_be_more_adjective(self): + r = parse("be more friendly") + assert r.intent == "config.personality" + + +class TestConfigSassLevel: + def test_more_sassy(self): + assert parse("more sassy").intent == "config.sass_level" + + def test_less_sassy(self): + assert parse("less sassy").intent == "config.sass_level" + + def test_set_sass_level(self): + r = parse("set sass level to 7") + assert r.intent == "config.sass_level" + assert r.entities.get("level") == "7" + + def test_tone_it_down(self): + assert parse("tone it down").intent == "config.sass_level" + + def test_less_sarcastic(self): + assert parse("less sarcastic").intent == "config.sass_level" + + def test_more_witty(self): + assert parse("more witty").intent == "config.sass_level" + + +class TestConfigFollowMode: + def test_change_follow_mode(self): + r = parse("change follow mode to loose") + assert r.intent in ("config.follow_mode", "nav.set_mode") + + def test_set_follow_mode(self): + r = parse("set follow mode to tight") + assert r.intent in ("config.follow_mode", "nav.set_mode") + + +# ── Confirmation intents ────────────────────────────────────────────────────── + + +class TestConfirmYes: + def test_yes(self): + assert parse("yes").intent == "confirm.yes" + + def test_yeah(self): + assert parse("yeah").intent == "confirm.yes" + + def test_yep(self): + assert parse("yep").intent == "confirm.yes" + + def test_sure(self): + assert parse("sure").intent == "confirm.yes" + + def test_ok(self): + assert parse("ok").intent == "confirm.yes" + + def test_go_ahead(self): + assert parse("go ahead").intent == "confirm.yes" + + def test_do_it(self): + assert parse("do it").intent == "confirm.yes" + + def test_affirmative(self): + assert parse("affirmative").intent == "confirm.yes" + + def test_correct(self): + assert parse("correct").intent == "confirm.yes" + + +class TestConfirmNo: + def test_no(self): + assert parse("no").intent == "confirm.no" + + def test_nope(self): + assert parse("nope").intent == "confirm.no" + + def test_cancel(self): + assert parse("cancel").intent == "confirm.no" + + def test_abort(self): + assert parse("abort").intent == "confirm.no" + + def test_nevermind(self): + assert parse("never mind").intent == "confirm.no" + + def test_negative(self): + assert parse("negative").intent == "confirm.no" + + +# ── Fallback ────────────────────────────────────────────────────────────────── + + +class TestFallback: + def test_unrecognised(self): + r = parse("what is the weather like today") + assert r.intent == "fallback" + assert r.confidence == 0.0 + + def test_empty_string(self): + r = parse("") + assert r.intent == "fallback" + + def test_numbers_only(self): + r = parse("42") + assert r.intent == "fallback" + + def test_gibberish(self): + r = parse("zxqw frp klmn") + assert r.intent == "fallback" + + def test_partial_match_not_enough(self): + # "the" alone should not trigger any intent + r = parse("the") + assert r.intent == "fallback" + + def test_raw_text_preserved(self): + r = parse("Hello there!") + assert "hello there" in r.raw_text + + +# ── Requires confirmation flag ──────────────────────────────────────────────── + + +class TestRequiresConfirmation: + def test_shutdown_requires_confirmation(self): + assert parse("shut down").requires_confirmation is True + + def test_restart_requires_confirmation(self): + assert parse("restart").requires_confirmation is True + + def test_forget_me_requires_confirmation(self): + assert parse("forget me").requires_confirmation is True + + def test_stop_does_not_require_confirmation(self): + assert parse("stop").requires_confirmation is False + + def test_go_home_does_not_require_confirmation(self): + assert parse("go home").requires_confirmation is False + + def test_battery_does_not_require_confirmation(self): + assert parse("battery status").requires_confirmation is False + + +# ── Confidence values ───────────────────────────────────────────────────────── + + +class TestConfidence: + def test_high_confidence_intents(self): + for text in ("go to the kitchen", "stop", "shut down", "forget me"): + r = parse(text) + assert r.confidence >= 0.90, f"Low confidence for: {text!r}" + + def test_fallback_has_zero_confidence(self): + assert parse("what is two plus two").confidence == 0.0 + + +# ── Case insensitivity ──────────────────────────────────────────────────────── + + +class TestCaseInsensitivity: + def test_uppercase(self): + assert parse("STOP").intent == "nav.stop" + + def test_mixed_case(self): + assert parse("Go To The Kitchen").intent == "nav.go_to" + + def test_upper_follow_me(self): + assert parse("FOLLOW ME").intent == "nav.follow_me" + + +# ── Mode inference from text ────────────────────────────────────────────────── + + +class TestModeFromText: + def test_shadow_keyword(self): + assert _mode_from_text("shadow") == "shadow" + + def test_lead_keyword(self): + assert _mode_from_text("go ahead") == "lead" + + def test_orbit_keyword(self): + assert _mode_from_text("orbit") == "orbit" + + def test_none_for_unknown(self): + assert _mode_from_text("random text here") is None diff --git a/jetson/ros2_ws/src/saltybot_social_msgs/CMakeLists.txt b/jetson/ros2_ws/src/saltybot_social_msgs/CMakeLists.txt index c7eb9b4..e01c190 100644 --- a/jetson/ros2_ws/src/saltybot_social_msgs/CMakeLists.txt +++ b/jetson/ros2_ws/src/saltybot_social_msgs/CMakeLists.txt @@ -33,6 +33,8 @@ rosidl_generate_interfaces(${PROJECT_NAME} "msg/VadState.msg" # Issue #83 — conversation engine "msg/ConversationResponse.msg" + # Issue #137 — voice command NLU + "msg/VoiceCommand.msg" DEPENDENCIES std_msgs geometry_msgs builtin_interfaces ) diff --git a/jetson/ros2_ws/src/saltybot_social_msgs/msg/VoiceCommand.msg b/jetson/ros2_ws/src/saltybot_social_msgs/msg/VoiceCommand.msg new file mode 100644 index 0000000..17a9f0b --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_social_msgs/msg/VoiceCommand.msg @@ -0,0 +1,25 @@ +# VoiceCommand.msg — Parsed voice command with NLU intent, entities, and confidence. +# Published by voice_command_node on /social/voice_command (Issue #137). +# +# Intent namespace: +# nav.* — navigation commands +# social.* — social / enrollment commands +# system.* — robot system control +# config.* — behavior configuration +# confirm.* — confirmation flow responses (yes / no) +# fallback — unrecognized; route to LLM conversation engine + +std_msgs/Header header + +string intent # e.g. "nav.go_to", "social.remember_me", "fallback" +string raw_text # Normalized transcribed text (lowercased, punctuation stripped) +string speaker_id # Who issued the command (from SpeechTranscript.speaker_id) +float32 confidence # NLU match confidence 0..1 + +# Entities: key=value pairs, e.g. ["location=kitchen", "mode=shadow", "name=Alice"] +# Parse with: dict(e.split("=", 1) for e in entities) +string[] entities + +# Confirmation flow (for dangerous commands: system.shutdown, system.restart, social.forget_me) +bool requires_confirmation # true = awaiting confirm.yes before execution +string confirmation_token # UUID matching this command to its confirmation response