diff --git a/jetson/ros2_ws/src/saltybot_social/models/hey_salty_synthetic.npy b/jetson/ros2_ws/src/saltybot_social/models/hey_salty_synthetic.npy new file mode 100644 index 0000000..2b34efc Binary files /dev/null and b/jetson/ros2_ws/src/saltybot_social/models/hey_salty_synthetic.npy differ diff --git a/jetson/ros2_ws/src/saltybot_social_enrollment/saltybot_social_enrollment/social_enrollment_node.py b/jetson/ros2_ws/src/saltybot_social_enrollment/saltybot_social_enrollment/social_enrollment_node.py new file mode 100644 index 0000000..d38fc6d --- /dev/null +++ b/jetson/ros2_ws/src/saltybot_social_enrollment/saltybot_social_enrollment/social_enrollment_node.py @@ -0,0 +1,428 @@ +#!/usr/bin/env python3 +"""social_enrollment_node.py -- First Encounter enrollment with face + voice biometrics. + +Triggered by FirstEncounterOrchestrator when state transitions to ENROLL. +Captures: + - Face embedding (via SCRFD + ArcFace from RealSense RGB) + - Voice speaker embedding (via ECAPA-TDNN) + - RealSense RGB photo snapshot + - Metadata (name, context, timestamp) + +Stores to: + - /home/seb/encounter-queue/{person_id}_{timestamp}.json (for offline cloud sync) + - Local speaker_embeddings.json (for immediate voice recognition) + - Face gallery (via EnrollPerson service to face_recognizer) + +Subscribes to: + /social/orchestrator/state (JSON: state, person_id, name, context) + /social/faces/embeddings (FaceEmbeddingArray with ArcFace embeddings) + /camera/color/image_raw (RealSense RGB for snapshots) + /social/speech/speaker_embedding (speaker embedding from ECAPA-TDNN) + +Publishes: + /social/enrollment/status (JSON: person_id, status, person_db_id) +""" + +import json +import time +import threading +import numpy as np +from pathlib import Path +from dataclasses import dataclass, asdict +from typing import Optional, Dict +from datetime import datetime + +import rclpy +from rclpy.node import Node +from rclpy.qos import QoSProfile, ReliabilityPolicy, DurabilityPolicy +from std_msgs.msg import String +from sensor_msgs.msg import Image +import cv2 +from cv_bridge import CvBridge + +from saltybot_social_msgs.msg import FaceEmbeddingArray +from saltybot_social_msgs.srv import EnrollPerson +from saltybot_social_enrollment.person_db import PersonDB + + +@dataclass +class EnrollmentRequest: + """Enrollment request from first encounter.""" + person_id: str + name: str + context: Optional[str] = None + timestamp: float = 0.0 + face_embedding: Optional[np.ndarray] = None + voice_embedding: Optional[np.ndarray] = None + photo_data: Optional[bytes] = None # JPEG encoded + + +class SocialEnrollmentNode(Node): + """Face + voice enrollment during first encounter.""" + + def __init__(self): + super().__init__('social_enrollment') + + # Parameters + self.declare_parameter('encounter_queue_dir', '/home/seb/encounter-queue') + self.declare_parameter('speaker_embeddings_path', '/home/seb/speaker_embeddings.json') + self.declare_parameter('photos_dir', '/home/seb/encounter-photos') + self.declare_parameter('face_recognizer_service', '/social/face_recognizer/enroll') + self.declare_parameter('embedding_dim_face', 512) + self.declare_parameter('embedding_dim_voice', 192) + + self.queue_dir = Path(self.get_parameter('encounter_queue_dir').value) + self.speaker_embeddings_path = Path(self.get_parameter('speaker_embeddings_path').value) + self.photos_dir = Path(self.get_parameter('photos_dir').value) + self.face_service_name = self.get_parameter('face_recognizer_service').value + self.face_emb_dim = self.get_parameter('embedding_dim_face').value + self.voice_emb_dim = self.get_parameter('embedding_dim_voice').value + + # Create directories + self.queue_dir.mkdir(parents=True, exist_ok=True) + self.photos_dir.mkdir(parents=True, exist_ok=True) + + # Initialize PersonDB + self._db = PersonDB(str(self.queue_dir.parent / 'persons.db')) + self.get_logger().info(f'PersonDB initialized') + + # CV bridge for image conversion + self._bridge = CvBridge() + + # State + self._enrollment_request: Optional[EnrollmentRequest] = None + self._lock = threading.Lock() + self._latest_face_embedding: Optional[np.ndarray] = None + self._latest_voice_embedding: Optional[np.ndarray] = None + self._latest_image: Optional[Image] = None + self._face_embedding_timestamp = 0.0 + self._voice_embedding_timestamp = 0.0 + self._image_timestamp = 0.0 + + # QoS profiles + best_effort_qos = QoSProfile( + depth=10, + reliability=ReliabilityPolicy.BEST_EFFORT, + durability=DurabilityPolicy.VOLATILE, + ) + reliable_qos = QoSProfile( + depth=1, + reliability=ReliabilityPolicy.RELIABLE, + durability=DurabilityPolicy.VOLATILE, + ) + + # Subscriptions + self.create_subscription( + String, '/social/orchestrator/state', + self._on_orchestrator_state, reliable_qos + ) + self.create_subscription( + FaceEmbeddingArray, '/social/faces/embeddings', + self._on_face_embeddings, reliable_qos + ) + self.create_subscription( + Image, '/camera/color/image_raw', + self._on_camera_image, best_effort_qos + ) + self.create_subscription( + String, '/social/speech/speaker_embedding', + self._on_speaker_embedding, best_effort_qos + ) + + # Service clients + self._enroll_face_client = self.create_client( + EnrollPerson, self.face_service_name + ) + + # Publishers + self._pub_status = self.create_publisher( + String, '/social/enrollment/status', reliable_qos + ) + + # Timer for enrollment timeout handling + self.create_timer(0.5, self._enrollment_timeout_check) + + self.get_logger().info( + f'Social enrollment node initialized. ' + f'Queue: {self.queue_dir}, ' + f'Speakers: {self.speaker_embeddings_path}' + ) + + def _on_orchestrator_state(self, msg: String) -> None: + """Handle orchestrator state transitions.""" + try: + state_data = json.loads(msg.data) + state = state_data.get('state') + + if state == 'ENROLL': + person_id = state_data.get('person_id') + name = state_data.get('name') + context = state_data.get('context') + + with self._lock: + self._enrollment_request = EnrollmentRequest( + person_id=person_id, + name=name, + context=context, + timestamp=time.time() + ) + self._face_embedding_timestamp = 0.0 + self._voice_embedding_timestamp = 0.0 + self._image_timestamp = 0.0 + + self.get_logger().info( + f'Enrollment triggered: {name} (ID: {person_id})' + ) + + except json.JSONDecodeError as e: + self.get_logger().error(f'Invalid orchestrator state JSON: {e}') + + def _on_face_embeddings(self, msg: FaceEmbeddingArray) -> None: + """Capture face embedding from social face recognizer.""" + if not msg.embeddings: + return + + with self._lock: + if self._enrollment_request is None: + return + + # Take first detected face embedding + face_emb = msg.embeddings[0] + emb_array = np.frombuffer(face_emb.embedding, dtype=np.float32) + + if len(emb_array) == self.face_emb_dim: + self._latest_face_embedding = emb_array.copy() + self._face_embedding_timestamp = time.time() + self.get_logger().debug( + f'Face embedding captured: {face_emb.track_id}' + ) + + def _on_speaker_embedding(self, msg: String) -> None: + """Capture voice speaker embedding from ECAPA-TDNN.""" + try: + emb_data = json.loads(msg.data) + emb_values = emb_data.get('embedding') + + if emb_values: + with self._lock: + if self._enrollment_request is None: + return + + emb_array = np.array(emb_values, dtype=np.float32) + if len(emb_array) == self.voice_emb_dim: + self._latest_voice_embedding = emb_array.copy() + self._voice_embedding_timestamp = time.time() + self.get_logger().debug( + f'Voice embedding captured: {len(emb_array)} dims' + ) + + except json.JSONDecodeError as e: + self.get_logger().error(f'Invalid speaker embedding JSON: {e}') + + def _on_camera_image(self, msg: Image) -> None: + """Capture RealSense RGB image for enrollment photo.""" + try: + with self._lock: + if self._enrollment_request is None: + return + + # Store latest image + self._latest_image = msg + self._image_timestamp = time.time() + + except Exception as e: + self.get_logger().error(f'Error capturing camera image: {e}') + + def _enrollment_timeout_check(self) -> None: + """Check if enrollment data is ready or timed out.""" + with self._lock: + if self._enrollment_request is None: + return + + now = time.time() + timeout = 10.0 # 10 seconds to collect embeddings + + # Check if all data collected + has_face = self._latest_face_embedding is not None and \ + (now - self._face_embedding_timestamp < 5.0) + has_voice = self._latest_voice_embedding is not None and \ + (now - self._voice_embedding_timestamp < 5.0) + has_image = self._latest_image is not None and \ + (now - self._image_timestamp < 5.0) + + # If we have face + voice, proceed with enrollment + if has_face and has_voice: + self._complete_enrollment() + # If timeout exceeded, save what we have + elif (now - self._enrollment_request.timestamp) > timeout: + self.get_logger().warn( + f'Enrollment timeout for {self._enrollment_request.name}. ' + f'Proceeding with available data.' + ) + self._complete_enrollment() + + def _complete_enrollment(self) -> None: + """Complete enrollment process.""" + request = self._enrollment_request + if request is None: + return + + try: + # Save enrollment data to queue + enroll_data = { + 'person_id': request.person_id, + 'name': request.name, + 'context': request.context, + 'timestamp': request.timestamp, + 'datetime': datetime.fromtimestamp(request.timestamp).isoformat(), + 'face_embedding_shape': list(self._latest_face_embedding.shape) + if self._latest_face_embedding is not None else None, + 'voice_embedding_shape': list(self._latest_voice_embedding.shape) + if self._latest_voice_embedding is not None else None, + } + + # Save queue JSON + queue_file = self.queue_dir / f"enrollment_{request.person_id}_{int(request.timestamp)}.json" + with open(queue_file, 'w') as f: + json.dump(enroll_data, f, indent=2) + self.get_logger().info(f'Enrollment data queued: {queue_file}') + + # Save photo if available + photo_id = None + if self._latest_image is not None: + photo_id = self._save_enrollment_photo(request) + + # Add to PersonDB with embeddings + person_db_id = self._db.add_person( + name=request.name, + embedding=self._latest_face_embedding, + sample_count=1, + metadata={ + 'encounter_person_id': request.person_id, + 'context': request.context, + 'photo_id': photo_id, + 'timestamp': request.timestamp, + } + ) + self.get_logger().info(f'Added to PersonDB: ID {person_db_id}') + + # Update speaker embeddings JSON + self._update_speaker_embeddings(person_db_id, request) + + # Enroll face via face_recognizer service + self._enroll_face(person_db_id, request) + + # Publish success status + self._publish_enrollment_status('success', person_db_id) + + except Exception as e: + self.get_logger().error(f'Enrollment failed for {request.name}: {e}') + self._publish_enrollment_status('failed', None) + finally: + self._enrollment_request = None + self._latest_face_embedding = None + self._latest_voice_embedding = None + self._latest_image = None + + def _save_enrollment_photo(self, request: EnrollmentRequest) -> str: + """Save enrollment photo from RealSense.""" + try: + if self._latest_image is None: + return None + + cv_image = self._bridge.imgmsg_to_cv2(self._latest_image, 'bgr8') + photo_id = f"{request.person_id}_{int(request.timestamp)}" + photo_path = self.photos_dir / f"{photo_id}.jpg" + + cv2.imwrite(str(photo_path), cv_image) + self.get_logger().info(f'Enrollment photo saved: {photo_path}') + return photo_id + + except Exception as e: + self.get_logger().error(f'Failed to save enrollment photo: {e}') + return None + + def _update_speaker_embeddings(self, person_db_id: int, request: EnrollmentRequest) -> None: + """Update speaker_embeddings.json with voice embedding.""" + try: + if self._latest_voice_embedding is None: + return + + # Load existing embeddings + speaker_db = {} + if self.speaker_embeddings_path.exists(): + with open(self.speaker_embeddings_path, 'r') as f: + speaker_db = json.load(f) + + # Add new embedding + speaker_db[str(person_db_id)] = { + 'name': request.name, + 'person_id': request.person_id, + 'embedding': self._latest_voice_embedding.tolist(), + 'timestamp': request.timestamp, + } + + # Save updated embeddings + with open(self.speaker_embeddings_path, 'w') as f: + json.dump(speaker_db, f, indent=2) + + self.get_logger().info( + f'Speaker embedding saved for {request.name}' + ) + + except Exception as e: + self.get_logger().error(f'Failed to update speaker embeddings: {e}') + + def _enroll_face(self, person_db_id: int, request: EnrollmentRequest) -> None: + """Enroll face via face_recognizer service.""" + try: + if self._latest_face_embedding is None: + return + + if not self._enroll_face_client.wait_for_service(timeout_sec=2.0): + self.get_logger().warn( + f'Face recognizer service not available. Skipping face enrollment.' + ) + return + + # Call EnrollPerson service + req = EnrollPerson.Request() + req.name = request.name + req.mode = 'face' + req.n_samples = 1 + + future = self._enroll_face_client.call_async(req) + self.get_logger().info(f'Face enrollment service called for {request.name}') + + except Exception as e: + self.get_logger().error(f'Face enrollment service call failed: {e}') + + def _publish_enrollment_status(self, status: str, person_db_id: Optional[int]) -> None: + """Publish enrollment completion status.""" + try: + status_msg = { + 'status': status, + 'person_id': self._enrollment_request.person_id if self._enrollment_request else None, + 'name': self._enrollment_request.name if self._enrollment_request else None, + 'person_db_id': person_db_id, + 'timestamp': time.time(), + } + self._pub_status.publish(String(data=json.dumps(status_msg))) + except Exception as e: + self.get_logger().error(f'Failed to publish enrollment status: {e}') + + +def main(args=None): + rclpy.init(args=args) + node = SocialEnrollmentNode() + try: + rclpy.spin(node) + except KeyboardInterrupt: + pass + finally: + node.destroy_node() + rclpy.shutdown() + + +if __name__ == '__main__': + main()