feat: first encounter orchestrator (Issue #400) #402
Binary file not shown.
@ -0,0 +1,428 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""social_enrollment_node.py -- First Encounter enrollment with face + voice biometrics.
|
||||||
|
|
||||||
|
Triggered by FirstEncounterOrchestrator when state transitions to ENROLL.
|
||||||
|
Captures:
|
||||||
|
- Face embedding (via SCRFD + ArcFace from RealSense RGB)
|
||||||
|
- Voice speaker embedding (via ECAPA-TDNN)
|
||||||
|
- RealSense RGB photo snapshot
|
||||||
|
- Metadata (name, context, timestamp)
|
||||||
|
|
||||||
|
Stores to:
|
||||||
|
- /home/seb/encounter-queue/{person_id}_{timestamp}.json (for offline cloud sync)
|
||||||
|
- Local speaker_embeddings.json (for immediate voice recognition)
|
||||||
|
- Face gallery (via EnrollPerson service to face_recognizer)
|
||||||
|
|
||||||
|
Subscribes to:
|
||||||
|
/social/orchestrator/state (JSON: state, person_id, name, context)
|
||||||
|
/social/faces/embeddings (FaceEmbeddingArray with ArcFace embeddings)
|
||||||
|
/camera/color/image_raw (RealSense RGB for snapshots)
|
||||||
|
/social/speech/speaker_embedding (speaker embedding from ECAPA-TDNN)
|
||||||
|
|
||||||
|
Publishes:
|
||||||
|
/social/enrollment/status (JSON: person_id, status, person_db_id)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import threading
|
||||||
|
import numpy as np
|
||||||
|
from pathlib import Path
|
||||||
|
from dataclasses import dataclass, asdict
|
||||||
|
from typing import Optional, Dict
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import rclpy
|
||||||
|
from rclpy.node import Node
|
||||||
|
from rclpy.qos import QoSProfile, ReliabilityPolicy, DurabilityPolicy
|
||||||
|
from std_msgs.msg import String
|
||||||
|
from sensor_msgs.msg import Image
|
||||||
|
import cv2
|
||||||
|
from cv_bridge import CvBridge
|
||||||
|
|
||||||
|
from saltybot_social_msgs.msg import FaceEmbeddingArray
|
||||||
|
from saltybot_social_msgs.srv import EnrollPerson
|
||||||
|
from saltybot_social_enrollment.person_db import PersonDB
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class EnrollmentRequest:
|
||||||
|
"""Enrollment request from first encounter."""
|
||||||
|
person_id: str
|
||||||
|
name: str
|
||||||
|
context: Optional[str] = None
|
||||||
|
timestamp: float = 0.0
|
||||||
|
face_embedding: Optional[np.ndarray] = None
|
||||||
|
voice_embedding: Optional[np.ndarray] = None
|
||||||
|
photo_data: Optional[bytes] = None # JPEG encoded
|
||||||
|
|
||||||
|
|
||||||
|
class SocialEnrollmentNode(Node):
|
||||||
|
"""Face + voice enrollment during first encounter."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__('social_enrollment')
|
||||||
|
|
||||||
|
# Parameters
|
||||||
|
self.declare_parameter('encounter_queue_dir', '/home/seb/encounter-queue')
|
||||||
|
self.declare_parameter('speaker_embeddings_path', '/home/seb/speaker_embeddings.json')
|
||||||
|
self.declare_parameter('photos_dir', '/home/seb/encounter-photos')
|
||||||
|
self.declare_parameter('face_recognizer_service', '/social/face_recognizer/enroll')
|
||||||
|
self.declare_parameter('embedding_dim_face', 512)
|
||||||
|
self.declare_parameter('embedding_dim_voice', 192)
|
||||||
|
|
||||||
|
self.queue_dir = Path(self.get_parameter('encounter_queue_dir').value)
|
||||||
|
self.speaker_embeddings_path = Path(self.get_parameter('speaker_embeddings_path').value)
|
||||||
|
self.photos_dir = Path(self.get_parameter('photos_dir').value)
|
||||||
|
self.face_service_name = self.get_parameter('face_recognizer_service').value
|
||||||
|
self.face_emb_dim = self.get_parameter('embedding_dim_face').value
|
||||||
|
self.voice_emb_dim = self.get_parameter('embedding_dim_voice').value
|
||||||
|
|
||||||
|
# Create directories
|
||||||
|
self.queue_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.photos_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Initialize PersonDB
|
||||||
|
self._db = PersonDB(str(self.queue_dir.parent / 'persons.db'))
|
||||||
|
self.get_logger().info(f'PersonDB initialized')
|
||||||
|
|
||||||
|
# CV bridge for image conversion
|
||||||
|
self._bridge = CvBridge()
|
||||||
|
|
||||||
|
# State
|
||||||
|
self._enrollment_request: Optional[EnrollmentRequest] = None
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
self._latest_face_embedding: Optional[np.ndarray] = None
|
||||||
|
self._latest_voice_embedding: Optional[np.ndarray] = None
|
||||||
|
self._latest_image: Optional[Image] = None
|
||||||
|
self._face_embedding_timestamp = 0.0
|
||||||
|
self._voice_embedding_timestamp = 0.0
|
||||||
|
self._image_timestamp = 0.0
|
||||||
|
|
||||||
|
# QoS profiles
|
||||||
|
best_effort_qos = QoSProfile(
|
||||||
|
depth=10,
|
||||||
|
reliability=ReliabilityPolicy.BEST_EFFORT,
|
||||||
|
durability=DurabilityPolicy.VOLATILE,
|
||||||
|
)
|
||||||
|
reliable_qos = QoSProfile(
|
||||||
|
depth=1,
|
||||||
|
reliability=ReliabilityPolicy.RELIABLE,
|
||||||
|
durability=DurabilityPolicy.VOLATILE,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Subscriptions
|
||||||
|
self.create_subscription(
|
||||||
|
String, '/social/orchestrator/state',
|
||||||
|
self._on_orchestrator_state, reliable_qos
|
||||||
|
)
|
||||||
|
self.create_subscription(
|
||||||
|
FaceEmbeddingArray, '/social/faces/embeddings',
|
||||||
|
self._on_face_embeddings, reliable_qos
|
||||||
|
)
|
||||||
|
self.create_subscription(
|
||||||
|
Image, '/camera/color/image_raw',
|
||||||
|
self._on_camera_image, best_effort_qos
|
||||||
|
)
|
||||||
|
self.create_subscription(
|
||||||
|
String, '/social/speech/speaker_embedding',
|
||||||
|
self._on_speaker_embedding, best_effort_qos
|
||||||
|
)
|
||||||
|
|
||||||
|
# Service clients
|
||||||
|
self._enroll_face_client = self.create_client(
|
||||||
|
EnrollPerson, self.face_service_name
|
||||||
|
)
|
||||||
|
|
||||||
|
# Publishers
|
||||||
|
self._pub_status = self.create_publisher(
|
||||||
|
String, '/social/enrollment/status', reliable_qos
|
||||||
|
)
|
||||||
|
|
||||||
|
# Timer for enrollment timeout handling
|
||||||
|
self.create_timer(0.5, self._enrollment_timeout_check)
|
||||||
|
|
||||||
|
self.get_logger().info(
|
||||||
|
f'Social enrollment node initialized. '
|
||||||
|
f'Queue: {self.queue_dir}, '
|
||||||
|
f'Speakers: {self.speaker_embeddings_path}'
|
||||||
|
)
|
||||||
|
|
||||||
|
def _on_orchestrator_state(self, msg: String) -> None:
|
||||||
|
"""Handle orchestrator state transitions."""
|
||||||
|
try:
|
||||||
|
state_data = json.loads(msg.data)
|
||||||
|
state = state_data.get('state')
|
||||||
|
|
||||||
|
if state == 'ENROLL':
|
||||||
|
person_id = state_data.get('person_id')
|
||||||
|
name = state_data.get('name')
|
||||||
|
context = state_data.get('context')
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
self._enrollment_request = EnrollmentRequest(
|
||||||
|
person_id=person_id,
|
||||||
|
name=name,
|
||||||
|
context=context,
|
||||||
|
timestamp=time.time()
|
||||||
|
)
|
||||||
|
self._face_embedding_timestamp = 0.0
|
||||||
|
self._voice_embedding_timestamp = 0.0
|
||||||
|
self._image_timestamp = 0.0
|
||||||
|
|
||||||
|
self.get_logger().info(
|
||||||
|
f'Enrollment triggered: {name} (ID: {person_id})'
|
||||||
|
)
|
||||||
|
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
self.get_logger().error(f'Invalid orchestrator state JSON: {e}')
|
||||||
|
|
||||||
|
def _on_face_embeddings(self, msg: FaceEmbeddingArray) -> None:
|
||||||
|
"""Capture face embedding from social face recognizer."""
|
||||||
|
if not msg.embeddings:
|
||||||
|
return
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
if self._enrollment_request is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Take first detected face embedding
|
||||||
|
face_emb = msg.embeddings[0]
|
||||||
|
emb_array = np.frombuffer(face_emb.embedding, dtype=np.float32)
|
||||||
|
|
||||||
|
if len(emb_array) == self.face_emb_dim:
|
||||||
|
self._latest_face_embedding = emb_array.copy()
|
||||||
|
self._face_embedding_timestamp = time.time()
|
||||||
|
self.get_logger().debug(
|
||||||
|
f'Face embedding captured: {face_emb.track_id}'
|
||||||
|
)
|
||||||
|
|
||||||
|
def _on_speaker_embedding(self, msg: String) -> None:
|
||||||
|
"""Capture voice speaker embedding from ECAPA-TDNN."""
|
||||||
|
try:
|
||||||
|
emb_data = json.loads(msg.data)
|
||||||
|
emb_values = emb_data.get('embedding')
|
||||||
|
|
||||||
|
if emb_values:
|
||||||
|
with self._lock:
|
||||||
|
if self._enrollment_request is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
emb_array = np.array(emb_values, dtype=np.float32)
|
||||||
|
if len(emb_array) == self.voice_emb_dim:
|
||||||
|
self._latest_voice_embedding = emb_array.copy()
|
||||||
|
self._voice_embedding_timestamp = time.time()
|
||||||
|
self.get_logger().debug(
|
||||||
|
f'Voice embedding captured: {len(emb_array)} dims'
|
||||||
|
)
|
||||||
|
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
self.get_logger().error(f'Invalid speaker embedding JSON: {e}')
|
||||||
|
|
||||||
|
def _on_camera_image(self, msg: Image) -> None:
|
||||||
|
"""Capture RealSense RGB image for enrollment photo."""
|
||||||
|
try:
|
||||||
|
with self._lock:
|
||||||
|
if self._enrollment_request is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Store latest image
|
||||||
|
self._latest_image = msg
|
||||||
|
self._image_timestamp = time.time()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.get_logger().error(f'Error capturing camera image: {e}')
|
||||||
|
|
||||||
|
def _enrollment_timeout_check(self) -> None:
|
||||||
|
"""Check if enrollment data is ready or timed out."""
|
||||||
|
with self._lock:
|
||||||
|
if self._enrollment_request is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
timeout = 10.0 # 10 seconds to collect embeddings
|
||||||
|
|
||||||
|
# Check if all data collected
|
||||||
|
has_face = self._latest_face_embedding is not None and \
|
||||||
|
(now - self._face_embedding_timestamp < 5.0)
|
||||||
|
has_voice = self._latest_voice_embedding is not None and \
|
||||||
|
(now - self._voice_embedding_timestamp < 5.0)
|
||||||
|
has_image = self._latest_image is not None and \
|
||||||
|
(now - self._image_timestamp < 5.0)
|
||||||
|
|
||||||
|
# If we have face + voice, proceed with enrollment
|
||||||
|
if has_face and has_voice:
|
||||||
|
self._complete_enrollment()
|
||||||
|
# If timeout exceeded, save what we have
|
||||||
|
elif (now - self._enrollment_request.timestamp) > timeout:
|
||||||
|
self.get_logger().warn(
|
||||||
|
f'Enrollment timeout for {self._enrollment_request.name}. '
|
||||||
|
f'Proceeding with available data.'
|
||||||
|
)
|
||||||
|
self._complete_enrollment()
|
||||||
|
|
||||||
|
def _complete_enrollment(self) -> None:
|
||||||
|
"""Complete enrollment process."""
|
||||||
|
request = self._enrollment_request
|
||||||
|
if request is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Save enrollment data to queue
|
||||||
|
enroll_data = {
|
||||||
|
'person_id': request.person_id,
|
||||||
|
'name': request.name,
|
||||||
|
'context': request.context,
|
||||||
|
'timestamp': request.timestamp,
|
||||||
|
'datetime': datetime.fromtimestamp(request.timestamp).isoformat(),
|
||||||
|
'face_embedding_shape': list(self._latest_face_embedding.shape)
|
||||||
|
if self._latest_face_embedding is not None else None,
|
||||||
|
'voice_embedding_shape': list(self._latest_voice_embedding.shape)
|
||||||
|
if self._latest_voice_embedding is not None else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Save queue JSON
|
||||||
|
queue_file = self.queue_dir / f"enrollment_{request.person_id}_{int(request.timestamp)}.json"
|
||||||
|
with open(queue_file, 'w') as f:
|
||||||
|
json.dump(enroll_data, f, indent=2)
|
||||||
|
self.get_logger().info(f'Enrollment data queued: {queue_file}')
|
||||||
|
|
||||||
|
# Save photo if available
|
||||||
|
photo_id = None
|
||||||
|
if self._latest_image is not None:
|
||||||
|
photo_id = self._save_enrollment_photo(request)
|
||||||
|
|
||||||
|
# Add to PersonDB with embeddings
|
||||||
|
person_db_id = self._db.add_person(
|
||||||
|
name=request.name,
|
||||||
|
embedding=self._latest_face_embedding,
|
||||||
|
sample_count=1,
|
||||||
|
metadata={
|
||||||
|
'encounter_person_id': request.person_id,
|
||||||
|
'context': request.context,
|
||||||
|
'photo_id': photo_id,
|
||||||
|
'timestamp': request.timestamp,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
self.get_logger().info(f'Added to PersonDB: ID {person_db_id}')
|
||||||
|
|
||||||
|
# Update speaker embeddings JSON
|
||||||
|
self._update_speaker_embeddings(person_db_id, request)
|
||||||
|
|
||||||
|
# Enroll face via face_recognizer service
|
||||||
|
self._enroll_face(person_db_id, request)
|
||||||
|
|
||||||
|
# Publish success status
|
||||||
|
self._publish_enrollment_status('success', person_db_id)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.get_logger().error(f'Enrollment failed for {request.name}: {e}')
|
||||||
|
self._publish_enrollment_status('failed', None)
|
||||||
|
finally:
|
||||||
|
self._enrollment_request = None
|
||||||
|
self._latest_face_embedding = None
|
||||||
|
self._latest_voice_embedding = None
|
||||||
|
self._latest_image = None
|
||||||
|
|
||||||
|
def _save_enrollment_photo(self, request: EnrollmentRequest) -> str:
|
||||||
|
"""Save enrollment photo from RealSense."""
|
||||||
|
try:
|
||||||
|
if self._latest_image is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
cv_image = self._bridge.imgmsg_to_cv2(self._latest_image, 'bgr8')
|
||||||
|
photo_id = f"{request.person_id}_{int(request.timestamp)}"
|
||||||
|
photo_path = self.photos_dir / f"{photo_id}.jpg"
|
||||||
|
|
||||||
|
cv2.imwrite(str(photo_path), cv_image)
|
||||||
|
self.get_logger().info(f'Enrollment photo saved: {photo_path}')
|
||||||
|
return photo_id
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.get_logger().error(f'Failed to save enrollment photo: {e}')
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _update_speaker_embeddings(self, person_db_id: int, request: EnrollmentRequest) -> None:
|
||||||
|
"""Update speaker_embeddings.json with voice embedding."""
|
||||||
|
try:
|
||||||
|
if self._latest_voice_embedding is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Load existing embeddings
|
||||||
|
speaker_db = {}
|
||||||
|
if self.speaker_embeddings_path.exists():
|
||||||
|
with open(self.speaker_embeddings_path, 'r') as f:
|
||||||
|
speaker_db = json.load(f)
|
||||||
|
|
||||||
|
# Add new embedding
|
||||||
|
speaker_db[str(person_db_id)] = {
|
||||||
|
'name': request.name,
|
||||||
|
'person_id': request.person_id,
|
||||||
|
'embedding': self._latest_voice_embedding.tolist(),
|
||||||
|
'timestamp': request.timestamp,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Save updated embeddings
|
||||||
|
with open(self.speaker_embeddings_path, 'w') as f:
|
||||||
|
json.dump(speaker_db, f, indent=2)
|
||||||
|
|
||||||
|
self.get_logger().info(
|
||||||
|
f'Speaker embedding saved for {request.name}'
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.get_logger().error(f'Failed to update speaker embeddings: {e}')
|
||||||
|
|
||||||
|
def _enroll_face(self, person_db_id: int, request: EnrollmentRequest) -> None:
|
||||||
|
"""Enroll face via face_recognizer service."""
|
||||||
|
try:
|
||||||
|
if self._latest_face_embedding is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
if not self._enroll_face_client.wait_for_service(timeout_sec=2.0):
|
||||||
|
self.get_logger().warn(
|
||||||
|
f'Face recognizer service not available. Skipping face enrollment.'
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Call EnrollPerson service
|
||||||
|
req = EnrollPerson.Request()
|
||||||
|
req.name = request.name
|
||||||
|
req.mode = 'face'
|
||||||
|
req.n_samples = 1
|
||||||
|
|
||||||
|
future = self._enroll_face_client.call_async(req)
|
||||||
|
self.get_logger().info(f'Face enrollment service called for {request.name}')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.get_logger().error(f'Face enrollment service call failed: {e}')
|
||||||
|
|
||||||
|
def _publish_enrollment_status(self, status: str, person_db_id: Optional[int]) -> None:
|
||||||
|
"""Publish enrollment completion status."""
|
||||||
|
try:
|
||||||
|
status_msg = {
|
||||||
|
'status': status,
|
||||||
|
'person_id': self._enrollment_request.person_id if self._enrollment_request else None,
|
||||||
|
'name': self._enrollment_request.name if self._enrollment_request else None,
|
||||||
|
'person_db_id': person_db_id,
|
||||||
|
'timestamp': time.time(),
|
||||||
|
}
|
||||||
|
self._pub_status.publish(String(data=json.dumps(status_msg)))
|
||||||
|
except Exception as e:
|
||||||
|
self.get_logger().error(f'Failed to publish enrollment status: {e}')
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=None):
|
||||||
|
rclpy.init(args=args)
|
||||||
|
node = SocialEnrollmentNode()
|
||||||
|
try:
|
||||||
|
rclpy.spin(node)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
node.destroy_node()
|
||||||
|
rclpy.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
Loading…
x
Reference in New Issue
Block a user