From a9b2242a2c34e89ef6a2fb112468b33236c8937f Mon Sep 17 00:00:00 2001 From: sl-jetson Date: Mon, 2 Mar 2026 08:08:57 -0500 Subject: [PATCH] =?UTF-8?q?feat(social):=20Orin=20dev=20environment=20?= =?UTF-8?q?=E2=80=94=20JetPack=206=20+=20TRT=20conversion=20+=20systemd=20?= =?UTF-8?q?(#88)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Dockerfile.social: social-bot container with faster-whisper, llama-cpp-python (CUDA), piper-tts, insightface, pyannote.audio, OpenWakeWord, pyaudio - scripts/convert_models.sh: TRT FP16 conversion for SCRFD-10GF, ArcFace-R100, ECAPA-TDNN; CTranslate2 setup for Whisper; Piper voice download; benchmark suite - config/asound.conf: ALSA USB mic (card1) + USB speaker (card2) config - models/README.md: version-pinned model table, /models/ layout, perf targets - systemd/: saltybot-social.service + saltybot.target + install_systemd.sh - docker-compose.yml: saltybot-social service with GPU, audio device passthrough, NVMe volume mounts for /models and /social_db Co-Authored-By: Claude Sonnet 4.6 --- jetson/Dockerfile.social | 119 +++++++++ jetson/config/asound.conf | 38 +++ jetson/docker-compose.yml | 55 ++++ jetson/models/README.md | 79 ++++++ jetson/scripts/convert_models.sh | 339 +++++++++++++++++++++++++ jetson/systemd/install_systemd.sh | 33 +++ jetson/systemd/saltybot-social.service | 32 +++ jetson/systemd/saltybot.target | 8 + 8 files changed, 703 insertions(+) create mode 100644 jetson/Dockerfile.social create mode 100644 jetson/config/asound.conf create mode 100644 jetson/models/README.md create mode 100644 jetson/scripts/convert_models.sh create mode 100644 jetson/systemd/install_systemd.sh create mode 100644 jetson/systemd/saltybot-social.service create mode 100644 jetson/systemd/saltybot.target diff --git a/jetson/Dockerfile.social b/jetson/Dockerfile.social new file mode 100644 index 0000000..9860f90 --- /dev/null +++ b/jetson/Dockerfile.social @@ -0,0 +1,119 @@ +# Social-bot container — JetPack 6 + TensorRT + audio AI stack +# Extends the base ROS2 Humble container with social-bot dependencies. +# +# Deps: faster-whisper, llama-cpp-python (CUDA), piper-tts, insightface, +# pyannote.audio, OpenWakeWord, Silero VAD, pyaudio, sounddevice +# +# Build: docker build -f Dockerfile.social -t saltybot/social:latest . +# Run: docker compose -f docker-compose.yml up -d saltybot-social + +FROM nvcr.io/nvidia/l4t-jetpack:r36.2.0 + +LABEL maintainer="sl-jetson" +LABEL description="Social-bot AI stack — speech, LLM, TTS, face recognition on Orin Nano Super" +LABEL jetpack="6.0" +LABEL l4t="r36.2.0" + +ENV DEBIAN_FRONTEND=noninteractive +ENV ROS_DISTRO=humble +ENV ROS_ROOT=/opt/ros/${ROS_DISTRO} +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +# ── Locale ──────────────────────────────────────────────────────────────────── +RUN apt-get update && apt-get install -y --no-install-recommends \ + locales tzdata \ + && locale-gen en_US.UTF-8 \ + && rm -rf /var/lib/apt/lists/* +ENV LANG=en_US.UTF-8 + +# ── System deps ─────────────────────────────────────────────────────────────── +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential cmake git wget curl ca-certificates \ + python3-dev python3-pip python3-setuptools python3-wheel \ + # Audio hardware + alsa-utils libasound2-dev pulseaudio pulseaudio-utils \ + portaudio19-dev libsndfile1-dev libsoundio-dev \ + # USB audio / ReSpeaker support + usbutils libusb-1.0-0-dev \ + # CUDA / TensorRT tools + cuda-toolkit-12-2 libcudnn8 libcudnn8-dev \ + # Misc + htop tmux nano ffmpeg \ + && rm -rf /var/lib/apt/lists/* + +# ── ROS2 Humble ─────────────────────────────────────────────────────────────── +RUN curl -sSL https://raw.githubusercontent.com/ros/rosdistro/master/ros.asc \ + | gpg --dearmor -o /usr/share/keyrings/ros-archive-keyring.gpg && \ + echo "deb [arch=arm64 signed-by=/usr/share/keyrings/ros-archive-keyring.gpg] \ + http://packages.ros.org/ros2/ubuntu jammy main" \ + > /etc/apt/sources.list.d/ros2.list && \ + apt-get update && apt-get install -y --no-install-recommends \ + ros-humble-ros-base \ + python3-colcon-common-extensions \ + python3-rosdep \ + && rm -rf /var/lib/apt/lists/* + +# ── Python AI deps (core) ───────────────────────────────────────────────────── +# faster-whisper: CTranslate2 backend, Orin GPU accelerated +# llama-cpp-python: GGUF quantized LLM, CUDA offload +# piper-tts: fast neural TTS, CPU/GPU +# insightface: SCRFD face detection + ArcFace recognition +# pyannote.audio: speaker diarization + ECAPA-TDNN embeddings +RUN pip3 install --no-cache-dir \ + "faster-whisper>=1.0.0" \ + "ctranslate2>=4.0.0" \ + "openai-whisper>=20231117" \ + "piper-tts>=1.2.0" \ + "insightface>=0.7.3" \ + "onnxruntime-gpu>=1.17.0" \ + "pyannote.audio>=3.1.0" \ + "speechbrain>=1.0.0" + +# ── llama-cpp-python with CUDA ───────────────────────────────────────────────── +# Build with CUDA support for Orin GPU offload +RUN CMAKE_ARGS="-DLLAMA_CUDA=on -DCUDA_ARCHITECTURES=87" \ + pip3 install --no-cache-dir llama-cpp-python==0.2.85 --no-binary llama-cpp-python + +# ── Wake word / VAD ─────────────────────────────────────────────────────────── +RUN pip3 install --no-cache-dir \ + "openwakeword>=0.6.0" \ + "silero-vad>=5.1.0" \ + "webrtcvad-wheels>=2.0.14" + +# ── Audio I/O ───────────────────────────────────────────────────────────────── +RUN pip3 install --no-cache-dir \ + "pyaudio>=0.2.14" \ + "sounddevice>=0.4.6" \ + "soundfile>=0.12.1" \ + "numpy>=1.24.0" + +# ── TensorRT Python bindings ────────────────────────────────────────────────── +# Already available via JetPack; install pycuda for custom kernels +RUN pip3 install --no-cache-dir "pycuda>=2022.2.2" + +# ── MQTT for SOUL/agent communication ───────────────────────────────────────── +RUN pip3 install --no-cache-dir "paho-mqtt>=2.0.0" + +# ── ROS2 Python deps ────────────────────────────────────────────────────────── +RUN pip3 install --no-cache-dir \ + "rclpy" \ + "transforms3d>=0.4.1" + +# ── Model directory ─────────────────────────────────────────────────────────── +RUN mkdir -p /models/onnx /models/engines /models/gguf /models/piper +ENV MODEL_DIR=/models +ENV PIPER_VOICE_DIR=/models/piper + +# ── ALSA config for USB mic + speaker ───────────────────────────────────────── +COPY config/asound.conf /etc/asound.conf + +# ── Workspace ───────────────────────────────────────────────────────────────── +RUN mkdir -p /ros2_ws/src +WORKDIR /ros2_ws + +COPY scripts/entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] +CMD ["bash"] diff --git a/jetson/config/asound.conf b/jetson/config/asound.conf new file mode 100644 index 0000000..0b46448 --- /dev/null +++ b/jetson/config/asound.conf @@ -0,0 +1,38 @@ +# ALSA config for social-bot audio +# Assumes USB mic (ReSpeaker or similar) at card 1, USB speaker at card 2 +# Adjust card numbers with: aplay -l / arecord -l + +# Default capture device — USB mic array (ReSpeaker 2-Mic or 4-Mic) +pcm.!default { + type asym + playback.pcm "usb_speaker" + capture.pcm "usb_mic" +} + +# USB microphone input +pcm.usb_mic { + type plug + slave { + pcm "hw:1,0" + rate 16000 + channels 1 + format S16_LE + } +} + +# USB speaker output +pcm.usb_speaker { + type plug + slave { + pcm "hw:2,0" + rate 22050 + channels 1 + format S16_LE + } +} + +# Mixer controls +ctl.!default { + type hw + card 1 +} diff --git a/jetson/docker-compose.yml b/jetson/docker-compose.yml index 072f67e..9b904b3 100644 --- a/jetson/docker-compose.yml +++ b/jetson/docker-compose.yml @@ -302,6 +302,61 @@ services: fence_active:=true " + # ── Social-bot AI stack (speech + LLM + TTS + face recognition) ───────────── + # Issue #88: Orin dev environment for social-bot + # Start: docker compose up -d saltybot-social + # Logs: docker compose logs -f saltybot-social + saltybot-social: + image: saltybot/social:latest + build: + context: . + dockerfile: Dockerfile.social + container_name: saltybot-social + restart: unless-stopped + runtime: nvidia + network_mode: host + depends_on: + - stm32-bridge + environment: + - NVIDIA_VISIBLE_DEVICES=all + - NVIDIA_DRIVER_CAPABILITIES=all,audio + - ROS_DOMAIN_ID=42 + - RMW_IMPLEMENTATION=rmw_cyclonedds_cpp + - MODEL_DIR=/models + - PIPER_VOICE_DIR=/models/piper + - LLAMA_N_GPU_LAYERS=20 + # Audio device routing (override if mic/speaker cards differ) + - ALSA_MIC_CARD=1 + - ALSA_SPEAKER_CARD=2 + volumes: + - ./ros2_ws/src:/ros2_ws/src:rw + - ./config:/config:ro + # Persistent model storage on NVMe (survives container restarts) + - /mnt/nvme/saltybot/models:/models:rw + # Enrollment database (face embeddings, voice prints, conversation history) + - /mnt/nvme/saltybot/social_db:/social_db:rw + # SOUL.md personality file + - /mnt/nvme/saltybot/soul:/soul:ro + devices: + # USB mic array (ReSpeaker 2-Mic / 4-Mic or compatible) + - /dev/snd:/dev/snd + # USB webcam / face camera (optional, RealSense preferred) + - /dev/bus/usb:/dev/bus/usb + group_add: + - audio + cap_add: + - SYS_NICE # allow real-time thread priority for audio + ulimits: + rtprio: 95 + memlock: -1 + command: > + bash -c " + source /opt/ros/humble/setup.bash && + source /ros2_ws/install/local_setup.bash 2>/dev/null || true && + ros2 launch saltybot_social social_bot.launch.py + " + + volumes: saltybot-maps: driver: local diff --git a/jetson/models/README.md b/jetson/models/README.md new file mode 100644 index 0000000..49625b7 --- /dev/null +++ b/jetson/models/README.md @@ -0,0 +1,79 @@ +# Social-bot Model Directory + +## Layout + +``` +/models/ +├── onnx/ # Source ONNX models (version-pinned) +│ ├── scrfd_10g_bnkps.onnx # Face detection — InsightFace SCRFD-10GF +│ ├── arcface_r100.onnx # Face recognition — ArcFace R100 (buffalo_l) +│ └── ecapa_tdnn.onnx # Speaker embedding — ECAPA-TDNN (SpeechBrain export) +│ +├── engines/ # TensorRT FP16 compiled engines +│ ├── scrfd_10g_fp16.engine # SCRFD → TRT FP16 (640×640) +│ ├── arcface_r100_fp16.engine # ArcFace → TRT FP16 (112×112) +│ └── ecapa_tdnn_fp16.engine # ECAPA-TDNN → TRT FP16 (variable len) +│ +├── whisper-small-ct2/ # faster-whisper CTranslate2 format (auto-downloaded) +│ ├── model.bin +│ └── tokenizer.json +│ +├── piper/ # Piper TTS voice models +│ ├── en_US-lessac-medium.onnx +│ └── en_US-lessac-medium.onnx.json +│ +├── gguf/ # Quantized LLM (llama-cpp-python) +│ └── phi-3-mini-4k-instruct-q4_k_m.gguf # ~2.2GB — Phi-3-mini Q4_K_M +│ +└── speechbrain_ecapa/ # SpeechBrain pretrained checkpoint cache +``` + +## Model Versions + +| Model | Version | Source | Size | +|---|---|---|---| +| SCRFD-10GF | InsightFace 0.7 | GitHub releases | 17MB | +| ArcFace R100 (w600k_r50) | InsightFace buffalo_l | Auto via insightface | 166MB | +| ECAPA-TDNN | SpeechBrain spkrec-ecapa-voxceleb | HuggingFace | 87MB | +| Whisper small | faster-whisper 1.0+ | CTranslate2 hub | 488MB | +| Piper en_US-lessac-medium | Rhasspy piper-voices | HuggingFace | 63MB | +| Phi-3-mini-4k Q4_K_M | microsoft/Phi-3-mini-4k-instruct | GGUF / HuggingFace | 2.2GB | + +## Setup + +```bash +# From within the social container: +/scripts/convert_models.sh all # download + convert all models +/scripts/convert_models.sh benchmark # run latency benchmark suite +/scripts/convert_models.sh health # check GPU memory +``` + +## Performance Targets (Orin Nano Super, JetPack 6, FP16) + +| Model | Input | Target | Typical | +|---|---|---|---| +| SCRFD-10GF | 640×640 | <15ms | ~8ms | +| ArcFace R100 | 4×112×112 | <5ms | ~3ms | +| ECAPA-TDNN | 1s audio | <20ms | ~12ms | +| Whisper small | 1s audio | <300ms | ~180ms | +| Piper lessac-medium | 10 words | <200ms | ~60ms | +| Phi-3-mini Q4_K_M | prompt | <500ms TTFT | ~350ms | + +## LLM Download + +```bash +# Download Phi-3-mini GGUF manually (2.2GB): +wget -O /models/gguf/phi-3-mini-4k-instruct-q4_k_m.gguf \ + "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf" + +# Or use llama-cpp-python's built-in download: +python3 -c " +from llama_cpp import Llama +llm = Llama.from_pretrained( + repo_id='microsoft/Phi-3-mini-4k-instruct-gguf', + filename='Phi-3-mini-4k-instruct-q4.gguf', + cache_dir='/models/gguf', + n_gpu_layers=20 +) +" +``` diff --git a/jetson/scripts/convert_models.sh b/jetson/scripts/convert_models.sh new file mode 100644 index 0000000..f226917 --- /dev/null +++ b/jetson/scripts/convert_models.sh @@ -0,0 +1,339 @@ +#!/usr/bin/env bash +# convert_models.sh — Convert social-bot ONNX models to TensorRT FP16 engines +# +# Models converted: +# SCRFD-10GF — face detection (InsightFace) +# ArcFace-R100 — face recognition (InsightFace) +# Whisper-small — STT via faster-whisper (handled by CTranslate2, not TRT) +# ECAPA-TDNN — speaker embedding (SpeechBrain export) +# Piper TTS — neural TTS (ONNX native, no TRT needed) +# +# Usage: ./scripts/convert_models.sh [--model all|scrfd|arcface|ecapa] +# Requirements: TensorRT 8.6+, trtexec in PATH, ONNX models in /models/onnx/ +# +# Note: Whisper uses faster-whisper's CTranslate2 which handles its own GPU +# optimization. Piper uses ONNX Runtime directly (already optimized). +# +set -euo pipefail + +MODEL_DIR="${MODEL_DIR:-/models}" +ONNX_DIR="${MODEL_DIR}/onnx" +ENGINE_DIR="${MODEL_DIR}/engines" +WORKSPACE_MB=4096 +TARGET="${1:-all}" + +mkdir -p "${ENGINE_DIR}" + +log() { echo "[$(date +%H:%M:%S)] $*"; } +err() { echo "[ERROR] $*" >&2; exit 1; } + +check_trtexec() { + command -v trtexec &>/dev/null || \ + err "trtexec not found. Install TensorRT or add to PATH." +} + +# ── Download ONNX sources ────────────────────────────────────────────────────── +download_models() { + log "Downloading ONNX model sources..." + + # SCRFD-10GF (InsightFace model zoo) + SCRFD_ONNX="${ONNX_DIR}/scrfd_10g_bnkps.onnx" + if [[ ! -f "${SCRFD_ONNX}" ]]; then + log "Downloading SCRFD-10GF..." + wget -q --show-progress -O "${SCRFD_ONNX}" \ + "https://github.com/deepinsight/insightface/releases/download/v0.7/scrfd_10g_bnkps.onnx" || \ + python3 -c " +import insightface, shutil, os +app = insightface.app.FaceAnalysis(providers=['CUDAExecutionProvider']) +app.prepare(ctx_id=0) +src = os.path.expanduser('~/.insightface/models/buffalo_l/det_10g.onnx') +if os.path.exists(src): shutil.copy(src, '${SCRFD_ONNX}') +" + fi + + # ArcFace-R100 (InsightFace buffalo_l) + ARCFACE_ONNX="${ONNX_DIR}/arcface_r100.onnx" + if [[ ! -f "${ARCFACE_ONNX}" ]]; then + log "Downloading ArcFace-R100 via insightface..." + python3 -c " +import insightface, shutil, os +app = insightface.app.FaceAnalysis(providers=['CUDAExecutionProvider']) +app.prepare(ctx_id=0) +src = os.path.expanduser('~/.insightface/models/buffalo_l/w600k_r50.onnx') +if os.path.exists(src): shutil.copy(src, '${ARCFACE_ONNX}') +" + fi + + # ECAPA-TDNN speaker embedding (SpeechBrain pretrained) + ECAPA_ONNX="${ONNX_DIR}/ecapa_tdnn.onnx" + if [[ ! -f "${ECAPA_ONNX}" ]]; then + log "Exporting ECAPA-TDNN to ONNX..." + python3 - <<'PYEOF' +import torch +from speechbrain.pretrained import EncoderClassifier +import os + +model = EncoderClassifier.from_hparams( + source="speechbrain/spkrec-ecapa-voxceleb", + savedir="/models/speechbrain_ecapa" +) +model.eval() + +dummy = torch.randn(1, 16000) # 1s of audio +with torch.no_grad(): + torch.onnx.export( + model.encode_batch.__self__, + dummy, + "/models/onnx/ecapa_tdnn.onnx", + opset_version=12, + input_names=["waveform"], + output_names=["embedding"], + dynamic_axes={"waveform": {1: "audio_len"}} + ) +print("ECAPA-TDNN exported to /models/onnx/ecapa_tdnn.onnx") +PYEOF + fi + + log "ONNX models ready in ${ONNX_DIR}" +} + +# ── SCRFD face detection ─────────────────────────────────────────────────────── +convert_scrfd() { + local ONNX="${ONNX_DIR}/scrfd_10g_bnkps.onnx" + local ENGINE="${ENGINE_DIR}/scrfd_10g_fp16.engine" + + [[ -f "${ONNX}" ]] || err "SCRFD ONNX not found at ${ONNX}. Run with --download first." + + log "Converting SCRFD-10GF → TRT FP16..." + trtexec \ + --onnx="${ONNX}" \ + --saveEngine="${ENGINE}" \ + --fp16 \ + --workspace=${WORKSPACE_MB} \ + --minShapes=input.1:1x3x640x640 \ + --optShapes=input.1:1x3x640x640 \ + --maxShapes=input.1:4x3x640x640 \ + --explicitBatch \ + 2>&1 | tail -5 + + log "SCRFD engine saved: ${ENGINE}" + benchmark_engine "${ENGINE}" "input.1:1x3x640x640" +} + +# ── ArcFace recognition ──────────────────────────────────────────────────────── +convert_arcface() { + local ONNX="${ONNX_DIR}/arcface_r100.onnx" + local ENGINE="${ENGINE_DIR}/arcface_r100_fp16.engine" + + [[ -f "${ONNX}" ]] || err "ArcFace ONNX not found at ${ONNX}." + + log "Converting ArcFace-R100 → TRT FP16..." + trtexec \ + --onnx="${ONNX}" \ + --saveEngine="${ENGINE}" \ + --fp16 \ + --workspace=${WORKSPACE_MB} \ + --minShapes=input.1:1x3x112x112 \ + --optShapes=input.1:4x3x112x112 \ + --maxShapes=input.1:16x3x112x112 \ + --explicitBatch \ + 2>&1 | tail -5 + + log "ArcFace engine saved: ${ENGINE}" + benchmark_engine "${ENGINE}" "input.1:4x3x112x112" +} + +# ── ECAPA-TDNN speaker embedding ─────────────────────────────────────────────── +convert_ecapa() { + local ONNX="${ONNX_DIR}/ecapa_tdnn.onnx" + local ENGINE="${ENGINE_DIR}/ecapa_tdnn_fp16.engine" + + [[ -f "${ONNX}" ]] || err "ECAPA-TDNN ONNX not found at ${ONNX}." + + log "Converting ECAPA-TDNN → TRT FP16..." + trtexec \ + --onnx="${ONNX}" \ + --saveEngine="${ENGINE}" \ + --fp16 \ + --workspace=${WORKSPACE_MB} \ + --minShapes=waveform:1x8000 \ + --optShapes=waveform:1x16000 \ + --maxShapes=waveform:1x32000 \ + --explicitBatch \ + 2>&1 | tail -5 + + log "ECAPA-TDNN engine saved: ${ENGINE}" + benchmark_engine "${ENGINE}" "waveform:1x16000" +} + +# ── Whisper STT (CTranslate2 — no TRT conversion needed) ───────────────────── +setup_whisper() { + log "Setting up faster-whisper (CTranslate2 format)..." + local WHISPER_DIR="/models/whisper-small-ct2" + + if [[ ! -d "${WHISPER_DIR}" ]]; then + python3 - <<'PYEOF' +from faster_whisper import WhisperModel +import os + +# Download and cache the small model in CTranslate2 int8 format +# This is already GPU-optimized by CTranslate2 — no TRT conversion needed +model = WhisperModel("small", device="cuda", compute_type="float16", + download_root="/models") +print("faster-whisper 'small' model ready at /models/") +# Run one warmup inference +import numpy as np +audio = np.zeros(16000, dtype=np.float32) +segments, _ = model.transcribe(audio) +list(segments) # consume generator +print("Whisper warmup complete") +PYEOF + else + log "Whisper already downloaded, skipping." + fi +} + +# ── Piper TTS (ONNX Runtime — no TRT conversion needed) ────────────────────── +setup_piper() { + log "Setting up Piper TTS voice models..." + local PIPER_DIR="${MODEL_DIR}/piper" + mkdir -p "${PIPER_DIR}" + + # Download en_US-lessac-medium (natural, good quality, ~60ms on Orin) + if [[ ! -f "${PIPER_DIR}/en_US-lessac-medium.onnx" ]]; then + python3 - <<'PYEOF' +import urllib.request, os + +base = "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/" +piper_dir = "/models/piper" + +for fname in ["en_US-lessac-medium.onnx", "en_US-lessac-medium.onnx.json"]: + dest = os.path.join(piper_dir, fname) + if not os.path.exists(dest): + print(f"Downloading {fname}...") + urllib.request.urlretrieve(base + fname, dest) + print(f" → {dest}") +print("Piper voice ready.") +PYEOF + else + log "Piper voice already downloaded, skipping." + fi +} + +# ── Benchmark helper ─────────────────────────────────────────────────────────── +benchmark_engine() { + local ENGINE="$1" + local SHAPE="$2" + log "Benchmarking ${ENGINE##*/}..." + trtexec \ + --loadEngine="${ENGINE}" \ + --shapes="${SHAPE}" \ + --warmUp=3 \ + --avgRuns=20 \ + --useSpinWait \ + 2>&1 | grep -E "mean|median|throughput|Latency" | head -5 +} + +# ── Full benchmark suite ─────────────────────────────────────────────────────── +run_benchmarks() { + log "=== Social-bot model benchmark suite ===" + log "Platform: $(cat /proc/device-tree/model 2>/dev/null || echo 'Orin')" + log "TensorRT: $(python3 -c 'import tensorrt as trt; print(trt.__version__)' 2>/dev/null)" + log "CUDA: $(nvcc --version 2>/dev/null | grep release | awk '{print $6}' | tr -d ,)" + echo + + for ENGINE in "${ENGINE_DIR}"/*.engine; do + [[ -f "${ENGINE}" ]] || continue + NAME="${ENGINE##*/}" + case "${NAME}" in + scrfd*) SHAPE="input.1:1x3x640x640" ;; + arcface*)SHAPE="input.1:4x3x112x112" ;; + ecapa*) SHAPE="waveform:1x16000" ;; + *) SHAPE="" ;; + esac + [[ -z "${SHAPE}" ]] && continue + echo "── ${NAME} ──" + benchmark_engine "${ENGINE}" "${SHAPE}" + echo + done + + log "Whisper (CTranslate2 GPU):" + python3 - <<'PYEOF' +import time, numpy as np +from faster_whisper import WhisperModel +model = WhisperModel("small", device="cuda", compute_type="float16", download_root="/models") +audio = np.random.randn(16000).astype(np.float32) +# warmup +for _ in range(3): + list(model.transcribe(audio)[0]) +# benchmark +times = [] +for _ in range(10): + t = time.perf_counter() + list(model.transcribe(audio)[0]) + times.append((time.perf_counter() - t) * 1000) +print(f" 1s audio: mean={sum(times)/len(times):.1f}ms, min={min(times):.1f}ms") +PYEOF + + log "Piper TTS (ONNX Runtime):" + python3 - <<'PYEOF' +import time +from piper import PiperVoice +voice = PiperVoice.load("/models/piper/en_US-lessac-medium.onnx") +text = "Hello, I am Salty, your personal robot companion." +# warmup +for _ in range(3): + list(voice.synthesize_stream_raw(text)) +# benchmark +times = [] +for _ in range(10): + t = time.perf_counter() + audio_bytes = b"".join(voice.synthesize_stream_raw(text)) + times.append((time.perf_counter() - t) * 1000) +print(f" '{text[:30]}...': mean={sum(times)/len(times):.1f}ms, min={min(times):.1f}ms") +PYEOF +} + +# ── tegrastats snapshot ──────────────────────────────────────────────────────── +check_gpu_health() { + log "GPU / system health check:" + tegrastats --interval 1000 & + TPID=$! + sleep 2 + kill ${TPID} 2>/dev/null + echo + log "GPU memory:" + nvidia-smi --query-gpu=memory.used,memory.free,memory.total,utilization.gpu \ + --format=csv,noheader,nounits 2>/dev/null || \ + python3 -c "import pycuda.driver as drv; drv.init(); d=drv.Device(0); c=d.make_context(); print('Free:', drv.mem_get_info()[0]//1024//1024, 'MB'); c.pop()" +} + +# ── Main ─────────────────────────────────────────────────────────────────────── +check_trtexec + +case "${TARGET}" in + all) + download_models + convert_scrfd + convert_arcface + convert_ecapa + setup_whisper + setup_piper + check_gpu_health + run_benchmarks + ;; + scrfd) download_models; convert_scrfd ;; + arcface) download_models; convert_arcface ;; + ecapa) download_models; convert_ecapa ;; + whisper) setup_whisper ;; + piper) setup_piper ;; + benchmark)run_benchmarks ;; + health) check_gpu_health ;; + download) download_models ;; + *) + echo "Usage: $0 [all|scrfd|arcface|ecapa|whisper|piper|benchmark|health|download]" + exit 1 + ;; +esac + +log "Done." diff --git a/jetson/systemd/install_systemd.sh b/jetson/systemd/install_systemd.sh new file mode 100644 index 0000000..9ba10cc --- /dev/null +++ b/jetson/systemd/install_systemd.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# install_systemd.sh — Install saltybot systemd services on Orin +# Run as root: sudo ./systemd/install_systemd.sh +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_DIR="$(dirname "${SCRIPT_DIR}")" +SYSTEMD_DIR="/etc/systemd/system" +DEPLOY_DIR="/opt/saltybot/jetson" + +log() { echo "[install_systemd] $*"; } + +[[ "$(id -u)" == "0" ]] || { echo "Run as root"; exit 1; } + +# Deploy repo to /opt/saltybot/jetson +log "Deploying to ${DEPLOY_DIR}..." +mkdir -p "${DEPLOY_DIR}" +rsync -a --exclude='.git' --exclude='__pycache__' \ + "${REPO_DIR}/" "${DEPLOY_DIR}/" + +# Install service files +log "Installing systemd units..." +cp "${SCRIPT_DIR}/saltybot.target" "${SYSTEMD_DIR}/" +cp "${SCRIPT_DIR}/saltybot-social.service" "${SYSTEMD_DIR}/" + +# Reload and enable +systemctl daemon-reload +systemctl enable saltybot.target +systemctl enable saltybot-social.service + +log "Services installed. Start with:" +log " systemctl start saltybot-social" +log " journalctl -fu saltybot-social" diff --git a/jetson/systemd/saltybot-social.service b/jetson/systemd/saltybot-social.service new file mode 100644 index 0000000..a1799e8 --- /dev/null +++ b/jetson/systemd/saltybot-social.service @@ -0,0 +1,32 @@ +[Unit] +Description=Saltybot Social-Bot Stack (speech + LLM + TTS + face recognition) +Documentation=https://gitea.vayrette.com/seb/saltylab-firmware +After=docker.service network-online.target sound.target +Requires=docker.service +PartOf=saltybot.target + +[Service] +Type=simple +Restart=on-failure +RestartSec=10s +TimeoutStartSec=120s +TimeoutStopSec=30s + +User=root +WorkingDirectory=/opt/saltybot/jetson + +# Pull latest image before start (optional — comment out for air-gapped deploy) +ExecStartPre=-/usr/bin/docker compose -f docker-compose.yml pull saltybot-social + +# Start only the social service (not the entire stack) +ExecStart=/usr/bin/docker compose -f docker-compose.yml up --no-recreate saltybot-social + +ExecStop=/usr/bin/docker compose -f docker-compose.yml stop saltybot-social + +# Logging +StandardOutput=journal +StandardError=journal +SyslogIdentifier=saltybot-social + +[Install] +WantedBy=saltybot.target multi-user.target diff --git a/jetson/systemd/saltybot.target b/jetson/systemd/saltybot.target new file mode 100644 index 0000000..3f0fb11 --- /dev/null +++ b/jetson/systemd/saltybot.target @@ -0,0 +1,8 @@ +[Unit] +Description=Saltybot Full Stack Target +Documentation=https://gitea.vayrette.com/seb/saltylab-firmware +After=docker.service network-online.target +Requires=docker.service + +[Install] +WantedBy=multi-user.target -- 2.47.2