saltylab-firmware/jetson/Dockerfile.social
sl-jetson a9b2242a2c feat(social): Orin dev environment — JetPack 6 + TRT conversion + systemd (#88)
- Dockerfile.social: social-bot container with faster-whisper, llama-cpp-python
  (CUDA), piper-tts, insightface, pyannote.audio, OpenWakeWord, pyaudio
- scripts/convert_models.sh: TRT FP16 conversion for SCRFD-10GF, ArcFace-R100,
  ECAPA-TDNN; CTranslate2 setup for Whisper; Piper voice download; benchmark suite
- config/asound.conf: ALSA USB mic (card1) + USB speaker (card2) config
- models/README.md: version-pinned model table, /models/ layout, perf targets
- systemd/: saltybot-social.service + saltybot.target + install_systemd.sh
- docker-compose.yml: saltybot-social service with GPU, audio device passthrough,
  NVMe volume mounts for /models and /social_db

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-02 08:08:57 -05:00

120 lines
6.0 KiB
Docker

# Social-bot container — JetPack 6 + TensorRT + audio AI stack
# Extends the base ROS2 Humble container with social-bot dependencies.
#
# Deps: faster-whisper, llama-cpp-python (CUDA), piper-tts, insightface,
# pyannote.audio, OpenWakeWord, Silero VAD, pyaudio, sounddevice
#
# Build: docker build -f Dockerfile.social -t saltybot/social:latest .
# Run: docker compose -f docker-compose.yml up -d saltybot-social
FROM nvcr.io/nvidia/l4t-jetpack:r36.2.0
LABEL maintainer="sl-jetson"
LABEL description="Social-bot AI stack — speech, LLM, TTS, face recognition on Orin Nano Super"
LABEL jetpack="6.0"
LABEL l4t="r36.2.0"
ENV DEBIAN_FRONTEND=noninteractive
ENV ROS_DISTRO=humble
ENV ROS_ROOT=/opt/ros/${ROS_DISTRO}
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
# ── Locale ────────────────────────────────────────────────────────────────────
RUN apt-get update && apt-get install -y --no-install-recommends \
locales tzdata \
&& locale-gen en_US.UTF-8 \
&& rm -rf /var/lib/apt/lists/*
ENV LANG=en_US.UTF-8
# ── System deps ───────────────────────────────────────────────────────────────
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential cmake git wget curl ca-certificates \
python3-dev python3-pip python3-setuptools python3-wheel \
# Audio hardware
alsa-utils libasound2-dev pulseaudio pulseaudio-utils \
portaudio19-dev libsndfile1-dev libsoundio-dev \
# USB audio / ReSpeaker support
usbutils libusb-1.0-0-dev \
# CUDA / TensorRT tools
cuda-toolkit-12-2 libcudnn8 libcudnn8-dev \
# Misc
htop tmux nano ffmpeg \
&& rm -rf /var/lib/apt/lists/*
# ── ROS2 Humble ───────────────────────────────────────────────────────────────
RUN curl -sSL https://raw.githubusercontent.com/ros/rosdistro/master/ros.asc \
| gpg --dearmor -o /usr/share/keyrings/ros-archive-keyring.gpg && \
echo "deb [arch=arm64 signed-by=/usr/share/keyrings/ros-archive-keyring.gpg] \
http://packages.ros.org/ros2/ubuntu jammy main" \
> /etc/apt/sources.list.d/ros2.list && \
apt-get update && apt-get install -y --no-install-recommends \
ros-humble-ros-base \
python3-colcon-common-extensions \
python3-rosdep \
&& rm -rf /var/lib/apt/lists/*
# ── Python AI deps (core) ─────────────────────────────────────────────────────
# faster-whisper: CTranslate2 backend, Orin GPU accelerated
# llama-cpp-python: GGUF quantized LLM, CUDA offload
# piper-tts: fast neural TTS, CPU/GPU
# insightface: SCRFD face detection + ArcFace recognition
# pyannote.audio: speaker diarization + ECAPA-TDNN embeddings
RUN pip3 install --no-cache-dir \
"faster-whisper>=1.0.0" \
"ctranslate2>=4.0.0" \
"openai-whisper>=20231117" \
"piper-tts>=1.2.0" \
"insightface>=0.7.3" \
"onnxruntime-gpu>=1.17.0" \
"pyannote.audio>=3.1.0" \
"speechbrain>=1.0.0"
# ── llama-cpp-python with CUDA ─────────────────────────────────────────────────
# Build with CUDA support for Orin GPU offload
RUN CMAKE_ARGS="-DLLAMA_CUDA=on -DCUDA_ARCHITECTURES=87" \
pip3 install --no-cache-dir llama-cpp-python==0.2.85 --no-binary llama-cpp-python
# ── Wake word / VAD ───────────────────────────────────────────────────────────
RUN pip3 install --no-cache-dir \
"openwakeword>=0.6.0" \
"silero-vad>=5.1.0" \
"webrtcvad-wheels>=2.0.14"
# ── Audio I/O ─────────────────────────────────────────────────────────────────
RUN pip3 install --no-cache-dir \
"pyaudio>=0.2.14" \
"sounddevice>=0.4.6" \
"soundfile>=0.12.1" \
"numpy>=1.24.0"
# ── TensorRT Python bindings ──────────────────────────────────────────────────
# Already available via JetPack; install pycuda for custom kernels
RUN pip3 install --no-cache-dir "pycuda>=2022.2.2"
# ── MQTT for SOUL/agent communication ─────────────────────────────────────────
RUN pip3 install --no-cache-dir "paho-mqtt>=2.0.0"
# ── ROS2 Python deps ──────────────────────────────────────────────────────────
RUN pip3 install --no-cache-dir \
"rclpy" \
"transforms3d>=0.4.1"
# ── Model directory ───────────────────────────────────────────────────────────
RUN mkdir -p /models/onnx /models/engines /models/gguf /models/piper
ENV MODEL_DIR=/models
ENV PIPER_VOICE_DIR=/models/piper
# ── ALSA config for USB mic + speaker ─────────────────────────────────────────
COPY config/asound.conf /etc/asound.conf
# ── Workspace ─────────────────────────────────────────────────────────────────
RUN mkdir -p /ros2_ws/src
WORKDIR /ros2_ws
COPY scripts/entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
CMD ["bash"]