- Dockerfile.social: social-bot container with faster-whisper, llama-cpp-python (CUDA), piper-tts, insightface, pyannote.audio, OpenWakeWord, pyaudio - scripts/convert_models.sh: TRT FP16 conversion for SCRFD-10GF, ArcFace-R100, ECAPA-TDNN; CTranslate2 setup for Whisper; Piper voice download; benchmark suite - config/asound.conf: ALSA USB mic (card1) + USB speaker (card2) config - models/README.md: version-pinned model table, /models/ layout, perf targets - systemd/: saltybot-social.service + saltybot.target + install_systemd.sh - docker-compose.yml: saltybot-social service with GPU, audio device passthrough, NVMe volume mounts for /models and /social_db Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
120 lines
6.0 KiB
Docker
120 lines
6.0 KiB
Docker
# Social-bot container — JetPack 6 + TensorRT + audio AI stack
|
|
# Extends the base ROS2 Humble container with social-bot dependencies.
|
|
#
|
|
# Deps: faster-whisper, llama-cpp-python (CUDA), piper-tts, insightface,
|
|
# pyannote.audio, OpenWakeWord, Silero VAD, pyaudio, sounddevice
|
|
#
|
|
# Build: docker build -f Dockerfile.social -t saltybot/social:latest .
|
|
# Run: docker compose -f docker-compose.yml up -d saltybot-social
|
|
|
|
FROM nvcr.io/nvidia/l4t-jetpack:r36.2.0
|
|
|
|
LABEL maintainer="sl-jetson"
|
|
LABEL description="Social-bot AI stack — speech, LLM, TTS, face recognition on Orin Nano Super"
|
|
LABEL jetpack="6.0"
|
|
LABEL l4t="r36.2.0"
|
|
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
ENV ROS_DISTRO=humble
|
|
ENV ROS_ROOT=/opt/ros/${ROS_DISTRO}
|
|
ENV PYTHONDONTWRITEBYTECODE=1
|
|
ENV PYTHONUNBUFFERED=1
|
|
|
|
# ── Locale ────────────────────────────────────────────────────────────────────
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
locales tzdata \
|
|
&& locale-gen en_US.UTF-8 \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
ENV LANG=en_US.UTF-8
|
|
|
|
# ── System deps ───────────────────────────────────────────────────────────────
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
build-essential cmake git wget curl ca-certificates \
|
|
python3-dev python3-pip python3-setuptools python3-wheel \
|
|
# Audio hardware
|
|
alsa-utils libasound2-dev pulseaudio pulseaudio-utils \
|
|
portaudio19-dev libsndfile1-dev libsoundio-dev \
|
|
# USB audio / ReSpeaker support
|
|
usbutils libusb-1.0-0-dev \
|
|
# CUDA / TensorRT tools
|
|
cuda-toolkit-12-2 libcudnn8 libcudnn8-dev \
|
|
# Misc
|
|
htop tmux nano ffmpeg \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# ── ROS2 Humble ───────────────────────────────────────────────────────────────
|
|
RUN curl -sSL https://raw.githubusercontent.com/ros/rosdistro/master/ros.asc \
|
|
| gpg --dearmor -o /usr/share/keyrings/ros-archive-keyring.gpg && \
|
|
echo "deb [arch=arm64 signed-by=/usr/share/keyrings/ros-archive-keyring.gpg] \
|
|
http://packages.ros.org/ros2/ubuntu jammy main" \
|
|
> /etc/apt/sources.list.d/ros2.list && \
|
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
ros-humble-ros-base \
|
|
python3-colcon-common-extensions \
|
|
python3-rosdep \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# ── Python AI deps (core) ─────────────────────────────────────────────────────
|
|
# faster-whisper: CTranslate2 backend, Orin GPU accelerated
|
|
# llama-cpp-python: GGUF quantized LLM, CUDA offload
|
|
# piper-tts: fast neural TTS, CPU/GPU
|
|
# insightface: SCRFD face detection + ArcFace recognition
|
|
# pyannote.audio: speaker diarization + ECAPA-TDNN embeddings
|
|
RUN pip3 install --no-cache-dir \
|
|
"faster-whisper>=1.0.0" \
|
|
"ctranslate2>=4.0.0" \
|
|
"openai-whisper>=20231117" \
|
|
"piper-tts>=1.2.0" \
|
|
"insightface>=0.7.3" \
|
|
"onnxruntime-gpu>=1.17.0" \
|
|
"pyannote.audio>=3.1.0" \
|
|
"speechbrain>=1.0.0"
|
|
|
|
# ── llama-cpp-python with CUDA ─────────────────────────────────────────────────
|
|
# Build with CUDA support for Orin GPU offload
|
|
RUN CMAKE_ARGS="-DLLAMA_CUDA=on -DCUDA_ARCHITECTURES=87" \
|
|
pip3 install --no-cache-dir llama-cpp-python==0.2.85 --no-binary llama-cpp-python
|
|
|
|
# ── Wake word / VAD ───────────────────────────────────────────────────────────
|
|
RUN pip3 install --no-cache-dir \
|
|
"openwakeword>=0.6.0" \
|
|
"silero-vad>=5.1.0" \
|
|
"webrtcvad-wheels>=2.0.14"
|
|
|
|
# ── Audio I/O ─────────────────────────────────────────────────────────────────
|
|
RUN pip3 install --no-cache-dir \
|
|
"pyaudio>=0.2.14" \
|
|
"sounddevice>=0.4.6" \
|
|
"soundfile>=0.12.1" \
|
|
"numpy>=1.24.0"
|
|
|
|
# ── TensorRT Python bindings ──────────────────────────────────────────────────
|
|
# Already available via JetPack; install pycuda for custom kernels
|
|
RUN pip3 install --no-cache-dir "pycuda>=2022.2.2"
|
|
|
|
# ── MQTT for SOUL/agent communication ─────────────────────────────────────────
|
|
RUN pip3 install --no-cache-dir "paho-mqtt>=2.0.0"
|
|
|
|
# ── ROS2 Python deps ──────────────────────────────────────────────────────────
|
|
RUN pip3 install --no-cache-dir \
|
|
"rclpy" \
|
|
"transforms3d>=0.4.1"
|
|
|
|
# ── Model directory ───────────────────────────────────────────────────────────
|
|
RUN mkdir -p /models/onnx /models/engines /models/gguf /models/piper
|
|
ENV MODEL_DIR=/models
|
|
ENV PIPER_VOICE_DIR=/models/piper
|
|
|
|
# ── ALSA config for USB mic + speaker ─────────────────────────────────────────
|
|
COPY config/asound.conf /etc/asound.conf
|
|
|
|
# ── Workspace ─────────────────────────────────────────────────────────────────
|
|
RUN mkdir -p /ros2_ws/src
|
|
WORKDIR /ros2_ws
|
|
|
|
COPY scripts/entrypoint.sh /entrypoint.sh
|
|
RUN chmod +x /entrypoint.sh
|
|
|
|
ENTRYPOINT ["/entrypoint.sh"]
|
|
CMD ["bash"]
|