# Social-bot container — JetPack 6 + TensorRT + audio AI stack # Extends the base ROS2 Humble container with social-bot dependencies. # # Deps: faster-whisper, llama-cpp-python (CUDA), piper-tts, insightface, # pyannote.audio, OpenWakeWord, Silero VAD, pyaudio, sounddevice # # Build: docker build -f Dockerfile.social -t saltybot/social:latest . # Run: docker compose -f docker-compose.yml up -d saltybot-social FROM nvcr.io/nvidia/l4t-jetpack:r36.2.0 LABEL maintainer="sl-jetson" LABEL description="Social-bot AI stack — speech, LLM, TTS, face recognition on Orin Nano Super" LABEL jetpack="6.0" LABEL l4t="r36.2.0" ENV DEBIAN_FRONTEND=noninteractive ENV ROS_DISTRO=humble ENV ROS_ROOT=/opt/ros/${ROS_DISTRO} ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONUNBUFFERED=1 # ── Locale ──────────────────────────────────────────────────────────────────── RUN apt-get update && apt-get install -y --no-install-recommends \ locales tzdata \ && locale-gen en_US.UTF-8 \ && rm -rf /var/lib/apt/lists/* ENV LANG=en_US.UTF-8 # ── System deps ─────────────────────────────────────────────────────────────── RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential cmake git wget curl ca-certificates \ python3-dev python3-pip python3-setuptools python3-wheel \ # Audio hardware alsa-utils libasound2-dev pulseaudio pulseaudio-utils \ portaudio19-dev libsndfile1-dev libsoundio-dev \ # USB audio / ReSpeaker support usbutils libusb-1.0-0-dev \ # CUDA / TensorRT tools cuda-toolkit-12-2 libcudnn8 libcudnn8-dev \ # Misc htop tmux nano ffmpeg \ && rm -rf /var/lib/apt/lists/* # ── ROS2 Humble ─────────────────────────────────────────────────────────────── RUN curl -sSL https://raw.githubusercontent.com/ros/rosdistro/master/ros.asc \ | gpg --dearmor -o /usr/share/keyrings/ros-archive-keyring.gpg && \ echo "deb [arch=arm64 signed-by=/usr/share/keyrings/ros-archive-keyring.gpg] \ http://packages.ros.org/ros2/ubuntu jammy main" \ > /etc/apt/sources.list.d/ros2.list && \ apt-get update && apt-get install -y --no-install-recommends \ ros-humble-ros-base \ python3-colcon-common-extensions \ python3-rosdep \ && rm -rf /var/lib/apt/lists/* # ── Python AI deps (core) ───────────────────────────────────────────────────── # faster-whisper: CTranslate2 backend, Orin GPU accelerated # llama-cpp-python: GGUF quantized LLM, CUDA offload # piper-tts: fast neural TTS, CPU/GPU # insightface: SCRFD face detection + ArcFace recognition # pyannote.audio: speaker diarization + ECAPA-TDNN embeddings RUN pip3 install --no-cache-dir \ "faster-whisper>=1.0.0" \ "ctranslate2>=4.0.0" \ "openai-whisper>=20231117" \ "piper-tts>=1.2.0" \ "insightface>=0.7.3" \ "onnxruntime-gpu>=1.17.0" \ "pyannote.audio>=3.1.0" \ "speechbrain>=1.0.0" # ── llama-cpp-python with CUDA ───────────────────────────────────────────────── # Build with CUDA support for Orin GPU offload RUN CMAKE_ARGS="-DLLAMA_CUDA=on -DCUDA_ARCHITECTURES=87" \ pip3 install --no-cache-dir llama-cpp-python==0.2.85 --no-binary llama-cpp-python # ── Wake word / VAD ─────────────────────────────────────────────────────────── RUN pip3 install --no-cache-dir \ "openwakeword>=0.6.0" \ "silero-vad>=5.1.0" \ "webrtcvad-wheels>=2.0.14" # ── Audio I/O ───────────────────────────────────────────────────────────────── RUN pip3 install --no-cache-dir \ "pyaudio>=0.2.14" \ "sounddevice>=0.4.6" \ "soundfile>=0.12.1" \ "numpy>=1.24.0" # ── TensorRT Python bindings ────────────────────────────────────────────────── # Already available via JetPack; install pycuda for custom kernels RUN pip3 install --no-cache-dir "pycuda>=2022.2.2" # ── MQTT for SOUL/agent communication ───────────────────────────────────────── RUN pip3 install --no-cache-dir "paho-mqtt>=2.0.0" # ── ROS2 Python deps ────────────────────────────────────────────────────────── RUN pip3 install --no-cache-dir \ "rclpy" \ "transforms3d>=0.4.1" # ── Model directory ─────────────────────────────────────────────────────────── RUN mkdir -p /models/onnx /models/engines /models/gguf /models/piper ENV MODEL_DIR=/models ENV PIPER_VOICE_DIR=/models/piper # ── ALSA config for USB mic + speaker ───────────────────────────────────────── COPY config/asound.conf /etc/asound.conf # ── Workspace ───────────────────────────────────────────────────────────────── RUN mkdir -p /ros2_ws/src WORKDIR /ros2_ws COPY scripts/entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh ENTRYPOINT ["/entrypoint.sh"] CMD ["bash"]