saltylab-firmware/jetson/Dockerfile.social

# Social-bot container — JetPack 6 + TensorRT + audio AI stack
# Extends the base ROS2 Humble container with social-bot dependencies.
#
# Deps: faster-whisper, llama-cpp-python (CUDA), piper-tts, insightface,
#       pyannote.audio, OpenWakeWord, Silero VAD, pyaudio, sounddevice
#
# Build: docker build -f Dockerfile.social -t saltybot/social:latest .
# Run:   docker compose -f docker-compose.yml up -d saltybot-social

FROM nvcr.io/nvidia/l4t-jetpack:r36.2.0

LABEL maintainer="sl-jetson"
LABEL description="Social-bot AI stack — speech, LLM, TTS, face recognition on Orin Nano Super"
LABEL jetpack="6.0"
LABEL l4t="r36.2.0"

ENV DEBIAN_FRONTEND=noninteractive
ENV ROS_DISTRO=humble
ENV ROS_ROOT=/opt/ros/${ROS_DISTRO}
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1

# ── Locale ────────────────────────────────────────────────────────────────────
RUN apt-get update && apt-get install -y --no-install-recommends \
    locales tzdata \
    && locale-gen en_US.UTF-8 \
    && rm -rf /var/lib/apt/lists/*
ENV LANG=en_US.UTF-8

# ── System deps ───────────────────────────────────────────────────────────────
RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential cmake git wget curl ca-certificates \
    python3-dev python3-pip python3-setuptools python3-wheel \
    # Audio hardware
    alsa-utils libasound2-dev pulseaudio pulseaudio-utils \
    portaudio19-dev libsndfile1-dev libsoundio-dev \
    # USB audio / ReSpeaker support
    usbutils libusb-1.0-0-dev \
    # CUDA / TensorRT tools
    cuda-toolkit-12-2 libcudnn8 libcudnn8-dev \
    # Misc
    htop tmux nano ffmpeg \
    && rm -rf /var/lib/apt/lists/*

# ── ROS2 Humble ───────────────────────────────────────────────────────────────
RUN curl -sSL https://raw.githubusercontent.com/ros/rosdistro/master/ros.asc \
    | gpg --dearmor -o /usr/share/keyrings/ros-archive-keyring.gpg && \
    echo "deb [arch=arm64 signed-by=/usr/share/keyrings/ros-archive-keyring.gpg] \
    http://packages.ros.org/ros2/ubuntu jammy main" \
    > /etc/apt/sources.list.d/ros2.list && \
    apt-get update && apt-get install -y --no-install-recommends \
    ros-humble-ros-base \
    python3-colcon-common-extensions \
    python3-rosdep \
    && rm -rf /var/lib/apt/lists/*

# ── Python AI deps (core) ─────────────────────────────────────────────────────
# faster-whisper: CTranslate2 backend, Orin GPU accelerated
# llama-cpp-python: GGUF quantized LLM, CUDA offload
# piper-tts: fast neural TTS, CPU/GPU
# insightface: SCRFD face detection + ArcFace recognition
# pyannote.audio: speaker diarization + ECAPA-TDNN embeddings
RUN pip3 install --no-cache-dir \
    "faster-whisper>=1.0.0" \
    "ctranslate2>=4.0.0" \
    "openai-whisper>=20231117" \
    "piper-tts>=1.2.0" \
    "insightface>=0.7.3" \
    "onnxruntime-gpu>=1.17.0" \
    "pyannote.audio>=3.1.0" \
    "speechbrain>=1.0.0"

# ── llama-cpp-python with CUDA ─────────────────────────────────────────────────
# Build with CUDA support for Orin GPU offload
RUN CMAKE_ARGS="-DLLAMA_CUDA=on -DCUDA_ARCHITECTURES=87" \
    pip3 install --no-cache-dir llama-cpp-python==0.2.85 --no-binary llama-cpp-python

# ── Wake word / VAD ───────────────────────────────────────────────────────────
RUN pip3 install --no-cache-dir \
    "openwakeword>=0.6.0" \
    "silero-vad>=5.1.0" \
    "webrtcvad-wheels>=2.0.14"

# ── Audio I/O ─────────────────────────────────────────────────────────────────
RUN pip3 install --no-cache-dir \
    "pyaudio>=0.2.14" \
    "sounddevice>=0.4.6" \
    "soundfile>=0.12.1" \
    "numpy>=1.24.0"

# ── TensorRT Python bindings ──────────────────────────────────────────────────
# Already available via JetPack; install pycuda for custom kernels
RUN pip3 install --no-cache-dir "pycuda>=2022.2.2"

# ── MQTT for SOUL/agent communication ─────────────────────────────────────────
RUN pip3 install --no-cache-dir "paho-mqtt>=2.0.0"

# ── ROS2 Python deps ──────────────────────────────────────────────────────────
RUN pip3 install --no-cache-dir \
    "rclpy" \
    "transforms3d>=0.4.1"

# ── Model directory ───────────────────────────────────────────────────────────
RUN mkdir -p /models/onnx /models/engines /models/gguf /models/piper
ENV MODEL_DIR=/models
ENV PIPER_VOICE_DIR=/models/piper

# ── ALSA config for USB mic + speaker ─────────────────────────────────────────
COPY config/asound.conf /etc/asound.conf

# ── Workspace ─────────────────────────────────────────────────────────────────
RUN mkdir -p /ros2_ws/src
WORKDIR /ros2_ws

COPY scripts/entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh

ENTRYPOINT ["/entrypoint.sh"]
CMD ["bash"]