feat: TTS Stack - Chatterbox Multilingual + Whisper STT

- Chatterbox TTS Server (Multilingual, 23 Sprachen, Voice Cloning) - Whisper STT Server (faster-whisper-small, CPU) - RTX 4060 GPU auf Tdarr VM (node2, 10.2.1.104) - Voice Profile: chantal.wav - Chantal Telegram Voice Integration
2026-04-17 12:37:44 +02:00 · 2026-04-17 12:37:44 +02:00 · 6115f3bc09
commit 6115f3bc09
4 changed files with 167 additions and 0 deletions
--- a/compose-chatterbox.yaml
+++ b/compose-chatterbox.yaml
@ -0,0 +1,50 @@
+services:
+  chatterbox-tts-server:
+    build:
+      args:
+      # Can be nvidia or cpu; Default is Nvidia
+        - RUNTIME=nvidia
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "${PORT:-8004}:8004"
+    volumes:
+      # Mount local config file for persistence
+      - ./config.yaml:/app/config.yaml
+      # Mount local directories for persistent app data
+      - ./voices:/app/voices
+      - ./reference_audio:/app/reference_audio
+      - ./outputs:/app/outputs
+      - ./logs:/app/logs
+      # Named volume for Hugging Face model cache to persist across container rebuilds
+      - hf_cache:/app/hf_cache
+    
+    # --- GPU Support (NVIDIA) ---
+    # The 'deploy' key is the modern way to request GPU resources.
+    # If you get a 'CDI device injection failed' error, comment out the 'deploy' section
+    # and uncomment the 'runtime: nvidia' line below.
+    
+    # Method 1: Modern Docker Compose (Recommended)
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+
+    # Method 2: Legacy Docker Compose (for older setups)
+    # runtime: nvidia
+
+    restart: unless-stopped
+    environment:
+      - HF_TOKEN=YOUR_TOKEN_HERE
+      # Enable faster Hugging Face downloads inside the container
+      - HF_HUB_ENABLE_HF_TRANSFER=1
+      # Make NVIDIA GPUs visible and specify capabilities for PyTorch
+      - NVIDIA_VISIBLE_DEVICES=all
+      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
+
+# Define the named volume for the Hugging Face cache
+volumes:
+  hf_cache: