feat: TTS Stack - Chatterbox Multilingual + Whisper STT

- Chatterbox TTS Server (Multilingual, 23 Sprachen, Voice Cloning)
- Whisper STT Server (faster-whisper-small, CPU)
- RTX 4060 GPU auf Tdarr VM (node2, 10.2.1.104)
- Voice Profile: chantal.wav
- Chantal Telegram Voice Integration
This commit is contained in:
feldjaeger 2026-04-17 12:37:44 +02:00
commit 6115f3bc09
4 changed files with 167 additions and 0 deletions

50
compose-chatterbox.yaml Normal file
View file

@ -0,0 +1,50 @@
services:
chatterbox-tts-server:
build:
args:
# Can be nvidia or cpu; Default is Nvidia
- RUNTIME=nvidia
context: .
dockerfile: Dockerfile
ports:
- "${PORT:-8004}:8004"
volumes:
# Mount local config file for persistence
- ./config.yaml:/app/config.yaml
# Mount local directories for persistent app data
- ./voices:/app/voices
- ./reference_audio:/app/reference_audio
- ./outputs:/app/outputs
- ./logs:/app/logs
# Named volume for Hugging Face model cache to persist across container rebuilds
- hf_cache:/app/hf_cache
# --- GPU Support (NVIDIA) ---
# The 'deploy' key is the modern way to request GPU resources.
# If you get a 'CDI device injection failed' error, comment out the 'deploy' section
# and uncomment the 'runtime: nvidia' line below.
# Method 1: Modern Docker Compose (Recommended)
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
# Method 2: Legacy Docker Compose (for older setups)
# runtime: nvidia
restart: unless-stopped
environment:
- HF_TOKEN=YOUR_TOKEN_HERE
# Enable faster Hugging Face downloads inside the container
- HF_HUB_ENABLE_HF_TRANSFER=1
# Make NVIDIA GPUs visible and specify capabilities for PyTorch
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
# Define the named volume for the Hugging Face cache
volumes:
hf_cache: