wyoming-services/wyoming-chatterbox.py
2026-04-19 09:57:30 +02:00

68 lines
2.9 KiB
Python

"""Wyoming protocol TTS wrapper for Chatterbox TTS server."""
import argparse, asyncio, io, logging, wave
import aiohttp
from wyoming.audio import AudioChunk, AudioStop
from wyoming.event import Event
from wyoming.info import Attribution, Describe, Info, TtsProgram, TtsVoice
from wyoming.server import AsyncEventHandler, AsyncServer
from wyoming.tts import Synthesize
_LOGGER = logging.getLogger(__name__)
INFO = None
class ChatterboxHandler(AsyncEventHandler):
def __init__(self, reader, writer, cli_args, *a, **kw):
super().__init__(reader, writer, *a, **kw)
self.cli_args = cli_args
async def handle_event(self, event: Event) -> bool:
if Describe.is_type(event.type):
await self.write_event(INFO.event())
return True
if not Synthesize.is_type(event.type):
return True
synth = Synthesize.from_event(event)
text = synth.text or ""
_LOGGER.info("Synthesizing: %s", text[:80])
try:
async with aiohttp.ClientSession() as session:
async with session.post(
f"{self.cli_args.chatterbox_url}/v1/audio/speech",
json={"model":"chatterbox","input":text,"voice":self.cli_args.voice,"response_format":"wav"},
timeout=aiohttp.ClientTimeout(total=60)
) as resp:
wav_data = await resp.read()
with io.BytesIO(wav_data) as wav_io:
with wave.open(wav_io, "rb") as wf:
audio = wf.readframes(wf.getnframes())
await self.write_event(AudioChunk(rate=wf.getframerate(), width=wf.getsampwidth(), channels=wf.getnchannels(), audio=audio).event())
await self.write_event(AudioStop().event())
except Exception:
_LOGGER.exception("TTS failed")
return True
async def main():
global INFO
parser = argparse.ArgumentParser()
parser.add_argument("--port", type=int, default=10201)
parser.add_argument("--chatterbox-url", default="http://10.2.1.104:8004")
parser.add_argument("--voice", default="homer.mp3")
args = parser.parse_args()
attr = Attribution(name="Chatterbox", url="https://github.com/resemble-ai/chatterbox")
INFO = Info(
tts=[TtsProgram(
name="chatterbox", description="Chatterbox TTS (Homer)",
attribution=attr, installed=True, version="1.0",
voices=[TtsVoice(name="homer", description="Homer Simpson",
attribution=attr, version="1.0",
languages=["de","en"], installed=True)],
)]
)
server = AsyncServer.from_uri(f"tcp://0.0.0.0:{args.port}")
_LOGGER.info("Wyoming Chatterbox on port %d, voice=%s", args.port, args.voice)
await server.run(lambda r,w: ChatterboxHandler(r, w, args))
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
asyncio.run(main())