Add wyoming-chatterbox.py

2026-04-21 09:37:00 +02:00 · 2026-04-21 09:37:00 +02:00 · 66d5595c68
commit 66d5595c68
parent c7f3b0d3ed
1 changed files with 68 additions and 0 deletions
--- a/wyoming-chatterbox.py
+++ b/wyoming-chatterbox.py
@ -0,0 +1,68 @@
 """Wyoming protocol TTS wrapper for Chatterbox TTS server."""
 import argparse, asyncio, io, logging, wave
 import aiohttp
 from wyoming.audio import AudioChunk, AudioStop
 from wyoming.event import Event
 from wyoming.info import Attribution, Describe, Info, TtsProgram, TtsVoice
 from wyoming.server import AsyncEventHandler, AsyncServer
 from wyoming.tts import Synthesize
 _LOGGER = logging.getLogger(__name__)
 INFO = None
 class ChatterboxHandler(AsyncEventHandler):
    def __init__(self, reader, writer, cli_args, *a, **kw):
        super().__init__(reader, writer, *a, **kw)
        self.cli_args = cli_args
    async def handle_event(self, event: Event) -> bool:
        if Describe.is_type(event.type):
            await self.write_event(INFO.event())
            return True
        if not Synthesize.is_type(event.type):
            return True
        synth = Synthesize.from_event(event)
        text = synth.text or ""
        _LOGGER.info("Synthesizing: %s", text[:80])
        try:
            async with aiohttp.ClientSession() as session:
                async with session.post(
                    f"{self.cli_args.chatterbox_url}/v1/audio/speech",
                    json={"model":"chatterbox","input":text,"voice":self.cli_args.voice,"response_format":"wav"},
                    timeout=aiohttp.ClientTimeout(total=60)
                ) as resp:
                    wav_data = await resp.read()
            with io.BytesIO(wav_data) as wav_io:
                with wave.open(wav_io, "rb") as wf:
                    audio = wf.readframes(wf.getnframes())
                    await self.write_event(AudioChunk(rate=wf.getframerate(), width=wf.getsampwidth(), channels=wf.getnchannels(), audio=audio).event())
                    await self.write_event(AudioStop().event())
        except Exception:
            _LOGGER.exception("TTS failed")
        return True
 async def main():
    global INFO
    parser = argparse.ArgumentParser()
    parser.add_argument("--port", type=int, default=10201)
    parser.add_argument("--chatterbox-url", default="http://10.2.1.104:8004")
    parser.add_argument("--voice", default="deep.mp3")
    args = parser.parse_args()
    attr = Attribution(name="Chatterbox", url="https://github.com/resemble-ai/chatterbox")
    INFO = Info(
        tts=[TtsProgram(
            name="chatterbox", description="Chatterbox TTS (Homer)",
            attribution=attr, installed=True, version="1.0",
            voices=[TtsVoice(name="deep", description="Deep - Lässige Assistentin",
                             attribution=attr, version="1.0",
                             languages=["de","en"], installed=True)],
        )]
    )
    server = AsyncServer.from_uri(f"tcp://0.0.0.0:{args.port}")
    _LOGGER.info("Wyoming Chatterbox on port %d, voice=%s", args.port, args.voice)
    await server.run(lambda r,w: ChatterboxHandler(r, w, args))
 if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    asyncio.run(main())