"""Wyoming protocol TTS wrapper for Chatterbox TTS server.""" import argparse, asyncio, io, logging, wave import aiohttp from wyoming.audio import AudioChunk, AudioStop from wyoming.event import Event from wyoming.info import Attribution, Describe, Info, TtsProgram, TtsVoice from wyoming.server import AsyncEventHandler, AsyncServer from wyoming.tts import Synthesize _LOGGER = logging.getLogger(__name__) INFO = None class ChatterboxHandler(AsyncEventHandler): def __init__(self, reader, writer, cli_args, *a, **kw): super().__init__(reader, writer, *a, **kw) self.cli_args = cli_args async def handle_event(self, event: Event) -> bool: if Describe.is_type(event.type): await self.write_event(INFO.event()) return True if not Synthesize.is_type(event.type): return True synth = Synthesize.from_event(event) text = synth.text or "" _LOGGER.info("Synthesizing: %s", text[:80]) try: async with aiohttp.ClientSession() as session: async with session.post( f"{self.cli_args.chatterbox_url}/v1/audio/speech", json={"model":"chatterbox","input":text,"voice":self.cli_args.voice,"response_format":"wav"}, timeout=aiohttp.ClientTimeout(total=60) ) as resp: wav_data = await resp.read() with io.BytesIO(wav_data) as wav_io: with wave.open(wav_io, "rb") as wf: audio = wf.readframes(wf.getnframes()) await self.write_event(AudioChunk(rate=wf.getframerate(), width=wf.getsampwidth(), channels=wf.getnchannels(), audio=audio).event()) await self.write_event(AudioStop().event()) except Exception: _LOGGER.exception("TTS failed") return True async def main(): global INFO parser = argparse.ArgumentParser() parser.add_argument("--port", type=int, default=10201) parser.add_argument("--chatterbox-url", default="http://10.2.1.104:8004") parser.add_argument("--voice", default="deep.mp3") args = parser.parse_args() attr = Attribution(name="Chatterbox", url="https://github.com/resemble-ai/chatterbox") INFO = Info( tts=[TtsProgram( name="chatterbox", description="Chatterbox TTS (Homer)", attribution=attr, installed=True, version="1.0", voices=[TtsVoice(name="deep", description="Deep - Lässige Assistentin", attribution=attr, version="1.0", languages=["de","en"], installed=True)], )] ) server = AsyncServer.from_uri(f"tcp://0.0.0.0:{args.port}") _LOGGER.info("Wyoming Chatterbox on port %d, voice=%s", args.port, args.voice) await server.run(lambda r,w: ChatterboxHandler(r, w, args)) if __name__ == "__main__": logging.basicConfig(level=logging.INFO) asyncio.run(main())