68 lines
2.9 KiB
Python
68 lines
2.9 KiB
Python
"""Wyoming protocol TTS wrapper for Chatterbox TTS server."""
|
|
import argparse, asyncio, io, logging, wave
|
|
import aiohttp
|
|
from wyoming.audio import AudioChunk, AudioStop
|
|
from wyoming.event import Event
|
|
from wyoming.info import Attribution, Describe, Info, TtsProgram, TtsVoice
|
|
from wyoming.server import AsyncEventHandler, AsyncServer
|
|
from wyoming.tts import Synthesize
|
|
|
|
_LOGGER = logging.getLogger(__name__)
|
|
INFO = None
|
|
|
|
class ChatterboxHandler(AsyncEventHandler):
|
|
def __init__(self, reader, writer, cli_args, *a, **kw):
|
|
super().__init__(reader, writer, *a, **kw)
|
|
self.cli_args = cli_args
|
|
|
|
async def handle_event(self, event: Event) -> bool:
|
|
if Describe.is_type(event.type):
|
|
await self.write_event(INFO.event())
|
|
return True
|
|
if not Synthesize.is_type(event.type):
|
|
return True
|
|
synth = Synthesize.from_event(event)
|
|
text = synth.text or ""
|
|
_LOGGER.info("Synthesizing: %s", text[:80])
|
|
try:
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.post(
|
|
f"{self.cli_args.chatterbox_url}/v1/audio/speech",
|
|
json={"model":"chatterbox","input":text,"voice":self.cli_args.voice,"response_format":"wav"},
|
|
timeout=aiohttp.ClientTimeout(total=60)
|
|
) as resp:
|
|
wav_data = await resp.read()
|
|
with io.BytesIO(wav_data) as wav_io:
|
|
with wave.open(wav_io, "rb") as wf:
|
|
audio = wf.readframes(wf.getnframes())
|
|
await self.write_event(AudioChunk(rate=wf.getframerate(), width=wf.getsampwidth(), channels=wf.getnchannels(), audio=audio).event())
|
|
await self.write_event(AudioStop().event())
|
|
except Exception:
|
|
_LOGGER.exception("TTS failed")
|
|
return True
|
|
|
|
async def main():
|
|
global INFO
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--port", type=int, default=10201)
|
|
parser.add_argument("--chatterbox-url", default="http://10.2.1.104:8004")
|
|
parser.add_argument("--voice", default="deep.mp3")
|
|
args = parser.parse_args()
|
|
|
|
attr = Attribution(name="Chatterbox", url="https://github.com/resemble-ai/chatterbox")
|
|
INFO = Info(
|
|
tts=[TtsProgram(
|
|
name="chatterbox", description="Chatterbox TTS (Homer)",
|
|
attribution=attr, installed=True, version="1.0",
|
|
voices=[TtsVoice(name="deep", description="Deep - Lässige Assistentin",
|
|
attribution=attr, version="1.0",
|
|
languages=["de","en"], installed=True)],
|
|
)]
|
|
)
|
|
server = AsyncServer.from_uri(f"tcp://0.0.0.0:{args.port}")
|
|
_LOGGER.info("Wyoming Chatterbox on port %d, voice=%s", args.port, args.voice)
|
|
await server.run(lambda r,w: ChatterboxHandler(r, w, args))
|
|
|
|
if __name__ == "__main__":
|
|
logging.basicConfig(level=logging.INFO)
|
|
asyncio.run(main())
|