feat: add voice playback settings with 4-provider support (#608)

Add WebSpeech, OpenAI TTS, Custom endpoint, and Edge TTS providers.

Co-authored-by: Hermes Agent <noreply@nousresearch.com>
This commit is contained in:
memeflyfly
2026-05-10 20:08:38 +08:00
committed by GitHub
parent 838791a740
commit 15195f0795
18 changed files with 1237 additions and 20 deletions
+39 -1
View File
@@ -1,5 +1,5 @@
import type { Context } from 'koa'
import { textToSpeech } from '../../services/hermes/tts'
import { textToSpeech, openaiCompatibleTts, speedToEdgeRate } from '../../services/hermes/tts'
export async function generate(ctx: Context) {
const { text, lang } = ctx.request.body as {
@@ -26,3 +26,41 @@ export async function generate(ctx: Context) {
ctx.set('X-TTS-Engine', engine)
ctx.body = audio
}
/**
* OpenAI-compatible TTS endpoint.
* Accepts: { model, input, voice, speed }
* Returns audio/mpeg stream.
*/
export async function openaiProxy(ctx: Context) {
const body = ctx.request.body as {
input?: string
voice?: string
speed?: number
model?: string
}
if (!body.input || typeof body.input !== 'string') {
ctx.status = 400
ctx.body = { error: 'input is required' }
return
}
if (body.input.length > 5000) {
ctx.status = 400
ctx.body = { error: 'input is too long (max 5000 characters)' }
return
}
const { audio, engine } = await openaiCompatibleTts({
input: body.input,
voice: body.voice,
speed: body.speed,
model: body.model,
})
ctx.set('Content-Type', 'audio/mpeg')
ctx.set('Content-Length', String(audio.length))
ctx.set('X-TTS-Engine', engine)
ctx.body = audio
}