feat: add voice playback settings with 4-provider support (#608)

Add WebSpeech, OpenAI TTS, Custom endpoint, and Edge TTS providers. Co-authored-by: Hermes Agent <noreply@nousresearch.com>
2026-05-10 20:08:38 +08:00
parent 838791a740
commit 15195f0795
18 changed files with 1237 additions and 20 deletions
@@ -1,5 +1,5 @@
 import type { Context } from 'koa'
-import { textToSpeech } from '../../services/hermes/tts'
+import { textToSpeech, openaiCompatibleTts, speedToEdgeRate } from '../../services/hermes/tts'

 export async function generate(ctx: Context) {
  const { text, lang } = ctx.request.body as {
@@ -26,3 +26,41 @@ export async function generate(ctx: Context) {
  ctx.set('X-TTS-Engine', engine)
  ctx.body = audio
 }
+
+/**
+ * OpenAI-compatible TTS endpoint.
+ * Accepts: { model, input, voice, speed }
+ * Returns audio/mpeg stream.
+ */
+export async function openaiProxy(ctx: Context) {
+  const body = ctx.request.body as {
+    input?: string
+    voice?: string
+    speed?: number
+    model?: string
+  }
+
+  if (!body.input || typeof body.input !== 'string') {
+    ctx.status = 400
+    ctx.body = { error: 'input is required' }
+    return
+  }
+
+  if (body.input.length > 5000) {
+    ctx.status = 400
+    ctx.body = { error: 'input is too long (max 5000 characters)' }
+    return
+  }
+
+  const { audio, engine } = await openaiCompatibleTts({
+    input: body.input,
+    voice: body.voice,
+    speed: body.speed,
+    model: body.model,
+  })
+
+  ctx.set('Content-Type', 'audio/mpeg')
+  ctx.set('Content-Length', String(audio.length))
+  ctx.set('X-TTS-Engine', engine)
+  ctx.body = audio
+}