packages/server/src/controllers/hermes/tts.ts

import type { Context } from 'koa'
import { textToSpeech, openaiCompatibleTts, speedToEdgeRate } from '../../services/hermes/tts'

export async function generate(ctx: Context) {
  const { text, lang } = ctx.request.body as {
    text?: string
    lang?: string
  }

  if (!text || typeof text !== 'string') {
    ctx.status = 400
    ctx.body = { error: 'text is required' }
    return
  }

  if (text.length > 5000) {
    ctx.status = 400
    ctx.body = { error: 'text is too long (max 5000 characters)' }
    return
  }

  const { audio, engine } = await textToSpeech({ text, lang })

  ctx.set('Content-Type', 'audio/mpeg')
  ctx.set('Content-Length', String(audio.length))
  ctx.set('X-TTS-Engine', engine)
  ctx.body = audio
}

/**
 * OpenAI-compatible TTS endpoint.
 * Accepts: { model, input, voice, speed }
 * Returns audio/mpeg stream.
 */
export async function openaiProxy(ctx: Context) {
  const body = ctx.request.body as {
    input?: string
    voice?: string
    speed?: number
    model?: string
    rate?: string
    pitch?: string
  }

  if (!body.input || typeof body.input !== 'string') {
    ctx.status = 400
    ctx.body = { error: 'input is required' }
    return
  }

  if (body.input.length > 5000) {
    ctx.status = 400
    ctx.body = { error: 'input is too long (max 5000 characters)' }
    return
  }

  const { audio, engine } = await openaiCompatibleTts({
    input: body.input,
    voice: body.voice,
    speed: body.speed,
    model: body.model,
    rate: body.rate,
    pitch: body.pitch,
  })

  ctx.set('Content-Type', 'audio/mpeg')
  ctx.set('Content-Length', String(audio.length))
  ctx.set('X-TTS-Engine', engine)
  ctx.body = audio
}
add hermes tts playback (#541 ) 2026-05-08 15:34:11 +08:00			`import type { Context } from 'koa'`
feat: add voice playback settings with 4-provider support (#608 ) 2026-05-10 20:08:38 +08:00			`import { textToSpeech, openaiCompatibleTts, speedToEdgeRate } from '../../services/hermes/tts'`
add hermes tts playback (#541 ) 2026-05-08 15:34:11 +08:00
			`export async function generate(ctx: Context) {`
			`const { text, lang } = ctx.request.body as {`
			`text?: string`
			`lang?: string`
			`}`

			`if (!text \|\| typeof text !== 'string') {`
			`ctx.status = 400`
			`ctx.body = { error: 'text is required' }`
			`return`
			`}`

			`if (text.length > 5000) {`
			`ctx.status = 400`
			`ctx.body = { error: 'text is too long (max 5000 characters)' }`
			`return`
			`}`

			`const { audio, engine } = await textToSpeech({ text, lang })`

			`ctx.set('Content-Type', 'audio/mpeg')`
			`ctx.set('Content-Length', String(audio.length))`
			`ctx.set('X-TTS-Engine', engine)`
			`ctx.body = audio`
			`}`
feat: add voice playback settings with 4-provider support (#608 ) 2026-05-10 20:08:38 +08:00
			`/**`
			`* OpenAI-compatible TTS endpoint.`
			`* Accepts: { model, input, voice, speed }`
			`* Returns audio/mpeg stream.`
			`*/`
			`export async function openaiProxy(ctx: Context) {`
			`const body = ctx.request.body as {`
			`input?: string`
			`voice?: string`
			`speed?: number`
			`model?: string`
feat: add Edge TTS rate/pitch sliders to voice settings (#629 ) 2026-05-11 21:56:11 +08:00			`rate?: string`
			`pitch?: string`
feat: add voice playback settings with 4-provider support (#608 ) 2026-05-10 20:08:38 +08:00			`}`

			`if (!body.input \|\| typeof body.input !== 'string') {`
			`ctx.status = 400`
			`ctx.body = { error: 'input is required' }`
			`return`
			`}`

			`if (body.input.length > 5000) {`
			`ctx.status = 400`
			`ctx.body = { error: 'input is too long (max 5000 characters)' }`
			`return`
			`}`

			`const { audio, engine } = await openaiCompatibleTts({`
			`input: body.input,`
			`voice: body.voice,`
			`speed: body.speed,`
			`model: body.model,`
feat: add Edge TTS rate/pitch sliders to voice settings (#629 ) 2026-05-11 21:56:11 +08:00			`rate: body.rate,`
			`pitch: body.pitch,`
feat: add voice playback settings with 4-provider support (#608 ) 2026-05-10 20:08:38 +08:00			`})`

			`ctx.set('Content-Type', 'audio/mpeg')`
			`ctx.set('Content-Length', String(audio.length))`
			`ctx.set('X-TTS-Engine', engine)`
			`ctx.body = audio`
			`}`