add hermes tts playback (#541)
This commit is contained in:
@@ -0,0 +1,28 @@
|
||||
import type { Context } from 'koa'
|
||||
import { textToSpeech } from '../../services/hermes/tts'
|
||||
|
||||
export async function generate(ctx: Context) {
|
||||
const { text, lang } = ctx.request.body as {
|
||||
text?: string
|
||||
lang?: string
|
||||
}
|
||||
|
||||
if (!text || typeof text !== 'string') {
|
||||
ctx.status = 400
|
||||
ctx.body = { error: 'text is required' }
|
||||
return
|
||||
}
|
||||
|
||||
if (text.length > 5000) {
|
||||
ctx.status = 400
|
||||
ctx.body = { error: 'text is too long (max 5000 characters)' }
|
||||
return
|
||||
}
|
||||
|
||||
const { audio, engine } = await textToSpeech({ text, lang })
|
||||
|
||||
ctx.set('Content-Type', 'audio/mpeg')
|
||||
ctx.set('Content-Length', String(audio.length))
|
||||
ctx.set('X-TTS-Engine', engine)
|
||||
ctx.body = audio
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
import Router from '@koa/router'
|
||||
import * as ctrl from '../../controllers/hermes/tts'
|
||||
|
||||
export const ttsRoutes = new Router()
|
||||
|
||||
ttsRoutes.post('/api/hermes/tts', ctrl.generate)
|
||||
@@ -26,6 +26,7 @@ import { downloadRoutes } from './hermes/download'
|
||||
import { jobRoutes } from './hermes/jobs'
|
||||
import { cronHistoryRoutes } from './hermes/cron-history'
|
||||
import { kanbanRoutes } from './hermes/kanban'
|
||||
import { ttsRoutes } from './hermes/tts'
|
||||
import { proxyRoutes, proxyMiddleware } from './hermes/proxy'
|
||||
import { groupChatRoutes, setGroupChatServer } from './hermes/group-chat'
|
||||
|
||||
@@ -66,6 +67,7 @@ export function registerRoutes(app: any, requireAuth: (ctx: Context, next: Next)
|
||||
app.use(jobRoutes.routes()) // Must be before proxy
|
||||
app.use(cronHistoryRoutes.routes()) // Must be before proxy
|
||||
app.use(kanbanRoutes.routes()) // Must be before proxy
|
||||
app.use(ttsRoutes.routes()) // Must be before proxy
|
||||
app.use(proxyRoutes.routes())
|
||||
|
||||
// Proxy catch-all middleware (must be last)
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
import { EdgeTTS } from 'node-edge-tts'
|
||||
import { tmpdir } from 'os'
|
||||
import { join } from 'path'
|
||||
import { readFile, unlink } from 'fs/promises'
|
||||
import { randomUUID } from 'crypto'
|
||||
import { logger } from '../logger'
|
||||
|
||||
const FIXED_VOICE = 'zh-CN-XiaoxiaoNeural'
|
||||
const FIXED_RATE = '+4%'
|
||||
const FIXED_PITCH = '+12Hz'
|
||||
|
||||
export interface TtsOptions {
|
||||
text: string
|
||||
lang?: string
|
||||
}
|
||||
|
||||
export async function edgeTts(opts: TtsOptions): Promise<Buffer> {
|
||||
const id = randomUUID()
|
||||
const tmpFile = join(tmpdir(), `tts-${id}.mp3`)
|
||||
|
||||
try {
|
||||
const tts = new EdgeTTS({
|
||||
voice: FIXED_VOICE,
|
||||
rate: FIXED_RATE,
|
||||
pitch: FIXED_PITCH,
|
||||
timeout: 15000,
|
||||
})
|
||||
|
||||
await tts.ttsPromise(opts.text, tmpFile)
|
||||
const buf = await readFile(tmpFile)
|
||||
return buf
|
||||
} finally {
|
||||
unlink(tmpFile).catch(() => {})
|
||||
}
|
||||
}
|
||||
|
||||
export async function textToSpeech(opts: TtsOptions): Promise<{ audio: Buffer; engine: string }> {
|
||||
const audio = await edgeTts(opts)
|
||||
logger.debug({ engine: 'edge', voice: FIXED_VOICE, rate: FIXED_RATE, pitch: FIXED_PITCH }, 'TTS generated via Edge')
|
||||
return { audio, engine: 'edge' }
|
||||
}
|
||||
Reference in New Issue
Block a user