2026-05-08 15:34:11 +08:00
|
|
|
import { EdgeTTS } from 'node-edge-tts'
|
|
|
|
|
import { tmpdir } from 'os'
|
|
|
|
|
import { join } from 'path'
|
|
|
|
|
import { readFile, unlink } from 'fs/promises'
|
|
|
|
|
import { randomUUID } from 'crypto'
|
|
|
|
|
import { logger } from '../logger'
|
|
|
|
|
|
|
|
|
|
const FIXED_VOICE = 'zh-CN-XiaoxiaoNeural'
|
|
|
|
|
const FIXED_RATE = '+4%'
|
|
|
|
|
const FIXED_PITCH = '+12Hz'
|
|
|
|
|
|
|
|
|
|
export interface TtsOptions {
|
|
|
|
|
text: string
|
|
|
|
|
lang?: string
|
2026-05-10 20:08:38 +08:00
|
|
|
voice?: string
|
|
|
|
|
rate?: string
|
|
|
|
|
pitch?: string
|
2026-05-08 15:34:11 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export async function edgeTts(opts: TtsOptions): Promise<Buffer> {
|
|
|
|
|
const id = randomUUID()
|
|
|
|
|
const tmpFile = join(tmpdir(), `tts-${id}.mp3`)
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
const tts = new EdgeTTS({
|
2026-05-10 20:08:38 +08:00
|
|
|
voice: opts.voice || FIXED_VOICE,
|
|
|
|
|
rate: opts.rate || FIXED_RATE,
|
|
|
|
|
pitch: opts.pitch || FIXED_PITCH,
|
2026-05-08 15:34:11 +08:00
|
|
|
timeout: 15000,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
await tts.ttsPromise(opts.text, tmpFile)
|
|
|
|
|
const buf = await readFile(tmpFile)
|
|
|
|
|
return buf
|
|
|
|
|
} finally {
|
|
|
|
|
unlink(tmpFile).catch(() => {})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export async function textToSpeech(opts: TtsOptions): Promise<{ audio: Buffer; engine: string }> {
|
2026-05-10 20:08:38 +08:00
|
|
|
const voice = opts.voice || FIXED_VOICE
|
|
|
|
|
const rate = opts.rate || FIXED_RATE
|
|
|
|
|
const pitch = opts.pitch || FIXED_PITCH
|
2026-05-08 15:34:11 +08:00
|
|
|
const audio = await edgeTts(opts)
|
2026-05-10 20:08:38 +08:00
|
|
|
logger.debug({ engine: 'edge', voice, rate, pitch }, 'TTS generated via Edge')
|
2026-05-08 15:34:11 +08:00
|
|
|
return { audio, engine: 'edge' }
|
|
|
|
|
}
|
2026-05-10 20:08:38 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert speed multiplier (0.5-2.0) to Edge TTS rate string.
|
|
|
|
|
* Edge TTS rate format: "+/-NN%"
|
|
|
|
|
*/
|
|
|
|
|
export function speedToEdgeRate(speed: number): string {
|
|
|
|
|
const percent = Math.round((speed - 1) * 100)
|
|
|
|
|
return percent >= 0 ? `+${percent}%` : `${percent}%`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert OpenAI TTS request to internal TtsOptions.
|
|
|
|
|
* OpenAI format: { model, input, voice, speed }
|
|
|
|
|
*/
|
|
|
|
|
export interface OpenaiTtsRequest {
|
|
|
|
|
model?: string
|
|
|
|
|
input: string
|
|
|
|
|
voice?: string
|
|
|
|
|
speed?: number
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export async function openaiCompatibleTts(
|
|
|
|
|
body: OpenaiTtsRequest,
|
|
|
|
|
): Promise<{ audio: Buffer; engine: string }> {
|
|
|
|
|
return textToSpeech({
|
|
|
|
|
text: body.input,
|
|
|
|
|
voice: body.voice || FIXED_VOICE,
|
|
|
|
|
rate: body.speed ? speedToEdgeRate(body.speed) : FIXED_RATE,
|
|
|
|
|
pitch: FIXED_PITCH,
|
|
|
|
|
})
|
|
|
|
|
}
|