add hermes tts playback (#541)
This commit is contained in:
+2
-1
@@ -67,6 +67,7 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"eventsource": "^4.1.0",
|
"eventsource": "^4.1.0",
|
||||||
"js-tiktoken": "^1.0.21",
|
"js-tiktoken": "^1.0.21",
|
||||||
|
"node-edge-tts": "^1.2.10",
|
||||||
"node-pty": "^1.1.0",
|
"node-pty": "^1.1.0",
|
||||||
"socket.io": "^4.8.3",
|
"socket.io": "^4.8.3",
|
||||||
"socket.io-client": "^4.8.3"
|
"socket.io-client": "^4.8.3"
|
||||||
@@ -124,4 +125,4 @@
|
|||||||
"vue-tsc": "^3.2.6",
|
"vue-tsc": "^3.2.6",
|
||||||
"ws": "^8.20.0"
|
"ws": "^8.20.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,34 @@
|
|||||||
|
export interface TtsOptions {
|
||||||
|
text: string
|
||||||
|
lang?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function generateSpeech(opts: TtsOptions): Promise<{ audio: Blob; engine: string }> {
|
||||||
|
const res = await fetch(
|
||||||
|
`${localStorage.getItem('hermes_server_url') || ''}/api/hermes/tts`,
|
||||||
|
{
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
Authorization: `Bearer ${localStorage.getItem('hermes_api_key') || ''}`,
|
||||||
|
},
|
||||||
|
body: JSON.stringify(opts),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(`TTS request failed: ${res.status}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
const audio = await res.blob()
|
||||||
|
const engine = res.headers.get('X-TTS-Engine') || 'unknown'
|
||||||
|
return { audio, engine }
|
||||||
|
}
|
||||||
|
|
||||||
|
export function playAudioBlob(blob: Blob): HTMLAudioElement {
|
||||||
|
const url = URL.createObjectURL(blob)
|
||||||
|
const audio = new Audio(url)
|
||||||
|
audio.play()
|
||||||
|
audio.onended = () => URL.revokeObjectURL(url)
|
||||||
|
return audio
|
||||||
|
}
|
||||||
@@ -370,38 +370,7 @@ function handleSpeechToggle() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
const content = props.message.content || ''
|
const content = props.message.content || ''
|
||||||
speech.toggle(props.message.id, content, getSpeechOptions())
|
speech.toggle(props.message.id, content)
|
||||||
}
|
|
||||||
|
|
||||||
function getSpeechOptions() {
|
|
||||||
// 尝试获取男声语音包
|
|
||||||
const allVoices = speech.getAllVoices()
|
|
||||||
let maleVoice: SpeechSynthesisVoice | null = null
|
|
||||||
|
|
||||||
// 查找可能的男声语音包
|
|
||||||
for (const voice of allVoices) {
|
|
||||||
const name = voice.name.toLowerCase()
|
|
||||||
// 常见男声关键词
|
|
||||||
if (name.includes('male') || name.includes('david') || name.includes('daniel') ||
|
|
||||||
name.includes('mark') || name.includes('yaoyao') || name.includes('google')) {
|
|
||||||
// 优先选择中文男声
|
|
||||||
if (voice.lang.startsWith('zh')) {
|
|
||||||
maleVoice = voice
|
|
||||||
break
|
|
||||||
}
|
|
||||||
// 如果没有找到中文男声,记住第一个男声
|
|
||||||
if (!maleVoice) {
|
|
||||||
maleVoice = voice
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 快速男声:语速快、音调低
|
|
||||||
return {
|
|
||||||
pitch: 0.5, // 低沉
|
|
||||||
rate: 1.2, // 快速
|
|
||||||
voice: maleVoice || undefined, // 使用男声,如果没有就用默认
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 监听自动播放事件
|
// 监听自动播放事件
|
||||||
@@ -411,7 +380,7 @@ onMounted(() => {
|
|||||||
autoPlayHandler = (e: Event) => {
|
autoPlayHandler = (e: Event) => {
|
||||||
const customEvent = e as CustomEvent<{ messageId: string; content: string }>
|
const customEvent = e as CustomEvent<{ messageId: string; content: string }>
|
||||||
if (customEvent.detail.messageId === props.message.id && canPlaySpeech.value) {
|
if (customEvent.detail.messageId === props.message.id && canPlaySpeech.value) {
|
||||||
speech.enqueue(props.message.id, customEvent.detail.content || props.message.content || '', getSpeechOptions())
|
speech.enqueue(props.message.id, customEvent.detail.content || props.message.content || '')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
window.addEventListener('auto-play-speech', autoPlayHandler)
|
window.addEventListener('auto-play-speech', autoPlayHandler)
|
||||||
|
|||||||
@@ -1,10 +1,7 @@
|
|||||||
import { ref, computed, onUnmounted } from 'vue'
|
import { ref, computed, onUnmounted } from 'vue'
|
||||||
|
import { generateSpeech, playAudioBlob } from '@/api/hermes/tts'
|
||||||
|
|
||||||
export interface SpeechOptions {
|
export interface SpeechOptions {
|
||||||
rate?: number // 语速 0.1-10,默认 1
|
|
||||||
pitch?: number // 音调 0-2,默认 1
|
|
||||||
volume?: number // 音量 0-1,默认 1
|
|
||||||
voice?: SpeechSynthesisVoice | null
|
|
||||||
lang?: string // 语言 'zh-CN', 'en-US' 等
|
lang?: string // 语言 'zh-CN', 'en-US' 等
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -13,6 +10,7 @@ export interface SpeechState {
|
|||||||
isPaused: boolean
|
isPaused: boolean
|
||||||
currentMessageId: string | null
|
currentMessageId: string | null
|
||||||
progress: number // 当前进度(字符数)
|
progress: number // 当前进度(字符数)
|
||||||
|
engine: 'none' | 'tts' | 'browser' // 当前使用的引擎
|
||||||
}
|
}
|
||||||
|
|
||||||
interface SpeechQueueItem {
|
interface SpeechQueueItem {
|
||||||
@@ -22,7 +20,8 @@ interface SpeechQueueItem {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Web Speech API 语音播放 Composable
|
* 语音播放 Composable
|
||||||
|
* 优先后端 TTS(Edge → Google),失败降级浏览器 speechSynthesis
|
||||||
*/
|
*/
|
||||||
export function useSpeech() {
|
export function useSpeech() {
|
||||||
const synth = window.speechSynthesis
|
const synth = window.speechSynthesis
|
||||||
@@ -32,9 +31,11 @@ export function useSpeech() {
|
|||||||
isPaused: false,
|
isPaused: false,
|
||||||
currentMessageId: null,
|
currentMessageId: null,
|
||||||
progress: 0,
|
progress: 0,
|
||||||
|
engine: 'none',
|
||||||
})
|
})
|
||||||
|
|
||||||
let utterance: SpeechSynthesisUtterance | null = null
|
let utterance: SpeechSynthesisUtterance | null = null
|
||||||
|
let currentAudio: HTMLAudioElement | null = null
|
||||||
let playbackToken = 0
|
let playbackToken = 0
|
||||||
const speechQueue: SpeechQueueItem[] = []
|
const speechQueue: SpeechQueueItem[] = []
|
||||||
|
|
||||||
@@ -43,9 +44,8 @@ export function useSpeech() {
|
|||||||
availableVoices.value = synth.getVoices()
|
availableVoices.value = synth.getVoices()
|
||||||
}
|
}
|
||||||
|
|
||||||
// 浏览器会在语音列表变化时触发 voiceschanged 事件
|
|
||||||
synth.addEventListener('voiceschanged', loadVoices)
|
synth.addEventListener('voiceschanged', loadVoices)
|
||||||
loadVoices() // 初始加载
|
loadVoices()
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 从文本中提取纯文本内容,过滤代码块、thinking 标签等
|
* 从文本中提取纯文本内容,过滤代码块、thinking 标签等
|
||||||
@@ -66,86 +66,110 @@ export function useSpeech() {
|
|||||||
// 移除 HTML 标签
|
// 移除 HTML 标签
|
||||||
text = text.replace(/<[^>]+>/g, '')
|
text = text.replace(/<[^>]+>/g, '')
|
||||||
|
|
||||||
// 只保留:字母、数字、空格、常用标点、中文
|
|
||||||
// 保留的标点:。!?;,,。!?;:、""''()【】《》
|
|
||||||
// 移除:*# 等特殊符号、表情符号、emoji 等
|
|
||||||
text = text.replace(/[^\p{L}\p{N}\s。!?;,,。!?;:、""''()【】《》\n一-鿿㐀-䶿]/gu, '')
|
text = text.replace(/[^\p{L}\p{N}\s。!?;,,。!?;:、""''()【】《》\n一-鿿㐀-䶿]/gu, '')
|
||||||
|
|
||||||
// 移除多余的空白
|
|
||||||
text = text.replace(/\s+/g, ' ').trim()
|
text = text.replace(/\s+/g, ' ').trim()
|
||||||
|
|
||||||
return text
|
return text
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* 检查浏览器是否支持 Web Speech API
|
|
||||||
*/
|
|
||||||
const isSupported = computed(() => {
|
const isSupported = computed(() => {
|
||||||
return 'speechSynthesis' in window && 'SpeechSynthesisUtterance' in window
|
return 'speechSynthesis' in window && 'SpeechSynthesisUtterance' in window
|
||||||
})
|
})
|
||||||
|
|
||||||
/**
|
|
||||||
* 获取默认语音(优先选择中文)
|
|
||||||
*/
|
|
||||||
function getDefaultVoice(): SpeechSynthesisVoice | null {
|
function getDefaultVoice(): SpeechSynthesisVoice | null {
|
||||||
const voices = availableVoices.value
|
const voices = availableVoices.value
|
||||||
if (voices.length === 0) return null
|
if (voices.length === 0) return null
|
||||||
|
|
||||||
// 优先选择中文语音
|
|
||||||
const zhVoice = voices.find(v => v.lang.startsWith('zh'))
|
const zhVoice = voices.find(v => v.lang.startsWith('zh'))
|
||||||
if (zhVoice) return zhVoice
|
if (zhVoice) return zhVoice
|
||||||
|
|
||||||
// 其次选择英文语音
|
|
||||||
const enVoice = voices.find(v => v.lang.startsWith('en'))
|
const enVoice = voices.find(v => v.lang.startsWith('en'))
|
||||||
if (enVoice) return enVoice
|
if (enVoice) return enVoice
|
||||||
|
|
||||||
// 默认第一个
|
|
||||||
return voices[0]
|
return voices[0]
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* 获取所有可用语音(用于调试)
|
|
||||||
*/
|
|
||||||
function getAllVoices(): SpeechSynthesisVoice[] {
|
|
||||||
return availableVoices.value
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 停止当前播放
|
|
||||||
*/
|
|
||||||
function stop(clearQueue = true) {
|
function stop(clearQueue = true) {
|
||||||
playbackToken += 1
|
playbackToken += 1
|
||||||
if (clearQueue) {
|
if (clearQueue) {
|
||||||
speechQueue.length = 0
|
speechQueue.length = 0
|
||||||
}
|
}
|
||||||
|
// Stop TTS audio
|
||||||
|
if (currentAudio) {
|
||||||
|
currentAudio.pause()
|
||||||
|
currentAudio.src = ''
|
||||||
|
currentAudio = null
|
||||||
|
}
|
||||||
|
// Stop browser speech
|
||||||
if (synth.speaking || synth.pending || synth.paused) {
|
if (synth.speaking || synth.pending || synth.paused) {
|
||||||
synth.cancel()
|
synth.cancel()
|
||||||
}
|
}
|
||||||
if (utterance) {
|
utterance = null
|
||||||
utterance = null
|
|
||||||
}
|
|
||||||
state.value = {
|
state.value = {
|
||||||
isPlaying: false,
|
isPlaying: false,
|
||||||
isPaused: false,
|
isPaused: false,
|
||||||
currentMessageId: null,
|
currentMessageId: null,
|
||||||
progress: 0,
|
progress: 0,
|
||||||
|
engine: 'none',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function speak(messageId: string, text: string, options: SpeechOptions = {}) {
|
// ─── TTS Engine (server-side) ───────────────────────────────
|
||||||
const token = ++playbackToken
|
|
||||||
|
|
||||||
|
async function speakViaTts(messageId: string, text: string, options: SpeechOptions, token: number) {
|
||||||
|
// Set playing state immediately so UI shows breathing animation right away
|
||||||
|
state.value.isPlaying = true
|
||||||
|
state.value.isPaused = false
|
||||||
|
state.value.currentMessageId = messageId
|
||||||
|
state.value.progress = 0
|
||||||
|
state.value.engine = 'tts'
|
||||||
|
|
||||||
|
try {
|
||||||
|
const lang = options.lang || 'zh-CN'
|
||||||
|
|
||||||
|
const { audio } = await generateSpeech({ text, lang })
|
||||||
|
|
||||||
|
if (token !== playbackToken) return
|
||||||
|
|
||||||
|
currentAudio = playAudioBlob(audio)
|
||||||
|
|
||||||
|
currentAudio.onended = () => {
|
||||||
|
if (token !== playbackToken) return
|
||||||
|
state.value.isPlaying = false
|
||||||
|
state.value.isPaused = false
|
||||||
|
state.value.currentMessageId = null
|
||||||
|
state.value.progress = text.length
|
||||||
|
state.value.engine = 'none'
|
||||||
|
currentAudio = null
|
||||||
|
if (speechQueue.length > 0) {
|
||||||
|
setTimeout(playNextQueuedSpeech, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
currentAudio.onerror = () => {
|
||||||
|
if (token !== playbackToken) return
|
||||||
|
// TTS playback failed, fallback to browser
|
||||||
|
console.warn('[useSpeech] TTS audio playback error, falling back to browser')
|
||||||
|
speakViaBrowser(messageId, text, options, token)
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
if (token !== playbackToken) return
|
||||||
|
console.warn('[useSpeech] TTS API failed, falling back to browser:', err)
|
||||||
|
speakViaBrowser(messageId, text, options, token)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Browser Engine (Web Speech API) ────────────────────────
|
||||||
|
|
||||||
|
function speakViaBrowser(messageId: string, text: string, options: SpeechOptions, token: number) {
|
||||||
utterance = new SpeechSynthesisUtterance(text)
|
utterance = new SpeechSynthesisUtterance(text)
|
||||||
const activeUtterance = utterance
|
const activeUtterance = utterance
|
||||||
const activeText = text
|
|
||||||
|
|
||||||
// 设置语音参数
|
utterance.rate = 1
|
||||||
utterance.rate = options.rate ?? 1
|
utterance.pitch = 1
|
||||||
utterance.pitch = options.pitch ?? 1
|
utterance.volume = 1
|
||||||
utterance.volume = options.volume ?? 1
|
utterance.voice = getDefaultVoice()
|
||||||
utterance.voice = options.voice ?? getDefaultVoice()
|
|
||||||
|
|
||||||
console.log('[useSpeech] Selected voice:', utterance.voice?.name, utterance.voice?.lang)
|
|
||||||
|
|
||||||
if (options.lang) {
|
if (options.lang) {
|
||||||
utterance.lang = options.lang
|
utterance.lang = options.lang
|
||||||
@@ -153,15 +177,11 @@ export function useSpeech() {
|
|||||||
utterance.lang = utterance.voice.lang
|
utterance.lang = utterance.voice.lang
|
||||||
}
|
}
|
||||||
|
|
||||||
// 事件监听
|
state.value.engine = 'browser'
|
||||||
utterance.onstart = () => {
|
state.value.isPlaying = true
|
||||||
if (token !== playbackToken || utterance !== activeUtterance) return
|
state.value.isPaused = false
|
||||||
console.log('[useSpeech] onstart fired')
|
state.value.currentMessageId = messageId
|
||||||
state.value.isPlaying = true
|
state.value.progress = 0
|
||||||
state.value.isPaused = false
|
|
||||||
state.value.currentMessageId = messageId
|
|
||||||
state.value.progress = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
utterance.onboundary = (event) => {
|
utterance.onboundary = (event) => {
|
||||||
if (token !== playbackToken || utterance !== activeUtterance) return
|
if (token !== playbackToken || utterance !== activeUtterance) return
|
||||||
@@ -172,66 +192,62 @@ export function useSpeech() {
|
|||||||
|
|
||||||
utterance.onend = () => {
|
utterance.onend = () => {
|
||||||
if (token !== playbackToken || utterance !== activeUtterance) return
|
if (token !== playbackToken || utterance !== activeUtterance) return
|
||||||
console.log('[useSpeech] onend fired')
|
|
||||||
state.value.isPlaying = false
|
state.value.isPlaying = false
|
||||||
state.value.isPaused = false
|
state.value.isPaused = false
|
||||||
state.value.currentMessageId = null
|
state.value.currentMessageId = null
|
||||||
state.value.progress = activeText.length
|
state.value.progress = text.length
|
||||||
|
state.value.engine = 'none'
|
||||||
utterance = null
|
utterance = null
|
||||||
if (speechQueue.length > 0) {
|
if (speechQueue.length > 0) {
|
||||||
window.setTimeout(playNextQueuedSpeech, 0)
|
setTimeout(playNextQueuedSpeech, 0)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
utterance.onerror = (event) => {
|
utterance.onerror = () => {
|
||||||
if (token !== playbackToken || utterance !== activeUtterance) return
|
if (token !== playbackToken || utterance !== activeUtterance) return
|
||||||
console.error('[useSpeech] Speech synthesis error:', event.error)
|
|
||||||
state.value.isPlaying = false
|
state.value.isPlaying = false
|
||||||
state.value.isPaused = false
|
state.value.isPaused = false
|
||||||
state.value.currentMessageId = null
|
state.value.currentMessageId = null
|
||||||
|
state.value.engine = 'none'
|
||||||
utterance = null
|
utterance = null
|
||||||
if (speechQueue.length > 0) {
|
if (speechQueue.length > 0) {
|
||||||
window.setTimeout(playNextQueuedSpeech, 0)
|
setTimeout(playNextQueuedSpeech, 0)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 开始播放
|
|
||||||
console.log('[useSpeech] Calling synth.speak()')
|
|
||||||
synth.speak(utterance)
|
synth.speak(utterance)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ─── Unified speak ──────────────────────────────────────────
|
||||||
|
|
||||||
|
function speak(messageId: string, text: string, options: SpeechOptions = {}) {
|
||||||
|
const token = ++playbackToken
|
||||||
|
|
||||||
|
// Try server-side TTS first, fallback to browser
|
||||||
|
speakViaTts(messageId, text, options, token)
|
||||||
|
}
|
||||||
|
|
||||||
function playNextQueuedSpeech() {
|
function playNextQueuedSpeech() {
|
||||||
if (state.value.isPlaying || state.value.isPaused || synth.speaking || synth.pending) return
|
if (state.value.isPlaying || state.value.isPaused) return
|
||||||
const next = speechQueue.shift()
|
const next = speechQueue.shift()
|
||||||
if (!next) return
|
if (!next) return
|
||||||
|
|
||||||
const text = extractReadableText(next.content)
|
const text = extractReadableText(next.content)
|
||||||
if (!text) {
|
if (!text) {
|
||||||
window.setTimeout(playNextQueuedSpeech, 0)
|
setTimeout(playNextQueuedSpeech, 0)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('[useSpeech] Playing queued text:', text.substring(0, 50) + '...')
|
|
||||||
speak(next.messageId, text, next.options)
|
speak(next.messageId, text, next.options)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* 播放文本
|
|
||||||
*/
|
|
||||||
function play(messageId: string, content: string, options: SpeechOptions = {}) {
|
function play(messageId: string, content: string, options: SpeechOptions = {}) {
|
||||||
if (!isSupported.value) {
|
// If playing other message, stop first
|
||||||
console.warn('[useSpeech] Speech synthesis not supported')
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log('[useSpeech] play called:', messageId)
|
|
||||||
|
|
||||||
// 如果正在播放其他消息,先停止
|
|
||||||
if (state.value.currentMessageId && state.value.currentMessageId !== messageId) {
|
if (state.value.currentMessageId && state.value.currentMessageId !== messageId) {
|
||||||
stop()
|
stop()
|
||||||
}
|
}
|
||||||
|
|
||||||
// 如果已经在播放这条消息,暂停/恢复
|
// Toggle play/pause for same message
|
||||||
if (state.value.currentMessageId === messageId) {
|
if (state.value.currentMessageId === messageId) {
|
||||||
if (state.value.isPaused) {
|
if (state.value.isPaused) {
|
||||||
resume()
|
resume()
|
||||||
@@ -241,59 +257,40 @@ export function useSpeech() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// 提取可读文本
|
|
||||||
const text = extractReadableText(content)
|
const text = extractReadableText(content)
|
||||||
if (!text) {
|
if (!text) return
|
||||||
console.warn('[useSpeech] No readable text found')
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log('[useSpeech] Playing text:', text.substring(0, 50) + '...')
|
|
||||||
|
|
||||||
// 停止当前播放
|
|
||||||
stop()
|
stop()
|
||||||
speak(messageId, text, options)
|
speak(messageId, text, options)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* 自动播放入队:不打断当前语音,按完成顺序依次播放。
|
|
||||||
*/
|
|
||||||
function enqueue(messageId: string, content: string, options: SpeechOptions = {}) {
|
function enqueue(messageId: string, content: string, options: SpeechOptions = {}) {
|
||||||
if (!isSupported.value) {
|
if (!extractReadableText(content)) return
|
||||||
console.warn('[useSpeech] Speech synthesis not supported')
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if (!extractReadableText(content)) {
|
|
||||||
console.warn('[useSpeech] No readable text found')
|
|
||||||
return
|
|
||||||
}
|
|
||||||
speechQueue.push({ messageId, content, options })
|
speechQueue.push({ messageId, content, options })
|
||||||
playNextQueuedSpeech()
|
playNextQueuedSpeech()
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* 暂停播放
|
|
||||||
*/
|
|
||||||
function pause() {
|
function pause() {
|
||||||
if (synth.speaking && !state.value.isPaused) {
|
if (state.value.engine === 'tts' && currentAudio) {
|
||||||
|
currentAudio.pause()
|
||||||
|
state.value.isPaused = true
|
||||||
|
} else if (synth.speaking && !state.value.isPaused) {
|
||||||
synth.pause()
|
synth.pause()
|
||||||
state.value.isPaused = true
|
state.value.isPaused = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* 恢复播放
|
|
||||||
*/
|
|
||||||
function resume() {
|
function resume() {
|
||||||
if (state.value.isPaused) {
|
if (state.value.isPaused) {
|
||||||
synth.resume()
|
if (state.value.engine === 'tts' && currentAudio) {
|
||||||
|
currentAudio.play()
|
||||||
|
} else {
|
||||||
|
synth.resume()
|
||||||
|
}
|
||||||
state.value.isPaused = false
|
state.value.isPaused = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* 切换播放/暂停
|
|
||||||
*/
|
|
||||||
function toggle(messageId: string, content: string, options: SpeechOptions = {}) {
|
function toggle(messageId: string, content: string, options: SpeechOptions = {}) {
|
||||||
if (state.value.currentMessageId === messageId && state.value.isPlaying) {
|
if (state.value.currentMessageId === messageId && state.value.isPlaying) {
|
||||||
if (state.value.isPaused) {
|
if (state.value.isPaused) {
|
||||||
@@ -306,22 +303,20 @@ export function useSpeech() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 清理
|
|
||||||
onUnmounted(() => {
|
onUnmounted(() => {
|
||||||
stop()
|
stop()
|
||||||
synth.removeEventListener('voiceschanged', loadVoices)
|
synth.removeEventListener('voiceschanged', loadVoices)
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
// 状态
|
|
||||||
isSupported,
|
isSupported,
|
||||||
availableVoices,
|
availableVoices,
|
||||||
isPlaying: computed(() => state.value.isPlaying),
|
isPlaying: computed(() => state.value.isPlaying),
|
||||||
isPaused: computed(() => state.value.isPaused),
|
isPaused: computed(() => state.value.isPaused),
|
||||||
currentMessageId: computed(() => state.value.currentMessageId),
|
currentMessageId: computed(() => state.value.currentMessageId),
|
||||||
progress: computed(() => state.value.progress),
|
progress: computed(() => state.value.progress),
|
||||||
|
engine: computed(() => state.value.engine),
|
||||||
|
|
||||||
// 方法
|
|
||||||
play,
|
play,
|
||||||
pause,
|
pause,
|
||||||
resume,
|
resume,
|
||||||
@@ -329,12 +324,10 @@ export function useSpeech() {
|
|||||||
toggle,
|
toggle,
|
||||||
enqueue,
|
enqueue,
|
||||||
getDefaultVoice,
|
getDefaultVoice,
|
||||||
getAllVoices,
|
|
||||||
extractReadableText,
|
extractReadableText,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 单例模式,全局共享一个语音实例
|
|
||||||
let globalSpeech: ReturnType<typeof useSpeech> | null = null
|
let globalSpeech: ReturnType<typeof useSpeech> | null = null
|
||||||
|
|
||||||
export function useGlobalSpeech() {
|
export function useGlobalSpeech() {
|
||||||
|
|||||||
@@ -0,0 +1,28 @@
|
|||||||
|
import type { Context } from 'koa'
|
||||||
|
import { textToSpeech } from '../../services/hermes/tts'
|
||||||
|
|
||||||
|
export async function generate(ctx: Context) {
|
||||||
|
const { text, lang } = ctx.request.body as {
|
||||||
|
text?: string
|
||||||
|
lang?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!text || typeof text !== 'string') {
|
||||||
|
ctx.status = 400
|
||||||
|
ctx.body = { error: 'text is required' }
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if (text.length > 5000) {
|
||||||
|
ctx.status = 400
|
||||||
|
ctx.body = { error: 'text is too long (max 5000 characters)' }
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const { audio, engine } = await textToSpeech({ text, lang })
|
||||||
|
|
||||||
|
ctx.set('Content-Type', 'audio/mpeg')
|
||||||
|
ctx.set('Content-Length', String(audio.length))
|
||||||
|
ctx.set('X-TTS-Engine', engine)
|
||||||
|
ctx.body = audio
|
||||||
|
}
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
import Router from '@koa/router'
|
||||||
|
import * as ctrl from '../../controllers/hermes/tts'
|
||||||
|
|
||||||
|
export const ttsRoutes = new Router()
|
||||||
|
|
||||||
|
ttsRoutes.post('/api/hermes/tts', ctrl.generate)
|
||||||
@@ -26,6 +26,7 @@ import { downloadRoutes } from './hermes/download'
|
|||||||
import { jobRoutes } from './hermes/jobs'
|
import { jobRoutes } from './hermes/jobs'
|
||||||
import { cronHistoryRoutes } from './hermes/cron-history'
|
import { cronHistoryRoutes } from './hermes/cron-history'
|
||||||
import { kanbanRoutes } from './hermes/kanban'
|
import { kanbanRoutes } from './hermes/kanban'
|
||||||
|
import { ttsRoutes } from './hermes/tts'
|
||||||
import { proxyRoutes, proxyMiddleware } from './hermes/proxy'
|
import { proxyRoutes, proxyMiddleware } from './hermes/proxy'
|
||||||
import { groupChatRoutes, setGroupChatServer } from './hermes/group-chat'
|
import { groupChatRoutes, setGroupChatServer } from './hermes/group-chat'
|
||||||
|
|
||||||
@@ -66,6 +67,7 @@ export function registerRoutes(app: any, requireAuth: (ctx: Context, next: Next)
|
|||||||
app.use(jobRoutes.routes()) // Must be before proxy
|
app.use(jobRoutes.routes()) // Must be before proxy
|
||||||
app.use(cronHistoryRoutes.routes()) // Must be before proxy
|
app.use(cronHistoryRoutes.routes()) // Must be before proxy
|
||||||
app.use(kanbanRoutes.routes()) // Must be before proxy
|
app.use(kanbanRoutes.routes()) // Must be before proxy
|
||||||
|
app.use(ttsRoutes.routes()) // Must be before proxy
|
||||||
app.use(proxyRoutes.routes())
|
app.use(proxyRoutes.routes())
|
||||||
|
|
||||||
// Proxy catch-all middleware (must be last)
|
// Proxy catch-all middleware (must be last)
|
||||||
|
|||||||
@@ -0,0 +1,41 @@
|
|||||||
|
import { EdgeTTS } from 'node-edge-tts'
|
||||||
|
import { tmpdir } from 'os'
|
||||||
|
import { join } from 'path'
|
||||||
|
import { readFile, unlink } from 'fs/promises'
|
||||||
|
import { randomUUID } from 'crypto'
|
||||||
|
import { logger } from '../logger'
|
||||||
|
|
||||||
|
const FIXED_VOICE = 'zh-CN-XiaoxiaoNeural'
|
||||||
|
const FIXED_RATE = '+4%'
|
||||||
|
const FIXED_PITCH = '+12Hz'
|
||||||
|
|
||||||
|
export interface TtsOptions {
|
||||||
|
text: string
|
||||||
|
lang?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function edgeTts(opts: TtsOptions): Promise<Buffer> {
|
||||||
|
const id = randomUUID()
|
||||||
|
const tmpFile = join(tmpdir(), `tts-${id}.mp3`)
|
||||||
|
|
||||||
|
try {
|
||||||
|
const tts = new EdgeTTS({
|
||||||
|
voice: FIXED_VOICE,
|
||||||
|
rate: FIXED_RATE,
|
||||||
|
pitch: FIXED_PITCH,
|
||||||
|
timeout: 15000,
|
||||||
|
})
|
||||||
|
|
||||||
|
await tts.ttsPromise(opts.text, tmpFile)
|
||||||
|
const buf = await readFile(tmpFile)
|
||||||
|
return buf
|
||||||
|
} finally {
|
||||||
|
unlink(tmpFile).catch(() => {})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function textToSpeech(opts: TtsOptions): Promise<{ audio: Buffer; engine: string }> {
|
||||||
|
const audio = await edgeTts(opts)
|
||||||
|
logger.debug({ engine: 'edge', voice: FIXED_VOICE, rate: FIXED_RATE, pitch: FIXED_PITCH }, 'TTS generated via Edge')
|
||||||
|
return { audio, engine: 'edge' }
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user