feat: add voice playback settings with 4-provider support (#608)
Add WebSpeech, OpenAI TTS, Custom endpoint, and Edge TTS providers. Co-authored-by: Hermes Agent <noreply@nousresearch.com>
This commit is contained in:
@@ -3,6 +3,14 @@ import { generateSpeech, playAudioBlob } from '@/api/hermes/tts'
|
||||
|
||||
export interface SpeechOptions {
|
||||
lang?: string // 语言 'zh-CN', 'en-US' 等
|
||||
voiceName?: string // 指定 WebSpeech 音色名称
|
||||
}
|
||||
|
||||
export interface OpenaiTtsOptions {
|
||||
baseUrl: string
|
||||
apiKey?: string
|
||||
model?: string
|
||||
voice?: string
|
||||
}
|
||||
|
||||
export interface SpeechState {
|
||||
@@ -39,6 +47,11 @@ export function useSpeech() {
|
||||
let playbackToken = 0
|
||||
const speechQueue: SpeechQueueItem[] = []
|
||||
|
||||
// 自定义 TTS(OpenAI / Custom / Edge)播放状态
|
||||
const isCustomPlaying = ref(false)
|
||||
const isCustomPaused = ref(false)
|
||||
const currentCustomMessageId = ref<string | null>(null)
|
||||
|
||||
// 加载可用语音列表
|
||||
function loadVoices() {
|
||||
availableVoices.value = synth.getVoices()
|
||||
@@ -162,14 +175,25 @@ export function useSpeech() {
|
||||
|
||||
// ─── Browser Engine (Web Speech API) ────────────────────────
|
||||
|
||||
function speakViaBrowser(messageId: string, text: string, options: SpeechOptions, token: number) {
|
||||
function speakViaBrowser(messageId: string, text: string, options: SpeechOptions, token?: number) {
|
||||
token = token || ++playbackToken
|
||||
utterance = new SpeechSynthesisUtterance(text)
|
||||
const activeUtterance = utterance
|
||||
|
||||
utterance.rate = 1
|
||||
utterance.pitch = 1
|
||||
utterance.volume = 1
|
||||
utterance.voice = getDefaultVoice()
|
||||
|
||||
// 使用指定的音色(如果有),否则用默认
|
||||
if (options.voiceName) {
|
||||
const voice = availableVoices.value.find(v => v.name === options.voiceName)
|
||||
if (voice) {
|
||||
utterance.voice = voice
|
||||
}
|
||||
}
|
||||
if (!utterance.voice) {
|
||||
utterance.voice = getDefaultVoice()
|
||||
}
|
||||
|
||||
if (options.lang) {
|
||||
utterance.lang = options.lang
|
||||
@@ -218,6 +242,115 @@ export function useSpeech() {
|
||||
synth.speak(utterance)
|
||||
}
|
||||
|
||||
// ─── OpenAI-compatible TTS Engine ────────────────────────────
|
||||
|
||||
let customAudio: HTMLAudioElement | null = null
|
||||
|
||||
async function openaiPlay(
|
||||
messageId: string,
|
||||
content: string,
|
||||
opts: OpenaiTtsOptions,
|
||||
) {
|
||||
const text = extractReadableText(content)
|
||||
if (!text) return
|
||||
|
||||
const token = ++playbackToken
|
||||
|
||||
isCustomPlaying.value = true
|
||||
isCustomPaused.value = false
|
||||
currentCustomMessageId.value = messageId
|
||||
|
||||
const url = `${opts.baseUrl.replace(/\/+$/, '')}/audio/speech`
|
||||
const body: Record<string, any> = {
|
||||
model: opts.model || 'tts-1',
|
||||
input: text,
|
||||
voice: opts.voice || 'alloy',
|
||||
}
|
||||
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
if (opts.apiKey) {
|
||||
headers['Authorization'] = `Bearer ${opts.apiKey}`
|
||||
}
|
||||
|
||||
try {
|
||||
const res = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(body),
|
||||
})
|
||||
|
||||
if (token !== playbackToken) return
|
||||
|
||||
if (!res.ok) {
|
||||
const errText = await res.text().catch(() => '')
|
||||
throw new Error(`OpenAI TTS 返回 ${res.status}: ${errText || res.statusText}`)
|
||||
}
|
||||
|
||||
const audioBlob = await res.blob()
|
||||
if (token !== playbackToken) return
|
||||
|
||||
const audioUrl = URL.createObjectURL(audioBlob)
|
||||
const audio = new Audio(audioUrl)
|
||||
customAudio = audio
|
||||
|
||||
audio.onended = () => {
|
||||
if (token !== playbackToken) return
|
||||
URL.revokeObjectURL(audioUrl)
|
||||
isCustomPlaying.value = false
|
||||
isCustomPaused.value = false
|
||||
currentCustomMessageId.value = null
|
||||
customAudio = null
|
||||
}
|
||||
|
||||
audio.onerror = () => {
|
||||
if (token !== playbackToken) return
|
||||
URL.revokeObjectURL(audioUrl)
|
||||
console.warn('[useSpeech] Custom TTS audio playback error')
|
||||
isCustomPlaying.value = false
|
||||
isCustomPaused.value = false
|
||||
currentCustomMessageId.value = null
|
||||
customAudio = null
|
||||
}
|
||||
|
||||
await audio.play()
|
||||
} catch (err) {
|
||||
if (token !== playbackToken) return
|
||||
console.error('[useSpeech] OpenAI TTS 请求失败:', err)
|
||||
isCustomPlaying.value = false
|
||||
isCustomPaused.value = false
|
||||
currentCustomMessageId.value = null
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
function openaiToggle(messageId: string, content: string, opts: OpenaiTtsOptions) {
|
||||
if (currentCustomMessageId.value === messageId && isCustomPlaying.value) {
|
||||
if (isCustomPaused.value) {
|
||||
// Resume
|
||||
if (customAudio) {
|
||||
customAudio.play()
|
||||
}
|
||||
isCustomPaused.value = false
|
||||
} else {
|
||||
// Pause
|
||||
if (customAudio) {
|
||||
customAudio.pause()
|
||||
}
|
||||
isCustomPaused.value = true
|
||||
}
|
||||
} else {
|
||||
// Stop other speech and start new
|
||||
stop(false)
|
||||
if (customAudio) {
|
||||
customAudio.pause()
|
||||
customAudio = null
|
||||
}
|
||||
openaiPlay(messageId, content, opts)
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Unified speak ──────────────────────────────────────────
|
||||
|
||||
function speak(messageId: string, text: string, options: SpeechOptions = {}) {
|
||||
@@ -317,6 +450,11 @@ export function useSpeech() {
|
||||
progress: computed(() => state.value.progress),
|
||||
engine: computed(() => state.value.engine),
|
||||
|
||||
// Custom TTS state
|
||||
isCustomPlaying,
|
||||
isCustomPaused,
|
||||
currentCustomMessageId,
|
||||
|
||||
play,
|
||||
pause,
|
||||
resume,
|
||||
@@ -325,6 +463,13 @@ export function useSpeech() {
|
||||
enqueue,
|
||||
getDefaultVoice,
|
||||
extractReadableText,
|
||||
|
||||
// OpenAI-compatible TTS
|
||||
openaiPlay,
|
||||
openaiToggle,
|
||||
|
||||
// Browser WebSpeech (直接调用避免 Rolldown 树摇)
|
||||
speakViaBrowser,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,164 @@
|
||||
import { ref, watch } from 'vue'
|
||||
|
||||
export type TtsProvider = 'webspeech' | 'openai' | 'custom' | 'edge'
|
||||
|
||||
export interface VoiceSettingsData {
|
||||
provider: TtsProvider
|
||||
|
||||
// WebSpeech
|
||||
webspeechVoice: string
|
||||
|
||||
// OpenAI
|
||||
openaiApiKey: string
|
||||
openaiBaseUrl: string
|
||||
openaiModel: string
|
||||
openaiVoice: string
|
||||
|
||||
// Custom endpoint (OpenAI-compatible)
|
||||
customUrl: string
|
||||
customApiKey: string
|
||||
|
||||
// Edge TTS
|
||||
edgeUrl: string
|
||||
edgeVoice: string
|
||||
}
|
||||
|
||||
const STORAGE_KEY = 'hermes-tts-settings-v2'
|
||||
|
||||
function migrateOldKeys() {
|
||||
const oldKey = 'hermes-tts-settings'
|
||||
try {
|
||||
const old = localStorage.getItem(oldKey)
|
||||
if (old) {
|
||||
const parsed = JSON.parse(old)
|
||||
// Old 'custom' provider maps to new 'custom'
|
||||
// Old 'gptsovits' provider maps to new 'custom'
|
||||
if (parsed.provider === 'gptsovits') {
|
||||
parsed.provider = 'custom'
|
||||
// old gptsovitsUrl -> customUrl
|
||||
if (parsed.gptsovitsUrl && !parsed.customUrl) {
|
||||
parsed.customUrl = parsed.gptsovitsUrl
|
||||
}
|
||||
}
|
||||
// Store as new format
|
||||
const data = { ...DEFAULT, ...parsed }
|
||||
localStorage.setItem(STORAGE_KEY, JSON.stringify(data))
|
||||
localStorage.removeItem(oldKey)
|
||||
}
|
||||
} catch { /* ignore */ }
|
||||
}
|
||||
|
||||
const DEFAULT: VoiceSettingsData = {
|
||||
provider: 'webspeech',
|
||||
|
||||
webspeechVoice: '',
|
||||
|
||||
openaiApiKey: '',
|
||||
openaiBaseUrl: '',
|
||||
openaiModel: 'tts-1',
|
||||
openaiVoice: 'alloy',
|
||||
|
||||
customUrl: '',
|
||||
customApiKey: '',
|
||||
|
||||
edgeUrl: '',
|
||||
edgeVoice: 'zh-CN-XiaoxiaoNeural',
|
||||
}
|
||||
|
||||
function sanitize(data: VoiceSettingsData): VoiceSettingsData {
|
||||
// Clear old Edge TTS adapter URLs — now uses internal node-edge-tts
|
||||
if (data.edgeUrl && data.edgeUrl !== '') {
|
||||
data.edgeUrl = ''
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
function load(): VoiceSettingsData {
|
||||
try {
|
||||
const raw = localStorage.getItem(STORAGE_KEY)
|
||||
if (raw) return sanitize({ ...DEFAULT, ...JSON.parse(raw) })
|
||||
} catch { /* ignore */ }
|
||||
return { ...DEFAULT }
|
||||
}
|
||||
|
||||
// Run migration once on import
|
||||
migrateOldKeys()
|
||||
|
||||
// ── Reactive state ──
|
||||
const provider = ref<TtsProvider>(load().provider)
|
||||
|
||||
// WebSpeech
|
||||
const webspeechVoice = ref<string>(load().webspeechVoice)
|
||||
|
||||
// OpenAI
|
||||
const openaiApiKey = ref<string>(load().openaiApiKey)
|
||||
const openaiBaseUrl = ref<string>(load().openaiBaseUrl)
|
||||
const openaiModel = ref<string>(load().openaiModel)
|
||||
const openaiVoice = ref<string>(load().openaiVoice)
|
||||
|
||||
// Custom
|
||||
const customUrl = ref<string>(load().customUrl)
|
||||
const customApiKey = ref<string>(load().customApiKey)
|
||||
|
||||
// Edge TTS
|
||||
const edgeUrl = ref<string>(load().edgeUrl)
|
||||
const edgeVoice = ref<string>(load().edgeVoice)
|
||||
|
||||
// Auto-persist on change
|
||||
watch(
|
||||
[provider, webspeechVoice, openaiApiKey, openaiBaseUrl, openaiModel, openaiVoice,
|
||||
customUrl, customApiKey, edgeUrl, edgeVoice],
|
||||
() => {
|
||||
localStorage.setItem(STORAGE_KEY, JSON.stringify({
|
||||
provider: provider.value,
|
||||
webspeechVoice: webspeechVoice.value,
|
||||
openaiApiKey: openaiApiKey.value,
|
||||
openaiBaseUrl: openaiBaseUrl.value,
|
||||
openaiModel: openaiModel.value,
|
||||
openaiVoice: openaiVoice.value,
|
||||
customUrl: customUrl.value,
|
||||
customApiKey: customApiKey.value,
|
||||
edgeUrl: edgeUrl.value,
|
||||
edgeVoice: edgeVoice.value,
|
||||
}))
|
||||
},
|
||||
)
|
||||
|
||||
export function useVoiceSettings() {
|
||||
return {
|
||||
provider,
|
||||
webspeechVoice,
|
||||
openaiApiKey,
|
||||
openaiBaseUrl,
|
||||
openaiModel,
|
||||
openaiVoice,
|
||||
customUrl,
|
||||
customApiKey,
|
||||
edgeUrl,
|
||||
edgeVoice,
|
||||
|
||||
setProvider(v: TtsProvider) { provider.value = v },
|
||||
setWebSpeechVoice(v: string) { webspeechVoice.value = v },
|
||||
setOpenaiApiKey(v: string) { openaiApiKey.value = v },
|
||||
setOpenaiBaseUrl(v: string) { openaiBaseUrl.value = v },
|
||||
setOpenaiModel(v: string) { openaiModel.value = v },
|
||||
setOpenaiVoice(v: string) { openaiVoice.value = v },
|
||||
setCustomUrl(v: string) { customUrl.value = v },
|
||||
setCustomApiKey(v: string) { customApiKey.value = v },
|
||||
setEdgeUrl(v: string) { edgeUrl.value = v },
|
||||
setEdgeVoice(v: string) { edgeVoice.value = v },
|
||||
|
||||
reset() {
|
||||
provider.value = DEFAULT.provider
|
||||
webspeechVoice.value = DEFAULT.webspeechVoice
|
||||
openaiApiKey.value = DEFAULT.openaiApiKey
|
||||
openaiBaseUrl.value = DEFAULT.openaiBaseUrl
|
||||
openaiModel.value = DEFAULT.openaiModel
|
||||
openaiVoice.value = DEFAULT.openaiVoice
|
||||
customUrl.value = DEFAULT.customUrl
|
||||
customApiKey.value = DEFAULT.customApiKey
|
||||
edgeUrl.value = DEFAULT.edgeUrl
|
||||
edgeVoice.value = DEFAULT.edgeVoice
|
||||
},
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user