diff --git a/package.json b/package.json index 9323b38..de6b38e 100644 --- a/package.json +++ b/package.json @@ -111,7 +111,7 @@ "vue": "^3.5.32", "vue-i18n": "^11.3.2", "vue-router": "^4.6.4", - "vue-tsc": "^3.2.6", + "vue-tsc": "^3.2.8", "ws": "^8.20.0" } -} \ No newline at end of file +} diff --git a/packages/client/src/components/hermes/chat/MessageItem.vue b/packages/client/src/components/hermes/chat/MessageItem.vue index b863801..502f703 100644 --- a/packages/client/src/components/hermes/chat/MessageItem.vue +++ b/packages/client/src/components/hermes/chat/MessageItem.vue @@ -16,6 +16,7 @@ import { renderHighlightedCodeBlock, } from "./highlight"; import { useGlobalSpeech } from "@/composables/useSpeech"; +import { useVoiceSettings } from "@/composables/useVoiceSettings"; const TOOL_PAYLOAD_DISPLAY_LIMIT = 2000; @@ -79,6 +80,7 @@ const previewUrl = ref(null); const chatStore = useChatStore(); const settingsStore = useSettingsStore(); const speech = useGlobalSpeech(); +const voiceSettings = useVoiceSettings(); // Copy entire bubble content const copyableContent = computed(() => { @@ -351,25 +353,90 @@ const renderedToolResult = computed(() => { // 语音播放相关 const canPlaySpeech = computed(() => { - // 只有 assistant 消息可以播放,且浏览器支持 Web Speech API - return props.message.role === 'assistant' && - speech.isSupported && - copyableContent.value; -}); + // 只有 assistant 消息可以播放 + if (props.message.role !== 'assistant') return false + if (!copyableContent.value) return false + // OpenAI / Custom / Edge 不依赖浏览器 Web Speech API + if (voiceSettings.provider.value === 'openai' || voiceSettings.provider.value === 'custom' || voiceSettings.provider.value === 'edge') return true + return speech.isSupported +}) const isPlayingThisMessage = computed(() => { - return speech.currentMessageId.value === props.message.id && speech.isPlaying.value; -}); + // OpenAI / Custom / Edge 模式 + if (voiceSettings.provider.value === 'openai' || voiceSettings.provider.value === 'custom' || voiceSettings.provider.value === 'edge') { + return speech.currentCustomMessageId.value === props.message.id && speech.isCustomPlaying.value + } + return speech.currentMessageId.value === props.message.id && speech.isPlaying.value +}) const isPausedThisMessage = computed(() => { - return speech.currentMessageId.value === props.message.id && speech.isPaused.value; -}); + // OpenAI / Custom / Edge 模式 + if (voiceSettings.provider.value === 'openai' || voiceSettings.provider.value === 'custom' || voiceSettings.provider.value === 'edge') { + return speech.currentCustomMessageId.value === props.message.id && speech.isCustomPaused.value + } + return speech.currentMessageId.value === props.message.id && speech.isPaused.value +}) function handleSpeechToggle() { if (!canPlaySpeech.value) { return } const content = props.message.content || '' + + // OpenAI TTS 模式 + if (voiceSettings.provider.value === 'openai') { + const apiUrl = voiceSettings.openaiBaseUrl.value + if (!apiUrl) { + console.warn('[MessageItem] OpenAI TTS 地址为空') + return + } + speech.openaiToggle(props.message.id, content, { + baseUrl: voiceSettings.openaiBaseUrl.value, + apiKey: voiceSettings.openaiApiKey.value, + model: voiceSettings.openaiModel.value, + voice: voiceSettings.openaiVoice.value, + }) + return + } + + // 自定义端点模式(OpenAI 兼容,如 GPT-SoVITS) + if (voiceSettings.provider.value === 'custom') { + const apiUrl = voiceSettings.customUrl.value + if (!apiUrl) { + console.warn('[MessageItem] 自定义 TTS 地址为空') + return + } + speech.openaiToggle(props.message.id, content, { + baseUrl: voiceSettings.customUrl.value, + apiKey: voiceSettings.customApiKey.value || undefined, + }) + return + } + + // Edge TTS 模式 + if (voiceSettings.provider.value === 'edge') { + // URL 为空时使用内建后端代理 + const apiUrl = voiceSettings.edgeUrl.value || '/api/tts/proxy' + speech.openaiToggle(props.message.id, content, { + baseUrl: apiUrl, + voice: voiceSettings.edgeVoice.value, + }) + return + } + + // Web Speech API 模式 + if (voiceSettings.provider.value === 'webspeech') { + const text = speech.extractReadableText(content) + if (text) { + speech.stop(false) + speech.speakViaBrowser(props.message.id, text, { + voiceName: voiceSettings.webspeechVoice.value || undefined, + }) + } + return + } + + // 后备(无 provider 匹配时) speech.toggle(props.message.id, content) } @@ -380,7 +447,37 @@ onMounted(() => { autoPlayHandler = (e: Event) => { const customEvent = e as CustomEvent<{ messageId: string; content: string }> if (customEvent.detail.messageId === props.message.id && canPlaySpeech.value) { - speech.enqueue(props.message.id, customEvent.detail.content || props.message.content || '') + const content = customEvent.detail.content || props.message.content || '' + if (voiceSettings.provider.value === 'openai') { + const apiUrl = voiceSettings.openaiBaseUrl.value + if (apiUrl) speech.openaiPlay(props.message.id, content, { + baseUrl: voiceSettings.openaiBaseUrl.value, + apiKey: voiceSettings.openaiApiKey.value, + model: voiceSettings.openaiModel.value, + voice: voiceSettings.openaiVoice.value, + }) + } else if (voiceSettings.provider.value === 'custom') { + const apiUrl = voiceSettings.customUrl.value + if (apiUrl) speech.openaiPlay(props.message.id, content, { + baseUrl: voiceSettings.customUrl.value, + apiKey: voiceSettings.customApiKey.value || undefined, + }) + } else if (voiceSettings.provider.value === 'edge') { + speech.openaiPlay(props.message.id, content, { + baseUrl: '/api/tts/proxy', + voice: voiceSettings.edgeVoice.value, + }) + } else if (voiceSettings.provider.value === 'webspeech') { + const text = speech.extractReadableText(content) + if (text) { + speech.stop(false) + speech.speakViaBrowser(props.message.id, text, { + voiceName: voiceSettings.webspeechVoice.value || undefined, + }) + } + } else { + speech.enqueue(props.message.id, content) + } } } window.addEventListener('auto-play-speech', autoPlayHandler) diff --git a/packages/client/src/components/hermes/settings/VoiceSettings.vue b/packages/client/src/components/hermes/settings/VoiceSettings.vue new file mode 100644 index 0000000..42d1686 --- /dev/null +++ b/packages/client/src/components/hermes/settings/VoiceSettings.vue @@ -0,0 +1,327 @@ + + + + + diff --git a/packages/client/src/composables/useSpeech.ts b/packages/client/src/composables/useSpeech.ts index 208a1b0..78631a6 100644 --- a/packages/client/src/composables/useSpeech.ts +++ b/packages/client/src/composables/useSpeech.ts @@ -3,6 +3,14 @@ import { generateSpeech, playAudioBlob } from '@/api/hermes/tts' export interface SpeechOptions { lang?: string // 语言 'zh-CN', 'en-US' 等 + voiceName?: string // 指定 WebSpeech 音色名称 +} + +export interface OpenaiTtsOptions { + baseUrl: string + apiKey?: string + model?: string + voice?: string } export interface SpeechState { @@ -39,6 +47,11 @@ export function useSpeech() { let playbackToken = 0 const speechQueue: SpeechQueueItem[] = [] + // 自定义 TTS(OpenAI / Custom / Edge)播放状态 + const isCustomPlaying = ref(false) + const isCustomPaused = ref(false) + const currentCustomMessageId = ref(null) + // 加载可用语音列表 function loadVoices() { availableVoices.value = synth.getVoices() @@ -162,14 +175,25 @@ export function useSpeech() { // ─── Browser Engine (Web Speech API) ──────────────────────── - function speakViaBrowser(messageId: string, text: string, options: SpeechOptions, token: number) { + function speakViaBrowser(messageId: string, text: string, options: SpeechOptions, token?: number) { + token = token || ++playbackToken utterance = new SpeechSynthesisUtterance(text) const activeUtterance = utterance utterance.rate = 1 utterance.pitch = 1 utterance.volume = 1 - utterance.voice = getDefaultVoice() + + // 使用指定的音色(如果有),否则用默认 + if (options.voiceName) { + const voice = availableVoices.value.find(v => v.name === options.voiceName) + if (voice) { + utterance.voice = voice + } + } + if (!utterance.voice) { + utterance.voice = getDefaultVoice() + } if (options.lang) { utterance.lang = options.lang @@ -218,6 +242,115 @@ export function useSpeech() { synth.speak(utterance) } + // ─── OpenAI-compatible TTS Engine ──────────────────────────── + + let customAudio: HTMLAudioElement | null = null + + async function openaiPlay( + messageId: string, + content: string, + opts: OpenaiTtsOptions, + ) { + const text = extractReadableText(content) + if (!text) return + + const token = ++playbackToken + + isCustomPlaying.value = true + isCustomPaused.value = false + currentCustomMessageId.value = messageId + + const url = `${opts.baseUrl.replace(/\/+$/, '')}/audio/speech` + const body: Record = { + model: opts.model || 'tts-1', + input: text, + voice: opts.voice || 'alloy', + } + + const headers: Record = { + 'Content-Type': 'application/json', + } + if (opts.apiKey) { + headers['Authorization'] = `Bearer ${opts.apiKey}` + } + + try { + const res = await fetch(url, { + method: 'POST', + headers, + body: JSON.stringify(body), + }) + + if (token !== playbackToken) return + + if (!res.ok) { + const errText = await res.text().catch(() => '') + throw new Error(`OpenAI TTS 返回 ${res.status}: ${errText || res.statusText}`) + } + + const audioBlob = await res.blob() + if (token !== playbackToken) return + + const audioUrl = URL.createObjectURL(audioBlob) + const audio = new Audio(audioUrl) + customAudio = audio + + audio.onended = () => { + if (token !== playbackToken) return + URL.revokeObjectURL(audioUrl) + isCustomPlaying.value = false + isCustomPaused.value = false + currentCustomMessageId.value = null + customAudio = null + } + + audio.onerror = () => { + if (token !== playbackToken) return + URL.revokeObjectURL(audioUrl) + console.warn('[useSpeech] Custom TTS audio playback error') + isCustomPlaying.value = false + isCustomPaused.value = false + currentCustomMessageId.value = null + customAudio = null + } + + await audio.play() + } catch (err) { + if (token !== playbackToken) return + console.error('[useSpeech] OpenAI TTS 请求失败:', err) + isCustomPlaying.value = false + isCustomPaused.value = false + currentCustomMessageId.value = null + throw err + } + } + + function openaiToggle(messageId: string, content: string, opts: OpenaiTtsOptions) { + if (currentCustomMessageId.value === messageId && isCustomPlaying.value) { + if (isCustomPaused.value) { + // Resume + if (customAudio) { + customAudio.play() + } + isCustomPaused.value = false + } else { + // Pause + if (customAudio) { + customAudio.pause() + } + isCustomPaused.value = true + } + } else { + // Stop other speech and start new + stop(false) + if (customAudio) { + customAudio.pause() + customAudio = null + } + openaiPlay(messageId, content, opts) + } + } + // ─── Unified speak ────────────────────────────────────────── function speak(messageId: string, text: string, options: SpeechOptions = {}) { @@ -317,6 +450,11 @@ export function useSpeech() { progress: computed(() => state.value.progress), engine: computed(() => state.value.engine), + // Custom TTS state + isCustomPlaying, + isCustomPaused, + currentCustomMessageId, + play, pause, resume, @@ -325,6 +463,13 @@ export function useSpeech() { enqueue, getDefaultVoice, extractReadableText, + + // OpenAI-compatible TTS + openaiPlay, + openaiToggle, + + // Browser WebSpeech (直接调用避免 Rolldown 树摇) + speakViaBrowser, } } diff --git a/packages/client/src/composables/useVoiceSettings.ts b/packages/client/src/composables/useVoiceSettings.ts new file mode 100644 index 0000000..a77f30b --- /dev/null +++ b/packages/client/src/composables/useVoiceSettings.ts @@ -0,0 +1,164 @@ +import { ref, watch } from 'vue' + +export type TtsProvider = 'webspeech' | 'openai' | 'custom' | 'edge' + +export interface VoiceSettingsData { + provider: TtsProvider + + // WebSpeech + webspeechVoice: string + + // OpenAI + openaiApiKey: string + openaiBaseUrl: string + openaiModel: string + openaiVoice: string + + // Custom endpoint (OpenAI-compatible) + customUrl: string + customApiKey: string + + // Edge TTS + edgeUrl: string + edgeVoice: string +} + +const STORAGE_KEY = 'hermes-tts-settings-v2' + +function migrateOldKeys() { + const oldKey = 'hermes-tts-settings' + try { + const old = localStorage.getItem(oldKey) + if (old) { + const parsed = JSON.parse(old) + // Old 'custom' provider maps to new 'custom' + // Old 'gptsovits' provider maps to new 'custom' + if (parsed.provider === 'gptsovits') { + parsed.provider = 'custom' + // old gptsovitsUrl -> customUrl + if (parsed.gptsovitsUrl && !parsed.customUrl) { + parsed.customUrl = parsed.gptsovitsUrl + } + } + // Store as new format + const data = { ...DEFAULT, ...parsed } + localStorage.setItem(STORAGE_KEY, JSON.stringify(data)) + localStorage.removeItem(oldKey) + } + } catch { /* ignore */ } +} + +const DEFAULT: VoiceSettingsData = { + provider: 'webspeech', + + webspeechVoice: '', + + openaiApiKey: '', + openaiBaseUrl: '', + openaiModel: 'tts-1', + openaiVoice: 'alloy', + + customUrl: '', + customApiKey: '', + + edgeUrl: '', + edgeVoice: 'zh-CN-XiaoxiaoNeural', +} + +function sanitize(data: VoiceSettingsData): VoiceSettingsData { + // Clear old Edge TTS adapter URLs — now uses internal node-edge-tts + if (data.edgeUrl && data.edgeUrl !== '') { + data.edgeUrl = '' + } + return data +} + +function load(): VoiceSettingsData { + try { + const raw = localStorage.getItem(STORAGE_KEY) + if (raw) return sanitize({ ...DEFAULT, ...JSON.parse(raw) }) + } catch { /* ignore */ } + return { ...DEFAULT } +} + +// Run migration once on import +migrateOldKeys() + +// ── Reactive state ── +const provider = ref(load().provider) + +// WebSpeech +const webspeechVoice = ref(load().webspeechVoice) + +// OpenAI +const openaiApiKey = ref(load().openaiApiKey) +const openaiBaseUrl = ref(load().openaiBaseUrl) +const openaiModel = ref(load().openaiModel) +const openaiVoice = ref(load().openaiVoice) + +// Custom +const customUrl = ref(load().customUrl) +const customApiKey = ref(load().customApiKey) + +// Edge TTS +const edgeUrl = ref(load().edgeUrl) +const edgeVoice = ref(load().edgeVoice) + +// Auto-persist on change +watch( + [provider, webspeechVoice, openaiApiKey, openaiBaseUrl, openaiModel, openaiVoice, + customUrl, customApiKey, edgeUrl, edgeVoice], + () => { + localStorage.setItem(STORAGE_KEY, JSON.stringify({ + provider: provider.value, + webspeechVoice: webspeechVoice.value, + openaiApiKey: openaiApiKey.value, + openaiBaseUrl: openaiBaseUrl.value, + openaiModel: openaiModel.value, + openaiVoice: openaiVoice.value, + customUrl: customUrl.value, + customApiKey: customApiKey.value, + edgeUrl: edgeUrl.value, + edgeVoice: edgeVoice.value, + })) + }, +) + +export function useVoiceSettings() { + return { + provider, + webspeechVoice, + openaiApiKey, + openaiBaseUrl, + openaiModel, + openaiVoice, + customUrl, + customApiKey, + edgeUrl, + edgeVoice, + + setProvider(v: TtsProvider) { provider.value = v }, + setWebSpeechVoice(v: string) { webspeechVoice.value = v }, + setOpenaiApiKey(v: string) { openaiApiKey.value = v }, + setOpenaiBaseUrl(v: string) { openaiBaseUrl.value = v }, + setOpenaiModel(v: string) { openaiModel.value = v }, + setOpenaiVoice(v: string) { openaiVoice.value = v }, + setCustomUrl(v: string) { customUrl.value = v }, + setCustomApiKey(v: string) { customApiKey.value = v }, + setEdgeUrl(v: string) { edgeUrl.value = v }, + setEdgeVoice(v: string) { edgeVoice.value = v }, + + reset() { + provider.value = DEFAULT.provider + webspeechVoice.value = DEFAULT.webspeechVoice + openaiApiKey.value = DEFAULT.openaiApiKey + openaiBaseUrl.value = DEFAULT.openaiBaseUrl + openaiModel.value = DEFAULT.openaiModel + openaiVoice.value = DEFAULT.openaiVoice + customUrl.value = DEFAULT.customUrl + customApiKey.value = DEFAULT.customApiKey + edgeUrl.value = DEFAULT.edgeUrl + edgeVoice.value = DEFAULT.edgeVoice + }, + } +} diff --git a/packages/client/src/i18n/locales/de.ts b/packages/client/src/i18n/locales/de.ts index fff4982..b9eb4b3 100644 --- a/packages/client/src/i18n/locales/de.ts +++ b/packages/client/src/i18n/locales/de.ts @@ -511,6 +511,8 @@ jobTriggered: 'Job ausgelost', session: 'Sitzung', privacy: 'Datenschutz', apiServer: 'API-Server', + models: 'Modelle', + voice: 'Sprache', }, display: { streaming: 'Streaming-Antworten', @@ -589,6 +591,55 @@ jobTriggered: 'Job ausgelost', cors: 'CORS-Ursprunge', corsHint: 'Erlaubte Cross-Origin-Quellen', }, + voice: { + ttsProvider: 'TTS-Anbieter', + ttsProviderHint: 'Waehlen Sie die Sprachsynthese-Engine fuer die Nachrichtenwiedergabe', + providerWebSpeech: 'WebSpeech API (Browser)', + providerOpenai: 'OpenAI TTS', + providerCustom: 'Benutzerdefinierter Endpunkt (OpenAI-kompatibel)', + providerEdge: 'Edge TTS (Kostenlos, kein API-Key erforderlich)', + + // WebSpeech + webspeechVoice: 'Stimme', + webspeechVoiceHint: 'Waehlen Sie eine Stimme aus Ihrem Browser oder Betriebssystem', + webspeechVoicePlaceholder: 'Auto (Standardstimme)', + + // OpenAI + openaiKey: 'API-Key', + openaiKeyHint: 'Ihr OpenAI API-Key mit TTS-Zugriff', + openaiUrl: 'API-Basis-URL', + openaiUrlHint: 'z.B. https://api.openai.com/v1/audio/speech', + openaiModel: 'Modell', + openaiModelHint: 'tts-1 (schneller) / tts-1-hd (hoehere Qualitaet)', + openaiVoice: 'Stimme', + openaiVoiceHint: 'Stimme fuer die Synthese', + + // Custom endpoint + customHint: 'Jede OpenAI-kompatible TTS-API verwenden — funktioniert mit GPT-SoVITS, CosyVoice, usw.', + customUrl: 'API-URL', + customUrlHint: 'Basis-URL Ihres TTS-Dienstes', + customUrlPlaceholder: 'Die im lokalen Adapter konfigurierte Adresse, z.B. http://127.0.0.1:9880', + customApiKey: 'API-Key (optional)', + customApiKeyHint: 'Einige benutzerdefinierte Endpunkte erfordern Authentifizierung', + customApiKeyPlaceholder: 'Leer lassen wenn nicht benoetigt', + + // Edge TTS + edgeHint: 'Angetrieben von Microsoft Edge TTS (node-edge-tts).', + edgeUrl: 'Adapter-URL', + edgeUrlHint: 'Adresse des Edge TTS-Adapters, z.B. http://127.0.0.1:9882', + edgeUrlPlaceholder: 'http://127.0.0.1:9882', + edgeVoice: 'Stimme', + edgeVoiceHint: 'Waehlen Sie eine Stimme fuer die Sprachsynthese', + + // Test + testTitle: 'Sprachtest', + testText: 'Testtext', + testTextPlaceholder: 'Text zum Testen eingeben...', + testTextDefault: 'Hallo, dies ist ein Sprachtest.', + testButton: 'Testen', + testButtonPlaying: 'Wiedergabe...', + testFailed: 'Test fehlgeschlagen: {error}', + }, lockedIps: { title: 'Gesperrte IPs', count: '{count} gesperrt', diff --git a/packages/client/src/i18n/locales/en.ts b/packages/client/src/i18n/locales/en.ts index 1b67312..dc4c39e 100644 --- a/packages/client/src/i18n/locales/en.ts +++ b/packages/client/src/i18n/locales/en.ts @@ -651,6 +651,7 @@ export default { privacy: 'Privacy', apiServer: 'API Server', models: 'Models', + voice: 'Voice', }, models: { apiKey: 'API Key', @@ -747,6 +748,54 @@ export default { unlocked: 'IP unlocked', allUnlocked: '{count} IPs unlocked', }, + voice: { + ttsProvider: 'TTS Provider', + ttsProviderHint: 'Choose the text-to-speech engine for message playback', + providerWebSpeech: 'WebSpeech API (Browser)', + providerOpenai: 'OpenAI TTS', + providerCustom: 'Custom Endpoint (OpenAI-compatible)', + providerEdge: 'Edge TTS (Free, no API Key)', + + // WebSpeech + webspeechVoice: 'Voice', + webspeechVoiceHint: 'Select a voice from your browser or OS', + webspeechVoicePlaceholder: 'Auto (default voice)', + + // OpenAI + openaiKey: 'API Key', + openaiKeyHint: 'Your OpenAI API key with TTS access', + openaiUrl: 'API Base URL', + openaiUrlHint: 'e.g. https://api.openai.com/v1/audio/speech', + openaiModel: 'Model', + openaiModelHint: 'tts-1 (faster) / tts-1-hd (higher quality)', + openaiVoice: 'Voice', + openaiVoiceHint: 'Voice to use for synthesis', + + // Custom endpoint + customHint: 'Use any OpenAI-compatible TTS API — works with GPT-SoVITS, CosyVoice, etc.', + customUrl: 'API URL', + customUrlHint: 'Base URL of your TTS service', + customUrlPlaceholder: 'The address configured in the local adapter, e.g. http://127.0.0.1:9880', + customApiKey: 'API Key (Optional)', + customApiKeyHint: 'Some custom endpoints require authentication', + customApiKeyPlaceholder: 'Leave blank if not needed', + // Edge TTS + edgeHint: 'Powered by Microsoft Edge TTS (node-edge-tts).', + edgeUrl: 'Adapter URL', + edgeUrlHint: 'Address of your Edge TTS adapter, e.g. http://127.0.0.1:9882', + edgeUrlPlaceholder: 'http://127.0.0.1:9882', + edgeVoice: 'Voice', + edgeVoiceHint: 'Select a voice for speech synthesis', + + // Test + testTitle: 'Test Voice', + testText: 'Test Text', + testTextPlaceholder: 'Enter text to test...', + testTextDefault: 'Hello, this is a voice test.', + testButton: 'Test', + testButtonPlaying: 'Playing...', + testFailed: 'Test failed: {error}', + }, }, // Platform channel settings diff --git a/packages/client/src/i18n/locales/es.ts b/packages/client/src/i18n/locales/es.ts index 05bcb11..dcd3f51 100644 --- a/packages/client/src/i18n/locales/es.ts +++ b/packages/client/src/i18n/locales/es.ts @@ -511,6 +511,8 @@ jobTriggered: 'Job ejecutado', session: 'Sesion', privacy: 'Privacidad', apiServer: 'Servidor API', + models: 'Modelos', + voice: 'Voz', }, display: { streaming: 'Respuestas en streaming', @@ -589,6 +591,55 @@ jobTriggered: 'Job ejecutado', cors: 'Origenes CORS', corsHint: 'Fuentes cross-origin permitidas', }, + voice: { + ttsProvider: 'Proveedor TTS', + ttsProviderHint: 'Elija el motor de texto a voz para la reproduccion de mensajes', + providerWebSpeech: 'WebSpeech API (Navegador)', + providerOpenai: 'OpenAI TTS', + providerCustom: 'Endpoint personalizado (compatible con OpenAI)', + providerEdge: 'Edge TTS (Gratuito, sin clave API)', + + // WebSpeech + webspeechVoice: 'Voz', + webspeechVoiceHint: 'Seleccione una voz de su navegador o sistema operativo', + webspeechVoicePlaceholder: 'Auto (voz predeterminada)', + + // OpenAI + openaiKey: 'Clave API', + openaiKeyHint: 'Su clave API de OpenAI con acceso TTS', + openaiUrl: 'URL base de API', + openaiUrlHint: 'ej. https://api.openai.com/v1/audio/speech', + openaiModel: 'Modelo', + openaiModelHint: 'tts-1 (mas rapido) / tts-1-hd (mayor calidad)', + openaiVoice: 'Voz', + openaiVoiceHint: 'Voz a utilizar para la sintesis', + + // Custom endpoint + customHint: 'Utilice cualquier API TTS compatible con OpenAI — funciona con GPT-SoVITS, CosyVoice, etc.', + customUrl: 'URL de API', + customUrlHint: 'URL base de su servicio TTS', + customUrlPlaceholder: 'Direccion configurada en el adaptador local, ej. http://127.0.0.1:9880', + customApiKey: 'Clave API (opcional)', + customApiKeyHint: 'Algunos endpoints personalizados requieren autenticacion', + customApiKeyPlaceholder: 'Dejar en blanco si no es necesario', + + // Edge TTS + edgeHint: 'Impulsado por Microsoft Edge TTS (node-edge-tts).', + edgeUrl: 'URL del adaptador', + edgeUrlHint: 'Direccion del adaptador Edge TTS, ej. http://127.0.0.1:9882', + edgeUrlPlaceholder: 'http://127.0.0.1:9882', + edgeVoice: 'Voz', + edgeVoiceHint: 'Seleccione una voz para la sintesis de voz', + + // Test + testTitle: 'Prueba de voz', + testText: 'Texto de prueba', + testTextPlaceholder: 'Ingrese texto para probar...', + testTextDefault: 'Hola, esta es una prueba de voz.', + testButton: 'Probar', + testButtonPlaying: 'Reproduciendo...', + testFailed: 'Prueba fallida: {error}', + }, lockedIps: { title: 'IPs bloqueadas', count: '{count} bloqueadas', diff --git a/packages/client/src/i18n/locales/fr.ts b/packages/client/src/i18n/locales/fr.ts index acf288a..263cd31 100644 --- a/packages/client/src/i18n/locales/fr.ts +++ b/packages/client/src/i18n/locales/fr.ts @@ -511,6 +511,8 @@ jobTriggered: 'Job declenche', session: 'Session', privacy: 'Confidentialite', apiServer: 'Serveur API', + models: 'Modèles', + voice: 'Voix', }, display: { streaming: 'Reponses en continu', @@ -589,6 +591,55 @@ jobTriggered: 'Job declenche', cors: 'Origines CORS', corsHint: 'Sources cross-origin autorisees', }, + voice: { + ttsProvider: 'Fournisseur TTS', + ttsProviderHint: 'Choisir le moteur de synthese vocale pour la lecture des messages', + providerWebSpeech: 'WebSpeech API (Navigateur)', + providerOpenai: 'OpenAI TTS', + providerCustom: "Point d'acces personnalise (compatible OpenAI)", + providerEdge: 'Edge TTS (Gratuit, sans cle API)', + + // WebSpeech + webspeechVoice: 'Voix', + webspeechVoiceHint: "Choisir une voix depuis le navigateur ou l'OS", + webspeechVoicePlaceholder: 'Auto (voix par defaut)', + + // OpenAI + openaiKey: 'Cle API', + openaiKeyHint: 'Votre cle API OpenAI avec acces TTS', + openaiUrl: 'URL de base API', + openaiUrlHint: 'ex. https://api.openai.com/v1/audio/speech', + openaiModel: 'Modele', + openaiModelHint: 'tts-1 (rapide) / tts-1-hd (haute qualite)', + openaiVoice: 'Voix', + openaiVoiceHint: 'Voix a utiliser pour la synthese', + + // Custom endpoint + customHint: 'Utilisez toute API TTS compatible OpenAI — fonctionne avec GPT-SoVITS, CosyVoice, etc.', + customUrl: 'URL API', + customUrlHint: 'URL de base de votre service TTS', + customUrlPlaceholder: "Adresse configuree dans l'adaptateur local, ex. http://127.0.0.1:9880", + customApiKey: 'Cle API (optionnelle)', + customApiKeyHint: "Certains points d'acces personnalises necessitent une authentification", + customApiKeyPlaceholder: 'Laisser vide si inutile', + + // Edge TTS + edgeHint: 'Propulse par Microsoft Edge TTS (node-edge-tts).', + edgeUrl: "URL de l'adaptateur", + edgeUrlHint: "Adresse de l'adaptateur Edge TTS, ex. http://127.0.0.1:9882", + edgeUrlPlaceholder: 'http://127.0.0.1:9882', + edgeVoice: 'Voix', + edgeVoiceHint: 'Choisir une voix pour la synthese vocale', + + // Test + testTitle: 'Test vocal', + testText: 'Texte de test', + testTextPlaceholder: 'Entrez le texte a tester...', + testTextDefault: 'Bonjour, ceci est un test vocal.', + testButton: 'Tester', + testButtonPlaying: 'Lecture...', + testFailed: 'Echec du test : {error}', + }, lockedIps: { title: 'IPs bloquees', count: '{count} bloquees', diff --git a/packages/client/src/i18n/locales/ja.ts b/packages/client/src/i18n/locales/ja.ts index 337ca60..46020ff 100644 --- a/packages/client/src/i18n/locales/ja.ts +++ b/packages/client/src/i18n/locales/ja.ts @@ -511,6 +511,8 @@ export default { session: 'セッション', privacy: 'プライバシー', apiServer: 'API サーバー', + models: 'モデル', + voice: '音声', }, display: { streaming: 'ストリームレスポンス', @@ -589,6 +591,55 @@ export default { cors: 'CORS 許可元', corsHint: '許可するクロスオリジン', }, + voice: { + ttsProvider: 'TTS プロバイダー', + ttsProviderHint: 'メッセージ読み上げに使用する音声合成エンジンを選択', + providerWebSpeech: 'WebSpeech API(ブラウザ)', + providerOpenai: 'OpenAI TTS', + providerCustom: 'カスタムエンドポイント(OpenAI 互換)', + providerEdge: 'Edge TTS(無料、API Key 不要)', + + // WebSpeech + webspeechVoice: '音声', + webspeechVoiceHint: 'ブラウザまたは OS から音声を選択', + webspeechVoicePlaceholder: '自動(デフォルト音声)', + + // OpenAI + openaiKey: 'API キー', + openaiKeyHint: 'TTS アクセス権のある OpenAI API キー', + openaiUrl: 'API ベース URL', + openaiUrlHint: '例: https://api.openai.com/v1/audio/speech', + openaiModel: 'モデル', + openaiModelHint: 'tts-1(高速)/ tts-1-hd(高音質)', + openaiVoice: '音色', + openaiVoiceHint: '合成に使用する音色', + + // Custom endpoint + customHint: 'OpenAI 互換の TTS API を使用可能 — GPT-SoVITS、CosyVoice などに対応', + customUrl: 'API URL', + customUrlHint: 'TTS サービスのベース URL', + customUrlPlaceholder: 'ローカルアダプターで設定したアドレス(例:http://127.0.0.1:9880)', + customApiKey: 'API キー(オプション)', + customApiKeyHint: '一部のカスタムエンドポイントは認証が必要', + customApiKeyPlaceholder: '不要な場合は空欄', + + // Edge TTS + edgeHint: 'Microsoft Edge TTS を搭載(node-edge-tts)。', + edgeUrl: 'アダプター URL', + edgeUrlHint: 'Edge TTS アダプターのアドレス(例:http://127.0.0.1:9882)', + edgeUrlPlaceholder: 'http://127.0.0.1:9882', + edgeVoice: '音色', + edgeVoiceHint: '音声合成に使用する音色を選択', + + // Test + testTitle: '音声テスト', + testText: 'テストテキスト', + testTextPlaceholder: 'テストするテキストを入力...', + testTextDefault: 'こんにちは、これは音声テストです。', + testButton: 'テスト', + testButtonPlaying: '再生中...', + testFailed: 'テスト失敗:{error}', + }, lockedIps: { title: 'ロック済みIP管理', count: '{count}件ロック中', diff --git a/packages/client/src/i18n/locales/ko.ts b/packages/client/src/i18n/locales/ko.ts index 0f7965b..b85f113 100644 --- a/packages/client/src/i18n/locales/ko.ts +++ b/packages/client/src/i18n/locales/ko.ts @@ -511,6 +511,8 @@ export default { session: '세션', privacy: '개인정보', apiServer: 'API 서버', + models: '모델', + voice: '음성', }, display: { streaming: '스트리밍 응답', @@ -589,6 +591,55 @@ export default { cors: 'CORS 출처', corsHint: '허용된 교차 출처', }, + voice: { + ttsProvider: 'TTS 제공자', + ttsProviderHint: '메시지 재생에 사용할 텍스트 음성 변환 엔진 선택', + providerWebSpeech: 'WebSpeech API (브라우저)', + providerOpenai: 'OpenAI TTS', + providerCustom: '사용자 정의 엔드포인트 (OpenAI 호환)', + providerEdge: 'Edge TTS (무료, API Key 불필요)', + + // WebSpeech + webspeechVoice: '음성', + webspeechVoiceHint: '브라우저 또는 OS에서 음성 선택', + webspeechVoicePlaceholder: '자동 (기본 음성)', + + // OpenAI + openaiKey: 'API 키', + openaiKeyHint: 'TTS 접근 권한이 있는 OpenAI API 키', + openaiUrl: 'API 기본 URL', + openaiUrlHint: '예: https://api.openai.com/v1/audio/speech', + openaiModel: '모델', + openaiModelHint: 'tts-1 (빠름) / tts-1-hd (고음질)', + openaiVoice: '음색', + openaiVoiceHint: '합성에 사용할 음색', + + // Custom endpoint + customHint: '모든 OpenAI 호환 TTS API 사용 가능 — GPT-SoVITS, CosyVoice 등 지원', + customUrl: 'API URL', + customUrlHint: 'TTS 서비스의 기본 URL', + customUrlPlaceholder: '로컬 어댑터에 설정된 주소 (예: http://127.0.0.1:9880)', + customApiKey: 'API 키 (선택사항)', + customApiKeyHint: '일부 사용자 정의 엔드포인트는 인증 필요', + customApiKeyPlaceholder: '필요하지 않으면 비워둠', + + // Edge TTS + edgeHint: 'Microsoft Edge TTS 기반 (node-edge-tts).', + edgeUrl: '어댑터 URL', + edgeUrlHint: 'Edge TTS 어댑터 주소 (예: http://127.0.0.1:9882)', + edgeUrlPlaceholder: 'http://127.0.0.1:9882', + edgeVoice: '음색', + edgeVoiceHint: '음성 합성에 사용할 음색 선택', + + // Test + testTitle: '음성 테스트', + testText: '테스트 텍스트', + testTextPlaceholder: '테스트할 텍스트 입력...', + testTextDefault: '안녕하세요, 음성 테스트입니다.', + testButton: '테스트', + testButtonPlaying: '재생 중...', + testFailed: '테스트 실패: {error}', + }, lockedIps: { title: '잠긴 IP 관리', count: '{count}개 잠김', diff --git a/packages/client/src/i18n/locales/pt.ts b/packages/client/src/i18n/locales/pt.ts index 0ee6fde..0c9a55b 100644 --- a/packages/client/src/i18n/locales/pt.ts +++ b/packages/client/src/i18n/locales/pt.ts @@ -511,6 +511,8 @@ jobTriggered: 'Job acionado', session: 'Sessao', privacy: 'Privacidade', apiServer: 'Servidor API', + models: 'Modelos', + voice: 'Voz', }, display: { streaming: 'Respostas em streaming', @@ -589,6 +591,55 @@ jobTriggered: 'Job acionado', cors: 'Origens CORS', corsHint: 'Fontes cross-origin permitidas', }, + voice: { + ttsProvider: 'Provedor TTS', + ttsProviderHint: 'Escolha o mecanismo de texto para fala para reproducao de mensagens', + providerWebSpeech: 'WebSpeech API (Navegador)', + providerOpenai: 'OpenAI TTS', + providerCustom: 'Endpoint personalizado (compativel com OpenAI)', + providerEdge: 'Edge TTS (Gratuito, sem chave API)', + + // WebSpeech + webspeechVoice: 'Voz', + webspeechVoiceHint: 'Selecione uma voz do seu navegador ou SO', + webspeechVoicePlaceholder: 'Auto (voz padrao)', + + // OpenAI + openaiKey: 'Chave API', + openaiKeyHint: 'Sua chave API OpenAI com acesso TTS', + openaiUrl: 'URL base da API', + openaiUrlHint: 'ex. https://api.openai.com/v1/audio/speech', + openaiModel: 'Modelo', + openaiModelHint: 'tts-1 (mais rapido) / tts-1-hd (qualidade superior)', + openaiVoice: 'Voz', + openaiVoiceHint: 'Voz a ser usada para sintese', + + // Custom endpoint + customHint: 'Use qualquer API TTS compativel com OpenAI — funciona com GPT-SoVITS, CosyVoice, etc.', + customUrl: 'URL da API', + customUrlHint: 'URL base do seu servico TTS', + customUrlPlaceholder: 'Endereco configurado no adaptador local, ex. http://127.0.0.1:9880', + customApiKey: 'Chave API (opcional)', + customApiKeyHint: 'Alguns endpoints personalizados exigem autenticacao', + customApiKeyPlaceholder: 'Deixe em branco se nao for necessario', + + // Edge TTS + edgeHint: 'Desenvolvido por Microsoft Edge TTS (node-edge-tts).', + edgeUrl: 'URL do adaptador', + edgeUrlHint: 'Endereco do adaptador Edge TTS, ex. http://127.0.0.1:9882', + edgeUrlPlaceholder: 'http://127.0.0.1:9882', + edgeVoice: 'Voz', + edgeVoiceHint: 'Selecione uma voz para sintese de fala', + + // Test + testTitle: 'Teste de voz', + testText: 'Texto de teste', + testTextPlaceholder: 'Insira o texto para testar...', + testTextDefault: 'Ola, este e um teste de voz.', + testButton: 'Testar', + testButtonPlaying: 'Reproduzindo...', + testFailed: 'Teste falhou: {error}', + }, lockedIps: { title: 'IPs bloqueadas', count: '{count} bloqueadas', diff --git a/packages/client/src/i18n/locales/zh.ts b/packages/client/src/i18n/locales/zh.ts index a4e0164..38efcce 100644 --- a/packages/client/src/i18n/locales/zh.ts +++ b/packages/client/src/i18n/locales/zh.ts @@ -643,6 +643,7 @@ export default { privacy: '隐私', apiServer: 'API 服务器', models: '模型', + voice: '语音', }, models: { apiKey: 'API Key', @@ -739,6 +740,54 @@ export default { unlocked: 'IP 已解锁', allUnlocked: '已解锁 {count} 个 IP', }, + voice: { + ttsProvider: 'TTS 提供者', + ttsProviderHint: '选择消息朗读使用的语音引擎', + providerWebSpeech: 'WebSpeech API(浏览器内置)', + providerOpenai: 'OpenAI TTS', + providerCustom: '自定义端点(兼容 OpenAI)', + providerEdge: 'Edge TTS(免费,无需 API Key)', + + // WebSpeech + webspeechVoice: '音色', + webspeechVoiceHint: '从浏览器或系统提供的语音中选择', + webspeechVoicePlaceholder: '自动(默认语音)', + + // OpenAI + openaiKey: 'API 密钥', + openaiKeyHint: '具有 TTS 权限的 OpenAI API Key', + openaiUrl: 'API 基础地址', + openaiUrlHint: '例如 https://api.openai.com/v1/audio/speech', + openaiModel: '模型', + openaiModelHint: 'tts-1(快速)/ tts-1-hd(高音质)', + openaiVoice: '音色', + openaiVoiceHint: '用于语音合成的音色', + + // 自定义端点 + customHint: '支持任何 OpenAI 兼容的 TTS 服务——可用于 GPT-SoVITS、CosyVoice 等自部署服务。', + customUrl: 'API 地址', + customUrlHint: 'TTS 服务的完整基础地址', + customUrlPlaceholder: '本地适配器中配置的地址 如:http://127.0.0.1:9880', + customApiKey: 'API 密钥(可选)', + customApiKeyHint: '部分自部署服务需要身份验证', + customApiKeyPlaceholder: '不需要则留空', + // Edge TTS + edgeHint: '由 Microsoft Edge TTS 驱动(node-edge-tts)。', + edgeUrl: '适配器地址', + edgeUrlHint: 'Edge TTS 适配器地址,例如 http://127.0.0.1:9882', + edgeUrlPlaceholder: 'http://127.0.0.1:9882', + edgeVoice: '音色', + edgeVoiceHint: '选择用于语音合成的音色', + + // 试听 + testTitle: '试听测试', + testText: '测试文本', + testTextPlaceholder: '输入测试文本...', + testTextDefault: '你好,这是一个语音测试。', + testButton: '试听', + testButtonPlaying: '播放中...', + testFailed: '测试失败:{error}', + }, }, // 平台频道设置 diff --git a/packages/client/src/views/hermes/SettingsView.vue b/packages/client/src/views/hermes/SettingsView.vue index a58693b..83e25f8 100644 --- a/packages/client/src/views/hermes/SettingsView.vue +++ b/packages/client/src/views/hermes/SettingsView.vue @@ -14,6 +14,7 @@ import SessionSettings from "@/components/hermes/settings/SessionSettings.vue"; import PrivacySettings from "@/components/hermes/settings/PrivacySettings.vue"; import ModelSettings from "@/components/hermes/settings/ModelSettings.vue"; import AccountSettings from "@/components/hermes/settings/AccountSettings.vue"; +import VoiceSettings from "@/components/hermes/settings/VoiceSettings.vue"; const settingsStore = useSettingsStore(); const { t } = useI18n(); @@ -57,6 +58,9 @@ onMounted(() => { + + + diff --git a/packages/server/src/controllers/hermes/tts.ts b/packages/server/src/controllers/hermes/tts.ts index 808e333..5cc2963 100644 --- a/packages/server/src/controllers/hermes/tts.ts +++ b/packages/server/src/controllers/hermes/tts.ts @@ -1,5 +1,5 @@ import type { Context } from 'koa' -import { textToSpeech } from '../../services/hermes/tts' +import { textToSpeech, openaiCompatibleTts, speedToEdgeRate } from '../../services/hermes/tts' export async function generate(ctx: Context) { const { text, lang } = ctx.request.body as { @@ -26,3 +26,41 @@ export async function generate(ctx: Context) { ctx.set('X-TTS-Engine', engine) ctx.body = audio } + +/** + * OpenAI-compatible TTS endpoint. + * Accepts: { model, input, voice, speed } + * Returns audio/mpeg stream. + */ +export async function openaiProxy(ctx: Context) { + const body = ctx.request.body as { + input?: string + voice?: string + speed?: number + model?: string + } + + if (!body.input || typeof body.input !== 'string') { + ctx.status = 400 + ctx.body = { error: 'input is required' } + return + } + + if (body.input.length > 5000) { + ctx.status = 400 + ctx.body = { error: 'input is too long (max 5000 characters)' } + return + } + + const { audio, engine } = await openaiCompatibleTts({ + input: body.input, + voice: body.voice, + speed: body.speed, + model: body.model, + }) + + ctx.set('Content-Type', 'audio/mpeg') + ctx.set('Content-Length', String(audio.length)) + ctx.set('X-TTS-Engine', engine) + ctx.body = audio +} diff --git a/packages/server/src/routes/hermes/tts.ts b/packages/server/src/routes/hermes/tts.ts index ecf4f81..f1e6c99 100644 --- a/packages/server/src/routes/hermes/tts.ts +++ b/packages/server/src/routes/hermes/tts.ts @@ -4,3 +4,4 @@ import * as ctrl from '../../controllers/hermes/tts' export const ttsRoutes = new Router() ttsRoutes.post('/api/hermes/tts', ctrl.generate) +ttsRoutes.post('/api/tts/proxy/audio/speech', ctrl.openaiProxy) diff --git a/packages/server/src/routes/index.ts b/packages/server/src/routes/index.ts index 8a2d555..24e10da 100644 --- a/packages/server/src/routes/index.ts +++ b/packages/server/src/routes/index.ts @@ -41,6 +41,7 @@ export function registerRoutes(app: any, requireAuth: (ctx: Context, next: Next) app.use(healthRoutes.routes()) app.use(webhookRoutes.routes()) app.use(authPublicRoutes.routes()) + app.use(ttsRoutes.routes()) // TTS proxy/generation — must be before auth // --- Auth middleware: all routes below require authentication --- app.use(requireAuth) @@ -69,7 +70,6 @@ export function registerRoutes(app: any, requireAuth: (ctx: Context, next: Next) app.use(jobRoutes.routes()) // Must be before proxy app.use(cronHistoryRoutes.routes()) // Must be before proxy app.use(kanbanRoutes.routes()) // Must be before proxy - app.use(ttsRoutes.routes()) // Must be before proxy app.use(proxyRoutes.routes()) // Proxy catch-all middleware (must be last) diff --git a/packages/server/src/services/hermes/tts.ts b/packages/server/src/services/hermes/tts.ts index ce676f1..5df175c 100644 --- a/packages/server/src/services/hermes/tts.ts +++ b/packages/server/src/services/hermes/tts.ts @@ -12,6 +12,9 @@ const FIXED_PITCH = '+12Hz' export interface TtsOptions { text: string lang?: string + voice?: string + rate?: string + pitch?: string } export async function edgeTts(opts: TtsOptions): Promise { @@ -20,9 +23,9 @@ export async function edgeTts(opts: TtsOptions): Promise { try { const tts = new EdgeTTS({ - voice: FIXED_VOICE, - rate: FIXED_RATE, - pitch: FIXED_PITCH, + voice: opts.voice || FIXED_VOICE, + rate: opts.rate || FIXED_RATE, + pitch: opts.pitch || FIXED_PITCH, timeout: 15000, }) @@ -35,7 +38,41 @@ export async function edgeTts(opts: TtsOptions): Promise { } export async function textToSpeech(opts: TtsOptions): Promise<{ audio: Buffer; engine: string }> { + const voice = opts.voice || FIXED_VOICE + const rate = opts.rate || FIXED_RATE + const pitch = opts.pitch || FIXED_PITCH const audio = await edgeTts(opts) - logger.debug({ engine: 'edge', voice: FIXED_VOICE, rate: FIXED_RATE, pitch: FIXED_PITCH }, 'TTS generated via Edge') + logger.debug({ engine: 'edge', voice, rate, pitch }, 'TTS generated via Edge') return { audio, engine: 'edge' } } + +/** + * Convert speed multiplier (0.5-2.0) to Edge TTS rate string. + * Edge TTS rate format: "+/-NN%" + */ +export function speedToEdgeRate(speed: number): string { + const percent = Math.round((speed - 1) * 100) + return percent >= 0 ? `+${percent}%` : `${percent}%` +} + +/** + * Convert OpenAI TTS request to internal TtsOptions. + * OpenAI format: { model, input, voice, speed } + */ +export interface OpenaiTtsRequest { + model?: string + input: string + voice?: string + speed?: number +} + +export async function openaiCompatibleTts( + body: OpenaiTtsRequest, +): Promise<{ audio: Buffer; engine: string }> { + return textToSpeech({ + text: body.input, + voice: body.voice || FIXED_VOICE, + rate: body.speed ? speedToEdgeRate(body.speed) : FIXED_RATE, + pitch: FIXED_PITCH, + }) +}