feat: add voice playback settings with 4-provider support (#608)

Add WebSpeech, OpenAI TTS, Custom endpoint, and Edge TTS providers. Co-authored-by: Hermes Agent <noreply@nousresearch.com>
2026-05-10 20:08:38 +08:00
parent 838791a740
commit 15195f0795
18 changed files with 1237 additions and 20 deletions
@@ -111,7 +111,7 @@
    "vue": "^3.5.32",
    "vue-i18n": "^11.3.2",
    "vue-router": "^4.6.4",
-    "vue-tsc": "^3.2.6",
+    "vue-tsc": "^3.2.8",
    "ws": "^8.20.0"
  }
-}
+}
@@ -16,6 +16,7 @@ import {
  renderHighlightedCodeBlock,
 } from "./highlight";
 import { useGlobalSpeech } from "@/composables/useSpeech";
+import { useVoiceSettings } from "@/composables/useVoiceSettings";

 const TOOL_PAYLOAD_DISPLAY_LIMIT = 2000;

@@ -79,6 +80,7 @@ const previewUrl = ref<string | null>(null);
 const chatStore = useChatStore();
 const settingsStore = useSettingsStore();
 const speech = useGlobalSpeech();
+const voiceSettings = useVoiceSettings();

 // Copy entire bubble content
 const copyableContent = computed(() => {
@@ -351,25 +353,90 @@ const renderedToolResult = computed(() => {

 // 语音播放相关
 const canPlaySpeech = computed(() => {
-  // 只有 assistant 消息可以播放，且浏览器支持 Web Speech API
-  return props.message.role === 'assistant' &&
-         speech.isSupported &&
-         copyableContent.value;
-});
+  // 只有 assistant 消息可以播放
+  if (props.message.role !== 'assistant') return false
+  if (!copyableContent.value) return false
+  // OpenAI / Custom / Edge 不依赖浏览器 Web Speech API
+  if (voiceSettings.provider.value === 'openai' || voiceSettings.provider.value === 'custom' || voiceSettings.provider.value === 'edge') return true
+  return speech.isSupported
+})

 const isPlayingThisMessage = computed(() => {
-  return speech.currentMessageId.value === props.message.id && speech.isPlaying.value;
-});
+  // OpenAI / Custom / Edge 模式
+  if (voiceSettings.provider.value === 'openai' || voiceSettings.provider.value === 'custom' || voiceSettings.provider.value === 'edge') {
+    return speech.currentCustomMessageId.value === props.message.id && speech.isCustomPlaying.value
+  }
+  return speech.currentMessageId.value === props.message.id && speech.isPlaying.value
+})

 const isPausedThisMessage = computed(() => {
-  return speech.currentMessageId.value === props.message.id && speech.isPaused.value;
-});
+  // OpenAI / Custom / Edge 模式
+  if (voiceSettings.provider.value === 'openai' || voiceSettings.provider.value === 'custom' || voiceSettings.provider.value === 'edge') {
+    return speech.currentCustomMessageId.value === props.message.id && speech.isCustomPaused.value
+  }
+  return speech.currentMessageId.value === props.message.id && speech.isPaused.value
+})

 function handleSpeechToggle() {
  if (!canPlaySpeech.value) {
    return
  }
  const content = props.message.content || ''
+
+  // OpenAI TTS 模式
+  if (voiceSettings.provider.value === 'openai') {
+    const apiUrl = voiceSettings.openaiBaseUrl.value
+    if (!apiUrl) {
+      console.warn('[MessageItem] OpenAI TTS 地址为空')
+      return
+    }
+    speech.openaiToggle(props.message.id, content, {
+      baseUrl: voiceSettings.openaiBaseUrl.value,
+      apiKey: voiceSettings.openaiApiKey.value,
+      model: voiceSettings.openaiModel.value,
+      voice: voiceSettings.openaiVoice.value,
+    })
+    return
+  }
+
+  // 自定义端点模式（OpenAI 兼容，如 GPT-SoVITS）
+  if (voiceSettings.provider.value === 'custom') {
+    const apiUrl = voiceSettings.customUrl.value
+    if (!apiUrl) {
+      console.warn('[MessageItem] 自定义 TTS 地址为空')
+      return
+    }
+    speech.openaiToggle(props.message.id, content, {
+      baseUrl: voiceSettings.customUrl.value,
+      apiKey: voiceSettings.customApiKey.value || undefined,
+    })
+    return
+  }
+
+  // Edge TTS 模式
+  if (voiceSettings.provider.value === 'edge') {
+    // URL 为空时使用内建后端代理
+    const apiUrl = voiceSettings.edgeUrl.value || '/api/tts/proxy'
+    speech.openaiToggle(props.message.id, content, {
+      baseUrl: apiUrl,
+      voice: voiceSettings.edgeVoice.value,
+    })
+    return
+  }
+
+  // Web Speech API 模式
+  if (voiceSettings.provider.value === 'webspeech') {
+    const text = speech.extractReadableText(content)
+    if (text) {
+      speech.stop(false)
+      speech.speakViaBrowser(props.message.id, text, {
+        voiceName: voiceSettings.webspeechVoice.value || undefined,
+      })
+    }
+    return
+  }
+
+  // 后备（无 provider 匹配时）
  speech.toggle(props.message.id, content)
 }

@@ -380,7 +447,37 @@ onMounted(() => {
  autoPlayHandler = (e: Event) => {
    const customEvent = e as CustomEvent<{ messageId: string; content: string }>
    if (customEvent.detail.messageId === props.message.id && canPlaySpeech.value) {
-      speech.enqueue(props.message.id, customEvent.detail.content || props.message.content || '')
+      const content = customEvent.detail.content || props.message.content || ''
+      if (voiceSettings.provider.value === 'openai') {
+        const apiUrl = voiceSettings.openaiBaseUrl.value
+        if (apiUrl) speech.openaiPlay(props.message.id, content, {
+          baseUrl: voiceSettings.openaiBaseUrl.value,
+          apiKey: voiceSettings.openaiApiKey.value,
+          model: voiceSettings.openaiModel.value,
+          voice: voiceSettings.openaiVoice.value,
+        })
+      } else if (voiceSettings.provider.value === 'custom') {
+        const apiUrl = voiceSettings.customUrl.value
+        if (apiUrl) speech.openaiPlay(props.message.id, content, {
+          baseUrl: voiceSettings.customUrl.value,
+          apiKey: voiceSettings.customApiKey.value || undefined,
+        })
+      } else if (voiceSettings.provider.value === 'edge') {
+        speech.openaiPlay(props.message.id, content, {
+          baseUrl: '/api/tts/proxy',
+          voice: voiceSettings.edgeVoice.value,
+        })
+      } else if (voiceSettings.provider.value === 'webspeech') {
+        const text = speech.extractReadableText(content)
+        if (text) {
+          speech.stop(false)
+          speech.speakViaBrowser(props.message.id, text, {
+            voiceName: voiceSettings.webspeechVoice.value || undefined,
+          })
+        }
+      } else {
+        speech.enqueue(props.message.id, content)
+      }
    }
  }
  window.addEventListener('auto-play-speech', autoPlayHandler)
@@ -0,0 +1,327 @@
+<script setup lang="ts">
+import { ref, onMounted } from 'vue'
+import { NSelect, NInput, NButton } from 'naive-ui'
+import { useI18n } from 'vue-i18n'
+import { useVoiceSettings } from '@/composables/useVoiceSettings'
+import { useSpeech } from '@/composables/useSpeech'
+import SettingRow from './SettingRow.vue'
+
+const { t } = useI18n()
+const vs = useVoiceSettings()
+const speech = useSpeech()
+
+const testText = ref(t('settings.voice.testTextDefault'))
+const testPlaying = ref(false)
+
+const providerOptions = [
+  { label: t('settings.voice.providerWebSpeech'), value: 'webspeech' },
+  { label: t('settings.voice.providerOpenai'), value: 'openai' },
+  { label: t('settings.voice.providerCustom'), value: 'custom' },
+  { label: t('settings.voice.providerEdge'), value: 'edge' },
+]
+
+const openaiModelOptions = [
+  { label: 'tts-1', value: 'tts-1' },
+  { label: 'tts-1-hd', value: 'tts-1-hd' },
+]
+
+const openaiVoiceOptions = [
+  { label: 'Alloy', value: 'alloy' },
+  { label: 'Echo', value: 'echo' },
+  { label: 'Fable', value: 'fable' },
+  { label: 'Nova', value: 'nova' },
+  { label: 'Onyx', value: 'onyx' },
+  { label: 'Shimmer', value: 'shimmer' },
+]
+
+const edgeVoiceOptions = [
+  { label: '晓晓 (zh-CN-XiaoxiaoNeural)', value: 'zh-CN-XiaoxiaoNeural' },
+  { label: '晓萱 (zh-CN-XiaoxuanNeural)', value: 'zh-CN-XiaoxuanNeural' },
+  { label: '云希 (zh-CN-YunxiNeural)', value: 'zh-CN-YunxiNeural' },
+  { label: '云健 (zh-CN-YunjianNeural)', value: 'zh-CN-YunjianNeural' },
+  { label: '云扬 (zh-CN-YunyangNeural)', value: 'zh-CN-YunyangNeural' },
+  { label: 'Jenny (en-US-JennyNeural)', value: 'en-US-JennyNeural' },
+  { label: 'Aria (en-US-AriaNeural)', value: 'en-US-AriaNeural' },
+  { label: 'Guy (en-US-GuyNeural)', value: 'en-US-GuyNeural' },
+  { label: 'Sonia (en-GB-SoniaNeural)', value: 'en-GB-SoniaNeural' },
+  { label: 'Ryan (en-GB-RyanNeural)', value: 'en-GB-RyanNeural' },
+  { label: 'Nanami (ja-JP-NanamiNeural)', value: 'ja-JP-NanamiNeural' },
+  { label: 'Keita (ja-JP-KeitaNeural)', value: 'ja-JP-KeitaNeural' },
+  { label: 'Sun-Hi (ko-KR-SunHiNeural)', value: 'ko-KR-SunHiNeural' },
+  { label: 'InJoon (ko-KR-InJoonNeural)', value: 'ko-KR-InJoonNeural' },
+  { label: 'Denise (fr-FR-DeniseNeural)', value: 'fr-FR-DeniseNeural' },
+  { label: 'Henri (fr-FR-HenriNeural)', value: 'fr-FR-HenriNeural' },
+  { label: 'Katja (de-DE-KatjaNeural)', value: 'de-DE-KatjaNeural' },
+  { label: 'Conrad (de-DE-ConradNeural)', value: 'de-DE-ConradNeural' },
+]
+
+// Get WebSpeech voices list on mount
+const webspeechVoices = ref<SpeechSynthesisVoice[]>([])
+onMounted(() => {
+  if ('speechSynthesis' in window) {
+    const voices = window.speechSynthesis.getVoices()
+    if (voices.length) {
+      webspeechVoices.value = voices
+    }
+    window.speechSynthesis.onvoiceschanged = () => {
+      webspeechVoices.value = window.speechSynthesis.getVoices()
+    }
+  }
+})
+
+async function handleTest() {
+  const text = testText.value.trim()
+  if (!text) return
+  testPlaying.value = true
+  try {
+    if (vs.provider.value === 'webspeech') {
+      speech.stop(false)
+      speech.speakViaBrowser('__test__', text, {
+        voiceName: vs.webspeechVoice.value || undefined,
+      })
+    } else if (vs.provider.value === 'openai') {
+      if (!vs.openaiBaseUrl.value) {
+        console.warn('[VoiceSettings] OpenAI base URL empty')
+        return
+      }
+      await speech.openaiPlay('__test__', text, {
+        baseUrl: vs.openaiBaseUrl.value,
+        apiKey: vs.openaiApiKey.value || undefined,
+        model: vs.openaiModel.value,
+        voice: vs.openaiVoice.value,
+      })
+    } else if (vs.provider.value === 'custom') {
+      if (!vs.customUrl.value) {
+        console.warn('[VoiceSettings] Custom URL empty')
+        return
+      }
+      await speech.openaiPlay('__test__', text, {
+        baseUrl: vs.customUrl.value,
+        apiKey: vs.customApiKey.value || undefined,
+      })
+    } else if (vs.provider.value === 'edge') {
+      await speech.openaiPlay('__test__', text, {
+        baseUrl: '/api/tts/proxy',
+        voice: vs.edgeVoice.value,
+      })
+    }
+  } catch (err) {
+    console.error('[VoiceSettings] Test failed:', err)
+  } finally {
+    testPlaying.value = false
+  }
+}
+</script>
+
+<template>
+  <div class="voice-settings">
+    <SettingRow
+      :label="t('settings.voice.ttsProvider')"
+      :hint="t('settings.voice.ttsProviderHint')"
+    >
+      <NSelect
+        :value="vs.provider.value"
+        :options="providerOptions"
+        size="small"
+        style="width: 300px"
+        @update:value="vs.setProvider"
+      />
+    </SettingRow>
+
+    <!-- ════ WebSpeech API ════ -->
+    <template v-if="vs.provider.value === 'webspeech'">
+      <SettingRow
+        :label="t('settings.voice.webspeechVoice')"
+        :hint="t('settings.voice.webspeechVoiceHint')"
+      >
+        <NSelect
+          :value="vs.webspeechVoice.value"
+          size="small"
+          filterable
+          style="width: 320px"
+          :placeholder="t('settings.voice.webspeechVoicePlaceholder')"
+          :consistent-menu-width="false"
+          :options="webspeechVoices.map(v => ({
+            label: `${v.name} (${v.lang})`,
+            value: v.name,
+          }))"
+          @update:value="vs.setWebSpeechVoice"
+        />
+      </SettingRow>
+
+    </template>
+
+    <!-- ════ OpenAI TTS ════ -->
+    <template v-if="vs.provider.value === 'openai'">
+      <SettingRow
+        :label="t('settings.voice.openaiKey')"
+        :hint="t('settings.voice.openaiKeyHint')"
+      >
+        <NInput
+          :value="vs.openaiApiKey.value"
+          type="password"
+          size="small"
+          show-password-on="click"
+          style="width: 360px"
+          placeholder="sk-..."
+          @update:value="vs.setOpenaiApiKey"
+        />
+      </SettingRow>
+
+      <SettingRow
+        :label="t('settings.voice.openaiUrl')"
+        :hint="t('settings.voice.openaiUrlHint')"
+      >
+        <NInput
+          :value="vs.openaiBaseUrl.value"
+          size="small"
+          style="width: 360px"
+          placeholder="https://api.openai.com/v1/audio/speech"
+          @update:value="vs.setOpenaiBaseUrl"
+        />
+      </SettingRow>
+
+      <SettingRow
+        :label="t('settings.voice.openaiModel')"
+        :hint="t('settings.voice.openaiModelHint')"
+      >
+        <NSelect
+          :value="vs.openaiModel.value"
+          :options="openaiModelOptions"
+          size="small"
+          style="width: 200px"
+          @update:value="vs.setOpenaiModel"
+        />
+      </SettingRow>
+
+      <SettingRow
+        :label="t('settings.voice.openaiVoice')"
+        :hint="t('settings.voice.openaiVoiceHint')"
+      >
+        <NSelect
+          :value="vs.openaiVoice.value"
+          :options="openaiVoiceOptions"
+          size="small"
+          style="width: 200px"
+          @update:value="vs.setOpenaiVoice"
+        />
+      </SettingRow>
+
+    </template>
+
+    <!-- ════ Custom Endpoint ════ -->
+    <template v-if="vs.provider.value === 'custom'">
+      <div class="provider-hint">
+        {{ t('settings.voice.customHint') }}
+      </div>
+
+      <SettingRow
+        :label="t('settings.voice.customUrl')"
+        :hint="t('settings.voice.customUrlHint')"
+      >
+        <NInput
+          :value="vs.customUrl.value"
+          size="small"
+          style="width: 360px"
+          :placeholder="t('settings.voice.customUrlPlaceholder')"
+          @update:value="vs.setCustomUrl"
+        />
+      </SettingRow>
+
+      <SettingRow
+        :label="t('settings.voice.customApiKey')"
+        :hint="t('settings.voice.customApiKeyHint')"
+      >
+        <NInput
+          :value="vs.customApiKey.value"
+          type="password"
+          size="small"
+          show-password-on="click"
+          style="width: 360px"
+          :placeholder="t('settings.voice.customApiKeyPlaceholder')"
+          @update:value="vs.setCustomApiKey"
+        />
+      </SettingRow>
+
+
+    </template>
+
+    <!-- ════ Edge TTS ════ -->
+    <template v-if="vs.provider.value === 'edge'">
+      <div class="provider-hint">
+        {{ t('settings.voice.edgeHint') }}
+      </div>
+
+<SettingRow
+        :label="t('settings.voice.edgeVoice')"
+        :hint="t('settings.voice.edgeVoiceHint')"
+      >
+        <NSelect
+          :value="vs.edgeVoice.value"
+          :options="edgeVoiceOptions"
+          size="small"
+          filterable
+          style="width: 320px"
+          :consistent-menu-width="false"
+          @update:value="vs.setEdgeVoice"
+        />
+      </SettingRow>
+
+    </template>
+
+    <!-- ─── Test / Audition ─── -->
+    <div class="test-section">
+      <h4 class="test-title">{{ t('settings.voice.testTitle') }}</h4>
+      <div class="test-row">
+        <NInput
+          v-model:value="testText"
+          size="small"
+          style="width: 360px"
+          :placeholder="t('settings.voice.testTextPlaceholder')"
+          :disabled="testPlaying"
+          @keyup.enter="handleTest"
+        />
+        <NButton
+          size="small"
+          type="primary"
+          :loading="testPlaying"
+          :disabled="testPlaying"
+          @click="handleTest"
+        >
+          {{ testPlaying ? t('settings.voice.testButtonPlaying') : t('settings.voice.testButton') }}
+        </NButton>
+      </div>
+    </div>
+  </div>
+</template>
+
+<style scoped lang="scss">
+.voice-settings {
+  display: flex;
+  flex-direction: column;
+  gap: 16px;
+}
+
+.provider-hint {
+  font-size: 12px;
+  color: #888;
+  line-height: 1.5;
+  padding: 0 0 4px 0;
+}
+
+.test-section {
+  padding-top: 16px;
+
+  .test-title {
+    margin: 0 0 8px 0;
+    font-size: 14px;
+    font-weight: 600;
+  }
+
+  .test-row {
+    display: flex;
+    gap: 8px;
+    align-items: center;
+  }
+}
+</style>
@@ -3,6 +3,14 @@ import { generateSpeech, playAudioBlob } from '@/api/hermes/tts'

 export interface SpeechOptions {
  lang?: string      // 语言 'zh-CN', 'en-US' 等
+  voiceName?: string // 指定 WebSpeech 音色名称
+}
+
+export interface OpenaiTtsOptions {
+  baseUrl: string
+  apiKey?: string
+  model?: string
+  voice?: string
 }

 export interface SpeechState {
@@ -39,6 +47,11 @@ export function useSpeech() {
  let playbackToken = 0
  const speechQueue: SpeechQueueItem[] = []

+  // 自定义 TTS（OpenAI / Custom / Edge）播放状态
+  const isCustomPlaying = ref(false)
+  const isCustomPaused = ref(false)
+  const currentCustomMessageId = ref<string | null>(null)
+
  // 加载可用语音列表
  function loadVoices() {
    availableVoices.value = synth.getVoices()
@@ -162,14 +175,25 @@ export function useSpeech() {

  // ─── Browser Engine (Web Speech API) ────────────────────────

-  function speakViaBrowser(messageId: string, text: string, options: SpeechOptions, token: number) {
+  function speakViaBrowser(messageId: string, text: string, options: SpeechOptions, token?: number) {
+    token = token || ++playbackToken
    utterance = new SpeechSynthesisUtterance(text)
    const activeUtterance = utterance

    utterance.rate = 1
    utterance.pitch = 1
    utterance.volume = 1
-    utterance.voice = getDefaultVoice()
+
+    // 使用指定的音色（如果有），否则用默认
+    if (options.voiceName) {
+      const voice = availableVoices.value.find(v => v.name === options.voiceName)
+      if (voice) {
+        utterance.voice = voice
+      }
+    }
+    if (!utterance.voice) {
+      utterance.voice = getDefaultVoice()
+    }

    if (options.lang) {
      utterance.lang = options.lang
@@ -218,6 +242,115 @@ export function useSpeech() {
    synth.speak(utterance)
  }

+  // ─── OpenAI-compatible TTS Engine ────────────────────────────
+
+  let customAudio: HTMLAudioElement | null = null
+
+  async function openaiPlay(
+    messageId: string,
+    content: string,
+    opts: OpenaiTtsOptions,
+  ) {
+    const text = extractReadableText(content)
+    if (!text) return
+
+    const token = ++playbackToken
+
+    isCustomPlaying.value = true
+    isCustomPaused.value = false
+    currentCustomMessageId.value = messageId
+
+    const url = `${opts.baseUrl.replace(/\/+$/, '')}/audio/speech`
+    const body: Record<string, any> = {
+      model: opts.model || 'tts-1',
+      input: text,
+      voice: opts.voice || 'alloy',
+    }
+
+    const headers: Record<string, string> = {
+      'Content-Type': 'application/json',
+    }
+    if (opts.apiKey) {
+      headers['Authorization'] = `Bearer ${opts.apiKey}`
+    }
+
+    try {
+      const res = await fetch(url, {
+        method: 'POST',
+        headers,
+        body: JSON.stringify(body),
+      })
+
+      if (token !== playbackToken) return
+
+      if (!res.ok) {
+        const errText = await res.text().catch(() => '')
+        throw new Error(`OpenAI TTS 返回 ${res.status}: ${errText || res.statusText}`)
+      }
+
+      const audioBlob = await res.blob()
+      if (token !== playbackToken) return
+
+      const audioUrl = URL.createObjectURL(audioBlob)
+      const audio = new Audio(audioUrl)
+      customAudio = audio
+
+      audio.onended = () => {
+        if (token !== playbackToken) return
+        URL.revokeObjectURL(audioUrl)
+        isCustomPlaying.value = false
+        isCustomPaused.value = false
+        currentCustomMessageId.value = null
+        customAudio = null
+      }
+
+      audio.onerror = () => {
+        if (token !== playbackToken) return
+        URL.revokeObjectURL(audioUrl)
+        console.warn('[useSpeech] Custom TTS audio playback error')
+        isCustomPlaying.value = false
+        isCustomPaused.value = false
+        currentCustomMessageId.value = null
+        customAudio = null
+      }
+
+      await audio.play()
+    } catch (err) {
+      if (token !== playbackToken) return
+      console.error('[useSpeech] OpenAI TTS 请求失败:', err)
+      isCustomPlaying.value = false
+      isCustomPaused.value = false
+      currentCustomMessageId.value = null
+      throw err
+    }
+  }
+
+  function openaiToggle(messageId: string, content: string, opts: OpenaiTtsOptions) {
+    if (currentCustomMessageId.value === messageId && isCustomPlaying.value) {
+      if (isCustomPaused.value) {
+        // Resume
+        if (customAudio) {
+          customAudio.play()
+        }
+        isCustomPaused.value = false
+      } else {
+        // Pause
+        if (customAudio) {
+          customAudio.pause()
+        }
+        isCustomPaused.value = true
+      }
+    } else {
+      // Stop other speech and start new
+      stop(false)
+      if (customAudio) {
+        customAudio.pause()
+        customAudio = null
+      }
+      openaiPlay(messageId, content, opts)
+    }
+  }
+
  // ─── Unified speak ──────────────────────────────────────────

  function speak(messageId: string, text: string, options: SpeechOptions = {}) {
@@ -317,6 +450,11 @@ export function useSpeech() {
    progress: computed(() => state.value.progress),
    engine: computed(() => state.value.engine),

+    // Custom TTS state
+    isCustomPlaying,
+    isCustomPaused,
+    currentCustomMessageId,
+
    play,
    pause,
    resume,
@@ -325,6 +463,13 @@ export function useSpeech() {
    enqueue,
    getDefaultVoice,
    extractReadableText,
+
+    // OpenAI-compatible TTS
+    openaiPlay,
+    openaiToggle,
+
+    // Browser WebSpeech (直接调用避免 Rolldown 树摇)
+    speakViaBrowser,
  }
 }

@@ -0,0 +1,164 @@
+import { ref, watch } from 'vue'
+
+export type TtsProvider = 'webspeech' | 'openai' | 'custom' | 'edge'
+
+export interface VoiceSettingsData {
+  provider: TtsProvider
+
+  // WebSpeech
+  webspeechVoice: string
+
+  // OpenAI
+  openaiApiKey: string
+  openaiBaseUrl: string
+  openaiModel: string
+  openaiVoice: string
+
+  // Custom endpoint (OpenAI-compatible)
+  customUrl: string
+  customApiKey: string
+
+  // Edge TTS
+  edgeUrl: string
+  edgeVoice: string
+}
+
+const STORAGE_KEY = 'hermes-tts-settings-v2'
+
+function migrateOldKeys() {
+  const oldKey = 'hermes-tts-settings'
+  try {
+    const old = localStorage.getItem(oldKey)
+    if (old) {
+      const parsed = JSON.parse(old)
+      // Old 'custom' provider maps to new 'custom'
+      // Old 'gptsovits' provider maps to new 'custom'
+      if (parsed.provider === 'gptsovits') {
+        parsed.provider = 'custom'
+        // old gptsovitsUrl -> customUrl
+        if (parsed.gptsovitsUrl && !parsed.customUrl) {
+          parsed.customUrl = parsed.gptsovitsUrl
+        }
+      }
+      // Store as new format
+      const data = { ...DEFAULT, ...parsed }
+      localStorage.setItem(STORAGE_KEY, JSON.stringify(data))
+      localStorage.removeItem(oldKey)
+    }
+  } catch { /* ignore */ }
+}
+
+const DEFAULT: VoiceSettingsData = {
+  provider: 'webspeech',
+
+  webspeechVoice: '',
+
+  openaiApiKey: '',
+  openaiBaseUrl: '',
+  openaiModel: 'tts-1',
+  openaiVoice: 'alloy',
+
+  customUrl: '',
+  customApiKey: '',
+
+  edgeUrl: '',
+  edgeVoice: 'zh-CN-XiaoxiaoNeural',
+}
+
+function sanitize(data: VoiceSettingsData): VoiceSettingsData {
+  // Clear old Edge TTS adapter URLs — now uses internal node-edge-tts
+  if (data.edgeUrl && data.edgeUrl !== '') {
+    data.edgeUrl = ''
+  }
+  return data
+}
+
+function load(): VoiceSettingsData {
+  try {
+    const raw = localStorage.getItem(STORAGE_KEY)
+    if (raw) return sanitize({ ...DEFAULT, ...JSON.parse(raw) })
+  } catch { /* ignore */ }
+  return { ...DEFAULT }
+}
+
+// Run migration once on import
+migrateOldKeys()
+
+// ── Reactive state ──
+const provider = ref<TtsProvider>(load().provider)
+
+// WebSpeech
+const webspeechVoice = ref<string>(load().webspeechVoice)
+
+// OpenAI
+const openaiApiKey = ref<string>(load().openaiApiKey)
+const openaiBaseUrl = ref<string>(load().openaiBaseUrl)
+const openaiModel = ref<string>(load().openaiModel)
+const openaiVoice = ref<string>(load().openaiVoice)
+
+// Custom
+const customUrl = ref<string>(load().customUrl)
+const customApiKey = ref<string>(load().customApiKey)
+
+// Edge TTS
+const edgeUrl = ref<string>(load().edgeUrl)
+const edgeVoice = ref<string>(load().edgeVoice)
+
+// Auto-persist on change
+watch(
+  [provider, webspeechVoice, openaiApiKey, openaiBaseUrl, openaiModel, openaiVoice,
+   customUrl, customApiKey, edgeUrl, edgeVoice],
+  () => {
+    localStorage.setItem(STORAGE_KEY, JSON.stringify({
+      provider: provider.value,
+      webspeechVoice: webspeechVoice.value,
+      openaiApiKey: openaiApiKey.value,
+      openaiBaseUrl: openaiBaseUrl.value,
+      openaiModel: openaiModel.value,
+      openaiVoice: openaiVoice.value,
+      customUrl: customUrl.value,
+      customApiKey: customApiKey.value,
+      edgeUrl: edgeUrl.value,
+      edgeVoice: edgeVoice.value,
+    }))
+  },
+)
+
+export function useVoiceSettings() {
+  return {
+    provider,
+    webspeechVoice,
+    openaiApiKey,
+    openaiBaseUrl,
+    openaiModel,
+    openaiVoice,
+    customUrl,
+    customApiKey,
+    edgeUrl,
+    edgeVoice,
+
+    setProvider(v: TtsProvider) { provider.value = v },
+    setWebSpeechVoice(v: string) { webspeechVoice.value = v },
+    setOpenaiApiKey(v: string) { openaiApiKey.value = v },
+    setOpenaiBaseUrl(v: string) { openaiBaseUrl.value = v },
+    setOpenaiModel(v: string) { openaiModel.value = v },
+    setOpenaiVoice(v: string) { openaiVoice.value = v },
+    setCustomUrl(v: string) { customUrl.value = v },
+    setCustomApiKey(v: string) { customApiKey.value = v },
+    setEdgeUrl(v: string) { edgeUrl.value = v },
+    setEdgeVoice(v: string) { edgeVoice.value = v },
+
+    reset() {
+      provider.value = DEFAULT.provider
+      webspeechVoice.value = DEFAULT.webspeechVoice
+      openaiApiKey.value = DEFAULT.openaiApiKey
+      openaiBaseUrl.value = DEFAULT.openaiBaseUrl
+      openaiModel.value = DEFAULT.openaiModel
+      openaiVoice.value = DEFAULT.openaiVoice
+      customUrl.value = DEFAULT.customUrl
+      customApiKey.value = DEFAULT.customApiKey
+      edgeUrl.value = DEFAULT.edgeUrl
+      edgeVoice.value = DEFAULT.edgeVoice
+    },
+  }
+}
@@ -511,6 +511,8 @@ jobTriggered: 'Job ausgelost',
      session: 'Sitzung',
      privacy: 'Datenschutz',
      apiServer: 'API-Server',
+      models: 'Modelle',
+      voice: 'Sprache',
    },
    display: {
      streaming: 'Streaming-Antworten',
@@ -589,6 +591,55 @@ jobTriggered: 'Job ausgelost',
      cors: 'CORS-Ursprunge',
      corsHint: 'Erlaubte Cross-Origin-Quellen',
    },
+    voice: {
+      ttsProvider: 'TTS-Anbieter',
+      ttsProviderHint: 'Waehlen Sie die Sprachsynthese-Engine fuer die Nachrichtenwiedergabe',
+      providerWebSpeech: 'WebSpeech API (Browser)',
+      providerOpenai: 'OpenAI TTS',
+      providerCustom: 'Benutzerdefinierter Endpunkt (OpenAI-kompatibel)',
+      providerEdge: 'Edge TTS (Kostenlos, kein API-Key erforderlich)',
+
+      // WebSpeech
+      webspeechVoice: 'Stimme',
+      webspeechVoiceHint: 'Waehlen Sie eine Stimme aus Ihrem Browser oder Betriebssystem',
+      webspeechVoicePlaceholder: 'Auto (Standardstimme)',
+
+      // OpenAI
+      openaiKey: 'API-Key',
+      openaiKeyHint: 'Ihr OpenAI API-Key mit TTS-Zugriff',
+      openaiUrl: 'API-Basis-URL',
+      openaiUrlHint: 'z.B. https://api.openai.com/v1/audio/speech',
+      openaiModel: 'Modell',
+      openaiModelHint: 'tts-1 (schneller) / tts-1-hd (hoehere Qualitaet)',
+      openaiVoice: 'Stimme',
+      openaiVoiceHint: 'Stimme fuer die Synthese',
+
+      // Custom endpoint
+      customHint: 'Jede OpenAI-kompatible TTS-API verwenden — funktioniert mit GPT-SoVITS, CosyVoice, usw.',
+      customUrl: 'API-URL',
+      customUrlHint: 'Basis-URL Ihres TTS-Dienstes',
+      customUrlPlaceholder: 'Die im lokalen Adapter konfigurierte Adresse, z.B. http://127.0.0.1:9880',
+      customApiKey: 'API-Key (optional)',
+      customApiKeyHint: 'Einige benutzerdefinierte Endpunkte erfordern Authentifizierung',
+      customApiKeyPlaceholder: 'Leer lassen wenn nicht benoetigt',
+
+      // Edge TTS
+      edgeHint: 'Angetrieben von Microsoft Edge TTS (node-edge-tts).',
+      edgeUrl: 'Adapter-URL',
+      edgeUrlHint: 'Adresse des Edge TTS-Adapters, z.B. http://127.0.0.1:9882',
+      edgeUrlPlaceholder: 'http://127.0.0.1:9882',
+      edgeVoice: 'Stimme',
+      edgeVoiceHint: 'Waehlen Sie eine Stimme fuer die Sprachsynthese',
+
+      // Test
+      testTitle: 'Sprachtest',
+      testText: 'Testtext',
+      testTextPlaceholder: 'Text zum Testen eingeben...',
+      testTextDefault: 'Hallo, dies ist ein Sprachtest.',
+      testButton: 'Testen',
+      testButtonPlaying: 'Wiedergabe...',
+      testFailed: 'Test fehlgeschlagen: {error}',
+    },
    lockedIps: {
      title: 'Gesperrte IPs',
      count: '{count} gesperrt',
@@ -651,6 +651,7 @@ export default {
      privacy: 'Privacy',
      apiServer: 'API Server',
      models: 'Models',
+      voice: 'Voice',
    },
    models: {
      apiKey: 'API Key',
@@ -747,6 +748,54 @@ export default {
      unlocked: 'IP unlocked',
      allUnlocked: '{count} IPs unlocked',
    },
+    voice: {
+      ttsProvider: 'TTS Provider',
+      ttsProviderHint: 'Choose the text-to-speech engine for message playback',
+      providerWebSpeech: 'WebSpeech API (Browser)',
+      providerOpenai: 'OpenAI TTS',
+      providerCustom: 'Custom Endpoint (OpenAI-compatible)',
+      providerEdge: 'Edge TTS (Free, no API Key)',
+
+      // WebSpeech
+      webspeechVoice: 'Voice',
+      webspeechVoiceHint: 'Select a voice from your browser or OS',
+      webspeechVoicePlaceholder: 'Auto (default voice)',
+
+      // OpenAI
+      openaiKey: 'API Key',
+      openaiKeyHint: 'Your OpenAI API key with TTS access',
+      openaiUrl: 'API Base URL',
+      openaiUrlHint: 'e.g. https://api.openai.com/v1/audio/speech',
+      openaiModel: 'Model',
+      openaiModelHint: 'tts-1 (faster) / tts-1-hd (higher quality)',
+      openaiVoice: 'Voice',
+      openaiVoiceHint: 'Voice to use for synthesis',
+
+      // Custom endpoint
+      customHint: 'Use any OpenAI-compatible TTS API — works with GPT-SoVITS, CosyVoice, etc.',
+      customUrl: 'API URL',
+      customUrlHint: 'Base URL of your TTS service',
+      customUrlPlaceholder: 'The address configured in the local adapter, e.g. http://127.0.0.1:9880',
+      customApiKey: 'API Key (Optional)',
+      customApiKeyHint: 'Some custom endpoints require authentication',
+      customApiKeyPlaceholder: 'Leave blank if not needed',
+      // Edge TTS
+      edgeHint: 'Powered by Microsoft Edge TTS (node-edge-tts).',
+      edgeUrl: 'Adapter URL',
+      edgeUrlHint: 'Address of your Edge TTS adapter, e.g. http://127.0.0.1:9882',
+      edgeUrlPlaceholder: 'http://127.0.0.1:9882',
+      edgeVoice: 'Voice',
+      edgeVoiceHint: 'Select a voice for speech synthesis',
+
+      // Test
+      testTitle: 'Test Voice',
+      testText: 'Test Text',
+      testTextPlaceholder: 'Enter text to test...',
+      testTextDefault: 'Hello, this is a voice test.',
+      testButton: 'Test',
+      testButtonPlaying: 'Playing...',
+      testFailed: 'Test failed: {error}',
+    },
  },

  // Platform channel settings
@@ -511,6 +511,8 @@ jobTriggered: 'Job ejecutado',
      session: 'Sesion',
      privacy: 'Privacidad',
      apiServer: 'Servidor API',
+      models: 'Modelos',
+      voice: 'Voz',
    },
    display: {
      streaming: 'Respuestas en streaming',
@@ -589,6 +591,55 @@ jobTriggered: 'Job ejecutado',
      cors: 'Origenes CORS',
      corsHint: 'Fuentes cross-origin permitidas',
    },
+    voice: {
+      ttsProvider: 'Proveedor TTS',
+      ttsProviderHint: 'Elija el motor de texto a voz para la reproduccion de mensajes',
+      providerWebSpeech: 'WebSpeech API (Navegador)',
+      providerOpenai: 'OpenAI TTS',
+      providerCustom: 'Endpoint personalizado (compatible con OpenAI)',
+      providerEdge: 'Edge TTS (Gratuito, sin clave API)',
+
+      // WebSpeech
+      webspeechVoice: 'Voz',
+      webspeechVoiceHint: 'Seleccione una voz de su navegador o sistema operativo',
+      webspeechVoicePlaceholder: 'Auto (voz predeterminada)',
+
+      // OpenAI
+      openaiKey: 'Clave API',
+      openaiKeyHint: 'Su clave API de OpenAI con acceso TTS',
+      openaiUrl: 'URL base de API',
+      openaiUrlHint: 'ej. https://api.openai.com/v1/audio/speech',
+      openaiModel: 'Modelo',
+      openaiModelHint: 'tts-1 (mas rapido) / tts-1-hd (mayor calidad)',
+      openaiVoice: 'Voz',
+      openaiVoiceHint: 'Voz a utilizar para la sintesis',
+
+      // Custom endpoint
+      customHint: 'Utilice cualquier API TTS compatible con OpenAI — funciona con GPT-SoVITS, CosyVoice, etc.',
+      customUrl: 'URL de API',
+      customUrlHint: 'URL base de su servicio TTS',
+      customUrlPlaceholder: 'Direccion configurada en el adaptador local, ej. http://127.0.0.1:9880',
+      customApiKey: 'Clave API (opcional)',
+      customApiKeyHint: 'Algunos endpoints personalizados requieren autenticacion',
+      customApiKeyPlaceholder: 'Dejar en blanco si no es necesario',
+
+      // Edge TTS
+      edgeHint: 'Impulsado por Microsoft Edge TTS (node-edge-tts).',
+      edgeUrl: 'URL del adaptador',
+      edgeUrlHint: 'Direccion del adaptador Edge TTS, ej. http://127.0.0.1:9882',
+      edgeUrlPlaceholder: 'http://127.0.0.1:9882',
+      edgeVoice: 'Voz',
+      edgeVoiceHint: 'Seleccione una voz para la sintesis de voz',
+
+      // Test
+      testTitle: 'Prueba de voz',
+      testText: 'Texto de prueba',
+      testTextPlaceholder: 'Ingrese texto para probar...',
+      testTextDefault: 'Hola, esta es una prueba de voz.',
+      testButton: 'Probar',
+      testButtonPlaying: 'Reproduciendo...',
+      testFailed: 'Prueba fallida: {error}',
+    },
    lockedIps: {
      title: 'IPs bloqueadas',
      count: '{count} bloqueadas',
@@ -511,6 +511,8 @@ jobTriggered: 'Job declenche',
      session: 'Session',
      privacy: 'Confidentialite',
      apiServer: 'Serveur API',
+      models: 'Modèles',
+      voice: 'Voix',
    },
    display: {
      streaming: 'Reponses en continu',
@@ -589,6 +591,55 @@ jobTriggered: 'Job declenche',
      cors: 'Origines CORS',
      corsHint: 'Sources cross-origin autorisees',
    },
+    voice: {
+      ttsProvider: 'Fournisseur TTS',
+      ttsProviderHint: 'Choisir le moteur de synthese vocale pour la lecture des messages',
+      providerWebSpeech: 'WebSpeech API (Navigateur)',
+      providerOpenai: 'OpenAI TTS',
+      providerCustom: "Point d'acces personnalise (compatible OpenAI)",
+      providerEdge: 'Edge TTS (Gratuit, sans cle API)',
+
+      // WebSpeech
+      webspeechVoice: 'Voix',
+      webspeechVoiceHint: "Choisir une voix depuis le navigateur ou l'OS",
+      webspeechVoicePlaceholder: 'Auto (voix par defaut)',
+
+      // OpenAI
+      openaiKey: 'Cle API',
+      openaiKeyHint: 'Votre cle API OpenAI avec acces TTS',
+      openaiUrl: 'URL de base API',
+      openaiUrlHint: 'ex. https://api.openai.com/v1/audio/speech',
+      openaiModel: 'Modele',
+      openaiModelHint: 'tts-1 (rapide) / tts-1-hd (haute qualite)',
+      openaiVoice: 'Voix',
+      openaiVoiceHint: 'Voix a utiliser pour la synthese',
+
+      // Custom endpoint
+      customHint: 'Utilisez toute API TTS compatible OpenAI — fonctionne avec GPT-SoVITS, CosyVoice, etc.',
+      customUrl: 'URL API',
+      customUrlHint: 'URL de base de votre service TTS',
+      customUrlPlaceholder: "Adresse configuree dans l'adaptateur local, ex. http://127.0.0.1:9880",
+      customApiKey: 'Cle API (optionnelle)',
+      customApiKeyHint: "Certains points d'acces personnalises necessitent une authentification",
+      customApiKeyPlaceholder: 'Laisser vide si inutile',
+
+      // Edge TTS
+      edgeHint: 'Propulse par Microsoft Edge TTS (node-edge-tts).',
+      edgeUrl: "URL de l'adaptateur",
+      edgeUrlHint: "Adresse de l'adaptateur Edge TTS, ex. http://127.0.0.1:9882",
+      edgeUrlPlaceholder: 'http://127.0.0.1:9882',
+      edgeVoice: 'Voix',
+      edgeVoiceHint: 'Choisir une voix pour la synthese vocale',
+
+      // Test
+      testTitle: 'Test vocal',
+      testText: 'Texte de test',
+      testTextPlaceholder: 'Entrez le texte a tester...',
+      testTextDefault: 'Bonjour, ceci est un test vocal.',
+      testButton: 'Tester',
+      testButtonPlaying: 'Lecture...',
+      testFailed: 'Echec du test : {error}',
+    },
    lockedIps: {
      title: 'IPs bloquees',
      count: '{count} bloquees',
@@ -511,6 +511,8 @@ export default {
      session: 'セッション',
      privacy: 'プライバシー',
      apiServer: 'API サーバー',
+      models: 'モデル',
+      voice: '音声',
    },
    display: {
      streaming: 'ストリームレスポンス',
@@ -589,6 +591,55 @@ export default {
      cors: 'CORS 許可元',
      corsHint: '許可するクロスオリジン',
    },
+    voice: {
+      ttsProvider: 'TTS プロバイダー',
+      ttsProviderHint: 'メッセージ読み上げに使用する音声合成エンジンを選択',
+      providerWebSpeech: 'WebSpeech API（ブラウザ）',
+      providerOpenai: 'OpenAI TTS',
+      providerCustom: 'カスタムエンドポイント（OpenAI 互換）',
+      providerEdge: 'Edge TTS（無料、API Key 不要）',
+
+      // WebSpeech
+      webspeechVoice: '音声',
+      webspeechVoiceHint: 'ブラウザまたは OS から音声を選択',
+      webspeechVoicePlaceholder: '自動（デフォルト音声）',
+
+      // OpenAI
+      openaiKey: 'API キー',
+      openaiKeyHint: 'TTS アクセス権のある OpenAI API キー',
+      openaiUrl: 'API ベース URL',
+      openaiUrlHint: '例: https://api.openai.com/v1/audio/speech',
+      openaiModel: 'モデル',
+      openaiModelHint: 'tts-1（高速）/ tts-1-hd（高音質）',
+      openaiVoice: '音色',
+      openaiVoiceHint: '合成に使用する音色',
+
+      // Custom endpoint
+      customHint: 'OpenAI 互換の TTS API を使用可能 — GPT-SoVITS、CosyVoice などに対応',
+      customUrl: 'API URL',
+      customUrlHint: 'TTS サービスのベース URL',
+      customUrlPlaceholder: 'ローカルアダプターで設定したアドレス（例：http://127.0.0.1:9880）',
+      customApiKey: 'API キー（オプション）',
+      customApiKeyHint: '一部のカスタムエンドポイントは認証が必要',
+      customApiKeyPlaceholder: '不要な場合は空欄',
+
+      // Edge TTS
+      edgeHint: 'Microsoft Edge TTS を搭載（node-edge-tts）。',
+      edgeUrl: 'アダプター URL',
+      edgeUrlHint: 'Edge TTS アダプターのアドレス（例：http://127.0.0.1:9882）',
+      edgeUrlPlaceholder: 'http://127.0.0.1:9882',
+      edgeVoice: '音色',
+      edgeVoiceHint: '音声合成に使用する音色を選択',
+
+      // Test
+      testTitle: '音声テスト',
+      testText: 'テストテキスト',
+      testTextPlaceholder: 'テストするテキストを入力...',
+      testTextDefault: 'こんにちは、これは音声テストです。',
+      testButton: 'テスト',
+      testButtonPlaying: '再生中...',
+      testFailed: 'テスト失敗：{error}',
+    },
    lockedIps: {
      title: 'ロック済みIP管理',
      count: '{count}件ロック中',
@@ -511,6 +511,8 @@ export default {
      session: '세션',
      privacy: '개인정보',
      apiServer: 'API 서버',
+      models: '모델',
+      voice: '음성',
    },
    display: {
      streaming: '스트리밍 응답',
@@ -589,6 +591,55 @@ export default {
      cors: 'CORS 출처',
      corsHint: '허용된 교차 출처',
    },
+    voice: {
+      ttsProvider: 'TTS 제공자',
+      ttsProviderHint: '메시지 재생에 사용할 텍스트 음성 변환 엔진 선택',
+      providerWebSpeech: 'WebSpeech API (브라우저)',
+      providerOpenai: 'OpenAI TTS',
+      providerCustom: '사용자 정의 엔드포인트 (OpenAI 호환)',
+      providerEdge: 'Edge TTS (무료, API Key 불필요)',
+
+      // WebSpeech
+      webspeechVoice: '음성',
+      webspeechVoiceHint: '브라우저 또는 OS에서 음성 선택',
+      webspeechVoicePlaceholder: '자동 (기본 음성)',
+
+      // OpenAI
+      openaiKey: 'API 키',
+      openaiKeyHint: 'TTS 접근 권한이 있는 OpenAI API 키',
+      openaiUrl: 'API 기본 URL',
+      openaiUrlHint: '예: https://api.openai.com/v1/audio/speech',
+      openaiModel: '모델',
+      openaiModelHint: 'tts-1 (빠름) / tts-1-hd (고음질)',
+      openaiVoice: '음색',
+      openaiVoiceHint: '합성에 사용할 음색',
+
+      // Custom endpoint
+      customHint: '모든 OpenAI 호환 TTS API 사용 가능 — GPT-SoVITS, CosyVoice 등 지원',
+      customUrl: 'API URL',
+      customUrlHint: 'TTS 서비스의 기본 URL',
+      customUrlPlaceholder: '로컬 어댑터에 설정된 주소 (예: http://127.0.0.1:9880)',
+      customApiKey: 'API 키 (선택사항)',
+      customApiKeyHint: '일부 사용자 정의 엔드포인트는 인증 필요',
+      customApiKeyPlaceholder: '필요하지 않으면 비워둠',
+
+      // Edge TTS
+      edgeHint: 'Microsoft Edge TTS 기반 (node-edge-tts).',
+      edgeUrl: '어댑터 URL',
+      edgeUrlHint: 'Edge TTS 어댑터 주소 (예: http://127.0.0.1:9882)',
+      edgeUrlPlaceholder: 'http://127.0.0.1:9882',
+      edgeVoice: '음색',
+      edgeVoiceHint: '음성 합성에 사용할 음색 선택',
+
+      // Test
+      testTitle: '음성 테스트',
+      testText: '테스트 텍스트',
+      testTextPlaceholder: '테스트할 텍스트 입력...',
+      testTextDefault: '안녕하세요, 음성 테스트입니다.',
+      testButton: '테스트',
+      testButtonPlaying: '재생 중...',
+      testFailed: '테스트 실패: {error}',
+    },
    lockedIps: {
      title: '잠긴 IP 관리',
      count: '{count}개 잠김',
@@ -511,6 +511,8 @@ jobTriggered: 'Job acionado',
      session: 'Sessao',
      privacy: 'Privacidade',
      apiServer: 'Servidor API',
+      models: 'Modelos',
+      voice: 'Voz',
    },
    display: {
      streaming: 'Respostas em streaming',
@@ -589,6 +591,55 @@ jobTriggered: 'Job acionado',
      cors: 'Origens CORS',
      corsHint: 'Fontes cross-origin permitidas',
    },
+    voice: {
+      ttsProvider: 'Provedor TTS',
+      ttsProviderHint: 'Escolha o mecanismo de texto para fala para reproducao de mensagens',
+      providerWebSpeech: 'WebSpeech API (Navegador)',
+      providerOpenai: 'OpenAI TTS',
+      providerCustom: 'Endpoint personalizado (compativel com OpenAI)',
+      providerEdge: 'Edge TTS (Gratuito, sem chave API)',
+
+      // WebSpeech
+      webspeechVoice: 'Voz',
+      webspeechVoiceHint: 'Selecione uma voz do seu navegador ou SO',
+      webspeechVoicePlaceholder: 'Auto (voz padrao)',
+
+      // OpenAI
+      openaiKey: 'Chave API',
+      openaiKeyHint: 'Sua chave API OpenAI com acesso TTS',
+      openaiUrl: 'URL base da API',
+      openaiUrlHint: 'ex. https://api.openai.com/v1/audio/speech',
+      openaiModel: 'Modelo',
+      openaiModelHint: 'tts-1 (mais rapido) / tts-1-hd (qualidade superior)',
+      openaiVoice: 'Voz',
+      openaiVoiceHint: 'Voz a ser usada para sintese',
+
+      // Custom endpoint
+      customHint: 'Use qualquer API TTS compativel com OpenAI — funciona com GPT-SoVITS, CosyVoice, etc.',
+      customUrl: 'URL da API',
+      customUrlHint: 'URL base do seu servico TTS',
+      customUrlPlaceholder: 'Endereco configurado no adaptador local, ex. http://127.0.0.1:9880',
+      customApiKey: 'Chave API (opcional)',
+      customApiKeyHint: 'Alguns endpoints personalizados exigem autenticacao',
+      customApiKeyPlaceholder: 'Deixe em branco se nao for necessario',
+
+      // Edge TTS
+      edgeHint: 'Desenvolvido por Microsoft Edge TTS (node-edge-tts).',
+      edgeUrl: 'URL do adaptador',
+      edgeUrlHint: 'Endereco do adaptador Edge TTS, ex. http://127.0.0.1:9882',
+      edgeUrlPlaceholder: 'http://127.0.0.1:9882',
+      edgeVoice: 'Voz',
+      edgeVoiceHint: 'Selecione uma voz para sintese de fala',
+
+      // Test
+      testTitle: 'Teste de voz',
+      testText: 'Texto de teste',
+      testTextPlaceholder: 'Insira o texto para testar...',
+      testTextDefault: 'Ola, este e um teste de voz.',
+      testButton: 'Testar',
+      testButtonPlaying: 'Reproduzindo...',
+      testFailed: 'Teste falhou: {error}',
+    },
    lockedIps: {
      title: 'IPs bloqueadas',
      count: '{count} bloqueadas',
@@ -643,6 +643,7 @@ export default {
      privacy: '隐私',
      apiServer: 'API 服务器',
      models: '模型',
+      voice: '语音',
    },
    models: {
      apiKey: 'API Key',
@@ -739,6 +740,54 @@ export default {
      unlocked: 'IP 已解锁',
      allUnlocked: '已解锁 {count} 个 IP',
    },
+    voice: {
+      ttsProvider: 'TTS 提供者',
+      ttsProviderHint: '选择消息朗读使用的语音引擎',
+      providerWebSpeech: 'WebSpeech API（浏览器内置）',
+      providerOpenai: 'OpenAI TTS',
+      providerCustom: '自定义端点（兼容 OpenAI）',
+      providerEdge: 'Edge TTS（免费，无需 API Key）',
+
+      // WebSpeech
+      webspeechVoice: '音色',
+      webspeechVoiceHint: '从浏览器或系统提供的语音中选择',
+      webspeechVoicePlaceholder: '自动（默认语音）',
+
+      // OpenAI
+      openaiKey: 'API 密钥',
+      openaiKeyHint: '具有 TTS 权限的 OpenAI API Key',
+      openaiUrl: 'API 基础地址',
+      openaiUrlHint: '例如 https://api.openai.com/v1/audio/speech',
+      openaiModel: '模型',
+      openaiModelHint: 'tts-1（快速）/ tts-1-hd（高音质）',
+      openaiVoice: '音色',
+      openaiVoiceHint: '用于语音合成的音色',
+
+      // 自定义端点
+      customHint: '支持任何 OpenAI 兼容的 TTS 服务——可用于 GPT-SoVITS、CosyVoice 等自部署服务。',
+      customUrl: 'API 地址',
+      customUrlHint: 'TTS 服务的完整基础地址',
+      customUrlPlaceholder: '本地适配器中配置的地址 如：http://127.0.0.1:9880',
+      customApiKey: 'API 密钥（可选）',
+      customApiKeyHint: '部分自部署服务需要身份验证',
+      customApiKeyPlaceholder: '不需要则留空',
+      // Edge TTS
+      edgeHint: '由 Microsoft Edge TTS 驱动（node-edge-tts）。',
+      edgeUrl: '适配器地址',
+      edgeUrlHint: 'Edge TTS 适配器地址，例如 http://127.0.0.1:9882',
+      edgeUrlPlaceholder: 'http://127.0.0.1:9882',
+      edgeVoice: '音色',
+      edgeVoiceHint: '选择用于语音合成的音色',
+
+      // 试听
+      testTitle: '试听测试',
+      testText: '测试文本',
+      testTextPlaceholder: '输入测试文本...',
+      testTextDefault: '你好，这是一个语音测试。',
+      testButton: '试听',
+      testButtonPlaying: '播放中...',
+      testFailed: '测试失败：{error}',
+    },
  },

  // 平台频道设置
@@ -14,6 +14,7 @@ import SessionSettings from "@/components/hermes/settings/SessionSettings.vue";
 import PrivacySettings from "@/components/hermes/settings/PrivacySettings.vue";
 import ModelSettings from "@/components/hermes/settings/ModelSettings.vue";
 import AccountSettings from "@/components/hermes/settings/AccountSettings.vue";
+import VoiceSettings from "@/components/hermes/settings/VoiceSettings.vue";

 const settingsStore = useSettingsStore();
 const { t } = useI18n();
@@ -57,6 +58,9 @@ onMounted(() => {
          <NTabPane name="models" :tab="t('settings.tabs.models')">
            <ModelSettings />
          </NTabPane>
+          <NTabPane name="voice" :tab="t('settings.tabs.voice')">
+            <VoiceSettings />
+          </NTabPane>
        </NTabs>
      </NSpin>
    </div>
@@ -1,5 +1,5 @@
 import type { Context } from 'koa'
-import { textToSpeech } from '../../services/hermes/tts'
+import { textToSpeech, openaiCompatibleTts, speedToEdgeRate } from '../../services/hermes/tts'

 export async function generate(ctx: Context) {
  const { text, lang } = ctx.request.body as {
@@ -26,3 +26,41 @@ export async function generate(ctx: Context) {
  ctx.set('X-TTS-Engine', engine)
  ctx.body = audio
 }
+
+/**
+ * OpenAI-compatible TTS endpoint.
+ * Accepts: { model, input, voice, speed }
+ * Returns audio/mpeg stream.
+ */
+export async function openaiProxy(ctx: Context) {
+  const body = ctx.request.body as {
+    input?: string
+    voice?: string
+    speed?: number
+    model?: string
+  }
+
+  if (!body.input || typeof body.input !== 'string') {
+    ctx.status = 400
+    ctx.body = { error: 'input is required' }
+    return
+  }
+
+  if (body.input.length > 5000) {
+    ctx.status = 400
+    ctx.body = { error: 'input is too long (max 5000 characters)' }
+    return
+  }
+
+  const { audio, engine } = await openaiCompatibleTts({
+    input: body.input,
+    voice: body.voice,
+    speed: body.speed,
+    model: body.model,
+  })
+
+  ctx.set('Content-Type', 'audio/mpeg')
+  ctx.set('Content-Length', String(audio.length))
+  ctx.set('X-TTS-Engine', engine)
+  ctx.body = audio
+}
@@ -4,3 +4,4 @@ import * as ctrl from '../../controllers/hermes/tts'
 export const ttsRoutes = new Router()

 ttsRoutes.post('/api/hermes/tts', ctrl.generate)
+ttsRoutes.post('/api/tts/proxy/audio/speech', ctrl.openaiProxy)
@@ -41,6 +41,7 @@ export function registerRoutes(app: any, requireAuth: (ctx: Context, next: Next)
  app.use(healthRoutes.routes())
  app.use(webhookRoutes.routes())
  app.use(authPublicRoutes.routes())
+  app.use(ttsRoutes.routes())              // TTS proxy/generation — must be before auth

  // --- Auth middleware: all routes below require authentication ---
  app.use(requireAuth)
@@ -69,7 +70,6 @@ export function registerRoutes(app: any, requireAuth: (ctx: Context, next: Next)
  app.use(jobRoutes.routes())               // Must be before proxy
  app.use(cronHistoryRoutes.routes())        // Must be before proxy
  app.use(kanbanRoutes.routes())             // Must be before proxy
-  app.use(ttsRoutes.routes())                // Must be before proxy
  app.use(proxyRoutes.routes())

  // Proxy catch-all middleware (must be last)
@@ -12,6 +12,9 @@ const FIXED_PITCH = '+12Hz'
 export interface TtsOptions {
  text: string
  lang?: string
+  voice?: string
+  rate?: string
+  pitch?: string
 }

 export async function edgeTts(opts: TtsOptions): Promise<Buffer> {
@@ -20,9 +23,9 @@ export async function edgeTts(opts: TtsOptions): Promise<Buffer> {

  try {
    const tts = new EdgeTTS({
-      voice: FIXED_VOICE,
-      rate: FIXED_RATE,
-      pitch: FIXED_PITCH,
+      voice: opts.voice || FIXED_VOICE,
+      rate: opts.rate || FIXED_RATE,
+      pitch: opts.pitch || FIXED_PITCH,
      timeout: 15000,
    })

@@ -35,7 +38,41 @@ export async function edgeTts(opts: TtsOptions): Promise<Buffer> {
 }

 export async function textToSpeech(opts: TtsOptions): Promise<{ audio: Buffer; engine: string }> {
+  const voice = opts.voice || FIXED_VOICE
+  const rate = opts.rate || FIXED_RATE
+  const pitch = opts.pitch || FIXED_PITCH
  const audio = await edgeTts(opts)
-  logger.debug({ engine: 'edge', voice: FIXED_VOICE, rate: FIXED_RATE, pitch: FIXED_PITCH }, 'TTS generated via Edge')
+  logger.debug({ engine: 'edge', voice, rate, pitch }, 'TTS generated via Edge')
  return { audio, engine: 'edge' }
 }
+
+/**
+ * Convert speed multiplier (0.5-2.0) to Edge TTS rate string.
+ * Edge TTS rate format: "+/-NN%"
+ */
+export function speedToEdgeRate(speed: number): string {
+  const percent = Math.round((speed - 1) * 100)
+  return percent >= 0 ? `+${percent}%` : `${percent}%`
+}
+
+/**
+ * Convert OpenAI TTS request to internal TtsOptions.
+ * OpenAI format: { model, input, voice, speed }
+ */
+export interface OpenaiTtsRequest {
+  model?: string
+  input: string
+  voice?: string
+  speed?: number
+}
+
+export async function openaiCompatibleTts(
+  body: OpenaiTtsRequest,
+): Promise<{ audio: Buffer; engine: string }> {
+  return textToSpeech({
+    text: body.input,
+    voice: body.voice || FIXED_VOICE,
+    rate: body.speed ? speedToEdgeRate(body.speed) : FIXED_RATE,
+    pitch: FIXED_PITCH,
+  })
+}