feat: add voice playback settings with 4-provider support (#608)

Add WebSpeech, OpenAI TTS, Custom endpoint, and Edge TTS providers.

Co-authored-by: Hermes Agent <noreply@nousresearch.com>
This commit is contained in:
memeflyfly
2026-05-10 20:08:38 +08:00
committed by GitHub
parent 838791a740
commit 15195f0795
18 changed files with 1237 additions and 20 deletions
@@ -16,6 +16,7 @@ import {
renderHighlightedCodeBlock,
} from "./highlight";
import { useGlobalSpeech } from "@/composables/useSpeech";
import { useVoiceSettings } from "@/composables/useVoiceSettings";
const TOOL_PAYLOAD_DISPLAY_LIMIT = 2000;
@@ -79,6 +80,7 @@ const previewUrl = ref<string | null>(null);
const chatStore = useChatStore();
const settingsStore = useSettingsStore();
const speech = useGlobalSpeech();
const voiceSettings = useVoiceSettings();
// Copy entire bubble content
const copyableContent = computed(() => {
@@ -351,25 +353,90 @@ const renderedToolResult = computed(() => {
// 语音播放相关
const canPlaySpeech = computed(() => {
// 只有 assistant 消息可以播放,且浏览器支持 Web Speech API
return props.message.role === 'assistant' &&
speech.isSupported &&
copyableContent.value;
});
// 只有 assistant 消息可以播放
if (props.message.role !== 'assistant') return false
if (!copyableContent.value) return false
// OpenAI / Custom / Edge 不依赖浏览器 Web Speech API
if (voiceSettings.provider.value === 'openai' || voiceSettings.provider.value === 'custom' || voiceSettings.provider.value === 'edge') return true
return speech.isSupported
})
const isPlayingThisMessage = computed(() => {
return speech.currentMessageId.value === props.message.id && speech.isPlaying.value;
});
// OpenAI / Custom / Edge 模式
if (voiceSettings.provider.value === 'openai' || voiceSettings.provider.value === 'custom' || voiceSettings.provider.value === 'edge') {
return speech.currentCustomMessageId.value === props.message.id && speech.isCustomPlaying.value
}
return speech.currentMessageId.value === props.message.id && speech.isPlaying.value
})
const isPausedThisMessage = computed(() => {
return speech.currentMessageId.value === props.message.id && speech.isPaused.value;
});
// OpenAI / Custom / Edge 模式
if (voiceSettings.provider.value === 'openai' || voiceSettings.provider.value === 'custom' || voiceSettings.provider.value === 'edge') {
return speech.currentCustomMessageId.value === props.message.id && speech.isCustomPaused.value
}
return speech.currentMessageId.value === props.message.id && speech.isPaused.value
})
function handleSpeechToggle() {
if (!canPlaySpeech.value) {
return
}
const content = props.message.content || ''
// OpenAI TTS 模式
if (voiceSettings.provider.value === 'openai') {
const apiUrl = voiceSettings.openaiBaseUrl.value
if (!apiUrl) {
console.warn('[MessageItem] OpenAI TTS 地址为空')
return
}
speech.openaiToggle(props.message.id, content, {
baseUrl: voiceSettings.openaiBaseUrl.value,
apiKey: voiceSettings.openaiApiKey.value,
model: voiceSettings.openaiModel.value,
voice: voiceSettings.openaiVoice.value,
})
return
}
// 自定义端点模式(OpenAI 兼容,如 GPT-SoVITS
if (voiceSettings.provider.value === 'custom') {
const apiUrl = voiceSettings.customUrl.value
if (!apiUrl) {
console.warn('[MessageItem] 自定义 TTS 地址为空')
return
}
speech.openaiToggle(props.message.id, content, {
baseUrl: voiceSettings.customUrl.value,
apiKey: voiceSettings.customApiKey.value || undefined,
})
return
}
// Edge TTS 模式
if (voiceSettings.provider.value === 'edge') {
// URL 为空时使用内建后端代理
const apiUrl = voiceSettings.edgeUrl.value || '/api/tts/proxy'
speech.openaiToggle(props.message.id, content, {
baseUrl: apiUrl,
voice: voiceSettings.edgeVoice.value,
})
return
}
// Web Speech API 模式
if (voiceSettings.provider.value === 'webspeech') {
const text = speech.extractReadableText(content)
if (text) {
speech.stop(false)
speech.speakViaBrowser(props.message.id, text, {
voiceName: voiceSettings.webspeechVoice.value || undefined,
})
}
return
}
// 后备(无 provider 匹配时)
speech.toggle(props.message.id, content)
}
@@ -380,7 +447,37 @@ onMounted(() => {
autoPlayHandler = (e: Event) => {
const customEvent = e as CustomEvent<{ messageId: string; content: string }>
if (customEvent.detail.messageId === props.message.id && canPlaySpeech.value) {
speech.enqueue(props.message.id, customEvent.detail.content || props.message.content || '')
const content = customEvent.detail.content || props.message.content || ''
if (voiceSettings.provider.value === 'openai') {
const apiUrl = voiceSettings.openaiBaseUrl.value
if (apiUrl) speech.openaiPlay(props.message.id, content, {
baseUrl: voiceSettings.openaiBaseUrl.value,
apiKey: voiceSettings.openaiApiKey.value,
model: voiceSettings.openaiModel.value,
voice: voiceSettings.openaiVoice.value,
})
} else if (voiceSettings.provider.value === 'custom') {
const apiUrl = voiceSettings.customUrl.value
if (apiUrl) speech.openaiPlay(props.message.id, content, {
baseUrl: voiceSettings.customUrl.value,
apiKey: voiceSettings.customApiKey.value || undefined,
})
} else if (voiceSettings.provider.value === 'edge') {
speech.openaiPlay(props.message.id, content, {
baseUrl: '/api/tts/proxy',
voice: voiceSettings.edgeVoice.value,
})
} else if (voiceSettings.provider.value === 'webspeech') {
const text = speech.extractReadableText(content)
if (text) {
speech.stop(false)
speech.speakViaBrowser(props.message.id, text, {
voiceName: voiceSettings.webspeechVoice.value || undefined,
})
}
} else {
speech.enqueue(props.message.id, content)
}
}
}
window.addEventListener('auto-play-speech', autoPlayHandler)
@@ -0,0 +1,327 @@
<script setup lang="ts">
import { ref, onMounted } from 'vue'
import { NSelect, NInput, NButton } from 'naive-ui'
import { useI18n } from 'vue-i18n'
import { useVoiceSettings } from '@/composables/useVoiceSettings'
import { useSpeech } from '@/composables/useSpeech'
import SettingRow from './SettingRow.vue'
const { t } = useI18n()
const vs = useVoiceSettings()
const speech = useSpeech()
const testText = ref(t('settings.voice.testTextDefault'))
const testPlaying = ref(false)
const providerOptions = [
{ label: t('settings.voice.providerWebSpeech'), value: 'webspeech' },
{ label: t('settings.voice.providerOpenai'), value: 'openai' },
{ label: t('settings.voice.providerCustom'), value: 'custom' },
{ label: t('settings.voice.providerEdge'), value: 'edge' },
]
const openaiModelOptions = [
{ label: 'tts-1', value: 'tts-1' },
{ label: 'tts-1-hd', value: 'tts-1-hd' },
]
const openaiVoiceOptions = [
{ label: 'Alloy', value: 'alloy' },
{ label: 'Echo', value: 'echo' },
{ label: 'Fable', value: 'fable' },
{ label: 'Nova', value: 'nova' },
{ label: 'Onyx', value: 'onyx' },
{ label: 'Shimmer', value: 'shimmer' },
]
const edgeVoiceOptions = [
{ label: '晓晓 (zh-CN-XiaoxiaoNeural)', value: 'zh-CN-XiaoxiaoNeural' },
{ label: '晓萱 (zh-CN-XiaoxuanNeural)', value: 'zh-CN-XiaoxuanNeural' },
{ label: '云希 (zh-CN-YunxiNeural)', value: 'zh-CN-YunxiNeural' },
{ label: '云健 (zh-CN-YunjianNeural)', value: 'zh-CN-YunjianNeural' },
{ label: '云扬 (zh-CN-YunyangNeural)', value: 'zh-CN-YunyangNeural' },
{ label: 'Jenny (en-US-JennyNeural)', value: 'en-US-JennyNeural' },
{ label: 'Aria (en-US-AriaNeural)', value: 'en-US-AriaNeural' },
{ label: 'Guy (en-US-GuyNeural)', value: 'en-US-GuyNeural' },
{ label: 'Sonia (en-GB-SoniaNeural)', value: 'en-GB-SoniaNeural' },
{ label: 'Ryan (en-GB-RyanNeural)', value: 'en-GB-RyanNeural' },
{ label: 'Nanami (ja-JP-NanamiNeural)', value: 'ja-JP-NanamiNeural' },
{ label: 'Keita (ja-JP-KeitaNeural)', value: 'ja-JP-KeitaNeural' },
{ label: 'Sun-Hi (ko-KR-SunHiNeural)', value: 'ko-KR-SunHiNeural' },
{ label: 'InJoon (ko-KR-InJoonNeural)', value: 'ko-KR-InJoonNeural' },
{ label: 'Denise (fr-FR-DeniseNeural)', value: 'fr-FR-DeniseNeural' },
{ label: 'Henri (fr-FR-HenriNeural)', value: 'fr-FR-HenriNeural' },
{ label: 'Katja (de-DE-KatjaNeural)', value: 'de-DE-KatjaNeural' },
{ label: 'Conrad (de-DE-ConradNeural)', value: 'de-DE-ConradNeural' },
]
// Get WebSpeech voices list on mount
const webspeechVoices = ref<SpeechSynthesisVoice[]>([])
onMounted(() => {
if ('speechSynthesis' in window) {
const voices = window.speechSynthesis.getVoices()
if (voices.length) {
webspeechVoices.value = voices
}
window.speechSynthesis.onvoiceschanged = () => {
webspeechVoices.value = window.speechSynthesis.getVoices()
}
}
})
async function handleTest() {
const text = testText.value.trim()
if (!text) return
testPlaying.value = true
try {
if (vs.provider.value === 'webspeech') {
speech.stop(false)
speech.speakViaBrowser('__test__', text, {
voiceName: vs.webspeechVoice.value || undefined,
})
} else if (vs.provider.value === 'openai') {
if (!vs.openaiBaseUrl.value) {
console.warn('[VoiceSettings] OpenAI base URL empty')
return
}
await speech.openaiPlay('__test__', text, {
baseUrl: vs.openaiBaseUrl.value,
apiKey: vs.openaiApiKey.value || undefined,
model: vs.openaiModel.value,
voice: vs.openaiVoice.value,
})
} else if (vs.provider.value === 'custom') {
if (!vs.customUrl.value) {
console.warn('[VoiceSettings] Custom URL empty')
return
}
await speech.openaiPlay('__test__', text, {
baseUrl: vs.customUrl.value,
apiKey: vs.customApiKey.value || undefined,
})
} else if (vs.provider.value === 'edge') {
await speech.openaiPlay('__test__', text, {
baseUrl: '/api/tts/proxy',
voice: vs.edgeVoice.value,
})
}
} catch (err) {
console.error('[VoiceSettings] Test failed:', err)
} finally {
testPlaying.value = false
}
}
</script>
<template>
<div class="voice-settings">
<SettingRow
:label="t('settings.voice.ttsProvider')"
:hint="t('settings.voice.ttsProviderHint')"
>
<NSelect
:value="vs.provider.value"
:options="providerOptions"
size="small"
style="width: 300px"
@update:value="vs.setProvider"
/>
</SettingRow>
<!-- WebSpeech API -->
<template v-if="vs.provider.value === 'webspeech'">
<SettingRow
:label="t('settings.voice.webspeechVoice')"
:hint="t('settings.voice.webspeechVoiceHint')"
>
<NSelect
:value="vs.webspeechVoice.value"
size="small"
filterable
style="width: 320px"
:placeholder="t('settings.voice.webspeechVoicePlaceholder')"
:consistent-menu-width="false"
:options="webspeechVoices.map(v => ({
label: `${v.name} (${v.lang})`,
value: v.name,
}))"
@update:value="vs.setWebSpeechVoice"
/>
</SettingRow>
</template>
<!-- OpenAI TTS -->
<template v-if="vs.provider.value === 'openai'">
<SettingRow
:label="t('settings.voice.openaiKey')"
:hint="t('settings.voice.openaiKeyHint')"
>
<NInput
:value="vs.openaiApiKey.value"
type="password"
size="small"
show-password-on="click"
style="width: 360px"
placeholder="sk-..."
@update:value="vs.setOpenaiApiKey"
/>
</SettingRow>
<SettingRow
:label="t('settings.voice.openaiUrl')"
:hint="t('settings.voice.openaiUrlHint')"
>
<NInput
:value="vs.openaiBaseUrl.value"
size="small"
style="width: 360px"
placeholder="https://api.openai.com/v1/audio/speech"
@update:value="vs.setOpenaiBaseUrl"
/>
</SettingRow>
<SettingRow
:label="t('settings.voice.openaiModel')"
:hint="t('settings.voice.openaiModelHint')"
>
<NSelect
:value="vs.openaiModel.value"
:options="openaiModelOptions"
size="small"
style="width: 200px"
@update:value="vs.setOpenaiModel"
/>
</SettingRow>
<SettingRow
:label="t('settings.voice.openaiVoice')"
:hint="t('settings.voice.openaiVoiceHint')"
>
<NSelect
:value="vs.openaiVoice.value"
:options="openaiVoiceOptions"
size="small"
style="width: 200px"
@update:value="vs.setOpenaiVoice"
/>
</SettingRow>
</template>
<!-- Custom Endpoint -->
<template v-if="vs.provider.value === 'custom'">
<div class="provider-hint">
{{ t('settings.voice.customHint') }}
</div>
<SettingRow
:label="t('settings.voice.customUrl')"
:hint="t('settings.voice.customUrlHint')"
>
<NInput
:value="vs.customUrl.value"
size="small"
style="width: 360px"
:placeholder="t('settings.voice.customUrlPlaceholder')"
@update:value="vs.setCustomUrl"
/>
</SettingRow>
<SettingRow
:label="t('settings.voice.customApiKey')"
:hint="t('settings.voice.customApiKeyHint')"
>
<NInput
:value="vs.customApiKey.value"
type="password"
size="small"
show-password-on="click"
style="width: 360px"
:placeholder="t('settings.voice.customApiKeyPlaceholder')"
@update:value="vs.setCustomApiKey"
/>
</SettingRow>
</template>
<!-- Edge TTS -->
<template v-if="vs.provider.value === 'edge'">
<div class="provider-hint">
{{ t('settings.voice.edgeHint') }}
</div>
<SettingRow
:label="t('settings.voice.edgeVoice')"
:hint="t('settings.voice.edgeVoiceHint')"
>
<NSelect
:value="vs.edgeVoice.value"
:options="edgeVoiceOptions"
size="small"
filterable
style="width: 320px"
:consistent-menu-width="false"
@update:value="vs.setEdgeVoice"
/>
</SettingRow>
</template>
<!-- Test / Audition -->
<div class="test-section">
<h4 class="test-title">{{ t('settings.voice.testTitle') }}</h4>
<div class="test-row">
<NInput
v-model:value="testText"
size="small"
style="width: 360px"
:placeholder="t('settings.voice.testTextPlaceholder')"
:disabled="testPlaying"
@keyup.enter="handleTest"
/>
<NButton
size="small"
type="primary"
:loading="testPlaying"
:disabled="testPlaying"
@click="handleTest"
>
{{ testPlaying ? t('settings.voice.testButtonPlaying') : t('settings.voice.testButton') }}
</NButton>
</div>
</div>
</div>
</template>
<style scoped lang="scss">
.voice-settings {
display: flex;
flex-direction: column;
gap: 16px;
}
.provider-hint {
font-size: 12px;
color: #888;
line-height: 1.5;
padding: 0 0 4px 0;
}
.test-section {
padding-top: 16px;
.test-title {
margin: 0 0 8px 0;
font-size: 14px;
font-weight: 600;
}
.test-row {
display: flex;
gap: 8px;
align-items: center;
}
}
</style>