feat: add voice playback settings with 4-provider support (#608)
Add WebSpeech, OpenAI TTS, Custom endpoint, and Edge TTS providers. Co-authored-by: Hermes Agent <noreply@nousresearch.com>
This commit is contained in:
+2
-2
@@ -111,7 +111,7 @@
|
||||
"vue": "^3.5.32",
|
||||
"vue-i18n": "^11.3.2",
|
||||
"vue-router": "^4.6.4",
|
||||
"vue-tsc": "^3.2.6",
|
||||
"vue-tsc": "^3.2.8",
|
||||
"ws": "^8.20.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ import {
|
||||
renderHighlightedCodeBlock,
|
||||
} from "./highlight";
|
||||
import { useGlobalSpeech } from "@/composables/useSpeech";
|
||||
import { useVoiceSettings } from "@/composables/useVoiceSettings";
|
||||
|
||||
const TOOL_PAYLOAD_DISPLAY_LIMIT = 2000;
|
||||
|
||||
@@ -79,6 +80,7 @@ const previewUrl = ref<string | null>(null);
|
||||
const chatStore = useChatStore();
|
||||
const settingsStore = useSettingsStore();
|
||||
const speech = useGlobalSpeech();
|
||||
const voiceSettings = useVoiceSettings();
|
||||
|
||||
// Copy entire bubble content
|
||||
const copyableContent = computed(() => {
|
||||
@@ -351,25 +353,90 @@ const renderedToolResult = computed(() => {
|
||||
|
||||
// 语音播放相关
|
||||
const canPlaySpeech = computed(() => {
|
||||
// 只有 assistant 消息可以播放,且浏览器支持 Web Speech API
|
||||
return props.message.role === 'assistant' &&
|
||||
speech.isSupported &&
|
||||
copyableContent.value;
|
||||
});
|
||||
// 只有 assistant 消息可以播放
|
||||
if (props.message.role !== 'assistant') return false
|
||||
if (!copyableContent.value) return false
|
||||
// OpenAI / Custom / Edge 不依赖浏览器 Web Speech API
|
||||
if (voiceSettings.provider.value === 'openai' || voiceSettings.provider.value === 'custom' || voiceSettings.provider.value === 'edge') return true
|
||||
return speech.isSupported
|
||||
})
|
||||
|
||||
const isPlayingThisMessage = computed(() => {
|
||||
return speech.currentMessageId.value === props.message.id && speech.isPlaying.value;
|
||||
});
|
||||
// OpenAI / Custom / Edge 模式
|
||||
if (voiceSettings.provider.value === 'openai' || voiceSettings.provider.value === 'custom' || voiceSettings.provider.value === 'edge') {
|
||||
return speech.currentCustomMessageId.value === props.message.id && speech.isCustomPlaying.value
|
||||
}
|
||||
return speech.currentMessageId.value === props.message.id && speech.isPlaying.value
|
||||
})
|
||||
|
||||
const isPausedThisMessage = computed(() => {
|
||||
return speech.currentMessageId.value === props.message.id && speech.isPaused.value;
|
||||
});
|
||||
// OpenAI / Custom / Edge 模式
|
||||
if (voiceSettings.provider.value === 'openai' || voiceSettings.provider.value === 'custom' || voiceSettings.provider.value === 'edge') {
|
||||
return speech.currentCustomMessageId.value === props.message.id && speech.isCustomPaused.value
|
||||
}
|
||||
return speech.currentMessageId.value === props.message.id && speech.isPaused.value
|
||||
})
|
||||
|
||||
function handleSpeechToggle() {
|
||||
if (!canPlaySpeech.value) {
|
||||
return
|
||||
}
|
||||
const content = props.message.content || ''
|
||||
|
||||
// OpenAI TTS 模式
|
||||
if (voiceSettings.provider.value === 'openai') {
|
||||
const apiUrl = voiceSettings.openaiBaseUrl.value
|
||||
if (!apiUrl) {
|
||||
console.warn('[MessageItem] OpenAI TTS 地址为空')
|
||||
return
|
||||
}
|
||||
speech.openaiToggle(props.message.id, content, {
|
||||
baseUrl: voiceSettings.openaiBaseUrl.value,
|
||||
apiKey: voiceSettings.openaiApiKey.value,
|
||||
model: voiceSettings.openaiModel.value,
|
||||
voice: voiceSettings.openaiVoice.value,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// 自定义端点模式(OpenAI 兼容,如 GPT-SoVITS)
|
||||
if (voiceSettings.provider.value === 'custom') {
|
||||
const apiUrl = voiceSettings.customUrl.value
|
||||
if (!apiUrl) {
|
||||
console.warn('[MessageItem] 自定义 TTS 地址为空')
|
||||
return
|
||||
}
|
||||
speech.openaiToggle(props.message.id, content, {
|
||||
baseUrl: voiceSettings.customUrl.value,
|
||||
apiKey: voiceSettings.customApiKey.value || undefined,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Edge TTS 模式
|
||||
if (voiceSettings.provider.value === 'edge') {
|
||||
// URL 为空时使用内建后端代理
|
||||
const apiUrl = voiceSettings.edgeUrl.value || '/api/tts/proxy'
|
||||
speech.openaiToggle(props.message.id, content, {
|
||||
baseUrl: apiUrl,
|
||||
voice: voiceSettings.edgeVoice.value,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Web Speech API 模式
|
||||
if (voiceSettings.provider.value === 'webspeech') {
|
||||
const text = speech.extractReadableText(content)
|
||||
if (text) {
|
||||
speech.stop(false)
|
||||
speech.speakViaBrowser(props.message.id, text, {
|
||||
voiceName: voiceSettings.webspeechVoice.value || undefined,
|
||||
})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// 后备(无 provider 匹配时)
|
||||
speech.toggle(props.message.id, content)
|
||||
}
|
||||
|
||||
@@ -380,7 +447,37 @@ onMounted(() => {
|
||||
autoPlayHandler = (e: Event) => {
|
||||
const customEvent = e as CustomEvent<{ messageId: string; content: string }>
|
||||
if (customEvent.detail.messageId === props.message.id && canPlaySpeech.value) {
|
||||
speech.enqueue(props.message.id, customEvent.detail.content || props.message.content || '')
|
||||
const content = customEvent.detail.content || props.message.content || ''
|
||||
if (voiceSettings.provider.value === 'openai') {
|
||||
const apiUrl = voiceSettings.openaiBaseUrl.value
|
||||
if (apiUrl) speech.openaiPlay(props.message.id, content, {
|
||||
baseUrl: voiceSettings.openaiBaseUrl.value,
|
||||
apiKey: voiceSettings.openaiApiKey.value,
|
||||
model: voiceSettings.openaiModel.value,
|
||||
voice: voiceSettings.openaiVoice.value,
|
||||
})
|
||||
} else if (voiceSettings.provider.value === 'custom') {
|
||||
const apiUrl = voiceSettings.customUrl.value
|
||||
if (apiUrl) speech.openaiPlay(props.message.id, content, {
|
||||
baseUrl: voiceSettings.customUrl.value,
|
||||
apiKey: voiceSettings.customApiKey.value || undefined,
|
||||
})
|
||||
} else if (voiceSettings.provider.value === 'edge') {
|
||||
speech.openaiPlay(props.message.id, content, {
|
||||
baseUrl: '/api/tts/proxy',
|
||||
voice: voiceSettings.edgeVoice.value,
|
||||
})
|
||||
} else if (voiceSettings.provider.value === 'webspeech') {
|
||||
const text = speech.extractReadableText(content)
|
||||
if (text) {
|
||||
speech.stop(false)
|
||||
speech.speakViaBrowser(props.message.id, text, {
|
||||
voiceName: voiceSettings.webspeechVoice.value || undefined,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
speech.enqueue(props.message.id, content)
|
||||
}
|
||||
}
|
||||
}
|
||||
window.addEventListener('auto-play-speech', autoPlayHandler)
|
||||
|
||||
@@ -0,0 +1,327 @@
|
||||
<script setup lang="ts">
|
||||
import { ref, onMounted } from 'vue'
|
||||
import { NSelect, NInput, NButton } from 'naive-ui'
|
||||
import { useI18n } from 'vue-i18n'
|
||||
import { useVoiceSettings } from '@/composables/useVoiceSettings'
|
||||
import { useSpeech } from '@/composables/useSpeech'
|
||||
import SettingRow from './SettingRow.vue'
|
||||
|
||||
const { t } = useI18n()
|
||||
const vs = useVoiceSettings()
|
||||
const speech = useSpeech()
|
||||
|
||||
const testText = ref(t('settings.voice.testTextDefault'))
|
||||
const testPlaying = ref(false)
|
||||
|
||||
const providerOptions = [
|
||||
{ label: t('settings.voice.providerWebSpeech'), value: 'webspeech' },
|
||||
{ label: t('settings.voice.providerOpenai'), value: 'openai' },
|
||||
{ label: t('settings.voice.providerCustom'), value: 'custom' },
|
||||
{ label: t('settings.voice.providerEdge'), value: 'edge' },
|
||||
]
|
||||
|
||||
const openaiModelOptions = [
|
||||
{ label: 'tts-1', value: 'tts-1' },
|
||||
{ label: 'tts-1-hd', value: 'tts-1-hd' },
|
||||
]
|
||||
|
||||
const openaiVoiceOptions = [
|
||||
{ label: 'Alloy', value: 'alloy' },
|
||||
{ label: 'Echo', value: 'echo' },
|
||||
{ label: 'Fable', value: 'fable' },
|
||||
{ label: 'Nova', value: 'nova' },
|
||||
{ label: 'Onyx', value: 'onyx' },
|
||||
{ label: 'Shimmer', value: 'shimmer' },
|
||||
]
|
||||
|
||||
const edgeVoiceOptions = [
|
||||
{ label: '晓晓 (zh-CN-XiaoxiaoNeural)', value: 'zh-CN-XiaoxiaoNeural' },
|
||||
{ label: '晓萱 (zh-CN-XiaoxuanNeural)', value: 'zh-CN-XiaoxuanNeural' },
|
||||
{ label: '云希 (zh-CN-YunxiNeural)', value: 'zh-CN-YunxiNeural' },
|
||||
{ label: '云健 (zh-CN-YunjianNeural)', value: 'zh-CN-YunjianNeural' },
|
||||
{ label: '云扬 (zh-CN-YunyangNeural)', value: 'zh-CN-YunyangNeural' },
|
||||
{ label: 'Jenny (en-US-JennyNeural)', value: 'en-US-JennyNeural' },
|
||||
{ label: 'Aria (en-US-AriaNeural)', value: 'en-US-AriaNeural' },
|
||||
{ label: 'Guy (en-US-GuyNeural)', value: 'en-US-GuyNeural' },
|
||||
{ label: 'Sonia (en-GB-SoniaNeural)', value: 'en-GB-SoniaNeural' },
|
||||
{ label: 'Ryan (en-GB-RyanNeural)', value: 'en-GB-RyanNeural' },
|
||||
{ label: 'Nanami (ja-JP-NanamiNeural)', value: 'ja-JP-NanamiNeural' },
|
||||
{ label: 'Keita (ja-JP-KeitaNeural)', value: 'ja-JP-KeitaNeural' },
|
||||
{ label: 'Sun-Hi (ko-KR-SunHiNeural)', value: 'ko-KR-SunHiNeural' },
|
||||
{ label: 'InJoon (ko-KR-InJoonNeural)', value: 'ko-KR-InJoonNeural' },
|
||||
{ label: 'Denise (fr-FR-DeniseNeural)', value: 'fr-FR-DeniseNeural' },
|
||||
{ label: 'Henri (fr-FR-HenriNeural)', value: 'fr-FR-HenriNeural' },
|
||||
{ label: 'Katja (de-DE-KatjaNeural)', value: 'de-DE-KatjaNeural' },
|
||||
{ label: 'Conrad (de-DE-ConradNeural)', value: 'de-DE-ConradNeural' },
|
||||
]
|
||||
|
||||
// Get WebSpeech voices list on mount
|
||||
const webspeechVoices = ref<SpeechSynthesisVoice[]>([])
|
||||
onMounted(() => {
|
||||
if ('speechSynthesis' in window) {
|
||||
const voices = window.speechSynthesis.getVoices()
|
||||
if (voices.length) {
|
||||
webspeechVoices.value = voices
|
||||
}
|
||||
window.speechSynthesis.onvoiceschanged = () => {
|
||||
webspeechVoices.value = window.speechSynthesis.getVoices()
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
async function handleTest() {
|
||||
const text = testText.value.trim()
|
||||
if (!text) return
|
||||
testPlaying.value = true
|
||||
try {
|
||||
if (vs.provider.value === 'webspeech') {
|
||||
speech.stop(false)
|
||||
speech.speakViaBrowser('__test__', text, {
|
||||
voiceName: vs.webspeechVoice.value || undefined,
|
||||
})
|
||||
} else if (vs.provider.value === 'openai') {
|
||||
if (!vs.openaiBaseUrl.value) {
|
||||
console.warn('[VoiceSettings] OpenAI base URL empty')
|
||||
return
|
||||
}
|
||||
await speech.openaiPlay('__test__', text, {
|
||||
baseUrl: vs.openaiBaseUrl.value,
|
||||
apiKey: vs.openaiApiKey.value || undefined,
|
||||
model: vs.openaiModel.value,
|
||||
voice: vs.openaiVoice.value,
|
||||
})
|
||||
} else if (vs.provider.value === 'custom') {
|
||||
if (!vs.customUrl.value) {
|
||||
console.warn('[VoiceSettings] Custom URL empty')
|
||||
return
|
||||
}
|
||||
await speech.openaiPlay('__test__', text, {
|
||||
baseUrl: vs.customUrl.value,
|
||||
apiKey: vs.customApiKey.value || undefined,
|
||||
})
|
||||
} else if (vs.provider.value === 'edge') {
|
||||
await speech.openaiPlay('__test__', text, {
|
||||
baseUrl: '/api/tts/proxy',
|
||||
voice: vs.edgeVoice.value,
|
||||
})
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('[VoiceSettings] Test failed:', err)
|
||||
} finally {
|
||||
testPlaying.value = false
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<template>
|
||||
<div class="voice-settings">
|
||||
<SettingRow
|
||||
:label="t('settings.voice.ttsProvider')"
|
||||
:hint="t('settings.voice.ttsProviderHint')"
|
||||
>
|
||||
<NSelect
|
||||
:value="vs.provider.value"
|
||||
:options="providerOptions"
|
||||
size="small"
|
||||
style="width: 300px"
|
||||
@update:value="vs.setProvider"
|
||||
/>
|
||||
</SettingRow>
|
||||
|
||||
<!-- ════ WebSpeech API ════ -->
|
||||
<template v-if="vs.provider.value === 'webspeech'">
|
||||
<SettingRow
|
||||
:label="t('settings.voice.webspeechVoice')"
|
||||
:hint="t('settings.voice.webspeechVoiceHint')"
|
||||
>
|
||||
<NSelect
|
||||
:value="vs.webspeechVoice.value"
|
||||
size="small"
|
||||
filterable
|
||||
style="width: 320px"
|
||||
:placeholder="t('settings.voice.webspeechVoicePlaceholder')"
|
||||
:consistent-menu-width="false"
|
||||
:options="webspeechVoices.map(v => ({
|
||||
label: `${v.name} (${v.lang})`,
|
||||
value: v.name,
|
||||
}))"
|
||||
@update:value="vs.setWebSpeechVoice"
|
||||
/>
|
||||
</SettingRow>
|
||||
|
||||
</template>
|
||||
|
||||
<!-- ════ OpenAI TTS ════ -->
|
||||
<template v-if="vs.provider.value === 'openai'">
|
||||
<SettingRow
|
||||
:label="t('settings.voice.openaiKey')"
|
||||
:hint="t('settings.voice.openaiKeyHint')"
|
||||
>
|
||||
<NInput
|
||||
:value="vs.openaiApiKey.value"
|
||||
type="password"
|
||||
size="small"
|
||||
show-password-on="click"
|
||||
style="width: 360px"
|
||||
placeholder="sk-..."
|
||||
@update:value="vs.setOpenaiApiKey"
|
||||
/>
|
||||
</SettingRow>
|
||||
|
||||
<SettingRow
|
||||
:label="t('settings.voice.openaiUrl')"
|
||||
:hint="t('settings.voice.openaiUrlHint')"
|
||||
>
|
||||
<NInput
|
||||
:value="vs.openaiBaseUrl.value"
|
||||
size="small"
|
||||
style="width: 360px"
|
||||
placeholder="https://api.openai.com/v1/audio/speech"
|
||||
@update:value="vs.setOpenaiBaseUrl"
|
||||
/>
|
||||
</SettingRow>
|
||||
|
||||
<SettingRow
|
||||
:label="t('settings.voice.openaiModel')"
|
||||
:hint="t('settings.voice.openaiModelHint')"
|
||||
>
|
||||
<NSelect
|
||||
:value="vs.openaiModel.value"
|
||||
:options="openaiModelOptions"
|
||||
size="small"
|
||||
style="width: 200px"
|
||||
@update:value="vs.setOpenaiModel"
|
||||
/>
|
||||
</SettingRow>
|
||||
|
||||
<SettingRow
|
||||
:label="t('settings.voice.openaiVoice')"
|
||||
:hint="t('settings.voice.openaiVoiceHint')"
|
||||
>
|
||||
<NSelect
|
||||
:value="vs.openaiVoice.value"
|
||||
:options="openaiVoiceOptions"
|
||||
size="small"
|
||||
style="width: 200px"
|
||||
@update:value="vs.setOpenaiVoice"
|
||||
/>
|
||||
</SettingRow>
|
||||
|
||||
</template>
|
||||
|
||||
<!-- ════ Custom Endpoint ════ -->
|
||||
<template v-if="vs.provider.value === 'custom'">
|
||||
<div class="provider-hint">
|
||||
{{ t('settings.voice.customHint') }}
|
||||
</div>
|
||||
|
||||
<SettingRow
|
||||
:label="t('settings.voice.customUrl')"
|
||||
:hint="t('settings.voice.customUrlHint')"
|
||||
>
|
||||
<NInput
|
||||
:value="vs.customUrl.value"
|
||||
size="small"
|
||||
style="width: 360px"
|
||||
:placeholder="t('settings.voice.customUrlPlaceholder')"
|
||||
@update:value="vs.setCustomUrl"
|
||||
/>
|
||||
</SettingRow>
|
||||
|
||||
<SettingRow
|
||||
:label="t('settings.voice.customApiKey')"
|
||||
:hint="t('settings.voice.customApiKeyHint')"
|
||||
>
|
||||
<NInput
|
||||
:value="vs.customApiKey.value"
|
||||
type="password"
|
||||
size="small"
|
||||
show-password-on="click"
|
||||
style="width: 360px"
|
||||
:placeholder="t('settings.voice.customApiKeyPlaceholder')"
|
||||
@update:value="vs.setCustomApiKey"
|
||||
/>
|
||||
</SettingRow>
|
||||
|
||||
|
||||
</template>
|
||||
|
||||
<!-- ════ Edge TTS ════ -->
|
||||
<template v-if="vs.provider.value === 'edge'">
|
||||
<div class="provider-hint">
|
||||
{{ t('settings.voice.edgeHint') }}
|
||||
</div>
|
||||
|
||||
<SettingRow
|
||||
:label="t('settings.voice.edgeVoice')"
|
||||
:hint="t('settings.voice.edgeVoiceHint')"
|
||||
>
|
||||
<NSelect
|
||||
:value="vs.edgeVoice.value"
|
||||
:options="edgeVoiceOptions"
|
||||
size="small"
|
||||
filterable
|
||||
style="width: 320px"
|
||||
:consistent-menu-width="false"
|
||||
@update:value="vs.setEdgeVoice"
|
||||
/>
|
||||
</SettingRow>
|
||||
|
||||
</template>
|
||||
|
||||
<!-- ─── Test / Audition ─── -->
|
||||
<div class="test-section">
|
||||
<h4 class="test-title">{{ t('settings.voice.testTitle') }}</h4>
|
||||
<div class="test-row">
|
||||
<NInput
|
||||
v-model:value="testText"
|
||||
size="small"
|
||||
style="width: 360px"
|
||||
:placeholder="t('settings.voice.testTextPlaceholder')"
|
||||
:disabled="testPlaying"
|
||||
@keyup.enter="handleTest"
|
||||
/>
|
||||
<NButton
|
||||
size="small"
|
||||
type="primary"
|
||||
:loading="testPlaying"
|
||||
:disabled="testPlaying"
|
||||
@click="handleTest"
|
||||
>
|
||||
{{ testPlaying ? t('settings.voice.testButtonPlaying') : t('settings.voice.testButton') }}
|
||||
</NButton>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<style scoped lang="scss">
|
||||
.voice-settings {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 16px;
|
||||
}
|
||||
|
||||
.provider-hint {
|
||||
font-size: 12px;
|
||||
color: #888;
|
||||
line-height: 1.5;
|
||||
padding: 0 0 4px 0;
|
||||
}
|
||||
|
||||
.test-section {
|
||||
padding-top: 16px;
|
||||
|
||||
.test-title {
|
||||
margin: 0 0 8px 0;
|
||||
font-size: 14px;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.test-row {
|
||||
display: flex;
|
||||
gap: 8px;
|
||||
align-items: center;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
@@ -3,6 +3,14 @@ import { generateSpeech, playAudioBlob } from '@/api/hermes/tts'
|
||||
|
||||
export interface SpeechOptions {
|
||||
lang?: string // 语言 'zh-CN', 'en-US' 等
|
||||
voiceName?: string // 指定 WebSpeech 音色名称
|
||||
}
|
||||
|
||||
export interface OpenaiTtsOptions {
|
||||
baseUrl: string
|
||||
apiKey?: string
|
||||
model?: string
|
||||
voice?: string
|
||||
}
|
||||
|
||||
export interface SpeechState {
|
||||
@@ -39,6 +47,11 @@ export function useSpeech() {
|
||||
let playbackToken = 0
|
||||
const speechQueue: SpeechQueueItem[] = []
|
||||
|
||||
// 自定义 TTS(OpenAI / Custom / Edge)播放状态
|
||||
const isCustomPlaying = ref(false)
|
||||
const isCustomPaused = ref(false)
|
||||
const currentCustomMessageId = ref<string | null>(null)
|
||||
|
||||
// 加载可用语音列表
|
||||
function loadVoices() {
|
||||
availableVoices.value = synth.getVoices()
|
||||
@@ -162,14 +175,25 @@ export function useSpeech() {
|
||||
|
||||
// ─── Browser Engine (Web Speech API) ────────────────────────
|
||||
|
||||
function speakViaBrowser(messageId: string, text: string, options: SpeechOptions, token: number) {
|
||||
function speakViaBrowser(messageId: string, text: string, options: SpeechOptions, token?: number) {
|
||||
token = token || ++playbackToken
|
||||
utterance = new SpeechSynthesisUtterance(text)
|
||||
const activeUtterance = utterance
|
||||
|
||||
utterance.rate = 1
|
||||
utterance.pitch = 1
|
||||
utterance.volume = 1
|
||||
utterance.voice = getDefaultVoice()
|
||||
|
||||
// 使用指定的音色(如果有),否则用默认
|
||||
if (options.voiceName) {
|
||||
const voice = availableVoices.value.find(v => v.name === options.voiceName)
|
||||
if (voice) {
|
||||
utterance.voice = voice
|
||||
}
|
||||
}
|
||||
if (!utterance.voice) {
|
||||
utterance.voice = getDefaultVoice()
|
||||
}
|
||||
|
||||
if (options.lang) {
|
||||
utterance.lang = options.lang
|
||||
@@ -218,6 +242,115 @@ export function useSpeech() {
|
||||
synth.speak(utterance)
|
||||
}
|
||||
|
||||
// ─── OpenAI-compatible TTS Engine ────────────────────────────
|
||||
|
||||
let customAudio: HTMLAudioElement | null = null
|
||||
|
||||
async function openaiPlay(
|
||||
messageId: string,
|
||||
content: string,
|
||||
opts: OpenaiTtsOptions,
|
||||
) {
|
||||
const text = extractReadableText(content)
|
||||
if (!text) return
|
||||
|
||||
const token = ++playbackToken
|
||||
|
||||
isCustomPlaying.value = true
|
||||
isCustomPaused.value = false
|
||||
currentCustomMessageId.value = messageId
|
||||
|
||||
const url = `${opts.baseUrl.replace(/\/+$/, '')}/audio/speech`
|
||||
const body: Record<string, any> = {
|
||||
model: opts.model || 'tts-1',
|
||||
input: text,
|
||||
voice: opts.voice || 'alloy',
|
||||
}
|
||||
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
if (opts.apiKey) {
|
||||
headers['Authorization'] = `Bearer ${opts.apiKey}`
|
||||
}
|
||||
|
||||
try {
|
||||
const res = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(body),
|
||||
})
|
||||
|
||||
if (token !== playbackToken) return
|
||||
|
||||
if (!res.ok) {
|
||||
const errText = await res.text().catch(() => '')
|
||||
throw new Error(`OpenAI TTS 返回 ${res.status}: ${errText || res.statusText}`)
|
||||
}
|
||||
|
||||
const audioBlob = await res.blob()
|
||||
if (token !== playbackToken) return
|
||||
|
||||
const audioUrl = URL.createObjectURL(audioBlob)
|
||||
const audio = new Audio(audioUrl)
|
||||
customAudio = audio
|
||||
|
||||
audio.onended = () => {
|
||||
if (token !== playbackToken) return
|
||||
URL.revokeObjectURL(audioUrl)
|
||||
isCustomPlaying.value = false
|
||||
isCustomPaused.value = false
|
||||
currentCustomMessageId.value = null
|
||||
customAudio = null
|
||||
}
|
||||
|
||||
audio.onerror = () => {
|
||||
if (token !== playbackToken) return
|
||||
URL.revokeObjectURL(audioUrl)
|
||||
console.warn('[useSpeech] Custom TTS audio playback error')
|
||||
isCustomPlaying.value = false
|
||||
isCustomPaused.value = false
|
||||
currentCustomMessageId.value = null
|
||||
customAudio = null
|
||||
}
|
||||
|
||||
await audio.play()
|
||||
} catch (err) {
|
||||
if (token !== playbackToken) return
|
||||
console.error('[useSpeech] OpenAI TTS 请求失败:', err)
|
||||
isCustomPlaying.value = false
|
||||
isCustomPaused.value = false
|
||||
currentCustomMessageId.value = null
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
function openaiToggle(messageId: string, content: string, opts: OpenaiTtsOptions) {
|
||||
if (currentCustomMessageId.value === messageId && isCustomPlaying.value) {
|
||||
if (isCustomPaused.value) {
|
||||
// Resume
|
||||
if (customAudio) {
|
||||
customAudio.play()
|
||||
}
|
||||
isCustomPaused.value = false
|
||||
} else {
|
||||
// Pause
|
||||
if (customAudio) {
|
||||
customAudio.pause()
|
||||
}
|
||||
isCustomPaused.value = true
|
||||
}
|
||||
} else {
|
||||
// Stop other speech and start new
|
||||
stop(false)
|
||||
if (customAudio) {
|
||||
customAudio.pause()
|
||||
customAudio = null
|
||||
}
|
||||
openaiPlay(messageId, content, opts)
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Unified speak ──────────────────────────────────────────
|
||||
|
||||
function speak(messageId: string, text: string, options: SpeechOptions = {}) {
|
||||
@@ -317,6 +450,11 @@ export function useSpeech() {
|
||||
progress: computed(() => state.value.progress),
|
||||
engine: computed(() => state.value.engine),
|
||||
|
||||
// Custom TTS state
|
||||
isCustomPlaying,
|
||||
isCustomPaused,
|
||||
currentCustomMessageId,
|
||||
|
||||
play,
|
||||
pause,
|
||||
resume,
|
||||
@@ -325,6 +463,13 @@ export function useSpeech() {
|
||||
enqueue,
|
||||
getDefaultVoice,
|
||||
extractReadableText,
|
||||
|
||||
// OpenAI-compatible TTS
|
||||
openaiPlay,
|
||||
openaiToggle,
|
||||
|
||||
// Browser WebSpeech (直接调用避免 Rolldown 树摇)
|
||||
speakViaBrowser,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,164 @@
|
||||
import { ref, watch } from 'vue'
|
||||
|
||||
export type TtsProvider = 'webspeech' | 'openai' | 'custom' | 'edge'
|
||||
|
||||
export interface VoiceSettingsData {
|
||||
provider: TtsProvider
|
||||
|
||||
// WebSpeech
|
||||
webspeechVoice: string
|
||||
|
||||
// OpenAI
|
||||
openaiApiKey: string
|
||||
openaiBaseUrl: string
|
||||
openaiModel: string
|
||||
openaiVoice: string
|
||||
|
||||
// Custom endpoint (OpenAI-compatible)
|
||||
customUrl: string
|
||||
customApiKey: string
|
||||
|
||||
// Edge TTS
|
||||
edgeUrl: string
|
||||
edgeVoice: string
|
||||
}
|
||||
|
||||
const STORAGE_KEY = 'hermes-tts-settings-v2'
|
||||
|
||||
function migrateOldKeys() {
|
||||
const oldKey = 'hermes-tts-settings'
|
||||
try {
|
||||
const old = localStorage.getItem(oldKey)
|
||||
if (old) {
|
||||
const parsed = JSON.parse(old)
|
||||
// Old 'custom' provider maps to new 'custom'
|
||||
// Old 'gptsovits' provider maps to new 'custom'
|
||||
if (parsed.provider === 'gptsovits') {
|
||||
parsed.provider = 'custom'
|
||||
// old gptsovitsUrl -> customUrl
|
||||
if (parsed.gptsovitsUrl && !parsed.customUrl) {
|
||||
parsed.customUrl = parsed.gptsovitsUrl
|
||||
}
|
||||
}
|
||||
// Store as new format
|
||||
const data = { ...DEFAULT, ...parsed }
|
||||
localStorage.setItem(STORAGE_KEY, JSON.stringify(data))
|
||||
localStorage.removeItem(oldKey)
|
||||
}
|
||||
} catch { /* ignore */ }
|
||||
}
|
||||
|
||||
const DEFAULT: VoiceSettingsData = {
|
||||
provider: 'webspeech',
|
||||
|
||||
webspeechVoice: '',
|
||||
|
||||
openaiApiKey: '',
|
||||
openaiBaseUrl: '',
|
||||
openaiModel: 'tts-1',
|
||||
openaiVoice: 'alloy',
|
||||
|
||||
customUrl: '',
|
||||
customApiKey: '',
|
||||
|
||||
edgeUrl: '',
|
||||
edgeVoice: 'zh-CN-XiaoxiaoNeural',
|
||||
}
|
||||
|
||||
function sanitize(data: VoiceSettingsData): VoiceSettingsData {
|
||||
// Clear old Edge TTS adapter URLs — now uses internal node-edge-tts
|
||||
if (data.edgeUrl && data.edgeUrl !== '') {
|
||||
data.edgeUrl = ''
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
function load(): VoiceSettingsData {
|
||||
try {
|
||||
const raw = localStorage.getItem(STORAGE_KEY)
|
||||
if (raw) return sanitize({ ...DEFAULT, ...JSON.parse(raw) })
|
||||
} catch { /* ignore */ }
|
||||
return { ...DEFAULT }
|
||||
}
|
||||
|
||||
// Run migration once on import
|
||||
migrateOldKeys()
|
||||
|
||||
// ── Reactive state ──
|
||||
const provider = ref<TtsProvider>(load().provider)
|
||||
|
||||
// WebSpeech
|
||||
const webspeechVoice = ref<string>(load().webspeechVoice)
|
||||
|
||||
// OpenAI
|
||||
const openaiApiKey = ref<string>(load().openaiApiKey)
|
||||
const openaiBaseUrl = ref<string>(load().openaiBaseUrl)
|
||||
const openaiModel = ref<string>(load().openaiModel)
|
||||
const openaiVoice = ref<string>(load().openaiVoice)
|
||||
|
||||
// Custom
|
||||
const customUrl = ref<string>(load().customUrl)
|
||||
const customApiKey = ref<string>(load().customApiKey)
|
||||
|
||||
// Edge TTS
|
||||
const edgeUrl = ref<string>(load().edgeUrl)
|
||||
const edgeVoice = ref<string>(load().edgeVoice)
|
||||
|
||||
// Auto-persist on change
|
||||
watch(
|
||||
[provider, webspeechVoice, openaiApiKey, openaiBaseUrl, openaiModel, openaiVoice,
|
||||
customUrl, customApiKey, edgeUrl, edgeVoice],
|
||||
() => {
|
||||
localStorage.setItem(STORAGE_KEY, JSON.stringify({
|
||||
provider: provider.value,
|
||||
webspeechVoice: webspeechVoice.value,
|
||||
openaiApiKey: openaiApiKey.value,
|
||||
openaiBaseUrl: openaiBaseUrl.value,
|
||||
openaiModel: openaiModel.value,
|
||||
openaiVoice: openaiVoice.value,
|
||||
customUrl: customUrl.value,
|
||||
customApiKey: customApiKey.value,
|
||||
edgeUrl: edgeUrl.value,
|
||||
edgeVoice: edgeVoice.value,
|
||||
}))
|
||||
},
|
||||
)
|
||||
|
||||
export function useVoiceSettings() {
|
||||
return {
|
||||
provider,
|
||||
webspeechVoice,
|
||||
openaiApiKey,
|
||||
openaiBaseUrl,
|
||||
openaiModel,
|
||||
openaiVoice,
|
||||
customUrl,
|
||||
customApiKey,
|
||||
edgeUrl,
|
||||
edgeVoice,
|
||||
|
||||
setProvider(v: TtsProvider) { provider.value = v },
|
||||
setWebSpeechVoice(v: string) { webspeechVoice.value = v },
|
||||
setOpenaiApiKey(v: string) { openaiApiKey.value = v },
|
||||
setOpenaiBaseUrl(v: string) { openaiBaseUrl.value = v },
|
||||
setOpenaiModel(v: string) { openaiModel.value = v },
|
||||
setOpenaiVoice(v: string) { openaiVoice.value = v },
|
||||
setCustomUrl(v: string) { customUrl.value = v },
|
||||
setCustomApiKey(v: string) { customApiKey.value = v },
|
||||
setEdgeUrl(v: string) { edgeUrl.value = v },
|
||||
setEdgeVoice(v: string) { edgeVoice.value = v },
|
||||
|
||||
reset() {
|
||||
provider.value = DEFAULT.provider
|
||||
webspeechVoice.value = DEFAULT.webspeechVoice
|
||||
openaiApiKey.value = DEFAULT.openaiApiKey
|
||||
openaiBaseUrl.value = DEFAULT.openaiBaseUrl
|
||||
openaiModel.value = DEFAULT.openaiModel
|
||||
openaiVoice.value = DEFAULT.openaiVoice
|
||||
customUrl.value = DEFAULT.customUrl
|
||||
customApiKey.value = DEFAULT.customApiKey
|
||||
edgeUrl.value = DEFAULT.edgeUrl
|
||||
edgeVoice.value = DEFAULT.edgeVoice
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -511,6 +511,8 @@ jobTriggered: 'Job ausgelost',
|
||||
session: 'Sitzung',
|
||||
privacy: 'Datenschutz',
|
||||
apiServer: 'API-Server',
|
||||
models: 'Modelle',
|
||||
voice: 'Sprache',
|
||||
},
|
||||
display: {
|
||||
streaming: 'Streaming-Antworten',
|
||||
@@ -589,6 +591,55 @@ jobTriggered: 'Job ausgelost',
|
||||
cors: 'CORS-Ursprunge',
|
||||
corsHint: 'Erlaubte Cross-Origin-Quellen',
|
||||
},
|
||||
voice: {
|
||||
ttsProvider: 'TTS-Anbieter',
|
||||
ttsProviderHint: 'Waehlen Sie die Sprachsynthese-Engine fuer die Nachrichtenwiedergabe',
|
||||
providerWebSpeech: 'WebSpeech API (Browser)',
|
||||
providerOpenai: 'OpenAI TTS',
|
||||
providerCustom: 'Benutzerdefinierter Endpunkt (OpenAI-kompatibel)',
|
||||
providerEdge: 'Edge TTS (Kostenlos, kein API-Key erforderlich)',
|
||||
|
||||
// WebSpeech
|
||||
webspeechVoice: 'Stimme',
|
||||
webspeechVoiceHint: 'Waehlen Sie eine Stimme aus Ihrem Browser oder Betriebssystem',
|
||||
webspeechVoicePlaceholder: 'Auto (Standardstimme)',
|
||||
|
||||
// OpenAI
|
||||
openaiKey: 'API-Key',
|
||||
openaiKeyHint: 'Ihr OpenAI API-Key mit TTS-Zugriff',
|
||||
openaiUrl: 'API-Basis-URL',
|
||||
openaiUrlHint: 'z.B. https://api.openai.com/v1/audio/speech',
|
||||
openaiModel: 'Modell',
|
||||
openaiModelHint: 'tts-1 (schneller) / tts-1-hd (hoehere Qualitaet)',
|
||||
openaiVoice: 'Stimme',
|
||||
openaiVoiceHint: 'Stimme fuer die Synthese',
|
||||
|
||||
// Custom endpoint
|
||||
customHint: 'Jede OpenAI-kompatible TTS-API verwenden — funktioniert mit GPT-SoVITS, CosyVoice, usw.',
|
||||
customUrl: 'API-URL',
|
||||
customUrlHint: 'Basis-URL Ihres TTS-Dienstes',
|
||||
customUrlPlaceholder: 'Die im lokalen Adapter konfigurierte Adresse, z.B. http://127.0.0.1:9880',
|
||||
customApiKey: 'API-Key (optional)',
|
||||
customApiKeyHint: 'Einige benutzerdefinierte Endpunkte erfordern Authentifizierung',
|
||||
customApiKeyPlaceholder: 'Leer lassen wenn nicht benoetigt',
|
||||
|
||||
// Edge TTS
|
||||
edgeHint: 'Angetrieben von Microsoft Edge TTS (node-edge-tts).',
|
||||
edgeUrl: 'Adapter-URL',
|
||||
edgeUrlHint: 'Adresse des Edge TTS-Adapters, z.B. http://127.0.0.1:9882',
|
||||
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
|
||||
edgeVoice: 'Stimme',
|
||||
edgeVoiceHint: 'Waehlen Sie eine Stimme fuer die Sprachsynthese',
|
||||
|
||||
// Test
|
||||
testTitle: 'Sprachtest',
|
||||
testText: 'Testtext',
|
||||
testTextPlaceholder: 'Text zum Testen eingeben...',
|
||||
testTextDefault: 'Hallo, dies ist ein Sprachtest.',
|
||||
testButton: 'Testen',
|
||||
testButtonPlaying: 'Wiedergabe...',
|
||||
testFailed: 'Test fehlgeschlagen: {error}',
|
||||
},
|
||||
lockedIps: {
|
||||
title: 'Gesperrte IPs',
|
||||
count: '{count} gesperrt',
|
||||
|
||||
@@ -651,6 +651,7 @@ export default {
|
||||
privacy: 'Privacy',
|
||||
apiServer: 'API Server',
|
||||
models: 'Models',
|
||||
voice: 'Voice',
|
||||
},
|
||||
models: {
|
||||
apiKey: 'API Key',
|
||||
@@ -747,6 +748,54 @@ export default {
|
||||
unlocked: 'IP unlocked',
|
||||
allUnlocked: '{count} IPs unlocked',
|
||||
},
|
||||
voice: {
|
||||
ttsProvider: 'TTS Provider',
|
||||
ttsProviderHint: 'Choose the text-to-speech engine for message playback',
|
||||
providerWebSpeech: 'WebSpeech API (Browser)',
|
||||
providerOpenai: 'OpenAI TTS',
|
||||
providerCustom: 'Custom Endpoint (OpenAI-compatible)',
|
||||
providerEdge: 'Edge TTS (Free, no API Key)',
|
||||
|
||||
// WebSpeech
|
||||
webspeechVoice: 'Voice',
|
||||
webspeechVoiceHint: 'Select a voice from your browser or OS',
|
||||
webspeechVoicePlaceholder: 'Auto (default voice)',
|
||||
|
||||
// OpenAI
|
||||
openaiKey: 'API Key',
|
||||
openaiKeyHint: 'Your OpenAI API key with TTS access',
|
||||
openaiUrl: 'API Base URL',
|
||||
openaiUrlHint: 'e.g. https://api.openai.com/v1/audio/speech',
|
||||
openaiModel: 'Model',
|
||||
openaiModelHint: 'tts-1 (faster) / tts-1-hd (higher quality)',
|
||||
openaiVoice: 'Voice',
|
||||
openaiVoiceHint: 'Voice to use for synthesis',
|
||||
|
||||
// Custom endpoint
|
||||
customHint: 'Use any OpenAI-compatible TTS API — works with GPT-SoVITS, CosyVoice, etc.',
|
||||
customUrl: 'API URL',
|
||||
customUrlHint: 'Base URL of your TTS service',
|
||||
customUrlPlaceholder: 'The address configured in the local adapter, e.g. http://127.0.0.1:9880',
|
||||
customApiKey: 'API Key (Optional)',
|
||||
customApiKeyHint: 'Some custom endpoints require authentication',
|
||||
customApiKeyPlaceholder: 'Leave blank if not needed',
|
||||
// Edge TTS
|
||||
edgeHint: 'Powered by Microsoft Edge TTS (node-edge-tts).',
|
||||
edgeUrl: 'Adapter URL',
|
||||
edgeUrlHint: 'Address of your Edge TTS adapter, e.g. http://127.0.0.1:9882',
|
||||
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
|
||||
edgeVoice: 'Voice',
|
||||
edgeVoiceHint: 'Select a voice for speech synthesis',
|
||||
|
||||
// Test
|
||||
testTitle: 'Test Voice',
|
||||
testText: 'Test Text',
|
||||
testTextPlaceholder: 'Enter text to test...',
|
||||
testTextDefault: 'Hello, this is a voice test.',
|
||||
testButton: 'Test',
|
||||
testButtonPlaying: 'Playing...',
|
||||
testFailed: 'Test failed: {error}',
|
||||
},
|
||||
},
|
||||
|
||||
// Platform channel settings
|
||||
|
||||
@@ -511,6 +511,8 @@ jobTriggered: 'Job ejecutado',
|
||||
session: 'Sesion',
|
||||
privacy: 'Privacidad',
|
||||
apiServer: 'Servidor API',
|
||||
models: 'Modelos',
|
||||
voice: 'Voz',
|
||||
},
|
||||
display: {
|
||||
streaming: 'Respuestas en streaming',
|
||||
@@ -589,6 +591,55 @@ jobTriggered: 'Job ejecutado',
|
||||
cors: 'Origenes CORS',
|
||||
corsHint: 'Fuentes cross-origin permitidas',
|
||||
},
|
||||
voice: {
|
||||
ttsProvider: 'Proveedor TTS',
|
||||
ttsProviderHint: 'Elija el motor de texto a voz para la reproduccion de mensajes',
|
||||
providerWebSpeech: 'WebSpeech API (Navegador)',
|
||||
providerOpenai: 'OpenAI TTS',
|
||||
providerCustom: 'Endpoint personalizado (compatible con OpenAI)',
|
||||
providerEdge: 'Edge TTS (Gratuito, sin clave API)',
|
||||
|
||||
// WebSpeech
|
||||
webspeechVoice: 'Voz',
|
||||
webspeechVoiceHint: 'Seleccione una voz de su navegador o sistema operativo',
|
||||
webspeechVoicePlaceholder: 'Auto (voz predeterminada)',
|
||||
|
||||
// OpenAI
|
||||
openaiKey: 'Clave API',
|
||||
openaiKeyHint: 'Su clave API de OpenAI con acceso TTS',
|
||||
openaiUrl: 'URL base de API',
|
||||
openaiUrlHint: 'ej. https://api.openai.com/v1/audio/speech',
|
||||
openaiModel: 'Modelo',
|
||||
openaiModelHint: 'tts-1 (mas rapido) / tts-1-hd (mayor calidad)',
|
||||
openaiVoice: 'Voz',
|
||||
openaiVoiceHint: 'Voz a utilizar para la sintesis',
|
||||
|
||||
// Custom endpoint
|
||||
customHint: 'Utilice cualquier API TTS compatible con OpenAI — funciona con GPT-SoVITS, CosyVoice, etc.',
|
||||
customUrl: 'URL de API',
|
||||
customUrlHint: 'URL base de su servicio TTS',
|
||||
customUrlPlaceholder: 'Direccion configurada en el adaptador local, ej. http://127.0.0.1:9880',
|
||||
customApiKey: 'Clave API (opcional)',
|
||||
customApiKeyHint: 'Algunos endpoints personalizados requieren autenticacion',
|
||||
customApiKeyPlaceholder: 'Dejar en blanco si no es necesario',
|
||||
|
||||
// Edge TTS
|
||||
edgeHint: 'Impulsado por Microsoft Edge TTS (node-edge-tts).',
|
||||
edgeUrl: 'URL del adaptador',
|
||||
edgeUrlHint: 'Direccion del adaptador Edge TTS, ej. http://127.0.0.1:9882',
|
||||
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
|
||||
edgeVoice: 'Voz',
|
||||
edgeVoiceHint: 'Seleccione una voz para la sintesis de voz',
|
||||
|
||||
// Test
|
||||
testTitle: 'Prueba de voz',
|
||||
testText: 'Texto de prueba',
|
||||
testTextPlaceholder: 'Ingrese texto para probar...',
|
||||
testTextDefault: 'Hola, esta es una prueba de voz.',
|
||||
testButton: 'Probar',
|
||||
testButtonPlaying: 'Reproduciendo...',
|
||||
testFailed: 'Prueba fallida: {error}',
|
||||
},
|
||||
lockedIps: {
|
||||
title: 'IPs bloqueadas',
|
||||
count: '{count} bloqueadas',
|
||||
|
||||
@@ -511,6 +511,8 @@ jobTriggered: 'Job declenche',
|
||||
session: 'Session',
|
||||
privacy: 'Confidentialite',
|
||||
apiServer: 'Serveur API',
|
||||
models: 'Modèles',
|
||||
voice: 'Voix',
|
||||
},
|
||||
display: {
|
||||
streaming: 'Reponses en continu',
|
||||
@@ -589,6 +591,55 @@ jobTriggered: 'Job declenche',
|
||||
cors: 'Origines CORS',
|
||||
corsHint: 'Sources cross-origin autorisees',
|
||||
},
|
||||
voice: {
|
||||
ttsProvider: 'Fournisseur TTS',
|
||||
ttsProviderHint: 'Choisir le moteur de synthese vocale pour la lecture des messages',
|
||||
providerWebSpeech: 'WebSpeech API (Navigateur)',
|
||||
providerOpenai: 'OpenAI TTS',
|
||||
providerCustom: "Point d'acces personnalise (compatible OpenAI)",
|
||||
providerEdge: 'Edge TTS (Gratuit, sans cle API)',
|
||||
|
||||
// WebSpeech
|
||||
webspeechVoice: 'Voix',
|
||||
webspeechVoiceHint: "Choisir une voix depuis le navigateur ou l'OS",
|
||||
webspeechVoicePlaceholder: 'Auto (voix par defaut)',
|
||||
|
||||
// OpenAI
|
||||
openaiKey: 'Cle API',
|
||||
openaiKeyHint: 'Votre cle API OpenAI avec acces TTS',
|
||||
openaiUrl: 'URL de base API',
|
||||
openaiUrlHint: 'ex. https://api.openai.com/v1/audio/speech',
|
||||
openaiModel: 'Modele',
|
||||
openaiModelHint: 'tts-1 (rapide) / tts-1-hd (haute qualite)',
|
||||
openaiVoice: 'Voix',
|
||||
openaiVoiceHint: 'Voix a utiliser pour la synthese',
|
||||
|
||||
// Custom endpoint
|
||||
customHint: 'Utilisez toute API TTS compatible OpenAI — fonctionne avec GPT-SoVITS, CosyVoice, etc.',
|
||||
customUrl: 'URL API',
|
||||
customUrlHint: 'URL de base de votre service TTS',
|
||||
customUrlPlaceholder: "Adresse configuree dans l'adaptateur local, ex. http://127.0.0.1:9880",
|
||||
customApiKey: 'Cle API (optionnelle)',
|
||||
customApiKeyHint: "Certains points d'acces personnalises necessitent une authentification",
|
||||
customApiKeyPlaceholder: 'Laisser vide si inutile',
|
||||
|
||||
// Edge TTS
|
||||
edgeHint: 'Propulse par Microsoft Edge TTS (node-edge-tts).',
|
||||
edgeUrl: "URL de l'adaptateur",
|
||||
edgeUrlHint: "Adresse de l'adaptateur Edge TTS, ex. http://127.0.0.1:9882",
|
||||
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
|
||||
edgeVoice: 'Voix',
|
||||
edgeVoiceHint: 'Choisir une voix pour la synthese vocale',
|
||||
|
||||
// Test
|
||||
testTitle: 'Test vocal',
|
||||
testText: 'Texte de test',
|
||||
testTextPlaceholder: 'Entrez le texte a tester...',
|
||||
testTextDefault: 'Bonjour, ceci est un test vocal.',
|
||||
testButton: 'Tester',
|
||||
testButtonPlaying: 'Lecture...',
|
||||
testFailed: 'Echec du test : {error}',
|
||||
},
|
||||
lockedIps: {
|
||||
title: 'IPs bloquees',
|
||||
count: '{count} bloquees',
|
||||
|
||||
@@ -511,6 +511,8 @@ export default {
|
||||
session: 'セッション',
|
||||
privacy: 'プライバシー',
|
||||
apiServer: 'API サーバー',
|
||||
models: 'モデル',
|
||||
voice: '音声',
|
||||
},
|
||||
display: {
|
||||
streaming: 'ストリームレスポンス',
|
||||
@@ -589,6 +591,55 @@ export default {
|
||||
cors: 'CORS 許可元',
|
||||
corsHint: '許可するクロスオリジン',
|
||||
},
|
||||
voice: {
|
||||
ttsProvider: 'TTS プロバイダー',
|
||||
ttsProviderHint: 'メッセージ読み上げに使用する音声合成エンジンを選択',
|
||||
providerWebSpeech: 'WebSpeech API(ブラウザ)',
|
||||
providerOpenai: 'OpenAI TTS',
|
||||
providerCustom: 'カスタムエンドポイント(OpenAI 互換)',
|
||||
providerEdge: 'Edge TTS(無料、API Key 不要)',
|
||||
|
||||
// WebSpeech
|
||||
webspeechVoice: '音声',
|
||||
webspeechVoiceHint: 'ブラウザまたは OS から音声を選択',
|
||||
webspeechVoicePlaceholder: '自動(デフォルト音声)',
|
||||
|
||||
// OpenAI
|
||||
openaiKey: 'API キー',
|
||||
openaiKeyHint: 'TTS アクセス権のある OpenAI API キー',
|
||||
openaiUrl: 'API ベース URL',
|
||||
openaiUrlHint: '例: https://api.openai.com/v1/audio/speech',
|
||||
openaiModel: 'モデル',
|
||||
openaiModelHint: 'tts-1(高速)/ tts-1-hd(高音質)',
|
||||
openaiVoice: '音色',
|
||||
openaiVoiceHint: '合成に使用する音色',
|
||||
|
||||
// Custom endpoint
|
||||
customHint: 'OpenAI 互換の TTS API を使用可能 — GPT-SoVITS、CosyVoice などに対応',
|
||||
customUrl: 'API URL',
|
||||
customUrlHint: 'TTS サービスのベース URL',
|
||||
customUrlPlaceholder: 'ローカルアダプターで設定したアドレス(例:http://127.0.0.1:9880)',
|
||||
customApiKey: 'API キー(オプション)',
|
||||
customApiKeyHint: '一部のカスタムエンドポイントは認証が必要',
|
||||
customApiKeyPlaceholder: '不要な場合は空欄',
|
||||
|
||||
// Edge TTS
|
||||
edgeHint: 'Microsoft Edge TTS を搭載(node-edge-tts)。',
|
||||
edgeUrl: 'アダプター URL',
|
||||
edgeUrlHint: 'Edge TTS アダプターのアドレス(例:http://127.0.0.1:9882)',
|
||||
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
|
||||
edgeVoice: '音色',
|
||||
edgeVoiceHint: '音声合成に使用する音色を選択',
|
||||
|
||||
// Test
|
||||
testTitle: '音声テスト',
|
||||
testText: 'テストテキスト',
|
||||
testTextPlaceholder: 'テストするテキストを入力...',
|
||||
testTextDefault: 'こんにちは、これは音声テストです。',
|
||||
testButton: 'テスト',
|
||||
testButtonPlaying: '再生中...',
|
||||
testFailed: 'テスト失敗:{error}',
|
||||
},
|
||||
lockedIps: {
|
||||
title: 'ロック済みIP管理',
|
||||
count: '{count}件ロック中',
|
||||
|
||||
@@ -511,6 +511,8 @@ export default {
|
||||
session: '세션',
|
||||
privacy: '개인정보',
|
||||
apiServer: 'API 서버',
|
||||
models: '모델',
|
||||
voice: '음성',
|
||||
},
|
||||
display: {
|
||||
streaming: '스트리밍 응답',
|
||||
@@ -589,6 +591,55 @@ export default {
|
||||
cors: 'CORS 출처',
|
||||
corsHint: '허용된 교차 출처',
|
||||
},
|
||||
voice: {
|
||||
ttsProvider: 'TTS 제공자',
|
||||
ttsProviderHint: '메시지 재생에 사용할 텍스트 음성 변환 엔진 선택',
|
||||
providerWebSpeech: 'WebSpeech API (브라우저)',
|
||||
providerOpenai: 'OpenAI TTS',
|
||||
providerCustom: '사용자 정의 엔드포인트 (OpenAI 호환)',
|
||||
providerEdge: 'Edge TTS (무료, API Key 불필요)',
|
||||
|
||||
// WebSpeech
|
||||
webspeechVoice: '음성',
|
||||
webspeechVoiceHint: '브라우저 또는 OS에서 음성 선택',
|
||||
webspeechVoicePlaceholder: '자동 (기본 음성)',
|
||||
|
||||
// OpenAI
|
||||
openaiKey: 'API 키',
|
||||
openaiKeyHint: 'TTS 접근 권한이 있는 OpenAI API 키',
|
||||
openaiUrl: 'API 기본 URL',
|
||||
openaiUrlHint: '예: https://api.openai.com/v1/audio/speech',
|
||||
openaiModel: '모델',
|
||||
openaiModelHint: 'tts-1 (빠름) / tts-1-hd (고음질)',
|
||||
openaiVoice: '음색',
|
||||
openaiVoiceHint: '합성에 사용할 음색',
|
||||
|
||||
// Custom endpoint
|
||||
customHint: '모든 OpenAI 호환 TTS API 사용 가능 — GPT-SoVITS, CosyVoice 등 지원',
|
||||
customUrl: 'API URL',
|
||||
customUrlHint: 'TTS 서비스의 기본 URL',
|
||||
customUrlPlaceholder: '로컬 어댑터에 설정된 주소 (예: http://127.0.0.1:9880)',
|
||||
customApiKey: 'API 키 (선택사항)',
|
||||
customApiKeyHint: '일부 사용자 정의 엔드포인트는 인증 필요',
|
||||
customApiKeyPlaceholder: '필요하지 않으면 비워둠',
|
||||
|
||||
// Edge TTS
|
||||
edgeHint: 'Microsoft Edge TTS 기반 (node-edge-tts).',
|
||||
edgeUrl: '어댑터 URL',
|
||||
edgeUrlHint: 'Edge TTS 어댑터 주소 (예: http://127.0.0.1:9882)',
|
||||
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
|
||||
edgeVoice: '음색',
|
||||
edgeVoiceHint: '음성 합성에 사용할 음색 선택',
|
||||
|
||||
// Test
|
||||
testTitle: '음성 테스트',
|
||||
testText: '테스트 텍스트',
|
||||
testTextPlaceholder: '테스트할 텍스트 입력...',
|
||||
testTextDefault: '안녕하세요, 음성 테스트입니다.',
|
||||
testButton: '테스트',
|
||||
testButtonPlaying: '재생 중...',
|
||||
testFailed: '테스트 실패: {error}',
|
||||
},
|
||||
lockedIps: {
|
||||
title: '잠긴 IP 관리',
|
||||
count: '{count}개 잠김',
|
||||
|
||||
@@ -511,6 +511,8 @@ jobTriggered: 'Job acionado',
|
||||
session: 'Sessao',
|
||||
privacy: 'Privacidade',
|
||||
apiServer: 'Servidor API',
|
||||
models: 'Modelos',
|
||||
voice: 'Voz',
|
||||
},
|
||||
display: {
|
||||
streaming: 'Respostas em streaming',
|
||||
@@ -589,6 +591,55 @@ jobTriggered: 'Job acionado',
|
||||
cors: 'Origens CORS',
|
||||
corsHint: 'Fontes cross-origin permitidas',
|
||||
},
|
||||
voice: {
|
||||
ttsProvider: 'Provedor TTS',
|
||||
ttsProviderHint: 'Escolha o mecanismo de texto para fala para reproducao de mensagens',
|
||||
providerWebSpeech: 'WebSpeech API (Navegador)',
|
||||
providerOpenai: 'OpenAI TTS',
|
||||
providerCustom: 'Endpoint personalizado (compativel com OpenAI)',
|
||||
providerEdge: 'Edge TTS (Gratuito, sem chave API)',
|
||||
|
||||
// WebSpeech
|
||||
webspeechVoice: 'Voz',
|
||||
webspeechVoiceHint: 'Selecione uma voz do seu navegador ou SO',
|
||||
webspeechVoicePlaceholder: 'Auto (voz padrao)',
|
||||
|
||||
// OpenAI
|
||||
openaiKey: 'Chave API',
|
||||
openaiKeyHint: 'Sua chave API OpenAI com acesso TTS',
|
||||
openaiUrl: 'URL base da API',
|
||||
openaiUrlHint: 'ex. https://api.openai.com/v1/audio/speech',
|
||||
openaiModel: 'Modelo',
|
||||
openaiModelHint: 'tts-1 (mais rapido) / tts-1-hd (qualidade superior)',
|
||||
openaiVoice: 'Voz',
|
||||
openaiVoiceHint: 'Voz a ser usada para sintese',
|
||||
|
||||
// Custom endpoint
|
||||
customHint: 'Use qualquer API TTS compativel com OpenAI — funciona com GPT-SoVITS, CosyVoice, etc.',
|
||||
customUrl: 'URL da API',
|
||||
customUrlHint: 'URL base do seu servico TTS',
|
||||
customUrlPlaceholder: 'Endereco configurado no adaptador local, ex. http://127.0.0.1:9880',
|
||||
customApiKey: 'Chave API (opcional)',
|
||||
customApiKeyHint: 'Alguns endpoints personalizados exigem autenticacao',
|
||||
customApiKeyPlaceholder: 'Deixe em branco se nao for necessario',
|
||||
|
||||
// Edge TTS
|
||||
edgeHint: 'Desenvolvido por Microsoft Edge TTS (node-edge-tts).',
|
||||
edgeUrl: 'URL do adaptador',
|
||||
edgeUrlHint: 'Endereco do adaptador Edge TTS, ex. http://127.0.0.1:9882',
|
||||
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
|
||||
edgeVoice: 'Voz',
|
||||
edgeVoiceHint: 'Selecione uma voz para sintese de fala',
|
||||
|
||||
// Test
|
||||
testTitle: 'Teste de voz',
|
||||
testText: 'Texto de teste',
|
||||
testTextPlaceholder: 'Insira o texto para testar...',
|
||||
testTextDefault: 'Ola, este e um teste de voz.',
|
||||
testButton: 'Testar',
|
||||
testButtonPlaying: 'Reproduzindo...',
|
||||
testFailed: 'Teste falhou: {error}',
|
||||
},
|
||||
lockedIps: {
|
||||
title: 'IPs bloqueadas',
|
||||
count: '{count} bloqueadas',
|
||||
|
||||
@@ -643,6 +643,7 @@ export default {
|
||||
privacy: '隐私',
|
||||
apiServer: 'API 服务器',
|
||||
models: '模型',
|
||||
voice: '语音',
|
||||
},
|
||||
models: {
|
||||
apiKey: 'API Key',
|
||||
@@ -739,6 +740,54 @@ export default {
|
||||
unlocked: 'IP 已解锁',
|
||||
allUnlocked: '已解锁 {count} 个 IP',
|
||||
},
|
||||
voice: {
|
||||
ttsProvider: 'TTS 提供者',
|
||||
ttsProviderHint: '选择消息朗读使用的语音引擎',
|
||||
providerWebSpeech: 'WebSpeech API(浏览器内置)',
|
||||
providerOpenai: 'OpenAI TTS',
|
||||
providerCustom: '自定义端点(兼容 OpenAI)',
|
||||
providerEdge: 'Edge TTS(免费,无需 API Key)',
|
||||
|
||||
// WebSpeech
|
||||
webspeechVoice: '音色',
|
||||
webspeechVoiceHint: '从浏览器或系统提供的语音中选择',
|
||||
webspeechVoicePlaceholder: '自动(默认语音)',
|
||||
|
||||
// OpenAI
|
||||
openaiKey: 'API 密钥',
|
||||
openaiKeyHint: '具有 TTS 权限的 OpenAI API Key',
|
||||
openaiUrl: 'API 基础地址',
|
||||
openaiUrlHint: '例如 https://api.openai.com/v1/audio/speech',
|
||||
openaiModel: '模型',
|
||||
openaiModelHint: 'tts-1(快速)/ tts-1-hd(高音质)',
|
||||
openaiVoice: '音色',
|
||||
openaiVoiceHint: '用于语音合成的音色',
|
||||
|
||||
// 自定义端点
|
||||
customHint: '支持任何 OpenAI 兼容的 TTS 服务——可用于 GPT-SoVITS、CosyVoice 等自部署服务。',
|
||||
customUrl: 'API 地址',
|
||||
customUrlHint: 'TTS 服务的完整基础地址',
|
||||
customUrlPlaceholder: '本地适配器中配置的地址 如:http://127.0.0.1:9880',
|
||||
customApiKey: 'API 密钥(可选)',
|
||||
customApiKeyHint: '部分自部署服务需要身份验证',
|
||||
customApiKeyPlaceholder: '不需要则留空',
|
||||
// Edge TTS
|
||||
edgeHint: '由 Microsoft Edge TTS 驱动(node-edge-tts)。',
|
||||
edgeUrl: '适配器地址',
|
||||
edgeUrlHint: 'Edge TTS 适配器地址,例如 http://127.0.0.1:9882',
|
||||
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
|
||||
edgeVoice: '音色',
|
||||
edgeVoiceHint: '选择用于语音合成的音色',
|
||||
|
||||
// 试听
|
||||
testTitle: '试听测试',
|
||||
testText: '测试文本',
|
||||
testTextPlaceholder: '输入测试文本...',
|
||||
testTextDefault: '你好,这是一个语音测试。',
|
||||
testButton: '试听',
|
||||
testButtonPlaying: '播放中...',
|
||||
testFailed: '测试失败:{error}',
|
||||
},
|
||||
},
|
||||
|
||||
// 平台频道设置
|
||||
|
||||
@@ -14,6 +14,7 @@ import SessionSettings from "@/components/hermes/settings/SessionSettings.vue";
|
||||
import PrivacySettings from "@/components/hermes/settings/PrivacySettings.vue";
|
||||
import ModelSettings from "@/components/hermes/settings/ModelSettings.vue";
|
||||
import AccountSettings from "@/components/hermes/settings/AccountSettings.vue";
|
||||
import VoiceSettings from "@/components/hermes/settings/VoiceSettings.vue";
|
||||
|
||||
const settingsStore = useSettingsStore();
|
||||
const { t } = useI18n();
|
||||
@@ -57,6 +58,9 @@ onMounted(() => {
|
||||
<NTabPane name="models" :tab="t('settings.tabs.models')">
|
||||
<ModelSettings />
|
||||
</NTabPane>
|
||||
<NTabPane name="voice" :tab="t('settings.tabs.voice')">
|
||||
<VoiceSettings />
|
||||
</NTabPane>
|
||||
</NTabs>
|
||||
</NSpin>
|
||||
</div>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { Context } from 'koa'
|
||||
import { textToSpeech } from '../../services/hermes/tts'
|
||||
import { textToSpeech, openaiCompatibleTts, speedToEdgeRate } from '../../services/hermes/tts'
|
||||
|
||||
export async function generate(ctx: Context) {
|
||||
const { text, lang } = ctx.request.body as {
|
||||
@@ -26,3 +26,41 @@ export async function generate(ctx: Context) {
|
||||
ctx.set('X-TTS-Engine', engine)
|
||||
ctx.body = audio
|
||||
}
|
||||
|
||||
/**
|
||||
* OpenAI-compatible TTS endpoint.
|
||||
* Accepts: { model, input, voice, speed }
|
||||
* Returns audio/mpeg stream.
|
||||
*/
|
||||
export async function openaiProxy(ctx: Context) {
|
||||
const body = ctx.request.body as {
|
||||
input?: string
|
||||
voice?: string
|
||||
speed?: number
|
||||
model?: string
|
||||
}
|
||||
|
||||
if (!body.input || typeof body.input !== 'string') {
|
||||
ctx.status = 400
|
||||
ctx.body = { error: 'input is required' }
|
||||
return
|
||||
}
|
||||
|
||||
if (body.input.length > 5000) {
|
||||
ctx.status = 400
|
||||
ctx.body = { error: 'input is too long (max 5000 characters)' }
|
||||
return
|
||||
}
|
||||
|
||||
const { audio, engine } = await openaiCompatibleTts({
|
||||
input: body.input,
|
||||
voice: body.voice,
|
||||
speed: body.speed,
|
||||
model: body.model,
|
||||
})
|
||||
|
||||
ctx.set('Content-Type', 'audio/mpeg')
|
||||
ctx.set('Content-Length', String(audio.length))
|
||||
ctx.set('X-TTS-Engine', engine)
|
||||
ctx.body = audio
|
||||
}
|
||||
|
||||
@@ -4,3 +4,4 @@ import * as ctrl from '../../controllers/hermes/tts'
|
||||
export const ttsRoutes = new Router()
|
||||
|
||||
ttsRoutes.post('/api/hermes/tts', ctrl.generate)
|
||||
ttsRoutes.post('/api/tts/proxy/audio/speech', ctrl.openaiProxy)
|
||||
|
||||
@@ -41,6 +41,7 @@ export function registerRoutes(app: any, requireAuth: (ctx: Context, next: Next)
|
||||
app.use(healthRoutes.routes())
|
||||
app.use(webhookRoutes.routes())
|
||||
app.use(authPublicRoutes.routes())
|
||||
app.use(ttsRoutes.routes()) // TTS proxy/generation — must be before auth
|
||||
|
||||
// --- Auth middleware: all routes below require authentication ---
|
||||
app.use(requireAuth)
|
||||
@@ -69,7 +70,6 @@ export function registerRoutes(app: any, requireAuth: (ctx: Context, next: Next)
|
||||
app.use(jobRoutes.routes()) // Must be before proxy
|
||||
app.use(cronHistoryRoutes.routes()) // Must be before proxy
|
||||
app.use(kanbanRoutes.routes()) // Must be before proxy
|
||||
app.use(ttsRoutes.routes()) // Must be before proxy
|
||||
app.use(proxyRoutes.routes())
|
||||
|
||||
// Proxy catch-all middleware (must be last)
|
||||
|
||||
@@ -12,6 +12,9 @@ const FIXED_PITCH = '+12Hz'
|
||||
export interface TtsOptions {
|
||||
text: string
|
||||
lang?: string
|
||||
voice?: string
|
||||
rate?: string
|
||||
pitch?: string
|
||||
}
|
||||
|
||||
export async function edgeTts(opts: TtsOptions): Promise<Buffer> {
|
||||
@@ -20,9 +23,9 @@ export async function edgeTts(opts: TtsOptions): Promise<Buffer> {
|
||||
|
||||
try {
|
||||
const tts = new EdgeTTS({
|
||||
voice: FIXED_VOICE,
|
||||
rate: FIXED_RATE,
|
||||
pitch: FIXED_PITCH,
|
||||
voice: opts.voice || FIXED_VOICE,
|
||||
rate: opts.rate || FIXED_RATE,
|
||||
pitch: opts.pitch || FIXED_PITCH,
|
||||
timeout: 15000,
|
||||
})
|
||||
|
||||
@@ -35,7 +38,41 @@ export async function edgeTts(opts: TtsOptions): Promise<Buffer> {
|
||||
}
|
||||
|
||||
export async function textToSpeech(opts: TtsOptions): Promise<{ audio: Buffer; engine: string }> {
|
||||
const voice = opts.voice || FIXED_VOICE
|
||||
const rate = opts.rate || FIXED_RATE
|
||||
const pitch = opts.pitch || FIXED_PITCH
|
||||
const audio = await edgeTts(opts)
|
||||
logger.debug({ engine: 'edge', voice: FIXED_VOICE, rate: FIXED_RATE, pitch: FIXED_PITCH }, 'TTS generated via Edge')
|
||||
logger.debug({ engine: 'edge', voice, rate, pitch }, 'TTS generated via Edge')
|
||||
return { audio, engine: 'edge' }
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert speed multiplier (0.5-2.0) to Edge TTS rate string.
|
||||
* Edge TTS rate format: "+/-NN%"
|
||||
*/
|
||||
export function speedToEdgeRate(speed: number): string {
|
||||
const percent = Math.round((speed - 1) * 100)
|
||||
return percent >= 0 ? `+${percent}%` : `${percent}%`
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert OpenAI TTS request to internal TtsOptions.
|
||||
* OpenAI format: { model, input, voice, speed }
|
||||
*/
|
||||
export interface OpenaiTtsRequest {
|
||||
model?: string
|
||||
input: string
|
||||
voice?: string
|
||||
speed?: number
|
||||
}
|
||||
|
||||
export async function openaiCompatibleTts(
|
||||
body: OpenaiTtsRequest,
|
||||
): Promise<{ audio: Buffer; engine: string }> {
|
||||
return textToSpeech({
|
||||
text: body.input,
|
||||
voice: body.voice || FIXED_VOICE,
|
||||
rate: body.speed ? speedToEdgeRate(body.speed) : FIXED_RATE,
|
||||
pitch: FIXED_PITCH,
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user