feat: add voice playback settings with 4-provider support (#608)

Add WebSpeech, OpenAI TTS, Custom endpoint, and Edge TTS providers.

Co-authored-by: Hermes Agent <noreply@nousresearch.com>
This commit is contained in:
memeflyfly
2026-05-10 20:08:38 +08:00
committed by GitHub
parent 838791a740
commit 15195f0795
18 changed files with 1237 additions and 20 deletions
+2 -2
View File
@@ -111,7 +111,7 @@
"vue": "^3.5.32",
"vue-i18n": "^11.3.2",
"vue-router": "^4.6.4",
"vue-tsc": "^3.2.6",
"vue-tsc": "^3.2.8",
"ws": "^8.20.0"
}
}
}
@@ -16,6 +16,7 @@ import {
renderHighlightedCodeBlock,
} from "./highlight";
import { useGlobalSpeech } from "@/composables/useSpeech";
import { useVoiceSettings } from "@/composables/useVoiceSettings";
const TOOL_PAYLOAD_DISPLAY_LIMIT = 2000;
@@ -79,6 +80,7 @@ const previewUrl = ref<string | null>(null);
const chatStore = useChatStore();
const settingsStore = useSettingsStore();
const speech = useGlobalSpeech();
const voiceSettings = useVoiceSettings();
// Copy entire bubble content
const copyableContent = computed(() => {
@@ -351,25 +353,90 @@ const renderedToolResult = computed(() => {
// 语音播放相关
const canPlaySpeech = computed(() => {
// 只有 assistant 消息可以播放,且浏览器支持 Web Speech API
return props.message.role === 'assistant' &&
speech.isSupported &&
copyableContent.value;
});
// 只有 assistant 消息可以播放
if (props.message.role !== 'assistant') return false
if (!copyableContent.value) return false
// OpenAI / Custom / Edge 不依赖浏览器 Web Speech API
if (voiceSettings.provider.value === 'openai' || voiceSettings.provider.value === 'custom' || voiceSettings.provider.value === 'edge') return true
return speech.isSupported
})
const isPlayingThisMessage = computed(() => {
return speech.currentMessageId.value === props.message.id && speech.isPlaying.value;
});
// OpenAI / Custom / Edge 模式
if (voiceSettings.provider.value === 'openai' || voiceSettings.provider.value === 'custom' || voiceSettings.provider.value === 'edge') {
return speech.currentCustomMessageId.value === props.message.id && speech.isCustomPlaying.value
}
return speech.currentMessageId.value === props.message.id && speech.isPlaying.value
})
const isPausedThisMessage = computed(() => {
return speech.currentMessageId.value === props.message.id && speech.isPaused.value;
});
// OpenAI / Custom / Edge 模式
if (voiceSettings.provider.value === 'openai' || voiceSettings.provider.value === 'custom' || voiceSettings.provider.value === 'edge') {
return speech.currentCustomMessageId.value === props.message.id && speech.isCustomPaused.value
}
return speech.currentMessageId.value === props.message.id && speech.isPaused.value
})
function handleSpeechToggle() {
if (!canPlaySpeech.value) {
return
}
const content = props.message.content || ''
// OpenAI TTS 模式
if (voiceSettings.provider.value === 'openai') {
const apiUrl = voiceSettings.openaiBaseUrl.value
if (!apiUrl) {
console.warn('[MessageItem] OpenAI TTS 地址为空')
return
}
speech.openaiToggle(props.message.id, content, {
baseUrl: voiceSettings.openaiBaseUrl.value,
apiKey: voiceSettings.openaiApiKey.value,
model: voiceSettings.openaiModel.value,
voice: voiceSettings.openaiVoice.value,
})
return
}
// 自定义端点模式(OpenAI 兼容,如 GPT-SoVITS
if (voiceSettings.provider.value === 'custom') {
const apiUrl = voiceSettings.customUrl.value
if (!apiUrl) {
console.warn('[MessageItem] 自定义 TTS 地址为空')
return
}
speech.openaiToggle(props.message.id, content, {
baseUrl: voiceSettings.customUrl.value,
apiKey: voiceSettings.customApiKey.value || undefined,
})
return
}
// Edge TTS 模式
if (voiceSettings.provider.value === 'edge') {
// URL 为空时使用内建后端代理
const apiUrl = voiceSettings.edgeUrl.value || '/api/tts/proxy'
speech.openaiToggle(props.message.id, content, {
baseUrl: apiUrl,
voice: voiceSettings.edgeVoice.value,
})
return
}
// Web Speech API 模式
if (voiceSettings.provider.value === 'webspeech') {
const text = speech.extractReadableText(content)
if (text) {
speech.stop(false)
speech.speakViaBrowser(props.message.id, text, {
voiceName: voiceSettings.webspeechVoice.value || undefined,
})
}
return
}
// 后备(无 provider 匹配时)
speech.toggle(props.message.id, content)
}
@@ -380,7 +447,37 @@ onMounted(() => {
autoPlayHandler = (e: Event) => {
const customEvent = e as CustomEvent<{ messageId: string; content: string }>
if (customEvent.detail.messageId === props.message.id && canPlaySpeech.value) {
speech.enqueue(props.message.id, customEvent.detail.content || props.message.content || '')
const content = customEvent.detail.content || props.message.content || ''
if (voiceSettings.provider.value === 'openai') {
const apiUrl = voiceSettings.openaiBaseUrl.value
if (apiUrl) speech.openaiPlay(props.message.id, content, {
baseUrl: voiceSettings.openaiBaseUrl.value,
apiKey: voiceSettings.openaiApiKey.value,
model: voiceSettings.openaiModel.value,
voice: voiceSettings.openaiVoice.value,
})
} else if (voiceSettings.provider.value === 'custom') {
const apiUrl = voiceSettings.customUrl.value
if (apiUrl) speech.openaiPlay(props.message.id, content, {
baseUrl: voiceSettings.customUrl.value,
apiKey: voiceSettings.customApiKey.value || undefined,
})
} else if (voiceSettings.provider.value === 'edge') {
speech.openaiPlay(props.message.id, content, {
baseUrl: '/api/tts/proxy',
voice: voiceSettings.edgeVoice.value,
})
} else if (voiceSettings.provider.value === 'webspeech') {
const text = speech.extractReadableText(content)
if (text) {
speech.stop(false)
speech.speakViaBrowser(props.message.id, text, {
voiceName: voiceSettings.webspeechVoice.value || undefined,
})
}
} else {
speech.enqueue(props.message.id, content)
}
}
}
window.addEventListener('auto-play-speech', autoPlayHandler)
@@ -0,0 +1,327 @@
<script setup lang="ts">
import { ref, onMounted } from 'vue'
import { NSelect, NInput, NButton } from 'naive-ui'
import { useI18n } from 'vue-i18n'
import { useVoiceSettings } from '@/composables/useVoiceSettings'
import { useSpeech } from '@/composables/useSpeech'
import SettingRow from './SettingRow.vue'
const { t } = useI18n()
const vs = useVoiceSettings()
const speech = useSpeech()
const testText = ref(t('settings.voice.testTextDefault'))
const testPlaying = ref(false)
const providerOptions = [
{ label: t('settings.voice.providerWebSpeech'), value: 'webspeech' },
{ label: t('settings.voice.providerOpenai'), value: 'openai' },
{ label: t('settings.voice.providerCustom'), value: 'custom' },
{ label: t('settings.voice.providerEdge'), value: 'edge' },
]
const openaiModelOptions = [
{ label: 'tts-1', value: 'tts-1' },
{ label: 'tts-1-hd', value: 'tts-1-hd' },
]
const openaiVoiceOptions = [
{ label: 'Alloy', value: 'alloy' },
{ label: 'Echo', value: 'echo' },
{ label: 'Fable', value: 'fable' },
{ label: 'Nova', value: 'nova' },
{ label: 'Onyx', value: 'onyx' },
{ label: 'Shimmer', value: 'shimmer' },
]
const edgeVoiceOptions = [
{ label: '晓晓 (zh-CN-XiaoxiaoNeural)', value: 'zh-CN-XiaoxiaoNeural' },
{ label: '晓萱 (zh-CN-XiaoxuanNeural)', value: 'zh-CN-XiaoxuanNeural' },
{ label: '云希 (zh-CN-YunxiNeural)', value: 'zh-CN-YunxiNeural' },
{ label: '云健 (zh-CN-YunjianNeural)', value: 'zh-CN-YunjianNeural' },
{ label: '云扬 (zh-CN-YunyangNeural)', value: 'zh-CN-YunyangNeural' },
{ label: 'Jenny (en-US-JennyNeural)', value: 'en-US-JennyNeural' },
{ label: 'Aria (en-US-AriaNeural)', value: 'en-US-AriaNeural' },
{ label: 'Guy (en-US-GuyNeural)', value: 'en-US-GuyNeural' },
{ label: 'Sonia (en-GB-SoniaNeural)', value: 'en-GB-SoniaNeural' },
{ label: 'Ryan (en-GB-RyanNeural)', value: 'en-GB-RyanNeural' },
{ label: 'Nanami (ja-JP-NanamiNeural)', value: 'ja-JP-NanamiNeural' },
{ label: 'Keita (ja-JP-KeitaNeural)', value: 'ja-JP-KeitaNeural' },
{ label: 'Sun-Hi (ko-KR-SunHiNeural)', value: 'ko-KR-SunHiNeural' },
{ label: 'InJoon (ko-KR-InJoonNeural)', value: 'ko-KR-InJoonNeural' },
{ label: 'Denise (fr-FR-DeniseNeural)', value: 'fr-FR-DeniseNeural' },
{ label: 'Henri (fr-FR-HenriNeural)', value: 'fr-FR-HenriNeural' },
{ label: 'Katja (de-DE-KatjaNeural)', value: 'de-DE-KatjaNeural' },
{ label: 'Conrad (de-DE-ConradNeural)', value: 'de-DE-ConradNeural' },
]
// Get WebSpeech voices list on mount
const webspeechVoices = ref<SpeechSynthesisVoice[]>([])
onMounted(() => {
if ('speechSynthesis' in window) {
const voices = window.speechSynthesis.getVoices()
if (voices.length) {
webspeechVoices.value = voices
}
window.speechSynthesis.onvoiceschanged = () => {
webspeechVoices.value = window.speechSynthesis.getVoices()
}
}
})
async function handleTest() {
const text = testText.value.trim()
if (!text) return
testPlaying.value = true
try {
if (vs.provider.value === 'webspeech') {
speech.stop(false)
speech.speakViaBrowser('__test__', text, {
voiceName: vs.webspeechVoice.value || undefined,
})
} else if (vs.provider.value === 'openai') {
if (!vs.openaiBaseUrl.value) {
console.warn('[VoiceSettings] OpenAI base URL empty')
return
}
await speech.openaiPlay('__test__', text, {
baseUrl: vs.openaiBaseUrl.value,
apiKey: vs.openaiApiKey.value || undefined,
model: vs.openaiModel.value,
voice: vs.openaiVoice.value,
})
} else if (vs.provider.value === 'custom') {
if (!vs.customUrl.value) {
console.warn('[VoiceSettings] Custom URL empty')
return
}
await speech.openaiPlay('__test__', text, {
baseUrl: vs.customUrl.value,
apiKey: vs.customApiKey.value || undefined,
})
} else if (vs.provider.value === 'edge') {
await speech.openaiPlay('__test__', text, {
baseUrl: '/api/tts/proxy',
voice: vs.edgeVoice.value,
})
}
} catch (err) {
console.error('[VoiceSettings] Test failed:', err)
} finally {
testPlaying.value = false
}
}
</script>
<template>
<div class="voice-settings">
<SettingRow
:label="t('settings.voice.ttsProvider')"
:hint="t('settings.voice.ttsProviderHint')"
>
<NSelect
:value="vs.provider.value"
:options="providerOptions"
size="small"
style="width: 300px"
@update:value="vs.setProvider"
/>
</SettingRow>
<!-- WebSpeech API -->
<template v-if="vs.provider.value === 'webspeech'">
<SettingRow
:label="t('settings.voice.webspeechVoice')"
:hint="t('settings.voice.webspeechVoiceHint')"
>
<NSelect
:value="vs.webspeechVoice.value"
size="small"
filterable
style="width: 320px"
:placeholder="t('settings.voice.webspeechVoicePlaceholder')"
:consistent-menu-width="false"
:options="webspeechVoices.map(v => ({
label: `${v.name} (${v.lang})`,
value: v.name,
}))"
@update:value="vs.setWebSpeechVoice"
/>
</SettingRow>
</template>
<!-- OpenAI TTS -->
<template v-if="vs.provider.value === 'openai'">
<SettingRow
:label="t('settings.voice.openaiKey')"
:hint="t('settings.voice.openaiKeyHint')"
>
<NInput
:value="vs.openaiApiKey.value"
type="password"
size="small"
show-password-on="click"
style="width: 360px"
placeholder="sk-..."
@update:value="vs.setOpenaiApiKey"
/>
</SettingRow>
<SettingRow
:label="t('settings.voice.openaiUrl')"
:hint="t('settings.voice.openaiUrlHint')"
>
<NInput
:value="vs.openaiBaseUrl.value"
size="small"
style="width: 360px"
placeholder="https://api.openai.com/v1/audio/speech"
@update:value="vs.setOpenaiBaseUrl"
/>
</SettingRow>
<SettingRow
:label="t('settings.voice.openaiModel')"
:hint="t('settings.voice.openaiModelHint')"
>
<NSelect
:value="vs.openaiModel.value"
:options="openaiModelOptions"
size="small"
style="width: 200px"
@update:value="vs.setOpenaiModel"
/>
</SettingRow>
<SettingRow
:label="t('settings.voice.openaiVoice')"
:hint="t('settings.voice.openaiVoiceHint')"
>
<NSelect
:value="vs.openaiVoice.value"
:options="openaiVoiceOptions"
size="small"
style="width: 200px"
@update:value="vs.setOpenaiVoice"
/>
</SettingRow>
</template>
<!-- Custom Endpoint -->
<template v-if="vs.provider.value === 'custom'">
<div class="provider-hint">
{{ t('settings.voice.customHint') }}
</div>
<SettingRow
:label="t('settings.voice.customUrl')"
:hint="t('settings.voice.customUrlHint')"
>
<NInput
:value="vs.customUrl.value"
size="small"
style="width: 360px"
:placeholder="t('settings.voice.customUrlPlaceholder')"
@update:value="vs.setCustomUrl"
/>
</SettingRow>
<SettingRow
:label="t('settings.voice.customApiKey')"
:hint="t('settings.voice.customApiKeyHint')"
>
<NInput
:value="vs.customApiKey.value"
type="password"
size="small"
show-password-on="click"
style="width: 360px"
:placeholder="t('settings.voice.customApiKeyPlaceholder')"
@update:value="vs.setCustomApiKey"
/>
</SettingRow>
</template>
<!-- Edge TTS -->
<template v-if="vs.provider.value === 'edge'">
<div class="provider-hint">
{{ t('settings.voice.edgeHint') }}
</div>
<SettingRow
:label="t('settings.voice.edgeVoice')"
:hint="t('settings.voice.edgeVoiceHint')"
>
<NSelect
:value="vs.edgeVoice.value"
:options="edgeVoiceOptions"
size="small"
filterable
style="width: 320px"
:consistent-menu-width="false"
@update:value="vs.setEdgeVoice"
/>
</SettingRow>
</template>
<!-- Test / Audition -->
<div class="test-section">
<h4 class="test-title">{{ t('settings.voice.testTitle') }}</h4>
<div class="test-row">
<NInput
v-model:value="testText"
size="small"
style="width: 360px"
:placeholder="t('settings.voice.testTextPlaceholder')"
:disabled="testPlaying"
@keyup.enter="handleTest"
/>
<NButton
size="small"
type="primary"
:loading="testPlaying"
:disabled="testPlaying"
@click="handleTest"
>
{{ testPlaying ? t('settings.voice.testButtonPlaying') : t('settings.voice.testButton') }}
</NButton>
</div>
</div>
</div>
</template>
<style scoped lang="scss">
.voice-settings {
display: flex;
flex-direction: column;
gap: 16px;
}
.provider-hint {
font-size: 12px;
color: #888;
line-height: 1.5;
padding: 0 0 4px 0;
}
.test-section {
padding-top: 16px;
.test-title {
margin: 0 0 8px 0;
font-size: 14px;
font-weight: 600;
}
.test-row {
display: flex;
gap: 8px;
align-items: center;
}
}
</style>
+147 -2
View File
@@ -3,6 +3,14 @@ import { generateSpeech, playAudioBlob } from '@/api/hermes/tts'
export interface SpeechOptions {
lang?: string // 语言 'zh-CN', 'en-US' 等
voiceName?: string // 指定 WebSpeech 音色名称
}
export interface OpenaiTtsOptions {
baseUrl: string
apiKey?: string
model?: string
voice?: string
}
export interface SpeechState {
@@ -39,6 +47,11 @@ export function useSpeech() {
let playbackToken = 0
const speechQueue: SpeechQueueItem[] = []
// 自定义 TTSOpenAI / Custom / Edge)播放状态
const isCustomPlaying = ref(false)
const isCustomPaused = ref(false)
const currentCustomMessageId = ref<string | null>(null)
// 加载可用语音列表
function loadVoices() {
availableVoices.value = synth.getVoices()
@@ -162,14 +175,25 @@ export function useSpeech() {
// ─── Browser Engine (Web Speech API) ────────────────────────
function speakViaBrowser(messageId: string, text: string, options: SpeechOptions, token: number) {
function speakViaBrowser(messageId: string, text: string, options: SpeechOptions, token?: number) {
token = token || ++playbackToken
utterance = new SpeechSynthesisUtterance(text)
const activeUtterance = utterance
utterance.rate = 1
utterance.pitch = 1
utterance.volume = 1
utterance.voice = getDefaultVoice()
// 使用指定的音色(如果有),否则用默认
if (options.voiceName) {
const voice = availableVoices.value.find(v => v.name === options.voiceName)
if (voice) {
utterance.voice = voice
}
}
if (!utterance.voice) {
utterance.voice = getDefaultVoice()
}
if (options.lang) {
utterance.lang = options.lang
@@ -218,6 +242,115 @@ export function useSpeech() {
synth.speak(utterance)
}
// ─── OpenAI-compatible TTS Engine ────────────────────────────
let customAudio: HTMLAudioElement | null = null
async function openaiPlay(
messageId: string,
content: string,
opts: OpenaiTtsOptions,
) {
const text = extractReadableText(content)
if (!text) return
const token = ++playbackToken
isCustomPlaying.value = true
isCustomPaused.value = false
currentCustomMessageId.value = messageId
const url = `${opts.baseUrl.replace(/\/+$/, '')}/audio/speech`
const body: Record<string, any> = {
model: opts.model || 'tts-1',
input: text,
voice: opts.voice || 'alloy',
}
const headers: Record<string, string> = {
'Content-Type': 'application/json',
}
if (opts.apiKey) {
headers['Authorization'] = `Bearer ${opts.apiKey}`
}
try {
const res = await fetch(url, {
method: 'POST',
headers,
body: JSON.stringify(body),
})
if (token !== playbackToken) return
if (!res.ok) {
const errText = await res.text().catch(() => '')
throw new Error(`OpenAI TTS 返回 ${res.status}: ${errText || res.statusText}`)
}
const audioBlob = await res.blob()
if (token !== playbackToken) return
const audioUrl = URL.createObjectURL(audioBlob)
const audio = new Audio(audioUrl)
customAudio = audio
audio.onended = () => {
if (token !== playbackToken) return
URL.revokeObjectURL(audioUrl)
isCustomPlaying.value = false
isCustomPaused.value = false
currentCustomMessageId.value = null
customAudio = null
}
audio.onerror = () => {
if (token !== playbackToken) return
URL.revokeObjectURL(audioUrl)
console.warn('[useSpeech] Custom TTS audio playback error')
isCustomPlaying.value = false
isCustomPaused.value = false
currentCustomMessageId.value = null
customAudio = null
}
await audio.play()
} catch (err) {
if (token !== playbackToken) return
console.error('[useSpeech] OpenAI TTS 请求失败:', err)
isCustomPlaying.value = false
isCustomPaused.value = false
currentCustomMessageId.value = null
throw err
}
}
function openaiToggle(messageId: string, content: string, opts: OpenaiTtsOptions) {
if (currentCustomMessageId.value === messageId && isCustomPlaying.value) {
if (isCustomPaused.value) {
// Resume
if (customAudio) {
customAudio.play()
}
isCustomPaused.value = false
} else {
// Pause
if (customAudio) {
customAudio.pause()
}
isCustomPaused.value = true
}
} else {
// Stop other speech and start new
stop(false)
if (customAudio) {
customAudio.pause()
customAudio = null
}
openaiPlay(messageId, content, opts)
}
}
// ─── Unified speak ──────────────────────────────────────────
function speak(messageId: string, text: string, options: SpeechOptions = {}) {
@@ -317,6 +450,11 @@ export function useSpeech() {
progress: computed(() => state.value.progress),
engine: computed(() => state.value.engine),
// Custom TTS state
isCustomPlaying,
isCustomPaused,
currentCustomMessageId,
play,
pause,
resume,
@@ -325,6 +463,13 @@ export function useSpeech() {
enqueue,
getDefaultVoice,
extractReadableText,
// OpenAI-compatible TTS
openaiPlay,
openaiToggle,
// Browser WebSpeech (直接调用避免 Rolldown 树摇)
speakViaBrowser,
}
}
@@ -0,0 +1,164 @@
import { ref, watch } from 'vue'
export type TtsProvider = 'webspeech' | 'openai' | 'custom' | 'edge'
export interface VoiceSettingsData {
provider: TtsProvider
// WebSpeech
webspeechVoice: string
// OpenAI
openaiApiKey: string
openaiBaseUrl: string
openaiModel: string
openaiVoice: string
// Custom endpoint (OpenAI-compatible)
customUrl: string
customApiKey: string
// Edge TTS
edgeUrl: string
edgeVoice: string
}
const STORAGE_KEY = 'hermes-tts-settings-v2'
function migrateOldKeys() {
const oldKey = 'hermes-tts-settings'
try {
const old = localStorage.getItem(oldKey)
if (old) {
const parsed = JSON.parse(old)
// Old 'custom' provider maps to new 'custom'
// Old 'gptsovits' provider maps to new 'custom'
if (parsed.provider === 'gptsovits') {
parsed.provider = 'custom'
// old gptsovitsUrl -> customUrl
if (parsed.gptsovitsUrl && !parsed.customUrl) {
parsed.customUrl = parsed.gptsovitsUrl
}
}
// Store as new format
const data = { ...DEFAULT, ...parsed }
localStorage.setItem(STORAGE_KEY, JSON.stringify(data))
localStorage.removeItem(oldKey)
}
} catch { /* ignore */ }
}
const DEFAULT: VoiceSettingsData = {
provider: 'webspeech',
webspeechVoice: '',
openaiApiKey: '',
openaiBaseUrl: '',
openaiModel: 'tts-1',
openaiVoice: 'alloy',
customUrl: '',
customApiKey: '',
edgeUrl: '',
edgeVoice: 'zh-CN-XiaoxiaoNeural',
}
function sanitize(data: VoiceSettingsData): VoiceSettingsData {
// Clear old Edge TTS adapter URLs — now uses internal node-edge-tts
if (data.edgeUrl && data.edgeUrl !== '') {
data.edgeUrl = ''
}
return data
}
function load(): VoiceSettingsData {
try {
const raw = localStorage.getItem(STORAGE_KEY)
if (raw) return sanitize({ ...DEFAULT, ...JSON.parse(raw) })
} catch { /* ignore */ }
return { ...DEFAULT }
}
// Run migration once on import
migrateOldKeys()
// ── Reactive state ──
const provider = ref<TtsProvider>(load().provider)
// WebSpeech
const webspeechVoice = ref<string>(load().webspeechVoice)
// OpenAI
const openaiApiKey = ref<string>(load().openaiApiKey)
const openaiBaseUrl = ref<string>(load().openaiBaseUrl)
const openaiModel = ref<string>(load().openaiModel)
const openaiVoice = ref<string>(load().openaiVoice)
// Custom
const customUrl = ref<string>(load().customUrl)
const customApiKey = ref<string>(load().customApiKey)
// Edge TTS
const edgeUrl = ref<string>(load().edgeUrl)
const edgeVoice = ref<string>(load().edgeVoice)
// Auto-persist on change
watch(
[provider, webspeechVoice, openaiApiKey, openaiBaseUrl, openaiModel, openaiVoice,
customUrl, customApiKey, edgeUrl, edgeVoice],
() => {
localStorage.setItem(STORAGE_KEY, JSON.stringify({
provider: provider.value,
webspeechVoice: webspeechVoice.value,
openaiApiKey: openaiApiKey.value,
openaiBaseUrl: openaiBaseUrl.value,
openaiModel: openaiModel.value,
openaiVoice: openaiVoice.value,
customUrl: customUrl.value,
customApiKey: customApiKey.value,
edgeUrl: edgeUrl.value,
edgeVoice: edgeVoice.value,
}))
},
)
export function useVoiceSettings() {
return {
provider,
webspeechVoice,
openaiApiKey,
openaiBaseUrl,
openaiModel,
openaiVoice,
customUrl,
customApiKey,
edgeUrl,
edgeVoice,
setProvider(v: TtsProvider) { provider.value = v },
setWebSpeechVoice(v: string) { webspeechVoice.value = v },
setOpenaiApiKey(v: string) { openaiApiKey.value = v },
setOpenaiBaseUrl(v: string) { openaiBaseUrl.value = v },
setOpenaiModel(v: string) { openaiModel.value = v },
setOpenaiVoice(v: string) { openaiVoice.value = v },
setCustomUrl(v: string) { customUrl.value = v },
setCustomApiKey(v: string) { customApiKey.value = v },
setEdgeUrl(v: string) { edgeUrl.value = v },
setEdgeVoice(v: string) { edgeVoice.value = v },
reset() {
provider.value = DEFAULT.provider
webspeechVoice.value = DEFAULT.webspeechVoice
openaiApiKey.value = DEFAULT.openaiApiKey
openaiBaseUrl.value = DEFAULT.openaiBaseUrl
openaiModel.value = DEFAULT.openaiModel
openaiVoice.value = DEFAULT.openaiVoice
customUrl.value = DEFAULT.customUrl
customApiKey.value = DEFAULT.customApiKey
edgeUrl.value = DEFAULT.edgeUrl
edgeVoice.value = DEFAULT.edgeVoice
},
}
}
+51
View File
@@ -511,6 +511,8 @@ jobTriggered: 'Job ausgelost',
session: 'Sitzung',
privacy: 'Datenschutz',
apiServer: 'API-Server',
models: 'Modelle',
voice: 'Sprache',
},
display: {
streaming: 'Streaming-Antworten',
@@ -589,6 +591,55 @@ jobTriggered: 'Job ausgelost',
cors: 'CORS-Ursprunge',
corsHint: 'Erlaubte Cross-Origin-Quellen',
},
voice: {
ttsProvider: 'TTS-Anbieter',
ttsProviderHint: 'Waehlen Sie die Sprachsynthese-Engine fuer die Nachrichtenwiedergabe',
providerWebSpeech: 'WebSpeech API (Browser)',
providerOpenai: 'OpenAI TTS',
providerCustom: 'Benutzerdefinierter Endpunkt (OpenAI-kompatibel)',
providerEdge: 'Edge TTS (Kostenlos, kein API-Key erforderlich)',
// WebSpeech
webspeechVoice: 'Stimme',
webspeechVoiceHint: 'Waehlen Sie eine Stimme aus Ihrem Browser oder Betriebssystem',
webspeechVoicePlaceholder: 'Auto (Standardstimme)',
// OpenAI
openaiKey: 'API-Key',
openaiKeyHint: 'Ihr OpenAI API-Key mit TTS-Zugriff',
openaiUrl: 'API-Basis-URL',
openaiUrlHint: 'z.B. https://api.openai.com/v1/audio/speech',
openaiModel: 'Modell',
openaiModelHint: 'tts-1 (schneller) / tts-1-hd (hoehere Qualitaet)',
openaiVoice: 'Stimme',
openaiVoiceHint: 'Stimme fuer die Synthese',
// Custom endpoint
customHint: 'Jede OpenAI-kompatible TTS-API verwenden — funktioniert mit GPT-SoVITS, CosyVoice, usw.',
customUrl: 'API-URL',
customUrlHint: 'Basis-URL Ihres TTS-Dienstes',
customUrlPlaceholder: 'Die im lokalen Adapter konfigurierte Adresse, z.B. http://127.0.0.1:9880',
customApiKey: 'API-Key (optional)',
customApiKeyHint: 'Einige benutzerdefinierte Endpunkte erfordern Authentifizierung',
customApiKeyPlaceholder: 'Leer lassen wenn nicht benoetigt',
// Edge TTS
edgeHint: 'Angetrieben von Microsoft Edge TTS (node-edge-tts).',
edgeUrl: 'Adapter-URL',
edgeUrlHint: 'Adresse des Edge TTS-Adapters, z.B. http://127.0.0.1:9882',
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
edgeVoice: 'Stimme',
edgeVoiceHint: 'Waehlen Sie eine Stimme fuer die Sprachsynthese',
// Test
testTitle: 'Sprachtest',
testText: 'Testtext',
testTextPlaceholder: 'Text zum Testen eingeben...',
testTextDefault: 'Hallo, dies ist ein Sprachtest.',
testButton: 'Testen',
testButtonPlaying: 'Wiedergabe...',
testFailed: 'Test fehlgeschlagen: {error}',
},
lockedIps: {
title: 'Gesperrte IPs',
count: '{count} gesperrt',
+49
View File
@@ -651,6 +651,7 @@ export default {
privacy: 'Privacy',
apiServer: 'API Server',
models: 'Models',
voice: 'Voice',
},
models: {
apiKey: 'API Key',
@@ -747,6 +748,54 @@ export default {
unlocked: 'IP unlocked',
allUnlocked: '{count} IPs unlocked',
},
voice: {
ttsProvider: 'TTS Provider',
ttsProviderHint: 'Choose the text-to-speech engine for message playback',
providerWebSpeech: 'WebSpeech API (Browser)',
providerOpenai: 'OpenAI TTS',
providerCustom: 'Custom Endpoint (OpenAI-compatible)',
providerEdge: 'Edge TTS (Free, no API Key)',
// WebSpeech
webspeechVoice: 'Voice',
webspeechVoiceHint: 'Select a voice from your browser or OS',
webspeechVoicePlaceholder: 'Auto (default voice)',
// OpenAI
openaiKey: 'API Key',
openaiKeyHint: 'Your OpenAI API key with TTS access',
openaiUrl: 'API Base URL',
openaiUrlHint: 'e.g. https://api.openai.com/v1/audio/speech',
openaiModel: 'Model',
openaiModelHint: 'tts-1 (faster) / tts-1-hd (higher quality)',
openaiVoice: 'Voice',
openaiVoiceHint: 'Voice to use for synthesis',
// Custom endpoint
customHint: 'Use any OpenAI-compatible TTS API — works with GPT-SoVITS, CosyVoice, etc.',
customUrl: 'API URL',
customUrlHint: 'Base URL of your TTS service',
customUrlPlaceholder: 'The address configured in the local adapter, e.g. http://127.0.0.1:9880',
customApiKey: 'API Key (Optional)',
customApiKeyHint: 'Some custom endpoints require authentication',
customApiKeyPlaceholder: 'Leave blank if not needed',
// Edge TTS
edgeHint: 'Powered by Microsoft Edge TTS (node-edge-tts).',
edgeUrl: 'Adapter URL',
edgeUrlHint: 'Address of your Edge TTS adapter, e.g. http://127.0.0.1:9882',
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
edgeVoice: 'Voice',
edgeVoiceHint: 'Select a voice for speech synthesis',
// Test
testTitle: 'Test Voice',
testText: 'Test Text',
testTextPlaceholder: 'Enter text to test...',
testTextDefault: 'Hello, this is a voice test.',
testButton: 'Test',
testButtonPlaying: 'Playing...',
testFailed: 'Test failed: {error}',
},
},
// Platform channel settings
+51
View File
@@ -511,6 +511,8 @@ jobTriggered: 'Job ejecutado',
session: 'Sesion',
privacy: 'Privacidad',
apiServer: 'Servidor API',
models: 'Modelos',
voice: 'Voz',
},
display: {
streaming: 'Respuestas en streaming',
@@ -589,6 +591,55 @@ jobTriggered: 'Job ejecutado',
cors: 'Origenes CORS',
corsHint: 'Fuentes cross-origin permitidas',
},
voice: {
ttsProvider: 'Proveedor TTS',
ttsProviderHint: 'Elija el motor de texto a voz para la reproduccion de mensajes',
providerWebSpeech: 'WebSpeech API (Navegador)',
providerOpenai: 'OpenAI TTS',
providerCustom: 'Endpoint personalizado (compatible con OpenAI)',
providerEdge: 'Edge TTS (Gratuito, sin clave API)',
// WebSpeech
webspeechVoice: 'Voz',
webspeechVoiceHint: 'Seleccione una voz de su navegador o sistema operativo',
webspeechVoicePlaceholder: 'Auto (voz predeterminada)',
// OpenAI
openaiKey: 'Clave API',
openaiKeyHint: 'Su clave API de OpenAI con acceso TTS',
openaiUrl: 'URL base de API',
openaiUrlHint: 'ej. https://api.openai.com/v1/audio/speech',
openaiModel: 'Modelo',
openaiModelHint: 'tts-1 (mas rapido) / tts-1-hd (mayor calidad)',
openaiVoice: 'Voz',
openaiVoiceHint: 'Voz a utilizar para la sintesis',
// Custom endpoint
customHint: 'Utilice cualquier API TTS compatible con OpenAI — funciona con GPT-SoVITS, CosyVoice, etc.',
customUrl: 'URL de API',
customUrlHint: 'URL base de su servicio TTS',
customUrlPlaceholder: 'Direccion configurada en el adaptador local, ej. http://127.0.0.1:9880',
customApiKey: 'Clave API (opcional)',
customApiKeyHint: 'Algunos endpoints personalizados requieren autenticacion',
customApiKeyPlaceholder: 'Dejar en blanco si no es necesario',
// Edge TTS
edgeHint: 'Impulsado por Microsoft Edge TTS (node-edge-tts).',
edgeUrl: 'URL del adaptador',
edgeUrlHint: 'Direccion del adaptador Edge TTS, ej. http://127.0.0.1:9882',
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
edgeVoice: 'Voz',
edgeVoiceHint: 'Seleccione una voz para la sintesis de voz',
// Test
testTitle: 'Prueba de voz',
testText: 'Texto de prueba',
testTextPlaceholder: 'Ingrese texto para probar...',
testTextDefault: 'Hola, esta es una prueba de voz.',
testButton: 'Probar',
testButtonPlaying: 'Reproduciendo...',
testFailed: 'Prueba fallida: {error}',
},
lockedIps: {
title: 'IPs bloqueadas',
count: '{count} bloqueadas',
+51
View File
@@ -511,6 +511,8 @@ jobTriggered: 'Job declenche',
session: 'Session',
privacy: 'Confidentialite',
apiServer: 'Serveur API',
models: 'Modèles',
voice: 'Voix',
},
display: {
streaming: 'Reponses en continu',
@@ -589,6 +591,55 @@ jobTriggered: 'Job declenche',
cors: 'Origines CORS',
corsHint: 'Sources cross-origin autorisees',
},
voice: {
ttsProvider: 'Fournisseur TTS',
ttsProviderHint: 'Choisir le moteur de synthese vocale pour la lecture des messages',
providerWebSpeech: 'WebSpeech API (Navigateur)',
providerOpenai: 'OpenAI TTS',
providerCustom: "Point d'acces personnalise (compatible OpenAI)",
providerEdge: 'Edge TTS (Gratuit, sans cle API)',
// WebSpeech
webspeechVoice: 'Voix',
webspeechVoiceHint: "Choisir une voix depuis le navigateur ou l'OS",
webspeechVoicePlaceholder: 'Auto (voix par defaut)',
// OpenAI
openaiKey: 'Cle API',
openaiKeyHint: 'Votre cle API OpenAI avec acces TTS',
openaiUrl: 'URL de base API',
openaiUrlHint: 'ex. https://api.openai.com/v1/audio/speech',
openaiModel: 'Modele',
openaiModelHint: 'tts-1 (rapide) / tts-1-hd (haute qualite)',
openaiVoice: 'Voix',
openaiVoiceHint: 'Voix a utiliser pour la synthese',
// Custom endpoint
customHint: 'Utilisez toute API TTS compatible OpenAI — fonctionne avec GPT-SoVITS, CosyVoice, etc.',
customUrl: 'URL API',
customUrlHint: 'URL de base de votre service TTS',
customUrlPlaceholder: "Adresse configuree dans l'adaptateur local, ex. http://127.0.0.1:9880",
customApiKey: 'Cle API (optionnelle)',
customApiKeyHint: "Certains points d'acces personnalises necessitent une authentification",
customApiKeyPlaceholder: 'Laisser vide si inutile',
// Edge TTS
edgeHint: 'Propulse par Microsoft Edge TTS (node-edge-tts).',
edgeUrl: "URL de l'adaptateur",
edgeUrlHint: "Adresse de l'adaptateur Edge TTS, ex. http://127.0.0.1:9882",
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
edgeVoice: 'Voix',
edgeVoiceHint: 'Choisir une voix pour la synthese vocale',
// Test
testTitle: 'Test vocal',
testText: 'Texte de test',
testTextPlaceholder: 'Entrez le texte a tester...',
testTextDefault: 'Bonjour, ceci est un test vocal.',
testButton: 'Tester',
testButtonPlaying: 'Lecture...',
testFailed: 'Echec du test : {error}',
},
lockedIps: {
title: 'IPs bloquees',
count: '{count} bloquees',
+51
View File
@@ -511,6 +511,8 @@ export default {
session: 'セッション',
privacy: 'プライバシー',
apiServer: 'API サーバー',
models: 'モデル',
voice: '音声',
},
display: {
streaming: 'ストリームレスポンス',
@@ -589,6 +591,55 @@ export default {
cors: 'CORS 許可元',
corsHint: '許可するクロスオリジン',
},
voice: {
ttsProvider: 'TTS プロバイダー',
ttsProviderHint: 'メッセージ読み上げに使用する音声合成エンジンを選択',
providerWebSpeech: 'WebSpeech API(ブラウザ)',
providerOpenai: 'OpenAI TTS',
providerCustom: 'カスタムエンドポイント(OpenAI 互換)',
providerEdge: 'Edge TTS(無料、API Key 不要)',
// WebSpeech
webspeechVoice: '音声',
webspeechVoiceHint: 'ブラウザまたは OS から音声を選択',
webspeechVoicePlaceholder: '自動(デフォルト音声)',
// OpenAI
openaiKey: 'API キー',
openaiKeyHint: 'TTS アクセス権のある OpenAI API キー',
openaiUrl: 'API ベース URL',
openaiUrlHint: '例: https://api.openai.com/v1/audio/speech',
openaiModel: 'モデル',
openaiModelHint: 'tts-1(高速)/ tts-1-hd(高音質)',
openaiVoice: '音色',
openaiVoiceHint: '合成に使用する音色',
// Custom endpoint
customHint: 'OpenAI 互換の TTS API を使用可能 — GPT-SoVITS、CosyVoice などに対応',
customUrl: 'API URL',
customUrlHint: 'TTS サービスのベース URL',
customUrlPlaceholder: 'ローカルアダプターで設定したアドレス(例:http://127.0.0.1:9880',
customApiKey: 'API キー(オプション)',
customApiKeyHint: '一部のカスタムエンドポイントは認証が必要',
customApiKeyPlaceholder: '不要な場合は空欄',
// Edge TTS
edgeHint: 'Microsoft Edge TTS を搭載(node-edge-tts)。',
edgeUrl: 'アダプター URL',
edgeUrlHint: 'Edge TTS アダプターのアドレス(例:http://127.0.0.1:9882',
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
edgeVoice: '音色',
edgeVoiceHint: '音声合成に使用する音色を選択',
// Test
testTitle: '音声テスト',
testText: 'テストテキスト',
testTextPlaceholder: 'テストするテキストを入力...',
testTextDefault: 'こんにちは、これは音声テストです。',
testButton: 'テスト',
testButtonPlaying: '再生中...',
testFailed: 'テスト失敗:{error}',
},
lockedIps: {
title: 'ロック済みIP管理',
count: '{count}件ロック中',
+51
View File
@@ -511,6 +511,8 @@ export default {
session: '세션',
privacy: '개인정보',
apiServer: 'API 서버',
models: '모델',
voice: '음성',
},
display: {
streaming: '스트리밍 응답',
@@ -589,6 +591,55 @@ export default {
cors: 'CORS 출처',
corsHint: '허용된 교차 출처',
},
voice: {
ttsProvider: 'TTS 제공자',
ttsProviderHint: '메시지 재생에 사용할 텍스트 음성 변환 엔진 선택',
providerWebSpeech: 'WebSpeech API (브라우저)',
providerOpenai: 'OpenAI TTS',
providerCustom: '사용자 정의 엔드포인트 (OpenAI 호환)',
providerEdge: 'Edge TTS (무료, API Key 불필요)',
// WebSpeech
webspeechVoice: '음성',
webspeechVoiceHint: '브라우저 또는 OS에서 음성 선택',
webspeechVoicePlaceholder: '자동 (기본 음성)',
// OpenAI
openaiKey: 'API 키',
openaiKeyHint: 'TTS 접근 권한이 있는 OpenAI API 키',
openaiUrl: 'API 기본 URL',
openaiUrlHint: '예: https://api.openai.com/v1/audio/speech',
openaiModel: '모델',
openaiModelHint: 'tts-1 (빠름) / tts-1-hd (고음질)',
openaiVoice: '음색',
openaiVoiceHint: '합성에 사용할 음색',
// Custom endpoint
customHint: '모든 OpenAI 호환 TTS API 사용 가능 — GPT-SoVITS, CosyVoice 등 지원',
customUrl: 'API URL',
customUrlHint: 'TTS 서비스의 기본 URL',
customUrlPlaceholder: '로컬 어댑터에 설정된 주소 (예: http://127.0.0.1:9880)',
customApiKey: 'API 키 (선택사항)',
customApiKeyHint: '일부 사용자 정의 엔드포인트는 인증 필요',
customApiKeyPlaceholder: '필요하지 않으면 비워둠',
// Edge TTS
edgeHint: 'Microsoft Edge TTS 기반 (node-edge-tts).',
edgeUrl: '어댑터 URL',
edgeUrlHint: 'Edge TTS 어댑터 주소 (예: http://127.0.0.1:9882)',
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
edgeVoice: '음색',
edgeVoiceHint: '음성 합성에 사용할 음색 선택',
// Test
testTitle: '음성 테스트',
testText: '테스트 텍스트',
testTextPlaceholder: '테스트할 텍스트 입력...',
testTextDefault: '안녕하세요, 음성 테스트입니다.',
testButton: '테스트',
testButtonPlaying: '재생 중...',
testFailed: '테스트 실패: {error}',
},
lockedIps: {
title: '잠긴 IP 관리',
count: '{count}개 잠김',
+51
View File
@@ -511,6 +511,8 @@ jobTriggered: 'Job acionado',
session: 'Sessao',
privacy: 'Privacidade',
apiServer: 'Servidor API',
models: 'Modelos',
voice: 'Voz',
},
display: {
streaming: 'Respostas em streaming',
@@ -589,6 +591,55 @@ jobTriggered: 'Job acionado',
cors: 'Origens CORS',
corsHint: 'Fontes cross-origin permitidas',
},
voice: {
ttsProvider: 'Provedor TTS',
ttsProviderHint: 'Escolha o mecanismo de texto para fala para reproducao de mensagens',
providerWebSpeech: 'WebSpeech API (Navegador)',
providerOpenai: 'OpenAI TTS',
providerCustom: 'Endpoint personalizado (compativel com OpenAI)',
providerEdge: 'Edge TTS (Gratuito, sem chave API)',
// WebSpeech
webspeechVoice: 'Voz',
webspeechVoiceHint: 'Selecione uma voz do seu navegador ou SO',
webspeechVoicePlaceholder: 'Auto (voz padrao)',
// OpenAI
openaiKey: 'Chave API',
openaiKeyHint: 'Sua chave API OpenAI com acesso TTS',
openaiUrl: 'URL base da API',
openaiUrlHint: 'ex. https://api.openai.com/v1/audio/speech',
openaiModel: 'Modelo',
openaiModelHint: 'tts-1 (mais rapido) / tts-1-hd (qualidade superior)',
openaiVoice: 'Voz',
openaiVoiceHint: 'Voz a ser usada para sintese',
// Custom endpoint
customHint: 'Use qualquer API TTS compativel com OpenAI — funciona com GPT-SoVITS, CosyVoice, etc.',
customUrl: 'URL da API',
customUrlHint: 'URL base do seu servico TTS',
customUrlPlaceholder: 'Endereco configurado no adaptador local, ex. http://127.0.0.1:9880',
customApiKey: 'Chave API (opcional)',
customApiKeyHint: 'Alguns endpoints personalizados exigem autenticacao',
customApiKeyPlaceholder: 'Deixe em branco se nao for necessario',
// Edge TTS
edgeHint: 'Desenvolvido por Microsoft Edge TTS (node-edge-tts).',
edgeUrl: 'URL do adaptador',
edgeUrlHint: 'Endereco do adaptador Edge TTS, ex. http://127.0.0.1:9882',
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
edgeVoice: 'Voz',
edgeVoiceHint: 'Selecione uma voz para sintese de fala',
// Test
testTitle: 'Teste de voz',
testText: 'Texto de teste',
testTextPlaceholder: 'Insira o texto para testar...',
testTextDefault: 'Ola, este e um teste de voz.',
testButton: 'Testar',
testButtonPlaying: 'Reproduzindo...',
testFailed: 'Teste falhou: {error}',
},
lockedIps: {
title: 'IPs bloqueadas',
count: '{count} bloqueadas',
+49
View File
@@ -643,6 +643,7 @@ export default {
privacy: '隐私',
apiServer: 'API 服务器',
models: '模型',
voice: '语音',
},
models: {
apiKey: 'API Key',
@@ -739,6 +740,54 @@ export default {
unlocked: 'IP 已解锁',
allUnlocked: '已解锁 {count} 个 IP',
},
voice: {
ttsProvider: 'TTS 提供者',
ttsProviderHint: '选择消息朗读使用的语音引擎',
providerWebSpeech: 'WebSpeech API(浏览器内置)',
providerOpenai: 'OpenAI TTS',
providerCustom: '自定义端点(兼容 OpenAI',
providerEdge: 'Edge TTS(免费,无需 API Key',
// WebSpeech
webspeechVoice: '音色',
webspeechVoiceHint: '从浏览器或系统提供的语音中选择',
webspeechVoicePlaceholder: '自动(默认语音)',
// OpenAI
openaiKey: 'API 密钥',
openaiKeyHint: '具有 TTS 权限的 OpenAI API Key',
openaiUrl: 'API 基础地址',
openaiUrlHint: '例如 https://api.openai.com/v1/audio/speech',
openaiModel: '模型',
openaiModelHint: 'tts-1(快速)/ tts-1-hd(高音质)',
openaiVoice: '音色',
openaiVoiceHint: '用于语音合成的音色',
// 自定义端点
customHint: '支持任何 OpenAI 兼容的 TTS 服务——可用于 GPT-SoVITS、CosyVoice 等自部署服务。',
customUrl: 'API 地址',
customUrlHint: 'TTS 服务的完整基础地址',
customUrlPlaceholder: '本地适配器中配置的地址 如:http://127.0.0.1:9880',
customApiKey: 'API 密钥(可选)',
customApiKeyHint: '部分自部署服务需要身份验证',
customApiKeyPlaceholder: '不需要则留空',
// Edge TTS
edgeHint: '由 Microsoft Edge TTS 驱动(node-edge-tts)。',
edgeUrl: '适配器地址',
edgeUrlHint: 'Edge TTS 适配器地址,例如 http://127.0.0.1:9882',
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
edgeVoice: '音色',
edgeVoiceHint: '选择用于语音合成的音色',
// 试听
testTitle: '试听测试',
testText: '测试文本',
testTextPlaceholder: '输入测试文本...',
testTextDefault: '你好,这是一个语音测试。',
testButton: '试听',
testButtonPlaying: '播放中...',
testFailed: '测试失败:{error}',
},
},
// 平台频道设置
@@ -14,6 +14,7 @@ import SessionSettings from "@/components/hermes/settings/SessionSettings.vue";
import PrivacySettings from "@/components/hermes/settings/PrivacySettings.vue";
import ModelSettings from "@/components/hermes/settings/ModelSettings.vue";
import AccountSettings from "@/components/hermes/settings/AccountSettings.vue";
import VoiceSettings from "@/components/hermes/settings/VoiceSettings.vue";
const settingsStore = useSettingsStore();
const { t } = useI18n();
@@ -57,6 +58,9 @@ onMounted(() => {
<NTabPane name="models" :tab="t('settings.tabs.models')">
<ModelSettings />
</NTabPane>
<NTabPane name="voice" :tab="t('settings.tabs.voice')">
<VoiceSettings />
</NTabPane>
</NTabs>
</NSpin>
</div>
+39 -1
View File
@@ -1,5 +1,5 @@
import type { Context } from 'koa'
import { textToSpeech } from '../../services/hermes/tts'
import { textToSpeech, openaiCompatibleTts, speedToEdgeRate } from '../../services/hermes/tts'
export async function generate(ctx: Context) {
const { text, lang } = ctx.request.body as {
@@ -26,3 +26,41 @@ export async function generate(ctx: Context) {
ctx.set('X-TTS-Engine', engine)
ctx.body = audio
}
/**
* OpenAI-compatible TTS endpoint.
* Accepts: { model, input, voice, speed }
* Returns audio/mpeg stream.
*/
export async function openaiProxy(ctx: Context) {
const body = ctx.request.body as {
input?: string
voice?: string
speed?: number
model?: string
}
if (!body.input || typeof body.input !== 'string') {
ctx.status = 400
ctx.body = { error: 'input is required' }
return
}
if (body.input.length > 5000) {
ctx.status = 400
ctx.body = { error: 'input is too long (max 5000 characters)' }
return
}
const { audio, engine } = await openaiCompatibleTts({
input: body.input,
voice: body.voice,
speed: body.speed,
model: body.model,
})
ctx.set('Content-Type', 'audio/mpeg')
ctx.set('Content-Length', String(audio.length))
ctx.set('X-TTS-Engine', engine)
ctx.body = audio
}
+1
View File
@@ -4,3 +4,4 @@ import * as ctrl from '../../controllers/hermes/tts'
export const ttsRoutes = new Router()
ttsRoutes.post('/api/hermes/tts', ctrl.generate)
ttsRoutes.post('/api/tts/proxy/audio/speech', ctrl.openaiProxy)
+1 -1
View File
@@ -41,6 +41,7 @@ export function registerRoutes(app: any, requireAuth: (ctx: Context, next: Next)
app.use(healthRoutes.routes())
app.use(webhookRoutes.routes())
app.use(authPublicRoutes.routes())
app.use(ttsRoutes.routes()) // TTS proxy/generation — must be before auth
// --- Auth middleware: all routes below require authentication ---
app.use(requireAuth)
@@ -69,7 +70,6 @@ export function registerRoutes(app: any, requireAuth: (ctx: Context, next: Next)
app.use(jobRoutes.routes()) // Must be before proxy
app.use(cronHistoryRoutes.routes()) // Must be before proxy
app.use(kanbanRoutes.routes()) // Must be before proxy
app.use(ttsRoutes.routes()) // Must be before proxy
app.use(proxyRoutes.routes())
// Proxy catch-all middleware (must be last)
+41 -4
View File
@@ -12,6 +12,9 @@ const FIXED_PITCH = '+12Hz'
export interface TtsOptions {
text: string
lang?: string
voice?: string
rate?: string
pitch?: string
}
export async function edgeTts(opts: TtsOptions): Promise<Buffer> {
@@ -20,9 +23,9 @@ export async function edgeTts(opts: TtsOptions): Promise<Buffer> {
try {
const tts = new EdgeTTS({
voice: FIXED_VOICE,
rate: FIXED_RATE,
pitch: FIXED_PITCH,
voice: opts.voice || FIXED_VOICE,
rate: opts.rate || FIXED_RATE,
pitch: opts.pitch || FIXED_PITCH,
timeout: 15000,
})
@@ -35,7 +38,41 @@ export async function edgeTts(opts: TtsOptions): Promise<Buffer> {
}
export async function textToSpeech(opts: TtsOptions): Promise<{ audio: Buffer; engine: string }> {
const voice = opts.voice || FIXED_VOICE
const rate = opts.rate || FIXED_RATE
const pitch = opts.pitch || FIXED_PITCH
const audio = await edgeTts(opts)
logger.debug({ engine: 'edge', voice: FIXED_VOICE, rate: FIXED_RATE, pitch: FIXED_PITCH }, 'TTS generated via Edge')
logger.debug({ engine: 'edge', voice, rate, pitch }, 'TTS generated via Edge')
return { audio, engine: 'edge' }
}
/**
* Convert speed multiplier (0.5-2.0) to Edge TTS rate string.
* Edge TTS rate format: "+/-NN%"
*/
export function speedToEdgeRate(speed: number): string {
const percent = Math.round((speed - 1) * 100)
return percent >= 0 ? `+${percent}%` : `${percent}%`
}
/**
* Convert OpenAI TTS request to internal TtsOptions.
* OpenAI format: { model, input, voice, speed }
*/
export interface OpenaiTtsRequest {
model?: string
input: string
voice?: string
speed?: number
}
export async function openaiCompatibleTts(
body: OpenaiTtsRequest,
): Promise<{ audio: Buffer; engine: string }> {
return textToSpeech({
text: body.input,
voice: body.voice || FIXED_VOICE,
rate: body.speed ? speedToEdgeRate(body.speed) : FIXED_RATE,
pitch: FIXED_PITCH,
})
}