feat: add Edge TTS rate/pitch sliders to voice settings (#629)
Add speed (rate) and pitch controls for Edge TTS provider: - Frontend: speedToEdgeRate()/hzToEdgePitch() helpers + UI sliders - Backend: rate/pitch passthrough in OpenaiTtsRequest and controller - i18n: add edgeRate/edgePitch keys across all 8 languages - Rate: 0.5x-2.0x slider, Pitch: -20Hz to +20Hz slider
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
export interface TtsOptions {
|
||||
text: string
|
||||
lang?: string
|
||||
rate?: string // Edge TTS rate format: "+NN%" or "-NN%"
|
||||
pitch?: string // Edge TTS pitch format: "+NNHz" or "-NNHz"
|
||||
}
|
||||
|
||||
export async function generateSpeech(opts: TtsOptions): Promise<{ audio: Blob; engine: string }> {
|
||||
|
||||
@@ -17,6 +17,7 @@ import {
|
||||
} from "./highlight";
|
||||
import { useGlobalSpeech } from "@/composables/useSpeech";
|
||||
import { useVoiceSettings } from "@/composables/useVoiceSettings";
|
||||
import { speedToEdgeRate, hzToEdgePitch } from "@/utils/ttsHelpers";
|
||||
|
||||
const TOOL_PAYLOAD_DISPLAY_LIMIT = 2000;
|
||||
|
||||
@@ -420,6 +421,8 @@ function handleSpeechToggle() {
|
||||
speech.openaiToggle(props.message.id, content, {
|
||||
baseUrl: apiUrl,
|
||||
voice: voiceSettings.edgeVoice.value,
|
||||
rate: speedToEdgeRate(voiceSettings.edgeRate.value),
|
||||
pitch: hzToEdgePitch(voiceSettings.edgePitchHz.value),
|
||||
})
|
||||
return
|
||||
}
|
||||
@@ -466,6 +469,8 @@ onMounted(() => {
|
||||
speech.openaiPlay(props.message.id, content, {
|
||||
baseUrl: '/api/tts/proxy',
|
||||
voice: voiceSettings.edgeVoice.value,
|
||||
rate: speedToEdgeRate(voiceSettings.edgeRate.value),
|
||||
pitch: hzToEdgePitch(voiceSettings.edgePitchHz.value),
|
||||
})
|
||||
} else if (voiceSettings.provider.value === 'webspeech') {
|
||||
const text = speech.extractReadableText(content)
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
<script setup lang="ts">
|
||||
import { ref, onMounted } from 'vue'
|
||||
import { NSelect, NInput, NButton } from 'naive-ui'
|
||||
import { NSelect, NInput, NButton, NSlider } from 'naive-ui'
|
||||
import { useI18n } from 'vue-i18n'
|
||||
import { useVoiceSettings } from '@/composables/useVoiceSettings'
|
||||
import { useSpeech } from '@/composables/useSpeech'
|
||||
import { speedToEdgeRate, hzToEdgePitch } from '@/utils/ttsHelpers'
|
||||
import SettingRow from './SettingRow.vue'
|
||||
|
||||
const { t } = useI18n()
|
||||
@@ -103,6 +104,8 @@ async function handleTest() {
|
||||
await speech.openaiPlay('__test__', text, {
|
||||
baseUrl: '/api/tts/proxy',
|
||||
voice: vs.edgeVoice.value,
|
||||
rate: speedToEdgeRate(vs.edgeRate.value),
|
||||
pitch: hzToEdgePitch(vs.edgePitchHz.value),
|
||||
})
|
||||
}
|
||||
} catch (err) {
|
||||
@@ -267,6 +270,40 @@ async function handleTest() {
|
||||
/>
|
||||
</SettingRow>
|
||||
|
||||
<SettingRow
|
||||
:label="t('settings.voice.edgeRate')"
|
||||
:hint="t('settings.voice.edgeRateHint')"
|
||||
>
|
||||
<div class="slider-row">
|
||||
<NSlider
|
||||
:value="vs.edgeRate.value"
|
||||
:min="0.5"
|
||||
:max="2.0"
|
||||
:step="0.05"
|
||||
style="width: 200px"
|
||||
@update:value="vs.setEdgeRate"
|
||||
/>
|
||||
<span class="slider-value">{{ vs.edgeRate.value.toFixed(2) }}x ({{ speedToEdgeRate(vs.edgeRate.value) }})</span>
|
||||
</div>
|
||||
</SettingRow>
|
||||
|
||||
<SettingRow
|
||||
:label="t('settings.voice.edgePitch')"
|
||||
:hint="t('settings.voice.edgePitchHint')"
|
||||
>
|
||||
<div class="slider-row">
|
||||
<NSlider
|
||||
:value="vs.edgePitchHz.value"
|
||||
:min="-20"
|
||||
:max="20"
|
||||
:step="1"
|
||||
style="width: 200px"
|
||||
@update:value="vs.setEdgePitchHz"
|
||||
/>
|
||||
<span class="slider-value">{{ vs.edgePitchHz.value > 0 ? '+' : '' }}{{ vs.edgePitchHz.value }} Hz ({{ hzToEdgePitch(vs.edgePitchHz.value) }})</span>
|
||||
</div>
|
||||
</SettingRow>
|
||||
|
||||
</template>
|
||||
|
||||
<!-- ─── Test / Audition ─── -->
|
||||
@@ -324,4 +361,17 @@ async function handleTest() {
|
||||
align-items: center;
|
||||
}
|
||||
}
|
||||
|
||||
.slider-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.slider-value {
|
||||
font-size: 12px;
|
||||
color: #999;
|
||||
white-space: nowrap;
|
||||
min-width: 120px;
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -11,6 +11,8 @@ export interface OpenaiTtsOptions {
|
||||
apiKey?: string
|
||||
model?: string
|
||||
voice?: string
|
||||
rate?: string // Edge TTS rate format, e.g. "+20%"
|
||||
pitch?: string // Edge TTS pitch format, e.g. "-8Hz"
|
||||
}
|
||||
|
||||
export interface SpeechState {
|
||||
@@ -266,6 +268,9 @@ export function useSpeech() {
|
||||
input: text,
|
||||
voice: opts.voice || 'alloy',
|
||||
}
|
||||
// Edge TTS proxy 支持 rate/pitch 参数
|
||||
if (opts.rate) body.rate = opts.rate
|
||||
if (opts.pitch) body.pitch = opts.pitch
|
||||
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
|
||||
@@ -21,6 +21,8 @@ export interface VoiceSettingsData {
|
||||
// Edge TTS
|
||||
edgeUrl: string
|
||||
edgeVoice: string
|
||||
edgeRate: number // 语速倍率 0.5~2.0,1.0 = 正常
|
||||
edgePitchHz: number // 音调偏移 Hz,-20~20,0 = 正常
|
||||
}
|
||||
|
||||
const STORAGE_KEY = 'hermes-tts-settings-v2'
|
||||
@@ -63,6 +65,8 @@ const DEFAULT: VoiceSettingsData = {
|
||||
|
||||
edgeUrl: '',
|
||||
edgeVoice: 'zh-CN-XiaoxiaoNeural',
|
||||
edgeRate: 1.0,
|
||||
edgePitchHz: 0,
|
||||
}
|
||||
|
||||
function sanitize(data: VoiceSettingsData): VoiceSettingsData {
|
||||
@@ -103,11 +107,13 @@ const customApiKey = ref<string>(load().customApiKey)
|
||||
// Edge TTS
|
||||
const edgeUrl = ref<string>(load().edgeUrl)
|
||||
const edgeVoice = ref<string>(load().edgeVoice)
|
||||
const edgeRate = ref<number>(load().edgeRate)
|
||||
const edgePitchHz = ref<number>(load().edgePitchHz)
|
||||
|
||||
// Auto-persist on change
|
||||
watch(
|
||||
[provider, webspeechVoice, openaiApiKey, openaiBaseUrl, openaiModel, openaiVoice,
|
||||
customUrl, customApiKey, edgeUrl, edgeVoice],
|
||||
customUrl, customApiKey, edgeUrl, edgeVoice, edgeRate, edgePitchHz],
|
||||
() => {
|
||||
localStorage.setItem(STORAGE_KEY, JSON.stringify({
|
||||
provider: provider.value,
|
||||
@@ -120,6 +126,8 @@ watch(
|
||||
customApiKey: customApiKey.value,
|
||||
edgeUrl: edgeUrl.value,
|
||||
edgeVoice: edgeVoice.value,
|
||||
edgeRate: edgeRate.value,
|
||||
edgePitchHz: edgePitchHz.value,
|
||||
}))
|
||||
},
|
||||
)
|
||||
@@ -136,6 +144,8 @@ export function useVoiceSettings() {
|
||||
customApiKey,
|
||||
edgeUrl,
|
||||
edgeVoice,
|
||||
edgeRate,
|
||||
edgePitchHz,
|
||||
|
||||
setProvider(v: TtsProvider) { provider.value = v },
|
||||
setWebSpeechVoice(v: string) { webspeechVoice.value = v },
|
||||
@@ -147,6 +157,8 @@ export function useVoiceSettings() {
|
||||
setCustomApiKey(v: string) { customApiKey.value = v },
|
||||
setEdgeUrl(v: string) { edgeUrl.value = v },
|
||||
setEdgeVoice(v: string) { edgeVoice.value = v },
|
||||
setEdgeRate(v: number) { edgeRate.value = v },
|
||||
setEdgePitchHz(v: number) { edgePitchHz.value = v },
|
||||
|
||||
reset() {
|
||||
provider.value = DEFAULT.provider
|
||||
@@ -159,6 +171,8 @@ export function useVoiceSettings() {
|
||||
customApiKey.value = DEFAULT.customApiKey
|
||||
edgeUrl.value = DEFAULT.edgeUrl
|
||||
edgeVoice.value = DEFAULT.edgeVoice
|
||||
edgeRate.value = DEFAULT.edgeRate
|
||||
edgePitchHz.value = DEFAULT.edgePitchHz
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -630,6 +630,11 @@ jobTriggered: 'Job ausgelost',
|
||||
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
|
||||
edgeVoice: 'Stimme',
|
||||
edgeVoiceHint: 'Waehlen Sie eine Stimme fuer die Sprachsynthese',
|
||||
edgeRate: 'Geschwindigkeit',
|
||||
edgeRateHint: 'Sprachgeschwindigkeit anpassen (0,5x ~ 2,0x)',
|
||||
edgePitch: 'Tonhöhe',
|
||||
edgePitchHint: 'Tonhöhe anpassen (-20 ~ +20 Hz)',
|
||||
|
||||
|
||||
// Test
|
||||
testTitle: 'Sprachtest',
|
||||
|
||||
@@ -793,6 +793,11 @@ export default {
|
||||
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
|
||||
edgeVoice: 'Voice',
|
||||
edgeVoiceHint: 'Select a voice for speech synthesis',
|
||||
edgeRate: 'Speed',
|
||||
edgeRateHint: 'Adjust speech speed (0.5x ~ 2.0x)',
|
||||
edgePitch: 'Pitch',
|
||||
edgePitchHint: 'Adjust speech pitch (-20 ~ +20 Hz)',
|
||||
|
||||
|
||||
// Test
|
||||
testTitle: 'Test Voice',
|
||||
|
||||
@@ -630,6 +630,11 @@ jobTriggered: 'Job ejecutado',
|
||||
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
|
||||
edgeVoice: 'Voz',
|
||||
edgeVoiceHint: 'Seleccione una voz para la sintesis de voz',
|
||||
edgeRate: 'Velocidad',
|
||||
edgeRateHint: 'Ajustar velocidad del habla (0.5x ~ 2.0x)',
|
||||
edgePitch: 'Tono',
|
||||
edgePitchHint: 'Ajustar tono del habla (-20 ~ +20 Hz)',
|
||||
|
||||
|
||||
// Test
|
||||
testTitle: 'Prueba de voz',
|
||||
|
||||
@@ -630,6 +630,11 @@ jobTriggered: 'Job declenche',
|
||||
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
|
||||
edgeVoice: 'Voix',
|
||||
edgeVoiceHint: 'Choisir une voix pour la synthese vocale',
|
||||
edgeRate: 'Vitesse',
|
||||
edgeRateHint: "Ajuster la vitesse de la voix (0.5x ~ 2.0x)",
|
||||
edgePitch: 'Hauteur',
|
||||
edgePitchHint: "Ajuster la hauteur de la voix (-20 ~ +20 Hz)",
|
||||
|
||||
|
||||
// Test
|
||||
testTitle: 'Test vocal',
|
||||
|
||||
@@ -630,6 +630,11 @@ export default {
|
||||
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
|
||||
edgeVoice: '音色',
|
||||
edgeVoiceHint: '音声合成に使用する音色を選択',
|
||||
edgeRate: '速度',
|
||||
edgeRateHint: '音声の速度を調整(0.5~2.0倍)',
|
||||
edgePitch: 'ピッチ',
|
||||
edgePitchHint: '音声のピッチを調整(-20~+20 Hz)',
|
||||
|
||||
|
||||
// Test
|
||||
testTitle: '音声テスト',
|
||||
|
||||
@@ -630,6 +630,11 @@ export default {
|
||||
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
|
||||
edgeVoice: '음색',
|
||||
edgeVoiceHint: '음성 합성에 사용할 음색 선택',
|
||||
edgeRate: '속도',
|
||||
edgeRateHint: '음성 속도 조절 (0.5~2.0배)',
|
||||
edgePitch: '음높이',
|
||||
edgePitchHint: '음성 음높이 조절 (-20~+20 Hz)',
|
||||
|
||||
|
||||
// Test
|
||||
testTitle: '음성 테스트',
|
||||
|
||||
@@ -630,6 +630,11 @@ jobTriggered: 'Job acionado',
|
||||
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
|
||||
edgeVoice: 'Voz',
|
||||
edgeVoiceHint: 'Selecione uma voz para sintese de fala',
|
||||
edgeRate: 'Velocidade',
|
||||
edgeRateHint: 'Ajustar velocidade da fala (0.5x ~ 2.0x)',
|
||||
edgePitch: 'Tom',
|
||||
edgePitchHint: 'Ajustar tom da fala (-20 ~ +20 Hz)',
|
||||
|
||||
|
||||
// Test
|
||||
testTitle: 'Teste de voz',
|
||||
|
||||
@@ -785,6 +785,11 @@ export default {
|
||||
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
|
||||
edgeVoice: '音色',
|
||||
edgeVoiceHint: '选择用于语音合成的音色',
|
||||
edgeRate: '语速',
|
||||
edgeRateHint: '调整语音速度(0.5~2.0 倍)',
|
||||
edgePitch: '音调',
|
||||
edgePitchHint: '调整语音音调(-20~+20 Hz)',
|
||||
|
||||
|
||||
// 试听
|
||||
testTitle: '试听测试',
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
/**
|
||||
* 语速倍率 → Edge TTS rate 字符串
|
||||
* 1.0 → "0%", 1.2 → "+20%", 0.5 → "-50%"
|
||||
*/
|
||||
export function speedToEdgeRate(speed: number): string {
|
||||
const percent = Math.round((speed - 1) * 100)
|
||||
return percent >= 0 ? `+${percent}%` : `${percent}%`
|
||||
}
|
||||
|
||||
/**
|
||||
* Hz 偏移值 → Edge TTS pitch 字符串
|
||||
* 0 → "0Hz", 12 → "+12Hz", -8 → "-8Hz"
|
||||
*/
|
||||
export function hzToEdgePitch(hz: number): string {
|
||||
return hz >= 0 ? `+${hz}Hz` : `${hz}Hz`
|
||||
}
|
||||
@@ -38,6 +38,8 @@ export async function openaiProxy(ctx: Context) {
|
||||
voice?: string
|
||||
speed?: number
|
||||
model?: string
|
||||
rate?: string
|
||||
pitch?: string
|
||||
}
|
||||
|
||||
if (!body.input || typeof body.input !== 'string') {
|
||||
@@ -57,6 +59,8 @@ export async function openaiProxy(ctx: Context) {
|
||||
voice: body.voice,
|
||||
speed: body.speed,
|
||||
model: body.model,
|
||||
rate: body.rate,
|
||||
pitch: body.pitch,
|
||||
})
|
||||
|
||||
ctx.set('Content-Type', 'audio/mpeg')
|
||||
|
||||
@@ -64,6 +64,8 @@ export interface OpenaiTtsRequest {
|
||||
input: string
|
||||
voice?: string
|
||||
speed?: number
|
||||
rate?: string // Edge TTS rate format, e.g. "+20%". Takes priority over speed.
|
||||
pitch?: string // Edge TTS pitch format, e.g. "-8Hz"
|
||||
}
|
||||
|
||||
export async function openaiCompatibleTts(
|
||||
@@ -72,7 +74,7 @@ export async function openaiCompatibleTts(
|
||||
return textToSpeech({
|
||||
text: body.input,
|
||||
voice: body.voice || FIXED_VOICE,
|
||||
rate: body.speed ? speedToEdgeRate(body.speed) : FIXED_RATE,
|
||||
pitch: FIXED_PITCH,
|
||||
rate: body.rate || (body.speed ? speedToEdgeRate(body.speed) : FIXED_RATE),
|
||||
pitch: body.pitch || FIXED_PITCH,
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user