feat: add Edge TTS rate/pitch sliders to voice settings (#629)

Add speed (rate) and pitch controls for Edge TTS provider:
- Frontend: speedToEdgeRate()/hzToEdgePitch() helpers + UI sliders
- Backend: rate/pitch passthrough in OpenaiTtsRequest and controller
- i18n: add edgeRate/edgePitch keys across all 8 languages
- Rate: 0.5x-2.0x slider, Pitch: -20Hz to +20Hz slider
This commit is contained in:
memeflyfly
2026-05-11 21:56:11 +08:00
committed by GitHub
parent 5e608ea338
commit a68b9bf01f
16 changed files with 142 additions and 4 deletions
+2
View File
@@ -1,6 +1,8 @@
export interface TtsOptions {
text: string
lang?: string
rate?: string // Edge TTS rate format: "+NN%" or "-NN%"
pitch?: string // Edge TTS pitch format: "+NNHz" or "-NNHz"
}
export async function generateSpeech(opts: TtsOptions): Promise<{ audio: Blob; engine: string }> {
@@ -17,6 +17,7 @@ import {
} from "./highlight";
import { useGlobalSpeech } from "@/composables/useSpeech";
import { useVoiceSettings } from "@/composables/useVoiceSettings";
import { speedToEdgeRate, hzToEdgePitch } from "@/utils/ttsHelpers";
const TOOL_PAYLOAD_DISPLAY_LIMIT = 2000;
@@ -420,6 +421,8 @@ function handleSpeechToggle() {
speech.openaiToggle(props.message.id, content, {
baseUrl: apiUrl,
voice: voiceSettings.edgeVoice.value,
rate: speedToEdgeRate(voiceSettings.edgeRate.value),
pitch: hzToEdgePitch(voiceSettings.edgePitchHz.value),
})
return
}
@@ -466,6 +469,8 @@ onMounted(() => {
speech.openaiPlay(props.message.id, content, {
baseUrl: '/api/tts/proxy',
voice: voiceSettings.edgeVoice.value,
rate: speedToEdgeRate(voiceSettings.edgeRate.value),
pitch: hzToEdgePitch(voiceSettings.edgePitchHz.value),
})
} else if (voiceSettings.provider.value === 'webspeech') {
const text = speech.extractReadableText(content)
@@ -1,9 +1,10 @@
<script setup lang="ts">
import { ref, onMounted } from 'vue'
import { NSelect, NInput, NButton } from 'naive-ui'
import { NSelect, NInput, NButton, NSlider } from 'naive-ui'
import { useI18n } from 'vue-i18n'
import { useVoiceSettings } from '@/composables/useVoiceSettings'
import { useSpeech } from '@/composables/useSpeech'
import { speedToEdgeRate, hzToEdgePitch } from '@/utils/ttsHelpers'
import SettingRow from './SettingRow.vue'
const { t } = useI18n()
@@ -103,6 +104,8 @@ async function handleTest() {
await speech.openaiPlay('__test__', text, {
baseUrl: '/api/tts/proxy',
voice: vs.edgeVoice.value,
rate: speedToEdgeRate(vs.edgeRate.value),
pitch: hzToEdgePitch(vs.edgePitchHz.value),
})
}
} catch (err) {
@@ -267,6 +270,40 @@ async function handleTest() {
/>
</SettingRow>
<SettingRow
:label="t('settings.voice.edgeRate')"
:hint="t('settings.voice.edgeRateHint')"
>
<div class="slider-row">
<NSlider
:value="vs.edgeRate.value"
:min="0.5"
:max="2.0"
:step="0.05"
style="width: 200px"
@update:value="vs.setEdgeRate"
/>
<span class="slider-value">{{ vs.edgeRate.value.toFixed(2) }}x ({{ speedToEdgeRate(vs.edgeRate.value) }})</span>
</div>
</SettingRow>
<SettingRow
:label="t('settings.voice.edgePitch')"
:hint="t('settings.voice.edgePitchHint')"
>
<div class="slider-row">
<NSlider
:value="vs.edgePitchHz.value"
:min="-20"
:max="20"
:step="1"
style="width: 200px"
@update:value="vs.setEdgePitchHz"
/>
<span class="slider-value">{{ vs.edgePitchHz.value > 0 ? '+' : '' }}{{ vs.edgePitchHz.value }} Hz ({{ hzToEdgePitch(vs.edgePitchHz.value) }})</span>
</div>
</SettingRow>
</template>
<!-- Test / Audition -->
@@ -324,4 +361,17 @@ async function handleTest() {
align-items: center;
}
}
.slider-row {
display: flex;
align-items: center;
gap: 12px;
}
.slider-value {
font-size: 12px;
color: #999;
white-space: nowrap;
min-width: 120px;
}
</style>
@@ -11,6 +11,8 @@ export interface OpenaiTtsOptions {
apiKey?: string
model?: string
voice?: string
rate?: string // Edge TTS rate format, e.g. "+20%"
pitch?: string // Edge TTS pitch format, e.g. "-8Hz"
}
export interface SpeechState {
@@ -266,6 +268,9 @@ export function useSpeech() {
input: text,
voice: opts.voice || 'alloy',
}
// Edge TTS proxy 支持 rate/pitch 参数
if (opts.rate) body.rate = opts.rate
if (opts.pitch) body.pitch = opts.pitch
const headers: Record<string, string> = {
'Content-Type': 'application/json',
@@ -21,6 +21,8 @@ export interface VoiceSettingsData {
// Edge TTS
edgeUrl: string
edgeVoice: string
edgeRate: number // 语速倍率 0.5~2.01.0 = 正常
edgePitchHz: number // 音调偏移 Hz-20~200 = 正常
}
const STORAGE_KEY = 'hermes-tts-settings-v2'
@@ -63,6 +65,8 @@ const DEFAULT: VoiceSettingsData = {
edgeUrl: '',
edgeVoice: 'zh-CN-XiaoxiaoNeural',
edgeRate: 1.0,
edgePitchHz: 0,
}
function sanitize(data: VoiceSettingsData): VoiceSettingsData {
@@ -103,11 +107,13 @@ const customApiKey = ref<string>(load().customApiKey)
// Edge TTS
const edgeUrl = ref<string>(load().edgeUrl)
const edgeVoice = ref<string>(load().edgeVoice)
const edgeRate = ref<number>(load().edgeRate)
const edgePitchHz = ref<number>(load().edgePitchHz)
// Auto-persist on change
watch(
[provider, webspeechVoice, openaiApiKey, openaiBaseUrl, openaiModel, openaiVoice,
customUrl, customApiKey, edgeUrl, edgeVoice],
customUrl, customApiKey, edgeUrl, edgeVoice, edgeRate, edgePitchHz],
() => {
localStorage.setItem(STORAGE_KEY, JSON.stringify({
provider: provider.value,
@@ -120,6 +126,8 @@ watch(
customApiKey: customApiKey.value,
edgeUrl: edgeUrl.value,
edgeVoice: edgeVoice.value,
edgeRate: edgeRate.value,
edgePitchHz: edgePitchHz.value,
}))
},
)
@@ -136,6 +144,8 @@ export function useVoiceSettings() {
customApiKey,
edgeUrl,
edgeVoice,
edgeRate,
edgePitchHz,
setProvider(v: TtsProvider) { provider.value = v },
setWebSpeechVoice(v: string) { webspeechVoice.value = v },
@@ -147,6 +157,8 @@ export function useVoiceSettings() {
setCustomApiKey(v: string) { customApiKey.value = v },
setEdgeUrl(v: string) { edgeUrl.value = v },
setEdgeVoice(v: string) { edgeVoice.value = v },
setEdgeRate(v: number) { edgeRate.value = v },
setEdgePitchHz(v: number) { edgePitchHz.value = v },
reset() {
provider.value = DEFAULT.provider
@@ -159,6 +171,8 @@ export function useVoiceSettings() {
customApiKey.value = DEFAULT.customApiKey
edgeUrl.value = DEFAULT.edgeUrl
edgeVoice.value = DEFAULT.edgeVoice
edgeRate.value = DEFAULT.edgeRate
edgePitchHz.value = DEFAULT.edgePitchHz
},
}
}
+5
View File
@@ -630,6 +630,11 @@ jobTriggered: 'Job ausgelost',
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
edgeVoice: 'Stimme',
edgeVoiceHint: 'Waehlen Sie eine Stimme fuer die Sprachsynthese',
edgeRate: 'Geschwindigkeit',
edgeRateHint: 'Sprachgeschwindigkeit anpassen (0,5x ~ 2,0x)',
edgePitch: 'Tonhöhe',
edgePitchHint: 'Tonhöhe anpassen (-20 ~ +20 Hz)',
// Test
testTitle: 'Sprachtest',
+5
View File
@@ -793,6 +793,11 @@ export default {
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
edgeVoice: 'Voice',
edgeVoiceHint: 'Select a voice for speech synthesis',
edgeRate: 'Speed',
edgeRateHint: 'Adjust speech speed (0.5x ~ 2.0x)',
edgePitch: 'Pitch',
edgePitchHint: 'Adjust speech pitch (-20 ~ +20 Hz)',
// Test
testTitle: 'Test Voice',
+5
View File
@@ -630,6 +630,11 @@ jobTriggered: 'Job ejecutado',
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
edgeVoice: 'Voz',
edgeVoiceHint: 'Seleccione una voz para la sintesis de voz',
edgeRate: 'Velocidad',
edgeRateHint: 'Ajustar velocidad del habla (0.5x ~ 2.0x)',
edgePitch: 'Tono',
edgePitchHint: 'Ajustar tono del habla (-20 ~ +20 Hz)',
// Test
testTitle: 'Prueba de voz',
+5
View File
@@ -630,6 +630,11 @@ jobTriggered: 'Job declenche',
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
edgeVoice: 'Voix',
edgeVoiceHint: 'Choisir une voix pour la synthese vocale',
edgeRate: 'Vitesse',
edgeRateHint: "Ajuster la vitesse de la voix (0.5x ~ 2.0x)",
edgePitch: 'Hauteur',
edgePitchHint: "Ajuster la hauteur de la voix (-20 ~ +20 Hz)",
// Test
testTitle: 'Test vocal',
+5
View File
@@ -630,6 +630,11 @@ export default {
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
edgeVoice: '音色',
edgeVoiceHint: '音声合成に使用する音色を選択',
edgeRate: '速度',
edgeRateHint: '音声の速度を調整(0.52.0倍)',
edgePitch: 'ピッチ',
edgePitchHint: '音声のピッチを調整(-20+20 Hz',
// Test
testTitle: '音声テスト',
+5
View File
@@ -630,6 +630,11 @@ export default {
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
edgeVoice: '음색',
edgeVoiceHint: '음성 합성에 사용할 음색 선택',
edgeRate: '속도',
edgeRateHint: '음성 속도 조절 (0.5~2.0배)',
edgePitch: '음높이',
edgePitchHint: '음성 음높이 조절 (-20~+20 Hz)',
// Test
testTitle: '음성 테스트',
+5
View File
@@ -630,6 +630,11 @@ jobTriggered: 'Job acionado',
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
edgeVoice: 'Voz',
edgeVoiceHint: 'Selecione uma voz para sintese de fala',
edgeRate: 'Velocidade',
edgeRateHint: 'Ajustar velocidade da fala (0.5x ~ 2.0x)',
edgePitch: 'Tom',
edgePitchHint: 'Ajustar tom da fala (-20 ~ +20 Hz)',
// Test
testTitle: 'Teste de voz',
+5
View File
@@ -785,6 +785,11 @@ export default {
edgeUrlPlaceholder: 'http://127.0.0.1:9882',
edgeVoice: '音色',
edgeVoiceHint: '选择用于语音合成的音色',
edgeRate: '语速',
edgeRateHint: '调整语音速度(0.52.0 倍)',
edgePitch: '音调',
edgePitchHint: '调整语音音调(-20+20 Hz',
// 试听
testTitle: '试听测试',
+16
View File
@@ -0,0 +1,16 @@
/**
* 语速倍率 → Edge TTS rate 字符串
* 1.0 → "0%", 1.2 → "+20%", 0.5 → "-50%"
*/
export function speedToEdgeRate(speed: number): string {
const percent = Math.round((speed - 1) * 100)
return percent >= 0 ? `+${percent}%` : `${percent}%`
}
/**
* Hz 偏移值 → Edge TTS pitch 字符串
* 0 → "0Hz", 12 → "+12Hz", -8 → "-8Hz"
*/
export function hzToEdgePitch(hz: number): string {
return hz >= 0 ? `+${hz}Hz` : `${hz}Hz`
}
@@ -38,6 +38,8 @@ export async function openaiProxy(ctx: Context) {
voice?: string
speed?: number
model?: string
rate?: string
pitch?: string
}
if (!body.input || typeof body.input !== 'string') {
@@ -57,6 +59,8 @@ export async function openaiProxy(ctx: Context) {
voice: body.voice,
speed: body.speed,
model: body.model,
rate: body.rate,
pitch: body.pitch,
})
ctx.set('Content-Type', 'audio/mpeg')
+4 -2
View File
@@ -64,6 +64,8 @@ export interface OpenaiTtsRequest {
input: string
voice?: string
speed?: number
rate?: string // Edge TTS rate format, e.g. "+20%". Takes priority over speed.
pitch?: string // Edge TTS pitch format, e.g. "-8Hz"
}
export async function openaiCompatibleTts(
@@ -72,7 +74,7 @@ export async function openaiCompatibleTts(
return textToSpeech({
text: body.input,
voice: body.voice || FIXED_VOICE,
rate: body.speed ? speedToEdgeRate(body.speed) : FIXED_RATE,
pitch: FIXED_PITCH,
rate: body.rate || (body.speed ? speedToEdgeRate(body.speed) : FIXED_RATE),
pitch: body.pitch || FIXED_PITCH,
})
}