feat(chat): add voice playback with auto-play and visual effects (#396)

## Features

### Core Functionality
- **Web Speech API Integration**: Add TTS (text-to-speech) playback for assistant messages
- **Manual Playback**: Click-to-play button next to each assistant message (🔊 icon)
- **Auto-play Mode**: Toggle switch in input bar to auto-play responses
- **Playback Controls**: Play/pause/stop with visual feedback

### User Interface
- **Playback Button**: Hover-activated button in message meta area (next to copy button)
- **Auto-play Switch**: Voice icon toggle in input top bar with state persistence
- **Mobile Optimization**: Buttons always visible on mobile (≤768px width)
- **Visual Feedback**:
  - Rainbow glowing border during playback (2px border, 10px/20px glow)
  - 4-second animation cycle through 6 colors
  - Play/pause icon toggle

### Voice Customization
- **Pitch/Rate Control**: Low-pitched (0.5) fast-speaking (1.2) "male voice"
- **Auto Voice Selection**: Attempts to select male voices across platforms (macOS: Yaoyao, Windows: David/Daniel)
- **Platform Compatibility**: Works with system-provided voices on macOS, iOS, Android, Windows

### Content Filtering
- Smart text extraction: filters code blocks, `<thinking>` tags, HTML
- Only assistant messages are eligible for playback
- Tool and system messages are excluded

### Internationalization
- Added 8 language translations (en, zh, de, es, fr, ja, ko, pt)
- New keys: `playSpeech`, `pauseSpeech`, `resumeSpeech`, `stopSpeech`, `autoPlaySpeech`, `speechNotSupported`

## Technical Details

### New Files
- `packages/client/src/composables/useSpeech.ts`: Core speech synthesis composable
  - Voice loading and selection logic
  - Single-instance global speech manager
  - Event handling (onstart, onend, onerror, onboundary)
  - State management (isPlaying, isPaused, currentMessageId)

### Modified Components
- **ChatInput.vue**: Auto-play switch with localStorage persistence
- **MessageItem.vue**:
  - Playback button integration
  - Event listener for auto-play triggers
  - Mobile-first button visibility
  - Rainbow border animation during playback

### Store Changes
- `chat.ts`:
  - Added `autoPlaySpeechEnabled` state
  - `setAutoPlaySpeech()` method
  - `playMessageSpeech()` method for event-based playback
  - Auto-play trigger on `run.completed` event

## Browser Support
- Requires Web Speech API support (all modern browsers)
- Graceful degradation: button hidden if API not supported
- Voice availability varies by platform and OS

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
ekko
2026-05-02 13:26:57 +08:00
committed by GitHub
parent 4c8cff2e7c
commit caa9162f28
12 changed files with 624 additions and 13 deletions
@@ -0,0 +1,281 @@
import { ref, computed, onUnmounted } from 'vue'
export interface SpeechOptions {
rate?: number // 语速 0.1-10,默认 1
pitch?: number // 音调 0-2,默认 1
volume?: number // 音量 0-1,默认 1
voice?: SpeechSynthesisVoice | null
lang?: string // 语言 'zh-CN', 'en-US' 等
}
export interface SpeechState {
isPlaying: boolean
isPaused: boolean
currentMessageId: string | null
progress: number // 当前进度(字符数)
}
/**
* Web Speech API 语音播放 Composable
*/
export function useSpeech() {
const synth = window.speechSynthesis
const availableVoices = ref<SpeechSynthesisVoice[]>([])
const state = ref<SpeechState>({
isPlaying: false,
isPaused: false,
currentMessageId: null,
progress: 0,
})
let utterance: SpeechSynthesisUtterance | null = null
let currentText = ''
// 加载可用语音列表
function loadVoices() {
availableVoices.value = synth.getVoices()
}
// 浏览器会在语音列表变化时触发 voiceschanged 事件
synth.addEventListener('voiceschanged', loadVoices)
loadVoices() // 初始加载
/**
* 从文本中提取纯文本内容,过滤代码块、thinking 标签等
*/
function extractReadableText(content: string): string {
if (!content) return ''
let text = content
// 移除 thinking 标签内容
text = text.replace(/<thinking[^>]*>[\s\S]*?<\/thinking>/gi, '')
text = text.replace(/<thinking[^>]*>[\s\S]*/gi, '')
// 移除代码块
text = text.replace(/```[\s\S]*?```/g, '')
text = text.replace(/`[^`]+`/g, '')
// 移除 HTML 标签
text = text.replace(/<[^>]+>/g, '')
// 移除多余的空白
text = text.replace(/\s+/g, ' ').trim()
return text
}
/**
* 检查浏览器是否支持 Web Speech API
*/
const isSupported = computed(() => {
return 'speechSynthesis' in window && 'SpeechSynthesisUtterance' in window
})
/**
* 获取默认语音(优先选择中文)
*/
function getDefaultVoice(): SpeechSynthesisVoice | null {
const voices = availableVoices.value
if (voices.length === 0) return null
// 优先选择中文语音
const zhVoice = voices.find(v => v.lang.startsWith('zh'))
if (zhVoice) return zhVoice
// 其次选择英文语音
const enVoice = voices.find(v => v.lang.startsWith('en'))
if (enVoice) return enVoice
// 默认第一个
return voices[0]
}
/**
* 获取所有可用语音(用于调试)
*/
function getAllVoices(): SpeechSynthesisVoice[] {
return availableVoices.value
}
/**
* 停止当前播放
*/
function stop() {
if (synth.speaking) {
synth.cancel()
}
if (utterance) {
utterance = null
}
state.value = {
isPlaying: false,
isPaused: false,
currentMessageId: null,
progress: 0,
}
currentText = ''
}
/**
* 播放文本
*/
function play(messageId: string, content: string, options: SpeechOptions = {}) {
if (!isSupported.value) {
console.warn('[useSpeech] Speech synthesis not supported')
return
}
console.log('[useSpeech] play called:', messageId)
// 如果正在播放其他消息,先停止
if (state.value.currentMessageId && state.value.currentMessageId !== messageId) {
stop()
}
// 如果已经在播放这条消息,暂停/恢复
if (state.value.currentMessageId === messageId) {
if (state.value.isPaused) {
resume()
} else if (state.value.isPlaying) {
pause()
}
return
}
// 提取可读文本
const text = extractReadableText(content)
if (!text) {
console.warn('[useSpeech] No readable text found')
return
}
console.log('[useSpeech] Playing text:', text.substring(0, 50) + '...')
// 停止当前播放
stop()
// 创建新的 utterance
utterance = new SpeechSynthesisUtterance(text)
currentText = text
// 设置语音参数
utterance.rate = options.rate ?? 1
utterance.pitch = options.pitch ?? 1
utterance.volume = options.volume ?? 1
utterance.voice = options.voice ?? getDefaultVoice()
console.log('[useSpeech] Selected voice:', utterance.voice?.name, utterance.voice?.lang)
if (options.lang) {
utterance.lang = options.lang
} else if (utterance.voice) {
utterance.lang = utterance.voice.lang
}
// 事件监听
utterance.onstart = () => {
console.log('[useSpeech] onstart fired')
state.value.isPlaying = true
state.value.isPaused = false
state.value.currentMessageId = messageId
state.value.progress = 0
}
utterance.onboundary = (event) => {
if (event.name === 'word') {
state.value.progress = event.charIndex
}
}
utterance.onend = () => {
console.log('[useSpeech] onend fired')
state.value.isPlaying = false
state.value.isPaused = false
state.value.currentMessageId = null
state.value.progress = currentText.length
}
utterance.onerror = (event) => {
console.error('[useSpeech] Speech synthesis error:', event.error)
state.value.isPlaying = false
state.value.isPaused = false
state.value.currentMessageId = null
}
// 开始播放
console.log('[useSpeech] Calling synth.speak()')
synth.speak(utterance)
}
/**
* 暂停播放
*/
function pause() {
if (synth.speaking && !state.value.isPaused) {
synth.pause()
state.value.isPaused = true
}
}
/**
* 恢复播放
*/
function resume() {
if (state.value.isPaused) {
synth.resume()
state.value.isPaused = false
}
}
/**
* 切换播放/暂停
*/
function toggle(messageId: string, content: string, options: SpeechOptions = {}) {
if (state.value.currentMessageId === messageId && state.value.isPlaying) {
if (state.value.isPaused) {
resume()
} else {
pause()
}
} else {
play(messageId, content, options)
}
}
// 清理
onUnmounted(() => {
stop()
synth.removeEventListener('voiceschanged', loadVoices)
})
return {
// 状态
isSupported,
availableVoices,
isPlaying: computed(() => state.value.isPlaying),
isPaused: computed(() => state.value.isPaused),
currentMessageId: computed(() => state.value.currentMessageId),
progress: computed(() => state.value.progress),
// 方法
play,
pause,
resume,
stop,
toggle,
getDefaultVoice,
getAllVoices,
extractReadableText,
}
}
// 单例模式,全局共享一个语音实例
let globalSpeech: ReturnType<typeof useSpeech> | null = null
export function useGlobalSpeech() {
if (!globalSpeech) {
globalSpeech = useSpeech()
}
return globalSpeech
}