feat(chat): add voice playback with auto-play and visual effects (#396)

## Features

### Core Functionality
- **Web Speech API Integration**: Add TTS (text-to-speech) playback for assistant messages
- **Manual Playback**: Click-to-play button next to each assistant message (🔊 icon)
- **Auto-play Mode**: Toggle switch in input bar to auto-play responses
- **Playback Controls**: Play/pause/stop with visual feedback

### User Interface
- **Playback Button**: Hover-activated button in message meta area (next to copy button)
- **Auto-play Switch**: Voice icon toggle in input top bar with state persistence
- **Mobile Optimization**: Buttons always visible on mobile (≤768px width)
- **Visual Feedback**:
  - Rainbow glowing border during playback (2px border, 10px/20px glow)
  - 4-second animation cycle through 6 colors
  - Play/pause icon toggle

### Voice Customization
- **Pitch/Rate Control**: Low-pitched (0.5) fast-speaking (1.2) "male voice"
- **Auto Voice Selection**: Attempts to select male voices across platforms (macOS: Yaoyao, Windows: David/Daniel)
- **Platform Compatibility**: Works with system-provided voices on macOS, iOS, Android, Windows

### Content Filtering
- Smart text extraction: filters code blocks, `<thinking>` tags, HTML
- Only assistant messages are eligible for playback
- Tool and system messages are excluded

### Internationalization
- Added 8 language translations (en, zh, de, es, fr, ja, ko, pt)
- New keys: `playSpeech`, `pauseSpeech`, `resumeSpeech`, `stopSpeech`, `autoPlaySpeech`, `speechNotSupported`

## Technical Details

### New Files
- `packages/client/src/composables/useSpeech.ts`: Core speech synthesis composable
  - Voice loading and selection logic
  - Single-instance global speech manager
  - Event handling (onstart, onend, onerror, onboundary)
  - State management (isPlaying, isPaused, currentMessageId)

### Modified Components
- **ChatInput.vue**: Auto-play switch with localStorage persistence
- **MessageItem.vue**:
  - Playback button integration
  - Event listener for auto-play triggers
  - Mobile-first button visibility
  - Rainbow border animation during playback

### Store Changes
- `chat.ts`:
  - Added `autoPlaySpeechEnabled` state
  - `setAutoPlaySpeech()` method
  - `playMessageSpeech()` method for event-based playback
  - Auto-play trigger on `run.completed` event

## Browser Support
- Requires Web Speech API support (all modern browsers)
- Graceful degradation: button hidden if API not supported
- Voice availability varies by platform and OS

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
ekko
2026-05-02 13:26:57 +08:00
committed by GitHub
parent 4c8cff2e7c
commit caa9162f28
12 changed files with 624 additions and 13 deletions
@@ -4,7 +4,7 @@ import { useChatStore } from '@/stores/hermes/chat'
import { useAppStore } from '@/stores/hermes/app'
import { useProfilesStore } from '@/stores/hermes/profiles'
import { fetchContextLength } from '@/api/hermes/sessions'
import { NButton, NTooltip } from 'naive-ui'
import { NButton, NTooltip, NSwitch } from 'naive-ui'
import { computed, ref, onMounted, watch } from 'vue'
import { useI18n } from 'vue-i18n'
@@ -18,6 +18,26 @@ const isDragging = ref(false)
const dragCounter = ref(0)
const isComposing = ref(false)
// 自动播放语音开关
const autoPlaySpeech = ref(false)
// 从 localStorage 读取设置
onMounted(() => {
const saved = localStorage.getItem('autoPlaySpeech')
if (saved !== null) {
autoPlaySpeech.value = saved === 'true'
// 同步到 chat store
chatStore.setAutoPlaySpeech(autoPlaySpeech.value)
}
})
// 监听变化并保存
watch(autoPlaySpeech, (value) => {
localStorage.setItem('autoPlaySpeech', String(value))
// 通知 chat store
chatStore.setAutoPlaySpeech(value)
})
const canSend = computed(() => inputText.value.trim() || attachments.value.length > 0)
// --- Context info ---
@@ -195,7 +215,7 @@ function isImage(type: string): boolean {
<template>
<div class="chat-input-area">
<!-- Top bar: attach + context info -->
<!-- Top bar: attach + auto play speech + context info -->
<div class="input-top-bar">
<NTooltip trigger="hover">
<template #trigger>
@@ -207,6 +227,25 @@ function isImage(type: string): boolean {
</template>
{{ t('chat.attachFiles') }}
</NTooltip>
<div class="auto-play-speech-switch">
<NTooltip trigger="hover">
<template #trigger>
<div class="switch-label">
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<polygon points="5 3 19 12 5 21 5 3"/>
</svg>
</div>
</template>
{{ t('chat.autoPlaySpeech') }}
</NTooltip>
<NSwitch
size="small"
v-model:value="autoPlaySpeech"
:round="false"
/>
</div>
<span v-if="totalTokens > 0" class="context-info" :class="{ 'context-warning': usagePercent > 80 }">
{{ formatTokens(totalTokens) }} / {{ formatTokens(contextLength) }} · {{ t('chat.contextRemaining') }} {{ formatTokens(remainingTokens) }}
</span>
@@ -314,6 +353,26 @@ function isImage(type: string): boolean {
padding: 0 0 6px;
}
.auto-play-speech-switch {
display: flex;
align-items: center;
gap: 6px;
padding: 0 8px;
border-left: 1px solid $border-light;
margin-left: 4px;
.switch-label {
display: flex;
align-items: center;
color: $text-muted;
font-size: 12px;
svg {
opacity: 0.7;
}
}
}
.context-info {
font-size: 11px;
color: $text-muted;
@@ -1,6 +1,6 @@
<script setup lang="ts">
import type { Message } from "@/stores/hermes/chat";
import { computed, onBeforeUnmount, ref, watchEffect } from "vue";
import { computed, onBeforeUnmount, onMounted, ref, watchEffect } from "vue";
import { useI18n } from "vue-i18n";
import { useMessage } from "naive-ui";
import { downloadFile } from "@/api/hermes/download";
@@ -14,6 +14,7 @@ import {
handleCodeBlockCopyClick,
renderHighlightedCodeBlock,
} from "./highlight";
import { useGlobalSpeech } from "@/composables/useSpeech";
const TOOL_PAYLOAD_DISPLAY_LIMIT = 2000;
@@ -27,6 +28,7 @@ const previewUrl = ref<string | null>(null);
const chatStore = useChatStore();
const settingsStore = useSettingsStore();
const speech = useGlobalSpeech();
// Copy entire bubble content
const copyableContent = computed(() => {
@@ -278,6 +280,88 @@ const renderedToolResult = computed(() => {
toolResultPayload.value.language,
);
});
// 语音播放相关
const canPlaySpeech = computed(() => {
// 只有 assistant 消息可以播放,且浏览器支持 Web Speech API
return props.message.role === 'assistant' &&
speech.isSupported &&
copyableContent.value;
});
const isPlayingThisMessage = computed(() => {
return speech.currentMessageId.value === props.message.id && speech.isPlaying.value;
});
const isPausedThisMessage = computed(() => {
return speech.currentMessageId.value === props.message.id && speech.isPaused.value;
});
function handleSpeechToggle() {
if (!canPlaySpeech.value) {
console.log('Speech not supported or no content')
return
}
const content = props.message.content || ''
console.log('Toggling speech for message:', props.message.id)
console.log('Current playing:', speech.currentMessageId.value, speech.isPlaying.value)
console.log('Call stack:', new Error().stack)
// 尝试获取男声语音包
const allVoices = speech.getAllVoices()
let maleVoice = null
// 查找可能的男声语音包
for (const voice of allVoices) {
const name = voice.name.toLowerCase()
// 常见男声关键词
if (name.includes('male') || name.includes('david') || name.includes('daniel') ||
name.includes('mark') || name.includes('yaoyao') || name.includes('google')) {
// 优先选择中文男声
if (voice.lang.startsWith('zh')) {
maleVoice = voice
break
}
// 如果没有找到中文男声,记住第一个男声
if (!maleVoice) {
maleVoice = voice
}
}
}
console.log('Selected male voice:', maleVoice?.name, maleVoice?.lang)
// 快速男声:语速快、音调低
speech.toggle(props.message.id, content, {
pitch: 0.5, // 低沉
rate: 1.2, // 快速
voice: maleVoice || undefined, // 使用男声,如果没有就用默认
})
}
// 监听自动播放事件
let autoPlayHandler: ((e: Event) => void) | null = null
onMounted(() => {
autoPlayHandler = (e: Event) => {
const customEvent = e as CustomEvent<{ messageId: string; content: string }>
if (customEvent.detail.messageId === props.message.id && canPlaySpeech.value) {
console.log('Auto-play triggered for message:', props.message.id)
handleSpeechToggle()
}
}
window.addEventListener('auto-play-speech', autoPlayHandler)
})
// 组件卸载时停止播放并清理事件监听
onBeforeUnmount(() => {
if (autoPlayHandler) {
window.removeEventListener('auto-play-speech', autoPlayHandler)
}
if (speech.currentMessageId.value === props.message.id) {
speech.stop();
}
});
</script>
<template>
@@ -353,7 +437,7 @@ const renderedToolResult = computed(() => {
class="msg-avatar"
/>
<div class="msg-content" :class="message.role">
<div class="message-bubble" :class="{ system: isSystem }">
<div class="message-bubble" :class="{ system: isSystem, 'speech-playing': isPlayingThisMessage && !isPausedThisMessage }">
<div v-if="hasAttachments" class="msg-attachments">
<div
v-for="att in message.attachments"
@@ -442,6 +526,21 @@ const renderedToolResult = computed(() => {
</span>
</div>
<div class="message-meta">
<button
v-if="canPlaySpeech"
class="speech-bubble-btn"
:class="{ playing: isPlayingThisMessage, paused: isPausedThisMessage }"
@click="handleSpeechToggle"
:title="isPlayingThisMessage ? (isPausedThisMessage ? t('chat.resumeSpeech') : t('chat.pauseSpeech')) : t('chat.playSpeech')"
>
<svg v-if="!isPlayingThisMessage || isPausedThisMessage" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<polygon points="5 3 19 12 5 21 5 3"/>
</svg>
<svg v-else width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<rect x="6" y="4" width="4" height="16"/>
<rect x="14" y="4" width="4" height="16"/>
</svg>
</button>
<button
v-if="copyableContent"
class="copy-bubble-btn"
@@ -472,12 +571,15 @@ const renderedToolResult = computed(() => {
.message {
display: flex;
flex-direction: column;
position: relative;
&.user {
align-items: flex-end;
.msg-body {
max-width: 75%;
position: relative;
z-index: 1;
}
.msg-content.user {
@@ -497,6 +599,8 @@ const renderedToolResult = computed(() => {
.msg-body {
max-width: 80%;
position: relative;
z-index: 1;
}
.msg-avatar {
@@ -518,13 +622,6 @@ const renderedToolResult = computed(() => {
&.system {
align-items: flex-start;
.message-bubble.system {
border-left: 3px solid $warning;
border-radius: $radius-sm;
max-width: 80%;
background-color: rgba(var(--warning-rgb), 0.06);
}
}
&.highlight {
@@ -534,6 +631,18 @@ const renderedToolResult = computed(() => {
}
}
@keyframes gradient-flow {
0% {
background-position: 0% 50%;
}
50% {
background-position: 100% 50%;
}
100% {
background-position: 0% 50%;
}
}
.msg-body {
display: flex;
align-items: flex-start;
@@ -554,6 +663,68 @@ const renderedToolResult = computed(() => {
word-break: break-word;
border-radius: 10px;
max-width: 100%;
position: relative;
box-sizing: border-box;
&.system {
border-left: 3px solid $warning;
border-radius: $radius-sm;
max-width: 80%;
background-color: rgba(var(--warning-rgb), 0.06);
}
&.speech-playing {
box-shadow:
0 0 0 2px #ff6b6b,
0 0 10px rgba(255, 107, 107, 0.4),
0 0 20px rgba(255, 107, 107, 0.2);
animation: rainbow-glow 4s linear infinite;
}
}
@keyframes rainbow-glow {
0% {
box-shadow:
0 0 0 2px #ff6b6b,
0 0 10px rgba(255, 107, 107, 0.4),
0 0 20px rgba(255, 107, 107, 0.2);
}
16.66% {
box-shadow:
0 0 0 2px #feca57,
0 0 10px rgba(254, 202, 87, 0.4),
0 0 20px rgba(254, 202, 87, 0.2);
}
33.33% {
box-shadow:
0 0 0 2px #48dbfb,
0 0 10px rgba(72, 219, 251, 0.4),
0 0 20px rgba(72, 219, 251, 0.2);
}
50% {
box-shadow:
0 0 0 2px #ff9ff3,
0 0 10px rgba(255, 159, 243, 0.4),
0 0 20px rgba(255, 159, 243, 0.2);
}
66.66% {
box-shadow:
0 0 0 2px #54a0ff,
0 0 10px rgba(84, 160, 255, 0.4),
0 0 20px rgba(84, 160, 255, 0.2);
}
83.33% {
box-shadow:
0 0 0 2px #5f27cd,
0 0 10px rgba(95, 39, 205, 0.4),
0 0 20px rgba(95, 39, 205, 0.2);
}
100% {
box-shadow:
0 0 0 2px #ff6b6b,
0 0 10px rgba(255, 107, 107, 0.4),
0 0 20px rgba(255, 107, 107, 0.2);
}
}
.msg-attachments {
@@ -673,9 +844,15 @@ const renderedToolResult = computed(() => {
.message:hover & {
opacity: 1;
}
// 移动端一直显示按钮
@media (max-width: 768px) {
opacity: 1;
}
}
.copy-bubble-btn {
.copy-bubble-btn,
.speech-bubble-btn {
display: flex;
align-items: center;
justify-content: center;
@@ -699,11 +876,32 @@ const renderedToolResult = computed(() => {
&:hover {
color: #cccccc;
background: rgba(255, 255, 255, 0.08);
background: rgba(255, 255, 255, 0.1);
}
}
}
.speech-bubble-btn {
&.playing {
color: var(--accent-primary);
animation: pulse 1.5s ease-in-out infinite;
&.paused {
animation: none;
opacity: 0.6;
}
}
}
@keyframes pulse {
0%, 100% {
opacity: 1;
}
50% {
opacity: 0.5;
}
}
.message-time {
font-size: 11px;
color: $text-muted;