align bridge multimodal handling (#755)

This commit is contained in:
ekko
2026-05-15 14:47:29 +08:00
committed by GitHub
parent 84e98cfb5e
commit 94f1061734
13 changed files with 53 additions and 4 deletions
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "hermes-web-ui",
"version": "0.5.23",
"version": "0.5.24",
"description": "Self-hosted AI chat dashboard for Hermes Agent — multi-model web UI with multi-platform integration",
"repository": {
"type": "git",
+7
View File
@@ -5,6 +5,13 @@ export interface ChangelogEntry {
}
export const changelog: ChangelogEntry[] = [
{
version: '0.5.24',
date: '2026-05-15',
changes: [
'changelog.new_0_5_24_1',
],
},
{
version: '0.5.23',
date: '2026-05-15',
+1
View File
@@ -876,6 +876,7 @@ jobTriggered: 'Job ausgelost',
new_0_5_23_3: 'Isolate gateway profile environment variables to prevent credentials leaking across profiles',
new_0_5_23_4: 'Reserve the Web UI port during gateway allocation to avoid startup conflicts',
new_0_5_23_5: 'Fix self-update restart handling so successful helper exits are not reported as failures',
new_0_5_24_1: 'Align Bridge chat with API Server handling for multimodal input, system prompt, and workspace context',
new_0_5_5_1: '🎉 Tag der Arbeit! Heute wird nicht gearbeitet, bitte habt Verständnis',
new_0_5_5_2: 'Verlaufsseite für Hermes-Sitzungshistorie hinzugefügt',
new_0_5_5_3: 'Verlaufsseite verwaltet Sitzungen unabhängig ohne Störung des aktiven Chats',
+1
View File
@@ -1153,6 +1153,7 @@ export default {
new_0_5_23_3: 'Isolate gateway profile environment variables to prevent credentials leaking across profiles',
new_0_5_23_4: 'Reserve the Web UI port during gateway allocation to avoid startup conflicts',
new_0_5_23_5: 'Fix self-update restart handling so successful helper exits are not reported as failures',
new_0_5_24_1: 'Align Bridge chat with API Server handling for multimodal input, system prompt, and workspace context',
new_0_5_6_1: 'Add voice playback feature with Web Speech API: manual button, auto-play toggle, rainbow border animation, and mobile optimization',
new_0_5_6_2: 'Add robust LLM JSON parser with tolerance for Python format and extract text from streaming events',
+1
View File
@@ -872,6 +872,7 @@ jobTriggered: 'Job ejecutado',
new_0_5_23_3: 'Isolate gateway profile environment variables to prevent credentials leaking across profiles',
new_0_5_23_4: 'Reserve the Web UI port during gateway allocation to avoid startup conflicts',
new_0_5_23_5: 'Fix self-update restart handling so successful helper exits are not reported as failures',
new_0_5_24_1: 'Align Bridge chat with API Server handling for multimodal input, system prompt, and workspace context',
new_0_5_5_1: '🎉 ¡Feliz Día del Trabajo! Hoy no se trabaja, agradezcan su comprensión',
new_0_5_5_2: 'Añadida página de historial para sesiones Hermes',
new_0_5_5_3: 'La página de historial gestiona sesiones de forma independiente',
+1
View File
@@ -871,6 +871,7 @@ jobTriggered: 'Job declenche',
new_0_5_23_3: 'Isolate gateway profile environment variables to prevent credentials leaking across profiles',
new_0_5_23_4: 'Reserve the Web UI port during gateway allocation to avoid startup conflicts',
new_0_5_23_5: 'Fix self-update restart handling so successful helper exits are not reported as failures',
new_0_5_24_1: 'Align Bridge chat with API Server handling for multimodal input, system prompt, and workspace context',
new_0_5_5_1: '🎉 Joyeuse Fête du Travail! Pas de travail aujourd\'hui, merci de votre compréhension',
new_0_5_5_2: 'Ajout d\'une page d\'historique pour les sessions Hermes',
new_0_5_5_3: 'La page d\'historique gère les sessions de manière indépendante',
+1
View File
@@ -872,6 +872,7 @@ export default {
new_0_5_23_3: 'Isolate gateway profile environment variables to prevent credentials leaking across profiles',
new_0_5_23_4: 'Reserve the Web UI port during gateway allocation to avoid startup conflicts',
new_0_5_23_5: 'Fix self-update restart handling so successful helper exits are not reported as failures',
new_0_5_24_1: 'Align Bridge chat with API Server handling for multimodal input, system prompt, and workspace context',
new_0_5_5_1: '🎉 労働者の日!今日はお休みです、何卒ご理解ください',
new_0_5_5_2: 'Hermesセッション履歴ページを追加',
new_0_5_5_3: '履歴ページはアクティブチャットに干渉せずにセッション管理',
+1
View File
@@ -872,6 +872,7 @@ export default {
new_0_5_23_3: 'Isolate gateway profile environment variables to prevent credentials leaking across profiles',
new_0_5_23_4: 'Reserve the Web UI port during gateway allocation to avoid startup conflicts',
new_0_5_23_5: 'Fix self-update restart handling so successful helper exits are not reported as failures',
new_0_5_24_1: 'Align Bridge chat with API Server handling for multimodal input, system prompt, and workspace context',
new_0_5_5_1: '🎉 노동절 감사합니다! 오늘은 쉬니까 양해 부탁드립니다',
new_0_5_5_2: 'Hermes 세션 기록 페이지 추가',
new_0_5_5_3: '기록 페이지는 독립적으로 세션 관리',
+1
View File
@@ -872,6 +872,7 @@ jobTriggered: 'Job acionado',
new_0_5_23_3: 'Isolate gateway profile environment variables to prevent credentials leaking across profiles',
new_0_5_23_4: 'Reserve the Web UI port during gateway allocation to avoid startup conflicts',
new_0_5_23_5: 'Fix self-update restart handling so successful helper exits are not reported as failures',
new_0_5_24_1: 'Align Bridge chat with API Server handling for multimodal input, system prompt, and workspace context',
new_0_5_5_1: '🎉 Feliz Dia do Trabalhador! Hoje não se trabalha, obrigado pela compreensão',
new_0_5_5_2: 'Adicionada página de histórico para sessões Hermes',
new_0_5_5_3: 'Página de histórico gerencia sessões de forma independente',
@@ -1155,6 +1155,7 @@ export default {
new_0_5_23_3: '隔離 gateway profile 環境變數,防止憑證在不同 profile 之間串用',
new_0_5_23_4: 'gateway 分配連接埠時保留 Web UI 連接埠,避免啟動連接埠衝突',
new_0_5_23_5: '修復自更新重啟邏輯,避免將 restart helper 的成功退出誤報為失敗',
new_0_5_24_1: '對齊 Bridge 聊天與 API Server 的多模態輸入、系統提示詞和工作區上下文處理',
new_0_5_6_1: '新增語音播放功能:使用 Web Speech API,支援手動播放按鈕、自動播放開關、彩虹邊框動畫和行動端最佳化',
new_0_5_6_2: '新增強健的 LLM JSON 解析器,相容 Python 格式並從串流事件中擷取文字',
new_0_5_6_3: 'Skills 功能增強:使用統計、來源過濾、封存技能、來源追溯和釘選切換',
+1
View File
@@ -1155,6 +1155,7 @@ export default {
new_0_5_23_3: '隔离 gateway profile 环境变量,防止凭据在不同 profile 之间串用',
new_0_5_23_4: 'gateway 分配端口时保留 Web UI 端口,避免启动端口冲突',
new_0_5_23_5: '修复自更新重启逻辑,避免将 restart helper 的成功退出误报为失败',
new_0_5_24_1: '对齐 Bridge 聊天与 API Server 的多模态输入、系统提示词和工作区上下文处理',
new_0_5_6_1: '新增语音播放功能:使用 Web Speech API,支持手动播放按钮、自动播放开关、彩虹边框动画和移动端优化',
new_0_5_6_2: '新增健壮的 LLM JSON 解析器,兼容 Python 格式并从流式事件中提取文本',
@@ -54,3 +54,20 @@ export async function convertContentBlocks(blocks: ContentBlock[]): Promise<Arra
return parts
}
/**
* Convert ContentBlock[] to the normalized multimodal shape Hermes agent
* receives after /v1/responses input normalization.
*/
export async function convertContentBlocksForAgent(blocks: ContentBlock[]): Promise<Array<{ type: string; text?: string; image_url?: { url: string } }>> {
const responseParts = await convertContentBlocks(blocks)
return responseParts.map((part) => {
if (part.type === 'input_text') {
return { type: 'text', text: part.text || '' }
}
if (part.type === 'input_image') {
return { type: 'image_url', image_url: { url: part.image_url || '' } }
}
return { type: 'text', text: part.text || '' }
})
}
@@ -4,11 +4,12 @@
*/
import type { Server, Socket } from 'socket.io'
import { getSystemPrompt } from '../../../lib/llm-prompt'
import { getSession, createSession, addMessage, updateSessionStats } from '../../../db/hermes/session-store'
import { updateUsage } from '../../../db/hermes/usage-store'
import { logger, bridgeLogger } from '../../logger'
import { AgentBridgeClient, type AgentBridgeMessage, type AgentBridgeOutput } from '../agent-bridge'
import { contentBlocksToString, extractTextForPreview } from './content-blocks'
import { contentBlocksToString, convertContentBlocksForAgent, extractTextForPreview, isContentBlockArray } from './content-blocks'
import { buildCompressedHistory } from './compression'
import { pushState, replaceState } from './compression'
import { calcAndUpdateUsage, estimateUsageTokensFromMessages } from './usage'
@@ -22,6 +23,7 @@ import {
import { forceCompressBridgeHistory } from './compression'
import { summarizeToolArguments } from './response-utils'
import { buildDbHistory } from './compression'
import { convertHistoryFormat } from './message-format'
import type { ContentBlock, SessionState } from './types'
import type { ChatMessage } from '../../../lib/context-compressor'
@@ -45,6 +47,15 @@ export async function handleBridgeRun(
return
}
let fullInstructions = instructions
? `${getSystemPrompt()}\n${instructions}`
: getSystemPrompt()
const sessionRow = getSession(session_id)
if (sessionRow?.workspace) {
const workspaceCtx = `[Current working directory: ${sessionRow.workspace}]`
fullInstructions = `\n${workspaceCtx}\n${fullInstructions}`
}
const runMarker = `cli_run_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`
const now = Math.floor(Date.now() / 1000)
let state = sessionMap.get(session_id)
@@ -107,17 +118,22 @@ export async function handleBridgeRun(
emit,
sessionMap,
)
const bridgeHistory = history.length > 0 ? convertHistoryFormat(history) : history
try {
const bridgeInput = isContentBlockArray(input)
? await convertContentBlocksForAgent(input)
: input
logger.info('[chat-run-socket] starting CLI bridge run for session %s', session_id)
bridgeLogger.info({
sessionId: session_id,
profile,
inputChars: inputStr.length,
historyMessages: history.length,
hasInstructions: Boolean(instructions),
hasInstructions: Boolean(fullInstructions),
multimodalInput: isContentBlockArray(input),
}, '[chat-run-socket] starting CLI bridge run')
const started = await bridge.chat(session_id, input as AgentBridgeMessage, history, instructions, profile)
const started = await bridge.chat(session_id, bridgeInput as AgentBridgeMessage, bridgeHistory, fullInstructions, profile)
state.runId = started.run_id
bridgeLogger.info({
sessionId: session_id,