align bridge multimodal handling (#755)
This commit is contained in:
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "hermes-web-ui",
|
||||
"version": "0.5.23",
|
||||
"version": "0.5.24",
|
||||
"description": "Self-hosted AI chat dashboard for Hermes Agent — multi-model web UI with multi-platform integration",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
|
||||
@@ -5,6 +5,13 @@ export interface ChangelogEntry {
|
||||
}
|
||||
|
||||
export const changelog: ChangelogEntry[] = [
|
||||
{
|
||||
version: '0.5.24',
|
||||
date: '2026-05-15',
|
||||
changes: [
|
||||
'changelog.new_0_5_24_1',
|
||||
],
|
||||
},
|
||||
{
|
||||
version: '0.5.23',
|
||||
date: '2026-05-15',
|
||||
|
||||
@@ -876,6 +876,7 @@ jobTriggered: 'Job ausgelost',
|
||||
new_0_5_23_3: 'Isolate gateway profile environment variables to prevent credentials leaking across profiles',
|
||||
new_0_5_23_4: 'Reserve the Web UI port during gateway allocation to avoid startup conflicts',
|
||||
new_0_5_23_5: 'Fix self-update restart handling so successful helper exits are not reported as failures',
|
||||
new_0_5_24_1: 'Align Bridge chat with API Server handling for multimodal input, system prompt, and workspace context',
|
||||
new_0_5_5_1: '🎉 Tag der Arbeit! Heute wird nicht gearbeitet, bitte habt Verständnis',
|
||||
new_0_5_5_2: 'Verlaufsseite für Hermes-Sitzungshistorie hinzugefügt',
|
||||
new_0_5_5_3: 'Verlaufsseite verwaltet Sitzungen unabhängig ohne Störung des aktiven Chats',
|
||||
|
||||
@@ -1153,6 +1153,7 @@ export default {
|
||||
new_0_5_23_3: 'Isolate gateway profile environment variables to prevent credentials leaking across profiles',
|
||||
new_0_5_23_4: 'Reserve the Web UI port during gateway allocation to avoid startup conflicts',
|
||||
new_0_5_23_5: 'Fix self-update restart handling so successful helper exits are not reported as failures',
|
||||
new_0_5_24_1: 'Align Bridge chat with API Server handling for multimodal input, system prompt, and workspace context',
|
||||
|
||||
new_0_5_6_1: 'Add voice playback feature with Web Speech API: manual button, auto-play toggle, rainbow border animation, and mobile optimization',
|
||||
new_0_5_6_2: 'Add robust LLM JSON parser with tolerance for Python format and extract text from streaming events',
|
||||
|
||||
@@ -872,6 +872,7 @@ jobTriggered: 'Job ejecutado',
|
||||
new_0_5_23_3: 'Isolate gateway profile environment variables to prevent credentials leaking across profiles',
|
||||
new_0_5_23_4: 'Reserve the Web UI port during gateway allocation to avoid startup conflicts',
|
||||
new_0_5_23_5: 'Fix self-update restart handling so successful helper exits are not reported as failures',
|
||||
new_0_5_24_1: 'Align Bridge chat with API Server handling for multimodal input, system prompt, and workspace context',
|
||||
new_0_5_5_1: '🎉 ¡Feliz Día del Trabajo! Hoy no se trabaja, agradezcan su comprensión',
|
||||
new_0_5_5_2: 'Añadida página de historial para sesiones Hermes',
|
||||
new_0_5_5_3: 'La página de historial gestiona sesiones de forma independiente',
|
||||
|
||||
@@ -871,6 +871,7 @@ jobTriggered: 'Job declenche',
|
||||
new_0_5_23_3: 'Isolate gateway profile environment variables to prevent credentials leaking across profiles',
|
||||
new_0_5_23_4: 'Reserve the Web UI port during gateway allocation to avoid startup conflicts',
|
||||
new_0_5_23_5: 'Fix self-update restart handling so successful helper exits are not reported as failures',
|
||||
new_0_5_24_1: 'Align Bridge chat with API Server handling for multimodal input, system prompt, and workspace context',
|
||||
new_0_5_5_1: '🎉 Joyeuse Fête du Travail! Pas de travail aujourd\'hui, merci de votre compréhension',
|
||||
new_0_5_5_2: 'Ajout d\'une page d\'historique pour les sessions Hermes',
|
||||
new_0_5_5_3: 'La page d\'historique gère les sessions de manière indépendante',
|
||||
|
||||
@@ -872,6 +872,7 @@ export default {
|
||||
new_0_5_23_3: 'Isolate gateway profile environment variables to prevent credentials leaking across profiles',
|
||||
new_0_5_23_4: 'Reserve the Web UI port during gateway allocation to avoid startup conflicts',
|
||||
new_0_5_23_5: 'Fix self-update restart handling so successful helper exits are not reported as failures',
|
||||
new_0_5_24_1: 'Align Bridge chat with API Server handling for multimodal input, system prompt, and workspace context',
|
||||
new_0_5_5_1: '🎉 労働者の日!今日はお休みです、何卒ご理解ください',
|
||||
new_0_5_5_2: 'Hermesセッション履歴ページを追加',
|
||||
new_0_5_5_3: '履歴ページはアクティブチャットに干渉せずにセッション管理',
|
||||
|
||||
@@ -872,6 +872,7 @@ export default {
|
||||
new_0_5_23_3: 'Isolate gateway profile environment variables to prevent credentials leaking across profiles',
|
||||
new_0_5_23_4: 'Reserve the Web UI port during gateway allocation to avoid startup conflicts',
|
||||
new_0_5_23_5: 'Fix self-update restart handling so successful helper exits are not reported as failures',
|
||||
new_0_5_24_1: 'Align Bridge chat with API Server handling for multimodal input, system prompt, and workspace context',
|
||||
new_0_5_5_1: '🎉 노동절 감사합니다! 오늘은 쉬니까 양해 부탁드립니다',
|
||||
new_0_5_5_2: 'Hermes 세션 기록 페이지 추가',
|
||||
new_0_5_5_3: '기록 페이지는 독립적으로 세션 관리',
|
||||
|
||||
@@ -872,6 +872,7 @@ jobTriggered: 'Job acionado',
|
||||
new_0_5_23_3: 'Isolate gateway profile environment variables to prevent credentials leaking across profiles',
|
||||
new_0_5_23_4: 'Reserve the Web UI port during gateway allocation to avoid startup conflicts',
|
||||
new_0_5_23_5: 'Fix self-update restart handling so successful helper exits are not reported as failures',
|
||||
new_0_5_24_1: 'Align Bridge chat with API Server handling for multimodal input, system prompt, and workspace context',
|
||||
new_0_5_5_1: '🎉 Feliz Dia do Trabalhador! Hoje não se trabalha, obrigado pela compreensão',
|
||||
new_0_5_5_2: 'Adicionada página de histórico para sessões Hermes',
|
||||
new_0_5_5_3: 'Página de histórico gerencia sessões de forma independente',
|
||||
|
||||
@@ -1155,6 +1155,7 @@ export default {
|
||||
new_0_5_23_3: '隔離 gateway profile 環境變數,防止憑證在不同 profile 之間串用',
|
||||
new_0_5_23_4: 'gateway 分配連接埠時保留 Web UI 連接埠,避免啟動連接埠衝突',
|
||||
new_0_5_23_5: '修復自更新重啟邏輯,避免將 restart helper 的成功退出誤報為失敗',
|
||||
new_0_5_24_1: '對齊 Bridge 聊天與 API Server 的多模態輸入、系統提示詞和工作區上下文處理',
|
||||
new_0_5_6_1: '新增語音播放功能:使用 Web Speech API,支援手動播放按鈕、自動播放開關、彩虹邊框動畫和行動端最佳化',
|
||||
new_0_5_6_2: '新增強健的 LLM JSON 解析器,相容 Python 格式並從串流事件中擷取文字',
|
||||
new_0_5_6_3: 'Skills 功能增強:使用統計、來源過濾、封存技能、來源追溯和釘選切換',
|
||||
|
||||
@@ -1155,6 +1155,7 @@ export default {
|
||||
new_0_5_23_3: '隔离 gateway profile 环境变量,防止凭据在不同 profile 之间串用',
|
||||
new_0_5_23_4: 'gateway 分配端口时保留 Web UI 端口,避免启动端口冲突',
|
||||
new_0_5_23_5: '修复自更新重启逻辑,避免将 restart helper 的成功退出误报为失败',
|
||||
new_0_5_24_1: '对齐 Bridge 聊天与 API Server 的多模态输入、系统提示词和工作区上下文处理',
|
||||
|
||||
new_0_5_6_1: '新增语音播放功能:使用 Web Speech API,支持手动播放按钮、自动播放开关、彩虹边框动画和移动端优化',
|
||||
new_0_5_6_2: '新增健壮的 LLM JSON 解析器,兼容 Python 格式并从流式事件中提取文本',
|
||||
|
||||
@@ -54,3 +54,20 @@ export async function convertContentBlocks(blocks: ContentBlock[]): Promise<Arra
|
||||
|
||||
return parts
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert ContentBlock[] to the normalized multimodal shape Hermes agent
|
||||
* receives after /v1/responses input normalization.
|
||||
*/
|
||||
export async function convertContentBlocksForAgent(blocks: ContentBlock[]): Promise<Array<{ type: string; text?: string; image_url?: { url: string } }>> {
|
||||
const responseParts = await convertContentBlocks(blocks)
|
||||
return responseParts.map((part) => {
|
||||
if (part.type === 'input_text') {
|
||||
return { type: 'text', text: part.text || '' }
|
||||
}
|
||||
if (part.type === 'input_image') {
|
||||
return { type: 'image_url', image_url: { url: part.image_url || '' } }
|
||||
}
|
||||
return { type: 'text', text: part.text || '' }
|
||||
})
|
||||
}
|
||||
|
||||
@@ -4,11 +4,12 @@
|
||||
*/
|
||||
|
||||
import type { Server, Socket } from 'socket.io'
|
||||
import { getSystemPrompt } from '../../../lib/llm-prompt'
|
||||
import { getSession, createSession, addMessage, updateSessionStats } from '../../../db/hermes/session-store'
|
||||
import { updateUsage } from '../../../db/hermes/usage-store'
|
||||
import { logger, bridgeLogger } from '../../logger'
|
||||
import { AgentBridgeClient, type AgentBridgeMessage, type AgentBridgeOutput } from '../agent-bridge'
|
||||
import { contentBlocksToString, extractTextForPreview } from './content-blocks'
|
||||
import { contentBlocksToString, convertContentBlocksForAgent, extractTextForPreview, isContentBlockArray } from './content-blocks'
|
||||
import { buildCompressedHistory } from './compression'
|
||||
import { pushState, replaceState } from './compression'
|
||||
import { calcAndUpdateUsage, estimateUsageTokensFromMessages } from './usage'
|
||||
@@ -22,6 +23,7 @@ import {
|
||||
import { forceCompressBridgeHistory } from './compression'
|
||||
import { summarizeToolArguments } from './response-utils'
|
||||
import { buildDbHistory } from './compression'
|
||||
import { convertHistoryFormat } from './message-format'
|
||||
import type { ContentBlock, SessionState } from './types'
|
||||
import type { ChatMessage } from '../../../lib/context-compressor'
|
||||
|
||||
@@ -45,6 +47,15 @@ export async function handleBridgeRun(
|
||||
return
|
||||
}
|
||||
|
||||
let fullInstructions = instructions
|
||||
? `${getSystemPrompt()}\n${instructions}`
|
||||
: getSystemPrompt()
|
||||
const sessionRow = getSession(session_id)
|
||||
if (sessionRow?.workspace) {
|
||||
const workspaceCtx = `[Current working directory: ${sessionRow.workspace}]`
|
||||
fullInstructions = `\n${workspaceCtx}\n${fullInstructions}`
|
||||
}
|
||||
|
||||
const runMarker = `cli_run_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`
|
||||
const now = Math.floor(Date.now() / 1000)
|
||||
let state = sessionMap.get(session_id)
|
||||
@@ -107,17 +118,22 @@ export async function handleBridgeRun(
|
||||
emit,
|
||||
sessionMap,
|
||||
)
|
||||
const bridgeHistory = history.length > 0 ? convertHistoryFormat(history) : history
|
||||
|
||||
try {
|
||||
const bridgeInput = isContentBlockArray(input)
|
||||
? await convertContentBlocksForAgent(input)
|
||||
: input
|
||||
logger.info('[chat-run-socket] starting CLI bridge run for session %s', session_id)
|
||||
bridgeLogger.info({
|
||||
sessionId: session_id,
|
||||
profile,
|
||||
inputChars: inputStr.length,
|
||||
historyMessages: history.length,
|
||||
hasInstructions: Boolean(instructions),
|
||||
hasInstructions: Boolean(fullInstructions),
|
||||
multimodalInput: isContentBlockArray(input),
|
||||
}, '[chat-run-socket] starting CLI bridge run')
|
||||
const started = await bridge.chat(session_id, input as AgentBridgeMessage, history, instructions, profile)
|
||||
const started = await bridge.chat(session_id, bridgeInput as AgentBridgeMessage, bridgeHistory, fullInstructions, profile)
|
||||
state.runId = started.run_id
|
||||
bridgeLogger.info({
|
||||
sessionId: session_id,
|
||||
|
||||
Reference in New Issue
Block a user