diff --git a/package.json b/package.json index e4d03e7..1cf4a53 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "hermes-web-ui", - "version": "0.5.6", + "version": "0.5.7", "description": "Self-hosted AI chat dashboard for Hermes Agent — multi-model (Claude, GPT, Gemini, DeepSeek) web UI with Telegram, Discord, Slack, WhatsApp integration", "repository": { "type": "git", diff --git a/packages/client/src/data/changelog.ts b/packages/client/src/data/changelog.ts index 9e1bed7..bc55254 100644 --- a/packages/client/src/data/changelog.ts +++ b/packages/client/src/data/changelog.ts @@ -5,6 +5,15 @@ export interface ChangelogEntry { } export const changelog: ChangelogEntry[] = [ + { + version: '0.5.7', + date: '2026-05-02', + changes: [ + 'changelog.new_0_5_7_1', + 'changelog.new_0_5_7_2', + 'changelog.new_0_5_7_3', + ], + }, { version: '0.5.6', date: '2026-05-02', diff --git a/packages/client/src/i18n/locales/en.ts b/packages/client/src/i18n/locales/en.ts index c606fbc..77754dc 100644 --- a/packages/client/src/i18n/locales/en.ts +++ b/packages/client/src/i18n/locales/en.ts @@ -767,6 +767,9 @@ export default { new_0_5_6_6: 'Redesigned attachment handling using Anthropic-style ContentBlock array format with type discriminated unions (text, image, file)', new_0_5_6_7: 'Added frontend file download functionality supporting both ContentBlock and Markdown formats with authentication', new_0_5_6_8: 'Fixed multi-process conflict causing SQLite database resets by eliminating redundant nodemon instances', + new_0_5_7_1: 'Optimize context compression to support rich content (images, files) with improved tool message handling', + new_0_5_7_2: 'Improve session sync with batch inserts and transaction protection for data consistency', + new_0_5_7_3: 'Fix usage.updated event reception to ensure accurate token tracking across runs', new_0_5_5_1: '🎉 Happy Labor Day! No work this Labor Day, please bear with us if there are any issues', new_0_5_5_2: 'Add History page for browsing Hermes session history', new_0_5_5_3: 'History page manages session state independently without interfering with active chat', diff --git a/packages/server/src/lib/context-compressor/index.ts b/packages/server/src/lib/context-compressor/index.ts index 2ebfa6e..d37f2d8 100644 --- a/packages/server/src/lib/context-compressor/index.ts +++ b/packages/server/src/lib/context-compressor/index.ts @@ -25,9 +25,16 @@ import { getDb } from '../../db/index' // ─── Types ─────────────────────────────────────────────── +export interface ContentBlock { + type: 'text' | 'image' | 'file' + text?: string + path?: string + source?: { type: string; media_type?: string; data?: string } +} + export interface ChatMessage { role: string - content: string + content: string | ContentBlock[] tool_calls?: Array<{ id: string; type: string; function: { name: string; arguments: string } }> tool_call_id?: string name?: string @@ -224,9 +231,23 @@ Write only the summary body. Do not include any preamble or prefix.` function serializeForSummary(messages: ChatMessage[]): string { const parts: string[] = [] + + function contentToString(content: string | ContentBlock[]): string { + if (typeof content === 'string') return content + if (Array.isArray(content)) { + return content.map(block => { + if (block.type === 'text') return block.text || '' + if (block.type === 'image') return `[Image: ${block.path || ''}]` + if (block.type === 'file') return `[File: ${block.path || ''}]` + return '' + }).join('') + } + return '' + } + for (const msg of messages) { const role = msg.role === 'tool' ? `[tool:${msg.name || 'unknown'}]` : msg.role - let content = msg.content || '' + let content = contentToString(msg.content || '') if (msg.role === 'tool' && content.length > 5500) { content = content.slice(0, 4000) + '\n... [truncated]\n...' + content.slice(-1500) @@ -275,8 +296,41 @@ function buildConversationHistory(messages: ChatMessage[]): Array<{ role: string }).join('\n') const content = msg.content ? `${msg.content}\n\n${toolsInfo}` : toolsInfo result.push({ role: msg.role, content }) - } else if (msg.role === 'user' || msg.role === 'assistant' || msg.role === 'system') { - result.push({ role: msg.role, content: msg.content || '' }) + } else if (msg.role === 'user') { + // Handle ContentBlock[] format: { type: 'text', text: '...' } or { type: 'image', path: '...' } + let contentStr = '' + const content = msg.content || '' + if (typeof content === 'string') { + contentStr = content + } else if (Array.isArray(content)) { + for (const block of content) { + if (block.type === 'text') { + contentStr += block.text || '' + } else if (block.type === 'image') { + contentStr += `[Image: ${block.path || ''}]` + } else if (block.type === 'file') { + contentStr += `[File: ${block.path || ''}]` + } + } + } + result.push({ role: 'user', content: contentStr }) + } else if (msg.role === 'assistant' || msg.role === 'system') { + let contentStr = '' + const content = msg.content + if (typeof content === 'string') { + contentStr = content + } else if (Array.isArray(content)) { + for (const block of content) { + if (block.type === 'text') { + contentStr += block.text || '' + } else if (block.type === 'image') { + contentStr += `[Image: ${block.path || ''}]` + } else if (block.type === 'file') { + contentStr += `[File: ${block.path || ''}]` + } + } + } + result.push({ role: msg.role, content: contentStr }) } // Skip other roles } @@ -292,7 +346,15 @@ function pruneOldToolResults(messages: ChatMessage[], keepRecentCount: number): const pruned = head.map(msg => { if (msg.role !== 'tool') return msg - const content = msg.content || '' + let content = '' + if (typeof msg.content === 'string') { + content = msg.content + } else if (Array.isArray(msg.content)) { + content = msg.content.map(block => { + if (block.type === 'text') return block.text || '' + return `[${block.type}]` + }).join('') + } const preview = content.slice(0, 100).replace(/\n/g, ' ') const truncated = content.length > 100 ? '...' : '' return { ...msg, content: `[${msg.name || 'tool'}] ${preview}${truncated}` } @@ -512,7 +574,7 @@ export class ChatContextCompressor { } const result: ChatMessage[] = [ - { role: 'system', content: SUMMARY_PREFIX + '\n\n' + summary }, + { role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary }, ...tail, ] @@ -575,7 +637,7 @@ export class ChatContextCompressor { const result: ChatMessage[] = [] if (summary) { - result.push({ role: 'system', content: SUMMARY_PREFIX + '\n\n' + summary }) + result.push({ role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary }) if (sessionId) { saveCompressionSnapshot(sessionId, summary, tailStart - 1, total) } diff --git a/packages/server/src/services/hermes/chat-run-socket.ts b/packages/server/src/services/hermes/chat-run-socket.ts index 4a8e907..6e366dd 100644 --- a/packages/server/src/services/hermes/chat-run-socket.ts +++ b/packages/server/src/services/hermes/chat-run-socket.ts @@ -18,6 +18,7 @@ import { getSessionDetailPaginated, createSession, addMessage, + addMessages, updateSessionStats, useLocalSessionStore, } from '../../db/hermes/session-store' @@ -91,108 +92,20 @@ async function convertContentBlocks(blocks: ContentBlock[]): Promise { const compressor = new ChatContextCompressor() // --- Helper: Convert OpenAI format to Anthropic format --- -function convertToAnthropicFormat(messages: any[]): any[] { +function convertHistoryFormat(messages: any[]): any[] { const result: any[] = [] for (const m of messages) { const role = m.role const content = m.content || '' - - if (role === 'assistant') { - const blocks: any[] = [] - - // Add thinking block if reasoning_content exists - if (m.reasoning) { - blocks.push({ type: 'thinking', thinking: m.reasoning }) - } - - // Add text content - if (content) { - if (typeof content === 'string') { - blocks.push({ type: 'text', text: content }) - } else if (Array.isArray(content)) { - blocks.push(...content) - } - } - - // Add tool_use blocks - if (m.tool_calls && Array.isArray(m.tool_calls)) { - for (const tc of m.tool_calls) { - if (tc.id && tc.function) { - try { - const args = parseToolArguments(tc.function.arguments || '{}') - blocks.push({ - type: 'tool_use', - id: tc.id, - name: tc.function.name, - input: args - }) - } catch (e) { - logger.warn(e, '[chat-run-socket] failed to parse tool arguments for tool %s', tc.id) - blocks.push({ - type: 'tool_use', - id: tc.id, - name: tc.function.name, - input: {} - }) - } - } - } - } - - // Handle empty content - if (blocks.length === 0) { - blocks.push({ type: 'text', text: '' }) - } - - result.push({ role: 'assistant', content: blocks }) - continue - } - + delete m.reasoning_content if (role === 'tool') { // Convert tool message to tool_result in user message // Follow Hermes official format: content is a string (not array) - const toolContent = content || '(no output)' - - // Normalize tool_result content to string format - // Use robust LLM JSON parser if content looks like JSON - let resultContent: string - if (typeof toolContent === 'string') { - try { - // Try to parse as JSON first (handles Python format, single quotes, etc.) - const parsed = parseLLMJSON(toolContent, 2) - // Re-serialize to ensure clean JSON string - resultContent = JSON.stringify(parsed) - } catch { - // Not valid JSON, use as-is - resultContent = toolContent - } - } else if (typeof toolContent === 'object' && toolContent !== null) { - // Object or array, serialize to JSON string - resultContent = JSON.stringify(toolContent) - } else { - // Primitive type (null, undefined, number, boolean) - resultContent = String(toolContent !== null && toolContent !== undefined ? toolContent : '(no output)') - } - - const toolResult = { - type: 'tool_result', - tool_use_id: m.tool_call_id || '', - content: resultContent - } - - // Merge with previous user message if it ends with tool_result - if ( - result.length > 0 && - result[result.length - 1].role === 'user' && - Array.isArray(result[result.length - 1].content) && - result[result.length - 1].content.length > 0 && - result[result.length - 1].content[result[result.length - 1].content.length - 1].type === 'tool_result' - ) { - result[result.length - 1].content.push(toolResult) - } else { - result.push({ role: 'user', content: [toolResult] }) - } + let pushItem = { ...m } + pushItem.role = 'user' + pushItem.content = `[Tool result: ${content}]` + result.push(pushItem) continue } @@ -200,16 +113,17 @@ function convertToAnthropicFormat(messages: any[]): any[] { if (role === 'user') { // Format: { role: 'user', content: [{ type: 'text', text: '...' }] } if (typeof content === 'string') { - result.push({ role: 'user', content: [{ type: 'text', text: content || '' }] }) + result.push({ role: 'user', content: content }) } else if (Array.isArray(content)) { // Already in array format, assume it's correct - result.push({ role: 'user', content }) - } else if (content) { - // Fallback for other types - result.push({ role: 'user', content: [{ type: 'text', text: String(content) }] }) + result.push({ role: 'user', content: convertContentBlocks(content) }) } continue } + if (role === 'assistant') { + result.push({ ...m }) + continue + } } return result } @@ -455,18 +369,23 @@ export class ChatRunSocket { : await getSessionDetailFromDb(sid) const messages = detail?.messages ? this.handleMessage(detail.messages, sid) : [] // Calculate context tokens — aware of compression snapshot + let inputTokens: number + let outputTokens: number const snapshot = getCompressionSnapshot(sid) if (snapshot) { const newMessages = messages.slice(snapshot.lastMessageIndex + 1) inputTokens = countTokens(SUMMARY_PREFIX + snapshot.summary) + - newMessages.reduce((sum, m) => sum + countTokens(m.content || ''), 0) + newMessages.filter(m => m.role === 'user').reduce((sum, m) => sum + countTokens(m.content || ''), 0) + outputTokens = newMessages + .filter(m => m.role === 'assistant' || m.role === 'tool') + .reduce((sum, m) => sum + countTokens(m.content || '') + countTokens(m.tool_calls + '' || ''), 0) } else { - inputTokens = messages.reduce((sum, m) => sum + countTokens(m.content || ''), 0) + inputTokens = messages.filter(m => m.role === 'user').reduce((sum, m) => sum + countTokens(m.content || ''), 0) + outputTokens = messages + .filter(m => m.role === 'assistant' || m.role === 'tool') + .reduce((sum, m) => sum + countTokens(m.content || '') + countTokens(m.tool_calls + '' || ''), 0) } - const outputTokens = messages - .filter(m => m.role === 'assistant') - .reduce((sum, m) => sum + countTokens(m.content || ''), 0) state = { messages, isWorking: false, @@ -650,6 +569,7 @@ export class ChatRunSocket { const newMessages = history.slice(snapshot.lastMessageIndex + 1) logger.info('[context-compress] session=%s: snapshot at %d, %d new messages, assembled ~%d tokens (threshold %d)', session_id, snapshot.lastMessageIndex, newMessages.length, totalTokens, triggerTokens) + // triggerTokens if (totalTokens <= triggerTokens) { // Under threshold — use assembled context directly, no LLM call needed history = [ @@ -879,7 +799,7 @@ export class ChatRunSocket { // Convert conversation_history from OpenAI format to Anthropic format if (body.conversation_history && Array.isArray(body.conversation_history)) { - body.conversation_history = convertToAnthropicFormat(body.conversation_history) + body.conversation_history = convertHistoryFormat(body.conversation_history) } const res = await fetch(`${upstream}/v1/runs`, { method: 'POST', @@ -1188,17 +1108,20 @@ export class ChatRunSocket { const snapshot = getCompressionSnapshot(sid) let inputTokens: number + let outputTokens: number if (snapshot && msgs.length) { const newMessages = msgs.slice(snapshot.lastMessageIndex + 1) inputTokens = countTokens(SUMMARY_PREFIX + snapshot.summary) + - newMessages.reduce((sum, m) => sum + countTokens(m.content || ''), 0) + newMessages.filter(m => m.role === 'user').reduce((sum, m) => sum + countTokens(m.content || ''), 0) + outputTokens = newMessages + .filter(m => m.role === 'assistant' || m.role === 'tool') + .reduce((sum, m) => sum + countTokens(m.content || '') + countTokens(m.tool_calls + '' || ''), 0) } else { - inputTokens = msgs.reduce((sum, m) => sum + countTokens(m.content || ''), 0) + inputTokens = msgs.filter(m => m.role === 'user').reduce((sum, m) => sum + countTokens(m.content || ''), 0) + outputTokens = msgs + .filter(m => m.role === 'assistant' || m.role === 'tool') + .reduce((sum, m) => sum + countTokens(m.content || '') + countTokens(m.tool_calls + '' || ''), 0) } - - const outputTokens = msgs - .filter(m => m.role === 'assistant') - .reduce((sum, m) => sum + countTokens(m.content || ''), 0) state.inputTokens = inputTokens state.outputTokens = outputTokens emit('usage.updated', { @@ -1277,13 +1200,14 @@ export class ChatRunSocket { logger.info('[chat-run-socket] syncFromHermes: merged reasoning for %d messages', mergedCount) } - for (const msg of toInsert) { + // Batch insert with transaction for atomicity + addMessages(toInsert.map(msg => { // Resolve tool_name from assistant's tool_calls if missing let toolName = msg.tool_name || null if (!toolName && msg.tool_call_id) { toolName = toolNameMap.get(msg.tool_call_id) || null } - addMessage({ + return { session_id: localSessionId, role: msg.role, content: msg.content || '', @@ -1293,12 +1217,13 @@ export class ChatRunSocket { timestamp: msg.timestamp || Math.floor(Date.now() / 1000), token_count: msg.token_count || null, finish_reason: msg.finish_reason || null, - reasoning: msg.reasoning || null, // Now includes merged reasoning + reasoning: msg.reasoning || null, reasoning_details: msg.reasoning_details || null, reasoning_content: msg.reasoning_content || null, codex_reasoning_items: msg.codex_reasoning_items || null, - }) - } + } + })) + logger.info('[chat-run-socket] syncFromHermes: synced %d messages to local session %s', toInsert.length, localSessionId) } @@ -1324,7 +1249,12 @@ export class ChatRunSocket { this.replaceByHermesSessionId(localSessionId, hermesSessionId, messages) } const emit = (event: string, payload: any) => { - socket.emit(event, { ...payload, session_id: localSessionId }) + const tagged = localSessionId ? { ...payload, localSessionId } : payload + if (localSessionId) { + this.nsp.to(`session:${localSessionId}`).emit(event, tagged) + } else if (socket.connected) { + socket.emit(event, tagged) + } } this.calcAndUpdateUsage(localSessionId, state, emit) }