feat: optimize context compression and session sync (#402)

* feat: optimize context compression and session sync Context Compressor: - Add ContentBlock type support for user messages with images/files - Enhance buildConversationHistory to handle ContentBlock[] format - Update serializeForSummary to convert ContentBlock[] to text - Support mixed content (text, images, files) in history compression Session Sync: - Replace loop of addMessage with batch addMessages (transaction-protected) - Use addMessages for atomic bulk insert with BEGIN/COMMIT - Ensure data consistency before calcAndUpdateUsage reads DB Chat Run Socket: - Remove convertHistoryFormat before compression (compressor handles raw format) - Compressor now processes original messages for better tool message handling - Improve compression quality by preserving original message structure Benefits: - Better compression quality with proper tool message recognition - Improved performance with batch DB inserts - Strong data consistency with transaction protection - Support for rich content (images/files) in conversation history Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * chore: bump version to 0.5.7 - Prepare for release with context compression and session sync optimizations Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * chore: add 0.5.7 changelog entries - Optimize context compression to support rich content (images, files) - Improve session sync with batch inserts and transaction protection - Fix usage.updated event reception for accurate token tracking Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-02 20:12:58 +08:00
parent 8af1951f13
commit 4a9967ebdd
5 changed files with 129 additions and 125 deletions
@@ -1,6 +1,6 @@
 {
  "name": "hermes-web-ui",
-  "version": "0.5.6",
+  "version": "0.5.7",
  "description": "Self-hosted AI chat dashboard for Hermes Agent — multi-model (Claude, GPT, Gemini, DeepSeek) web UI with Telegram, Discord, Slack, WhatsApp integration",
  "repository": {
    "type": "git",
@@ -5,6 +5,15 @@ export interface ChangelogEntry {
 }

 export const changelog: ChangelogEntry[] = [
+  {
+    version: '0.5.7',
+    date: '2026-05-02',
+    changes: [
+      'changelog.new_0_5_7_1',
+      'changelog.new_0_5_7_2',
+      'changelog.new_0_5_7_3',
+    ],
+  },
  {
    version: '0.5.6',
    date: '2026-05-02',
@@ -767,6 +767,9 @@ export default {
    new_0_5_6_6: 'Redesigned attachment handling using Anthropic-style ContentBlock array format with type discriminated unions (text, image, file)',
    new_0_5_6_7: 'Added frontend file download functionality supporting both ContentBlock and Markdown formats with authentication',
    new_0_5_6_8: 'Fixed multi-process conflict causing SQLite database resets by eliminating redundant nodemon instances',
+    new_0_5_7_1: 'Optimize context compression to support rich content (images, files) with improved tool message handling',
+    new_0_5_7_2: 'Improve session sync with batch inserts and transaction protection for data consistency',
+    new_0_5_7_3: 'Fix usage.updated event reception to ensure accurate token tracking across runs',
    new_0_5_5_1: '🎉 Happy Labor Day! No work this Labor Day, please bear with us if there are any issues',
    new_0_5_5_2: 'Add History page for browsing Hermes session history',
    new_0_5_5_3: 'History page manages session state independently without interfering with active chat',
@@ -25,9 +25,16 @@ import { getDb } from '../../db/index'

 // ─── Types ───────────────────────────────────────────────

+export interface ContentBlock {
+  type: 'text' | 'image' | 'file'
+  text?: string
+  path?: string
+  source?: { type: string; media_type?: string; data?: string }
+}
+
 export interface ChatMessage {
  role: string
-  content: string
+  content: string | ContentBlock[]
  tool_calls?: Array<{ id: string; type: string; function: { name: string; arguments: string } }>
  tool_call_id?: string
  name?: string
@@ -224,9 +231,23 @@ Write only the summary body. Do not include any preamble or prefix.`

 function serializeForSummary(messages: ChatMessage[]): string {
  const parts: string[] = []
+
+  function contentToString(content: string | ContentBlock[]): string {
+    if (typeof content === 'string') return content
+    if (Array.isArray(content)) {
+      return content.map(block => {
+        if (block.type === 'text') return block.text || ''
+        if (block.type === 'image') return `[Image: ${block.path || ''}]`
+        if (block.type === 'file') return `[File: ${block.path || ''}]`
+        return ''
+      }).join('')
+    }
+    return ''
+  }
+
  for (const msg of messages) {
    const role = msg.role === 'tool' ? `[tool:${msg.name || 'unknown'}]` : msg.role
-    let content = msg.content || ''
+    let content = contentToString(msg.content || '')

    if (msg.role === 'tool' && content.length > 5500) {
      content = content.slice(0, 4000) + '\n... [truncated]\n...' + content.slice(-1500)
@@ -275,8 +296,41 @@ function buildConversationHistory(messages: ChatMessage[]): Array<{ role: string
      }).join('\n')
      const content = msg.content ? `${msg.content}\n\n${toolsInfo}` : toolsInfo
      result.push({ role: msg.role, content })
-    } else if (msg.role === 'user' || msg.role === 'assistant' || msg.role === 'system') {
-      result.push({ role: msg.role, content: msg.content || '' })
+    } else if (msg.role === 'user') {
+      // Handle ContentBlock[] format: { type: 'text', text: '...' } or { type: 'image', path: '...' }
+      let contentStr = ''
+      const content = msg.content || ''
+      if (typeof content === 'string') {
+        contentStr = content
+      } else if (Array.isArray(content)) {
+        for (const block of content) {
+          if (block.type === 'text') {
+            contentStr += block.text || ''
+          } else if (block.type === 'image') {
+            contentStr += `[Image: ${block.path || ''}]`
+          } else if (block.type === 'file') {
+            contentStr += `[File: ${block.path || ''}]`
+          }
+        }
+      }
+      result.push({ role: 'user', content: contentStr })
+    } else if (msg.role === 'assistant' || msg.role === 'system') {
+      let contentStr = ''
+      const content = msg.content
+      if (typeof content === 'string') {
+        contentStr = content
+      } else if (Array.isArray(content)) {
+        for (const block of content) {
+          if (block.type === 'text') {
+            contentStr += block.text || ''
+          } else if (block.type === 'image') {
+            contentStr += `[Image: ${block.path || ''}]`
+          } else if (block.type === 'file') {
+            contentStr += `[File: ${block.path || ''}]`
+          }
+        }
+      }
+      result.push({ role: msg.role, content: contentStr })
    }
    // Skip other roles
  }
@@ -292,7 +346,15 @@ function pruneOldToolResults(messages: ChatMessage[], keepRecentCount: number):

  const pruned = head.map(msg => {
    if (msg.role !== 'tool') return msg
-    const content = msg.content || ''
+    let content = ''
+    if (typeof msg.content === 'string') {
+      content = msg.content
+    } else if (Array.isArray(msg.content)) {
+      content = msg.content.map(block => {
+        if (block.type === 'text') return block.text || ''
+        return `[${block.type}]`
+      }).join('')
+    }
    const preview = content.slice(0, 100).replace(/\n/g, ' ')
    const truncated = content.length > 100 ? '...' : ''
    return { ...msg, content: `[${msg.name || 'tool'}] ${preview}${truncated}` }
@@ -512,7 +574,7 @@ export class ChatContextCompressor {
    }

    const result: ChatMessage[] = [
-      { role: 'system', content: SUMMARY_PREFIX + '\n\n' + summary },
+      { role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary },
      ...tail,
    ]

@@ -575,7 +637,7 @@ export class ChatContextCompressor {
    const result: ChatMessage[] = []

    if (summary) {
-      result.push({ role: 'system', content: SUMMARY_PREFIX + '\n\n' + summary })
+      result.push({ role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary })
      if (sessionId) {
        saveCompressionSnapshot(sessionId, summary, tailStart - 1, total)
      }
@@ -18,6 +18,7 @@ import {
  getSessionDetailPaginated,
  createSession,
  addMessage,
+  addMessages,
  updateSessionStats,
  useLocalSessionStore,
 } from '../../db/hermes/session-store'
@@ -91,108 +92,20 @@ async function convertContentBlocks(blocks: ContentBlock[]): Promise<string> {
 const compressor = new ChatContextCompressor()

 // --- Helper: Convert OpenAI format to Anthropic format ---
-function convertToAnthropicFormat(messages: any[]): any[] {
+function convertHistoryFormat(messages: any[]): any[] {
  const result: any[] = []

  for (const m of messages) {
    const role = m.role
    const content = m.content || ''
-
-    if (role === 'assistant') {
-      const blocks: any[] = []
-
-      // Add thinking block if reasoning_content exists
-      if (m.reasoning) {
-        blocks.push({ type: 'thinking', thinking: m.reasoning })
-      }
-
-      // Add text content
-      if (content) {
-        if (typeof content === 'string') {
-          blocks.push({ type: 'text', text: content })
-        } else if (Array.isArray(content)) {
-          blocks.push(...content)
-        }
-      }
-
-      // Add tool_use blocks
-      if (m.tool_calls && Array.isArray(m.tool_calls)) {
-        for (const tc of m.tool_calls) {
-          if (tc.id && tc.function) {
-            try {
-              const args = parseToolArguments(tc.function.arguments || '{}')
-              blocks.push({
-                type: 'tool_use',
-                id: tc.id,
-                name: tc.function.name,
-                input: args
-              })
-            } catch (e) {
-              logger.warn(e, '[chat-run-socket] failed to parse tool arguments for tool %s', tc.id)
-              blocks.push({
-                type: 'tool_use',
-                id: tc.id,
-                name: tc.function.name,
-                input: {}
-              })
-            }
-          }
-        }
-      }
-
-      // Handle empty content
-      if (blocks.length === 0) {
-        blocks.push({ type: 'text', text: '' })
-      }
-
-      result.push({ role: 'assistant', content: blocks })
-      continue
-    }
-
+    delete m.reasoning_content
    if (role === 'tool') {
      // Convert tool message to tool_result in user message
      // Follow Hermes official format: content is a string (not array)
-      const toolContent = content || '(no output)'
-
-      // Normalize tool_result content to string format
-      // Use robust LLM JSON parser if content looks like JSON
-      let resultContent: string
-      if (typeof toolContent === 'string') {
-        try {
-          // Try to parse as JSON first (handles Python format, single quotes, etc.)
-          const parsed = parseLLMJSON(toolContent, 2)
-          // Re-serialize to ensure clean JSON string
-          resultContent = JSON.stringify(parsed)
-        } catch {
-          // Not valid JSON, use as-is
-          resultContent = toolContent
-        }
-      } else if (typeof toolContent === 'object' && toolContent !== null) {
-        // Object or array, serialize to JSON string
-        resultContent = JSON.stringify(toolContent)
-      } else {
-        // Primitive type (null, undefined, number, boolean)
-        resultContent = String(toolContent !== null && toolContent !== undefined ? toolContent : '(no output)')
-      }
-
-      const toolResult = {
-        type: 'tool_result',
-        tool_use_id: m.tool_call_id || '',
-        content: resultContent
-      }
-
-      // Merge with previous user message if it ends with tool_result
-      if (
-        result.length > 0 &&
-        result[result.length - 1].role === 'user' &&
-        Array.isArray(result[result.length - 1].content) &&
-        result[result.length - 1].content.length > 0 &&
-        result[result.length - 1].content[result[result.length - 1].content.length - 1].type === 'tool_result'
-      ) {
-        result[result.length - 1].content.push(toolResult)
-      } else {
-        result.push({ role: 'user', content: [toolResult] })
-      }
+      let pushItem = { ...m }
+      pushItem.role = 'user'
+      pushItem.content = `[Tool result: ${content}]`
+      result.push(pushItem)
      continue
    }

@@ -200,16 +113,17 @@ function convertToAnthropicFormat(messages: any[]): any[] {
    if (role === 'user') {
      // Format: { role: 'user', content: [{ type: 'text', text: '...' }] }
      if (typeof content === 'string') {
-        result.push({ role: 'user', content: [{ type: 'text', text: content || '' }] })
+        result.push({ role: 'user', content: content })
      } else if (Array.isArray(content)) {
        // Already in array format, assume it's correct
-        result.push({ role: 'user', content })
-      } else if (content) {
-        // Fallback for other types
-        result.push({ role: 'user', content: [{ type: 'text', text: String(content) }] })
+        result.push({ role: 'user', content: convertContentBlocks(content) })
      }
      continue
    }
+    if (role === 'assistant') {
+      result.push({ ...m })
+      continue
+    }
  }
  return result
 }
@@ -455,18 +369,23 @@ export class ChatRunSocket {
          : await getSessionDetailFromDb(sid)
        const messages = detail?.messages ? this.handleMessage(detail.messages, sid) : []
        // Calculate context tokens — aware of compression snapshot
+
        let inputTokens: number
+        let outputTokens: number
        const snapshot = getCompressionSnapshot(sid)
        if (snapshot) {
          const newMessages = messages.slice(snapshot.lastMessageIndex + 1)
          inputTokens = countTokens(SUMMARY_PREFIX + snapshot.summary) +
-            newMessages.reduce((sum, m) => sum + countTokens(m.content || ''), 0)
+            newMessages.filter(m => m.role === 'user').reduce((sum, m) => sum + countTokens(m.content || ''), 0)
+          outputTokens = newMessages
+            .filter(m => m.role === 'assistant' || m.role === 'tool')
+            .reduce((sum, m) => sum + countTokens(m.content || '') + countTokens(m.tool_calls + '' || ''), 0)
        } else {
-          inputTokens = messages.reduce((sum, m) => sum + countTokens(m.content || ''), 0)
+          inputTokens = messages.filter(m => m.role === 'user').reduce((sum, m) => sum + countTokens(m.content || ''), 0)
+          outputTokens = messages
+            .filter(m => m.role === 'assistant' || m.role === 'tool')
+            .reduce((sum, m) => sum + countTokens(m.content || '') + countTokens(m.tool_calls + '' || ''), 0)
        }
-        const outputTokens = messages
-          .filter(m => m.role === 'assistant')
-          .reduce((sum, m) => sum + countTokens(m.content || ''), 0)
        state = {
          messages,
          isWorking: false,
@@ -650,6 +569,7 @@ export class ChatRunSocket {
              const newMessages = history.slice(snapshot.lastMessageIndex + 1)
              logger.info('[context-compress] session=%s: snapshot at %d, %d new messages, assembled ~%d tokens (threshold %d)',
                session_id, snapshot.lastMessageIndex, newMessages.length, totalTokens, triggerTokens)
+              // triggerTokens
              if (totalTokens <= triggerTokens) {
                // Under threshold — use assembled context directly, no LLM call needed
                history = [
@@ -879,7 +799,7 @@ export class ChatRunSocket {

      // Convert conversation_history from OpenAI format to Anthropic format
      if (body.conversation_history && Array.isArray(body.conversation_history)) {
-        body.conversation_history = convertToAnthropicFormat(body.conversation_history)
+        body.conversation_history = convertHistoryFormat(body.conversation_history)
      }
      const res = await fetch(`${upstream}/v1/runs`, {
        method: 'POST',
@@ -1188,17 +1108,20 @@ export class ChatRunSocket {

      const snapshot = getCompressionSnapshot(sid)
      let inputTokens: number
+      let outputTokens: number
      if (snapshot && msgs.length) {
        const newMessages = msgs.slice(snapshot.lastMessageIndex + 1)
        inputTokens = countTokens(SUMMARY_PREFIX + snapshot.summary) +
-          newMessages.reduce((sum, m) => sum + countTokens(m.content || ''), 0)
+          newMessages.filter(m => m.role === 'user').reduce((sum, m) => sum + countTokens(m.content || ''), 0)
+        outputTokens = newMessages
+          .filter(m => m.role === 'assistant' || m.role === 'tool')
+          .reduce((sum, m) => sum + countTokens(m.content || '') + countTokens(m.tool_calls + '' || ''), 0)
      } else {
-        inputTokens = msgs.reduce((sum, m) => sum + countTokens(m.content || ''), 0)
+        inputTokens = msgs.filter(m => m.role === 'user').reduce((sum, m) => sum + countTokens(m.content || ''), 0)
+        outputTokens = msgs
+          .filter(m => m.role === 'assistant' || m.role === 'tool')
+          .reduce((sum, m) => sum + countTokens(m.content || '') + countTokens(m.tool_calls + '' || ''), 0)
      }
-
-      const outputTokens = msgs
-        .filter(m => m.role === 'assistant')
-        .reduce((sum, m) => sum + countTokens(m.content || ''), 0)
      state.inputTokens = inputTokens
      state.outputTokens = outputTokens
      emit('usage.updated', {
@@ -1277,13 +1200,14 @@ export class ChatRunSocket {
            logger.info('[chat-run-socket] syncFromHermes: merged reasoning for %d messages', mergedCount)
          }

-          for (const msg of toInsert) {
+          // Batch insert with transaction for atomicity
+          addMessages(toInsert.map(msg => {
            // Resolve tool_name from assistant's tool_calls if missing
            let toolName = msg.tool_name || null
            if (!toolName && msg.tool_call_id) {
              toolName = toolNameMap.get(msg.tool_call_id) || null
            }
-            addMessage({
+            return {
              session_id: localSessionId,
              role: msg.role,
              content: msg.content || '',
@@ -1293,12 +1217,13 @@ export class ChatRunSocket {
              timestamp: msg.timestamp || Math.floor(Date.now() / 1000),
              token_count: msg.token_count || null,
              finish_reason: msg.finish_reason || null,
-              reasoning: msg.reasoning || null,  // Now includes merged reasoning
+              reasoning: msg.reasoning || null,
              reasoning_details: msg.reasoning_details || null,
              reasoning_content: msg.reasoning_content || null,
              codex_reasoning_items: msg.codex_reasoning_items || null,
-            })
-          }
+            }
+          }))
+
          logger.info('[chat-run-socket] syncFromHermes: synced %d messages to local session %s', toInsert.length, localSessionId)
        }

@@ -1324,7 +1249,12 @@ export class ChatRunSocket {
            this.replaceByHermesSessionId(localSessionId, hermesSessionId, messages)
          }
          const emit = (event: string, payload: any) => {
-            socket.emit(event, { ...payload, session_id: localSessionId })
+            const tagged = localSessionId ? { ...payload, localSessionId } : payload
+            if (localSessionId) {
+              this.nsp.to(`session:${localSessionId}`).emit(event, tagged)
+            } else if (socket.connected) {
+              socket.emit(event, tagged)
+            }
          }
          this.calcAndUpdateUsage(localSessionId, state, emit)
        }