feat: optimize context compression and session sync (#402)

* feat: optimize context compression and session sync

Context Compressor:
- Add ContentBlock type support for user messages with images/files
- Enhance buildConversationHistory to handle ContentBlock[] format
- Update serializeForSummary to convert ContentBlock[] to text
- Support mixed content (text, images, files) in history compression

Session Sync:
- Replace loop of addMessage with batch addMessages (transaction-protected)
- Use addMessages for atomic bulk insert with BEGIN/COMMIT
- Ensure data consistency before calcAndUpdateUsage reads DB

Chat Run Socket:
- Remove convertHistoryFormat before compression (compressor handles raw format)
- Compressor now processes original messages for better tool message handling
- Improve compression quality by preserving original message structure

Benefits:
- Better compression quality with proper tool message recognition
- Improved performance with batch DB inserts
- Strong data consistency with transaction protection
- Support for rich content (images/files) in conversation history

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* chore: bump version to 0.5.7

- Prepare for release with context compression and session sync optimizations

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* chore: add 0.5.7 changelog entries

- Optimize context compression to support rich content (images, files)
- Improve session sync with batch inserts and transaction protection
- Fix usage.updated event reception for accurate token tracking

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
ekko
2026-05-02 20:12:58 +08:00
committed by GitHub
parent 8af1951f13
commit 4a9967ebdd
5 changed files with 129 additions and 125 deletions
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "hermes-web-ui",
"version": "0.5.6",
"version": "0.5.7",
"description": "Self-hosted AI chat dashboard for Hermes Agent — multi-model (Claude, GPT, Gemini, DeepSeek) web UI with Telegram, Discord, Slack, WhatsApp integration",
"repository": {
"type": "git",
+9
View File
@@ -5,6 +5,15 @@ export interface ChangelogEntry {
}
export const changelog: ChangelogEntry[] = [
{
version: '0.5.7',
date: '2026-05-02',
changes: [
'changelog.new_0_5_7_1',
'changelog.new_0_5_7_2',
'changelog.new_0_5_7_3',
],
},
{
version: '0.5.6',
date: '2026-05-02',
+3
View File
@@ -767,6 +767,9 @@ export default {
new_0_5_6_6: 'Redesigned attachment handling using Anthropic-style ContentBlock array format with type discriminated unions (text, image, file)',
new_0_5_6_7: 'Added frontend file download functionality supporting both ContentBlock and Markdown formats with authentication',
new_0_5_6_8: 'Fixed multi-process conflict causing SQLite database resets by eliminating redundant nodemon instances',
new_0_5_7_1: 'Optimize context compression to support rich content (images, files) with improved tool message handling',
new_0_5_7_2: 'Improve session sync with batch inserts and transaction protection for data consistency',
new_0_5_7_3: 'Fix usage.updated event reception to ensure accurate token tracking across runs',
new_0_5_5_1: '🎉 Happy Labor Day! No work this Labor Day, please bear with us if there are any issues',
new_0_5_5_2: 'Add History page for browsing Hermes session history',
new_0_5_5_3: 'History page manages session state independently without interfering with active chat',
@@ -25,9 +25,16 @@ import { getDb } from '../../db/index'
// ─── Types ───────────────────────────────────────────────
export interface ContentBlock {
type: 'text' | 'image' | 'file'
text?: string
path?: string
source?: { type: string; media_type?: string; data?: string }
}
export interface ChatMessage {
role: string
content: string
content: string | ContentBlock[]
tool_calls?: Array<{ id: string; type: string; function: { name: string; arguments: string } }>
tool_call_id?: string
name?: string
@@ -224,9 +231,23 @@ Write only the summary body. Do not include any preamble or prefix.`
function serializeForSummary(messages: ChatMessage[]): string {
const parts: string[] = []
function contentToString(content: string | ContentBlock[]): string {
if (typeof content === 'string') return content
if (Array.isArray(content)) {
return content.map(block => {
if (block.type === 'text') return block.text || ''
if (block.type === 'image') return `[Image: ${block.path || ''}]`
if (block.type === 'file') return `[File: ${block.path || ''}]`
return ''
}).join('')
}
return ''
}
for (const msg of messages) {
const role = msg.role === 'tool' ? `[tool:${msg.name || 'unknown'}]` : msg.role
let content = msg.content || ''
let content = contentToString(msg.content || '')
if (msg.role === 'tool' && content.length > 5500) {
content = content.slice(0, 4000) + '\n... [truncated]\n...' + content.slice(-1500)
@@ -275,8 +296,41 @@ function buildConversationHistory(messages: ChatMessage[]): Array<{ role: string
}).join('\n')
const content = msg.content ? `${msg.content}\n\n${toolsInfo}` : toolsInfo
result.push({ role: msg.role, content })
} else if (msg.role === 'user' || msg.role === 'assistant' || msg.role === 'system') {
result.push({ role: msg.role, content: msg.content || '' })
} else if (msg.role === 'user') {
// Handle ContentBlock[] format: { type: 'text', text: '...' } or { type: 'image', path: '...' }
let contentStr = ''
const content = msg.content || ''
if (typeof content === 'string') {
contentStr = content
} else if (Array.isArray(content)) {
for (const block of content) {
if (block.type === 'text') {
contentStr += block.text || ''
} else if (block.type === 'image') {
contentStr += `[Image: ${block.path || ''}]`
} else if (block.type === 'file') {
contentStr += `[File: ${block.path || ''}]`
}
}
}
result.push({ role: 'user', content: contentStr })
} else if (msg.role === 'assistant' || msg.role === 'system') {
let contentStr = ''
const content = msg.content
if (typeof content === 'string') {
contentStr = content
} else if (Array.isArray(content)) {
for (const block of content) {
if (block.type === 'text') {
contentStr += block.text || ''
} else if (block.type === 'image') {
contentStr += `[Image: ${block.path || ''}]`
} else if (block.type === 'file') {
contentStr += `[File: ${block.path || ''}]`
}
}
}
result.push({ role: msg.role, content: contentStr })
}
// Skip other roles
}
@@ -292,7 +346,15 @@ function pruneOldToolResults(messages: ChatMessage[], keepRecentCount: number):
const pruned = head.map(msg => {
if (msg.role !== 'tool') return msg
const content = msg.content || ''
let content = ''
if (typeof msg.content === 'string') {
content = msg.content
} else if (Array.isArray(msg.content)) {
content = msg.content.map(block => {
if (block.type === 'text') return block.text || ''
return `[${block.type}]`
}).join('')
}
const preview = content.slice(0, 100).replace(/\n/g, ' ')
const truncated = content.length > 100 ? '...' : ''
return { ...msg, content: `[${msg.name || 'tool'}] ${preview}${truncated}` }
@@ -512,7 +574,7 @@ export class ChatContextCompressor {
}
const result: ChatMessage[] = [
{ role: 'system', content: SUMMARY_PREFIX + '\n\n' + summary },
{ role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary },
...tail,
]
@@ -575,7 +637,7 @@ export class ChatContextCompressor {
const result: ChatMessage[] = []
if (summary) {
result.push({ role: 'system', content: SUMMARY_PREFIX + '\n\n' + summary })
result.push({ role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary })
if (sessionId) {
saveCompressionSnapshot(sessionId, summary, tailStart - 1, total)
}
@@ -18,6 +18,7 @@ import {
getSessionDetailPaginated,
createSession,
addMessage,
addMessages,
updateSessionStats,
useLocalSessionStore,
} from '../../db/hermes/session-store'
@@ -91,108 +92,20 @@ async function convertContentBlocks(blocks: ContentBlock[]): Promise<string> {
const compressor = new ChatContextCompressor()
// --- Helper: Convert OpenAI format to Anthropic format ---
function convertToAnthropicFormat(messages: any[]): any[] {
function convertHistoryFormat(messages: any[]): any[] {
const result: any[] = []
for (const m of messages) {
const role = m.role
const content = m.content || ''
if (role === 'assistant') {
const blocks: any[] = []
// Add thinking block if reasoning_content exists
if (m.reasoning) {
blocks.push({ type: 'thinking', thinking: m.reasoning })
}
// Add text content
if (content) {
if (typeof content === 'string') {
blocks.push({ type: 'text', text: content })
} else if (Array.isArray(content)) {
blocks.push(...content)
}
}
// Add tool_use blocks
if (m.tool_calls && Array.isArray(m.tool_calls)) {
for (const tc of m.tool_calls) {
if (tc.id && tc.function) {
try {
const args = parseToolArguments(tc.function.arguments || '{}')
blocks.push({
type: 'tool_use',
id: tc.id,
name: tc.function.name,
input: args
})
} catch (e) {
logger.warn(e, '[chat-run-socket] failed to parse tool arguments for tool %s', tc.id)
blocks.push({
type: 'tool_use',
id: tc.id,
name: tc.function.name,
input: {}
})
}
}
}
}
// Handle empty content
if (blocks.length === 0) {
blocks.push({ type: 'text', text: '' })
}
result.push({ role: 'assistant', content: blocks })
continue
}
delete m.reasoning_content
if (role === 'tool') {
// Convert tool message to tool_result in user message
// Follow Hermes official format: content is a string (not array)
const toolContent = content || '(no output)'
// Normalize tool_result content to string format
// Use robust LLM JSON parser if content looks like JSON
let resultContent: string
if (typeof toolContent === 'string') {
try {
// Try to parse as JSON first (handles Python format, single quotes, etc.)
const parsed = parseLLMJSON(toolContent, 2)
// Re-serialize to ensure clean JSON string
resultContent = JSON.stringify(parsed)
} catch {
// Not valid JSON, use as-is
resultContent = toolContent
}
} else if (typeof toolContent === 'object' && toolContent !== null) {
// Object or array, serialize to JSON string
resultContent = JSON.stringify(toolContent)
} else {
// Primitive type (null, undefined, number, boolean)
resultContent = String(toolContent !== null && toolContent !== undefined ? toolContent : '(no output)')
}
const toolResult = {
type: 'tool_result',
tool_use_id: m.tool_call_id || '',
content: resultContent
}
// Merge with previous user message if it ends with tool_result
if (
result.length > 0 &&
result[result.length - 1].role === 'user' &&
Array.isArray(result[result.length - 1].content) &&
result[result.length - 1].content.length > 0 &&
result[result.length - 1].content[result[result.length - 1].content.length - 1].type === 'tool_result'
) {
result[result.length - 1].content.push(toolResult)
} else {
result.push({ role: 'user', content: [toolResult] })
}
let pushItem = { ...m }
pushItem.role = 'user'
pushItem.content = `[Tool result: ${content}]`
result.push(pushItem)
continue
}
@@ -200,16 +113,17 @@ function convertToAnthropicFormat(messages: any[]): any[] {
if (role === 'user') {
// Format: { role: 'user', content: [{ type: 'text', text: '...' }] }
if (typeof content === 'string') {
result.push({ role: 'user', content: [{ type: 'text', text: content || '' }] })
result.push({ role: 'user', content: content })
} else if (Array.isArray(content)) {
// Already in array format, assume it's correct
result.push({ role: 'user', content })
} else if (content) {
// Fallback for other types
result.push({ role: 'user', content: [{ type: 'text', text: String(content) }] })
result.push({ role: 'user', content: convertContentBlocks(content) })
}
continue
}
if (role === 'assistant') {
result.push({ ...m })
continue
}
}
return result
}
@@ -455,18 +369,23 @@ export class ChatRunSocket {
: await getSessionDetailFromDb(sid)
const messages = detail?.messages ? this.handleMessage(detail.messages, sid) : []
// Calculate context tokens — aware of compression snapshot
let inputTokens: number
let outputTokens: number
const snapshot = getCompressionSnapshot(sid)
if (snapshot) {
const newMessages = messages.slice(snapshot.lastMessageIndex + 1)
inputTokens = countTokens(SUMMARY_PREFIX + snapshot.summary) +
newMessages.reduce((sum, m) => sum + countTokens(m.content || ''), 0)
newMessages.filter(m => m.role === 'user').reduce((sum, m) => sum + countTokens(m.content || ''), 0)
outputTokens = newMessages
.filter(m => m.role === 'assistant' || m.role === 'tool')
.reduce((sum, m) => sum + countTokens(m.content || '') + countTokens(m.tool_calls + '' || ''), 0)
} else {
inputTokens = messages.reduce((sum, m) => sum + countTokens(m.content || ''), 0)
inputTokens = messages.filter(m => m.role === 'user').reduce((sum, m) => sum + countTokens(m.content || ''), 0)
outputTokens = messages
.filter(m => m.role === 'assistant' || m.role === 'tool')
.reduce((sum, m) => sum + countTokens(m.content || '') + countTokens(m.tool_calls + '' || ''), 0)
}
const outputTokens = messages
.filter(m => m.role === 'assistant')
.reduce((sum, m) => sum + countTokens(m.content || ''), 0)
state = {
messages,
isWorking: false,
@@ -650,6 +569,7 @@ export class ChatRunSocket {
const newMessages = history.slice(snapshot.lastMessageIndex + 1)
logger.info('[context-compress] session=%s: snapshot at %d, %d new messages, assembled ~%d tokens (threshold %d)',
session_id, snapshot.lastMessageIndex, newMessages.length, totalTokens, triggerTokens)
// triggerTokens
if (totalTokens <= triggerTokens) {
// Under threshold — use assembled context directly, no LLM call needed
history = [
@@ -879,7 +799,7 @@ export class ChatRunSocket {
// Convert conversation_history from OpenAI format to Anthropic format
if (body.conversation_history && Array.isArray(body.conversation_history)) {
body.conversation_history = convertToAnthropicFormat(body.conversation_history)
body.conversation_history = convertHistoryFormat(body.conversation_history)
}
const res = await fetch(`${upstream}/v1/runs`, {
method: 'POST',
@@ -1188,17 +1108,20 @@ export class ChatRunSocket {
const snapshot = getCompressionSnapshot(sid)
let inputTokens: number
let outputTokens: number
if (snapshot && msgs.length) {
const newMessages = msgs.slice(snapshot.lastMessageIndex + 1)
inputTokens = countTokens(SUMMARY_PREFIX + snapshot.summary) +
newMessages.reduce((sum, m) => sum + countTokens(m.content || ''), 0)
newMessages.filter(m => m.role === 'user').reduce((sum, m) => sum + countTokens(m.content || ''), 0)
outputTokens = newMessages
.filter(m => m.role === 'assistant' || m.role === 'tool')
.reduce((sum, m) => sum + countTokens(m.content || '') + countTokens(m.tool_calls + '' || ''), 0)
} else {
inputTokens = msgs.reduce((sum, m) => sum + countTokens(m.content || ''), 0)
inputTokens = msgs.filter(m => m.role === 'user').reduce((sum, m) => sum + countTokens(m.content || ''), 0)
outputTokens = msgs
.filter(m => m.role === 'assistant' || m.role === 'tool')
.reduce((sum, m) => sum + countTokens(m.content || '') + countTokens(m.tool_calls + '' || ''), 0)
}
const outputTokens = msgs
.filter(m => m.role === 'assistant')
.reduce((sum, m) => sum + countTokens(m.content || ''), 0)
state.inputTokens = inputTokens
state.outputTokens = outputTokens
emit('usage.updated', {
@@ -1277,13 +1200,14 @@ export class ChatRunSocket {
logger.info('[chat-run-socket] syncFromHermes: merged reasoning for %d messages', mergedCount)
}
for (const msg of toInsert) {
// Batch insert with transaction for atomicity
addMessages(toInsert.map(msg => {
// Resolve tool_name from assistant's tool_calls if missing
let toolName = msg.tool_name || null
if (!toolName && msg.tool_call_id) {
toolName = toolNameMap.get(msg.tool_call_id) || null
}
addMessage({
return {
session_id: localSessionId,
role: msg.role,
content: msg.content || '',
@@ -1293,12 +1217,13 @@ export class ChatRunSocket {
timestamp: msg.timestamp || Math.floor(Date.now() / 1000),
token_count: msg.token_count || null,
finish_reason: msg.finish_reason || null,
reasoning: msg.reasoning || null, // Now includes merged reasoning
reasoning: msg.reasoning || null,
reasoning_details: msg.reasoning_details || null,
reasoning_content: msg.reasoning_content || null,
codex_reasoning_items: msg.codex_reasoning_items || null,
})
}
}
}))
logger.info('[chat-run-socket] syncFromHermes: synced %d messages to local session %s', toInsert.length, localSessionId)
}
@@ -1324,7 +1249,12 @@ export class ChatRunSocket {
this.replaceByHermesSessionId(localSessionId, hermesSessionId, messages)
}
const emit = (event: string, payload: any) => {
socket.emit(event, { ...payload, session_id: localSessionId })
const tagged = localSessionId ? { ...payload, localSessionId } : payload
if (localSessionId) {
this.nsp.to(`session:${localSessionId}`).emit(event, tagged)
} else if (socket.connected) {
socket.emit(event, tagged)
}
}
this.calcAndUpdateUsage(localSessionId, state, emit)
}