feat: optimize context compression and session sync (#402)
* feat: optimize context compression and session sync Context Compressor: - Add ContentBlock type support for user messages with images/files - Enhance buildConversationHistory to handle ContentBlock[] format - Update serializeForSummary to convert ContentBlock[] to text - Support mixed content (text, images, files) in history compression Session Sync: - Replace loop of addMessage with batch addMessages (transaction-protected) - Use addMessages for atomic bulk insert with BEGIN/COMMIT - Ensure data consistency before calcAndUpdateUsage reads DB Chat Run Socket: - Remove convertHistoryFormat before compression (compressor handles raw format) - Compressor now processes original messages for better tool message handling - Improve compression quality by preserving original message structure Benefits: - Better compression quality with proper tool message recognition - Improved performance with batch DB inserts - Strong data consistency with transaction protection - Support for rich content (images/files) in conversation history Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * chore: bump version to 0.5.7 - Prepare for release with context compression and session sync optimizations Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * chore: add 0.5.7 changelog entries - Optimize context compression to support rich content (images, files) - Improve session sync with batch inserts and transaction protection - Fix usage.updated event reception for accurate token tracking Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "hermes-web-ui",
|
||||
"version": "0.5.6",
|
||||
"version": "0.5.7",
|
||||
"description": "Self-hosted AI chat dashboard for Hermes Agent — multi-model (Claude, GPT, Gemini, DeepSeek) web UI with Telegram, Discord, Slack, WhatsApp integration",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
|
||||
@@ -5,6 +5,15 @@ export interface ChangelogEntry {
|
||||
}
|
||||
|
||||
export const changelog: ChangelogEntry[] = [
|
||||
{
|
||||
version: '0.5.7',
|
||||
date: '2026-05-02',
|
||||
changes: [
|
||||
'changelog.new_0_5_7_1',
|
||||
'changelog.new_0_5_7_2',
|
||||
'changelog.new_0_5_7_3',
|
||||
],
|
||||
},
|
||||
{
|
||||
version: '0.5.6',
|
||||
date: '2026-05-02',
|
||||
|
||||
@@ -767,6 +767,9 @@ export default {
|
||||
new_0_5_6_6: 'Redesigned attachment handling using Anthropic-style ContentBlock array format with type discriminated unions (text, image, file)',
|
||||
new_0_5_6_7: 'Added frontend file download functionality supporting both ContentBlock and Markdown formats with authentication',
|
||||
new_0_5_6_8: 'Fixed multi-process conflict causing SQLite database resets by eliminating redundant nodemon instances',
|
||||
new_0_5_7_1: 'Optimize context compression to support rich content (images, files) with improved tool message handling',
|
||||
new_0_5_7_2: 'Improve session sync with batch inserts and transaction protection for data consistency',
|
||||
new_0_5_7_3: 'Fix usage.updated event reception to ensure accurate token tracking across runs',
|
||||
new_0_5_5_1: '🎉 Happy Labor Day! No work this Labor Day, please bear with us if there are any issues',
|
||||
new_0_5_5_2: 'Add History page for browsing Hermes session history',
|
||||
new_0_5_5_3: 'History page manages session state independently without interfering with active chat',
|
||||
|
||||
@@ -25,9 +25,16 @@ import { getDb } from '../../db/index'
|
||||
|
||||
// ─── Types ───────────────────────────────────────────────
|
||||
|
||||
export interface ContentBlock {
|
||||
type: 'text' | 'image' | 'file'
|
||||
text?: string
|
||||
path?: string
|
||||
source?: { type: string; media_type?: string; data?: string }
|
||||
}
|
||||
|
||||
export interface ChatMessage {
|
||||
role: string
|
||||
content: string
|
||||
content: string | ContentBlock[]
|
||||
tool_calls?: Array<{ id: string; type: string; function: { name: string; arguments: string } }>
|
||||
tool_call_id?: string
|
||||
name?: string
|
||||
@@ -224,9 +231,23 @@ Write only the summary body. Do not include any preamble or prefix.`
|
||||
|
||||
function serializeForSummary(messages: ChatMessage[]): string {
|
||||
const parts: string[] = []
|
||||
|
||||
function contentToString(content: string | ContentBlock[]): string {
|
||||
if (typeof content === 'string') return content
|
||||
if (Array.isArray(content)) {
|
||||
return content.map(block => {
|
||||
if (block.type === 'text') return block.text || ''
|
||||
if (block.type === 'image') return `[Image: ${block.path || ''}]`
|
||||
if (block.type === 'file') return `[File: ${block.path || ''}]`
|
||||
return ''
|
||||
}).join('')
|
||||
}
|
||||
return ''
|
||||
}
|
||||
|
||||
for (const msg of messages) {
|
||||
const role = msg.role === 'tool' ? `[tool:${msg.name || 'unknown'}]` : msg.role
|
||||
let content = msg.content || ''
|
||||
let content = contentToString(msg.content || '')
|
||||
|
||||
if (msg.role === 'tool' && content.length > 5500) {
|
||||
content = content.slice(0, 4000) + '\n... [truncated]\n...' + content.slice(-1500)
|
||||
@@ -275,8 +296,41 @@ function buildConversationHistory(messages: ChatMessage[]): Array<{ role: string
|
||||
}).join('\n')
|
||||
const content = msg.content ? `${msg.content}\n\n${toolsInfo}` : toolsInfo
|
||||
result.push({ role: msg.role, content })
|
||||
} else if (msg.role === 'user' || msg.role === 'assistant' || msg.role === 'system') {
|
||||
result.push({ role: msg.role, content: msg.content || '' })
|
||||
} else if (msg.role === 'user') {
|
||||
// Handle ContentBlock[] format: { type: 'text', text: '...' } or { type: 'image', path: '...' }
|
||||
let contentStr = ''
|
||||
const content = msg.content || ''
|
||||
if (typeof content === 'string') {
|
||||
contentStr = content
|
||||
} else if (Array.isArray(content)) {
|
||||
for (const block of content) {
|
||||
if (block.type === 'text') {
|
||||
contentStr += block.text || ''
|
||||
} else if (block.type === 'image') {
|
||||
contentStr += `[Image: ${block.path || ''}]`
|
||||
} else if (block.type === 'file') {
|
||||
contentStr += `[File: ${block.path || ''}]`
|
||||
}
|
||||
}
|
||||
}
|
||||
result.push({ role: 'user', content: contentStr })
|
||||
} else if (msg.role === 'assistant' || msg.role === 'system') {
|
||||
let contentStr = ''
|
||||
const content = msg.content
|
||||
if (typeof content === 'string') {
|
||||
contentStr = content
|
||||
} else if (Array.isArray(content)) {
|
||||
for (const block of content) {
|
||||
if (block.type === 'text') {
|
||||
contentStr += block.text || ''
|
||||
} else if (block.type === 'image') {
|
||||
contentStr += `[Image: ${block.path || ''}]`
|
||||
} else if (block.type === 'file') {
|
||||
contentStr += `[File: ${block.path || ''}]`
|
||||
}
|
||||
}
|
||||
}
|
||||
result.push({ role: msg.role, content: contentStr })
|
||||
}
|
||||
// Skip other roles
|
||||
}
|
||||
@@ -292,7 +346,15 @@ function pruneOldToolResults(messages: ChatMessage[], keepRecentCount: number):
|
||||
|
||||
const pruned = head.map(msg => {
|
||||
if (msg.role !== 'tool') return msg
|
||||
const content = msg.content || ''
|
||||
let content = ''
|
||||
if (typeof msg.content === 'string') {
|
||||
content = msg.content
|
||||
} else if (Array.isArray(msg.content)) {
|
||||
content = msg.content.map(block => {
|
||||
if (block.type === 'text') return block.text || ''
|
||||
return `[${block.type}]`
|
||||
}).join('')
|
||||
}
|
||||
const preview = content.slice(0, 100).replace(/\n/g, ' ')
|
||||
const truncated = content.length > 100 ? '...' : ''
|
||||
return { ...msg, content: `[${msg.name || 'tool'}] ${preview}${truncated}` }
|
||||
@@ -512,7 +574,7 @@ export class ChatContextCompressor {
|
||||
}
|
||||
|
||||
const result: ChatMessage[] = [
|
||||
{ role: 'system', content: SUMMARY_PREFIX + '\n\n' + summary },
|
||||
{ role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary },
|
||||
...tail,
|
||||
]
|
||||
|
||||
@@ -575,7 +637,7 @@ export class ChatContextCompressor {
|
||||
const result: ChatMessage[] = []
|
||||
|
||||
if (summary) {
|
||||
result.push({ role: 'system', content: SUMMARY_PREFIX + '\n\n' + summary })
|
||||
result.push({ role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary })
|
||||
if (sessionId) {
|
||||
saveCompressionSnapshot(sessionId, summary, tailStart - 1, total)
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ import {
|
||||
getSessionDetailPaginated,
|
||||
createSession,
|
||||
addMessage,
|
||||
addMessages,
|
||||
updateSessionStats,
|
||||
useLocalSessionStore,
|
||||
} from '../../db/hermes/session-store'
|
||||
@@ -91,108 +92,20 @@ async function convertContentBlocks(blocks: ContentBlock[]): Promise<string> {
|
||||
const compressor = new ChatContextCompressor()
|
||||
|
||||
// --- Helper: Convert OpenAI format to Anthropic format ---
|
||||
function convertToAnthropicFormat(messages: any[]): any[] {
|
||||
function convertHistoryFormat(messages: any[]): any[] {
|
||||
const result: any[] = []
|
||||
|
||||
for (const m of messages) {
|
||||
const role = m.role
|
||||
const content = m.content || ''
|
||||
|
||||
if (role === 'assistant') {
|
||||
const blocks: any[] = []
|
||||
|
||||
// Add thinking block if reasoning_content exists
|
||||
if (m.reasoning) {
|
||||
blocks.push({ type: 'thinking', thinking: m.reasoning })
|
||||
}
|
||||
|
||||
// Add text content
|
||||
if (content) {
|
||||
if (typeof content === 'string') {
|
||||
blocks.push({ type: 'text', text: content })
|
||||
} else if (Array.isArray(content)) {
|
||||
blocks.push(...content)
|
||||
}
|
||||
}
|
||||
|
||||
// Add tool_use blocks
|
||||
if (m.tool_calls && Array.isArray(m.tool_calls)) {
|
||||
for (const tc of m.tool_calls) {
|
||||
if (tc.id && tc.function) {
|
||||
try {
|
||||
const args = parseToolArguments(tc.function.arguments || '{}')
|
||||
blocks.push({
|
||||
type: 'tool_use',
|
||||
id: tc.id,
|
||||
name: tc.function.name,
|
||||
input: args
|
||||
})
|
||||
} catch (e) {
|
||||
logger.warn(e, '[chat-run-socket] failed to parse tool arguments for tool %s', tc.id)
|
||||
blocks.push({
|
||||
type: 'tool_use',
|
||||
id: tc.id,
|
||||
name: tc.function.name,
|
||||
input: {}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle empty content
|
||||
if (blocks.length === 0) {
|
||||
blocks.push({ type: 'text', text: '' })
|
||||
}
|
||||
|
||||
result.push({ role: 'assistant', content: blocks })
|
||||
continue
|
||||
}
|
||||
|
||||
delete m.reasoning_content
|
||||
if (role === 'tool') {
|
||||
// Convert tool message to tool_result in user message
|
||||
// Follow Hermes official format: content is a string (not array)
|
||||
const toolContent = content || '(no output)'
|
||||
|
||||
// Normalize tool_result content to string format
|
||||
// Use robust LLM JSON parser if content looks like JSON
|
||||
let resultContent: string
|
||||
if (typeof toolContent === 'string') {
|
||||
try {
|
||||
// Try to parse as JSON first (handles Python format, single quotes, etc.)
|
||||
const parsed = parseLLMJSON(toolContent, 2)
|
||||
// Re-serialize to ensure clean JSON string
|
||||
resultContent = JSON.stringify(parsed)
|
||||
} catch {
|
||||
// Not valid JSON, use as-is
|
||||
resultContent = toolContent
|
||||
}
|
||||
} else if (typeof toolContent === 'object' && toolContent !== null) {
|
||||
// Object or array, serialize to JSON string
|
||||
resultContent = JSON.stringify(toolContent)
|
||||
} else {
|
||||
// Primitive type (null, undefined, number, boolean)
|
||||
resultContent = String(toolContent !== null && toolContent !== undefined ? toolContent : '(no output)')
|
||||
}
|
||||
|
||||
const toolResult = {
|
||||
type: 'tool_result',
|
||||
tool_use_id: m.tool_call_id || '',
|
||||
content: resultContent
|
||||
}
|
||||
|
||||
// Merge with previous user message if it ends with tool_result
|
||||
if (
|
||||
result.length > 0 &&
|
||||
result[result.length - 1].role === 'user' &&
|
||||
Array.isArray(result[result.length - 1].content) &&
|
||||
result[result.length - 1].content.length > 0 &&
|
||||
result[result.length - 1].content[result[result.length - 1].content.length - 1].type === 'tool_result'
|
||||
) {
|
||||
result[result.length - 1].content.push(toolResult)
|
||||
} else {
|
||||
result.push({ role: 'user', content: [toolResult] })
|
||||
}
|
||||
let pushItem = { ...m }
|
||||
pushItem.role = 'user'
|
||||
pushItem.content = `[Tool result: ${content}]`
|
||||
result.push(pushItem)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -200,16 +113,17 @@ function convertToAnthropicFormat(messages: any[]): any[] {
|
||||
if (role === 'user') {
|
||||
// Format: { role: 'user', content: [{ type: 'text', text: '...' }] }
|
||||
if (typeof content === 'string') {
|
||||
result.push({ role: 'user', content: [{ type: 'text', text: content || '' }] })
|
||||
result.push({ role: 'user', content: content })
|
||||
} else if (Array.isArray(content)) {
|
||||
// Already in array format, assume it's correct
|
||||
result.push({ role: 'user', content })
|
||||
} else if (content) {
|
||||
// Fallback for other types
|
||||
result.push({ role: 'user', content: [{ type: 'text', text: String(content) }] })
|
||||
result.push({ role: 'user', content: convertContentBlocks(content) })
|
||||
}
|
||||
continue
|
||||
}
|
||||
if (role === 'assistant') {
|
||||
result.push({ ...m })
|
||||
continue
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
@@ -455,18 +369,23 @@ export class ChatRunSocket {
|
||||
: await getSessionDetailFromDb(sid)
|
||||
const messages = detail?.messages ? this.handleMessage(detail.messages, sid) : []
|
||||
// Calculate context tokens — aware of compression snapshot
|
||||
|
||||
let inputTokens: number
|
||||
let outputTokens: number
|
||||
const snapshot = getCompressionSnapshot(sid)
|
||||
if (snapshot) {
|
||||
const newMessages = messages.slice(snapshot.lastMessageIndex + 1)
|
||||
inputTokens = countTokens(SUMMARY_PREFIX + snapshot.summary) +
|
||||
newMessages.reduce((sum, m) => sum + countTokens(m.content || ''), 0)
|
||||
newMessages.filter(m => m.role === 'user').reduce((sum, m) => sum + countTokens(m.content || ''), 0)
|
||||
outputTokens = newMessages
|
||||
.filter(m => m.role === 'assistant' || m.role === 'tool')
|
||||
.reduce((sum, m) => sum + countTokens(m.content || '') + countTokens(m.tool_calls + '' || ''), 0)
|
||||
} else {
|
||||
inputTokens = messages.reduce((sum, m) => sum + countTokens(m.content || ''), 0)
|
||||
inputTokens = messages.filter(m => m.role === 'user').reduce((sum, m) => sum + countTokens(m.content || ''), 0)
|
||||
outputTokens = messages
|
||||
.filter(m => m.role === 'assistant' || m.role === 'tool')
|
||||
.reduce((sum, m) => sum + countTokens(m.content || '') + countTokens(m.tool_calls + '' || ''), 0)
|
||||
}
|
||||
const outputTokens = messages
|
||||
.filter(m => m.role === 'assistant')
|
||||
.reduce((sum, m) => sum + countTokens(m.content || ''), 0)
|
||||
state = {
|
||||
messages,
|
||||
isWorking: false,
|
||||
@@ -650,6 +569,7 @@ export class ChatRunSocket {
|
||||
const newMessages = history.slice(snapshot.lastMessageIndex + 1)
|
||||
logger.info('[context-compress] session=%s: snapshot at %d, %d new messages, assembled ~%d tokens (threshold %d)',
|
||||
session_id, snapshot.lastMessageIndex, newMessages.length, totalTokens, triggerTokens)
|
||||
// triggerTokens
|
||||
if (totalTokens <= triggerTokens) {
|
||||
// Under threshold — use assembled context directly, no LLM call needed
|
||||
history = [
|
||||
@@ -879,7 +799,7 @@ export class ChatRunSocket {
|
||||
|
||||
// Convert conversation_history from OpenAI format to Anthropic format
|
||||
if (body.conversation_history && Array.isArray(body.conversation_history)) {
|
||||
body.conversation_history = convertToAnthropicFormat(body.conversation_history)
|
||||
body.conversation_history = convertHistoryFormat(body.conversation_history)
|
||||
}
|
||||
const res = await fetch(`${upstream}/v1/runs`, {
|
||||
method: 'POST',
|
||||
@@ -1188,17 +1108,20 @@ export class ChatRunSocket {
|
||||
|
||||
const snapshot = getCompressionSnapshot(sid)
|
||||
let inputTokens: number
|
||||
let outputTokens: number
|
||||
if (snapshot && msgs.length) {
|
||||
const newMessages = msgs.slice(snapshot.lastMessageIndex + 1)
|
||||
inputTokens = countTokens(SUMMARY_PREFIX + snapshot.summary) +
|
||||
newMessages.reduce((sum, m) => sum + countTokens(m.content || ''), 0)
|
||||
newMessages.filter(m => m.role === 'user').reduce((sum, m) => sum + countTokens(m.content || ''), 0)
|
||||
outputTokens = newMessages
|
||||
.filter(m => m.role === 'assistant' || m.role === 'tool')
|
||||
.reduce((sum, m) => sum + countTokens(m.content || '') + countTokens(m.tool_calls + '' || ''), 0)
|
||||
} else {
|
||||
inputTokens = msgs.reduce((sum, m) => sum + countTokens(m.content || ''), 0)
|
||||
inputTokens = msgs.filter(m => m.role === 'user').reduce((sum, m) => sum + countTokens(m.content || ''), 0)
|
||||
outputTokens = msgs
|
||||
.filter(m => m.role === 'assistant' || m.role === 'tool')
|
||||
.reduce((sum, m) => sum + countTokens(m.content || '') + countTokens(m.tool_calls + '' || ''), 0)
|
||||
}
|
||||
|
||||
const outputTokens = msgs
|
||||
.filter(m => m.role === 'assistant')
|
||||
.reduce((sum, m) => sum + countTokens(m.content || ''), 0)
|
||||
state.inputTokens = inputTokens
|
||||
state.outputTokens = outputTokens
|
||||
emit('usage.updated', {
|
||||
@@ -1277,13 +1200,14 @@ export class ChatRunSocket {
|
||||
logger.info('[chat-run-socket] syncFromHermes: merged reasoning for %d messages', mergedCount)
|
||||
}
|
||||
|
||||
for (const msg of toInsert) {
|
||||
// Batch insert with transaction for atomicity
|
||||
addMessages(toInsert.map(msg => {
|
||||
// Resolve tool_name from assistant's tool_calls if missing
|
||||
let toolName = msg.tool_name || null
|
||||
if (!toolName && msg.tool_call_id) {
|
||||
toolName = toolNameMap.get(msg.tool_call_id) || null
|
||||
}
|
||||
addMessage({
|
||||
return {
|
||||
session_id: localSessionId,
|
||||
role: msg.role,
|
||||
content: msg.content || '',
|
||||
@@ -1293,12 +1217,13 @@ export class ChatRunSocket {
|
||||
timestamp: msg.timestamp || Math.floor(Date.now() / 1000),
|
||||
token_count: msg.token_count || null,
|
||||
finish_reason: msg.finish_reason || null,
|
||||
reasoning: msg.reasoning || null, // Now includes merged reasoning
|
||||
reasoning: msg.reasoning || null,
|
||||
reasoning_details: msg.reasoning_details || null,
|
||||
reasoning_content: msg.reasoning_content || null,
|
||||
codex_reasoning_items: msg.codex_reasoning_items || null,
|
||||
})
|
||||
}
|
||||
}
|
||||
}))
|
||||
|
||||
logger.info('[chat-run-socket] syncFromHermes: synced %d messages to local session %s', toInsert.length, localSessionId)
|
||||
}
|
||||
|
||||
@@ -1324,7 +1249,12 @@ export class ChatRunSocket {
|
||||
this.replaceByHermesSessionId(localSessionId, hermesSessionId, messages)
|
||||
}
|
||||
const emit = (event: string, payload: any) => {
|
||||
socket.emit(event, { ...payload, session_id: localSessionId })
|
||||
const tagged = localSessionId ? { ...payload, localSessionId } : payload
|
||||
if (localSessionId) {
|
||||
this.nsp.to(`session:${localSessionId}`).emit(event, tagged)
|
||||
} else if (socket.connected) {
|
||||
socket.emit(event, tagged)
|
||||
}
|
||||
}
|
||||
this.calcAndUpdateUsage(localSessionId, state, emit)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user