diff --git a/packages/server/src/services/hermes/run-chat/compression.ts b/packages/server/src/services/hermes/run-chat/compression.ts index a98602e..c0802c5 100644 --- a/packages/server/src/services/hermes/run-chat/compression.ts +++ b/packages/server/src/services/hermes/run-chat/compression.ts @@ -66,6 +66,24 @@ export async function buildDbHistory( }).filter((m): m is ChatMessage => m !== null) } +export function estimateSnapshotAwareHistoryUsage( + sessionId: string, + history: ChatMessage[], +): { messageCount: number; tokenCount: number } { + const snapshot = getCompressionSnapshot(sessionId) + const messages = snapshot + ? [ + { role: 'user', content: SUMMARY_PREFIX + snapshot.summary }, + ...history.slice(snapshot.lastMessageIndex + 1), + ] + : history + const usage = estimateUsageTokensFromMessages(messages) + return { + messageCount: messages.length, + tokenCount: usage.inputTokens + usage.outputTokens, + } +} + export async function buildCompressedHistory( sessionId: string, profile: string, @@ -210,12 +228,13 @@ export async function forceCompressBridgeHistory( const upstream = getUpstream(profile).replace(/\/$/, '') const apiKey = getApiKey(profile) || undefined - const beforeUsage = estimateUsageTokensFromMessages(history) - const totalTokens = beforeUsage.inputTokens + beforeUsage.outputTokens + const beforeUsage = estimateSnapshotAwareHistoryUsage(sessionId, history) + const totalTokens = beforeUsage.tokenCount bridgeLogger.info({ sessionId, profile, historyMessages: history.length, + snapshotAwareMessages: beforeUsage.messageCount, bridgeProvidedMessages: Array.isArray(_messages) ? _messages.length : 0, tokenEstimate: totalTokens, snapshotAware: true, diff --git a/packages/server/src/services/hermes/run-chat/session-command.ts b/packages/server/src/services/hermes/run-chat/session-command.ts index eb29f95..d2b18cd 100644 --- a/packages/server/src/services/hermes/run-chat/session-command.ts +++ b/packages/server/src/services/hermes/run-chat/session-command.ts @@ -3,9 +3,9 @@ import { addMessage, clearSessionMessages, createSession, getSession, renameSess import { logger } from '../../logger' import type { AgentBridgeClient } from '../agent-bridge' import { flushBridgePendingToDb } from './bridge-message' -import { buildDbHistory, forceCompressBridgeHistory, getOrCreateSession, replaceState } from './compression' +import { buildDbHistory, estimateSnapshotAwareHistoryUsage, forceCompressBridgeHistory, getOrCreateSession, replaceState } from './compression' import { handleAbort } from './abort' -import { calcAndUpdateUsage, estimateUsageTokensFromMessages } from './usage' +import { calcAndUpdateUsage } from './usage' import type { ContentBlock, QueuedRun, SessionState } from './types' type CommandName = @@ -232,12 +232,11 @@ export async function handleSessionCommand( const emit = (event: string, payload: any) => emitToSession(ctx.nsp, ctx.socket, sessionId, event, payload) try { const history = await buildDbHistory(sessionId, { excludeLastUser: true }) - const usageEstimate = estimateUsageTokensFromMessages(history) - const tokenEstimate = usageEstimate.inputTokens + usageEstimate.outputTokens + const usageEstimate = estimateSnapshotAwareHistoryUsage(sessionId, history) emit('compression.started', { event: 'compression.started', - message_count: history.length, - token_count: tokenEstimate, + message_count: usageEstimate.messageCount, + token_count: usageEstimate.tokenCount, source: 'command', }) const result = await forceCompressBridgeHistory(