fix compression context usage accounting (#924)
This commit is contained in:
@@ -9,11 +9,26 @@ const updateSessionStatsMock = vi.fn()
|
||||
const updateUsageMock = vi.fn()
|
||||
const buildCompressedHistoryMock = vi.fn()
|
||||
const buildDbHistoryMock = vi.fn()
|
||||
const buildSnapshotAwareHistoryMock = vi.fn(async (_sessionId: string, _profile: string, history: any[]) => history)
|
||||
const pushStateMock = vi.fn()
|
||||
const replaceStateMock = vi.fn()
|
||||
const forceCompressBridgeHistoryMock = vi.fn()
|
||||
const calcAndUpdateUsageMock = vi.fn()
|
||||
const estimateUsageTokensFromMessagesMock = vi.fn()
|
||||
const updateContextTokenUsageMock = vi.fn((sid: string, state: any, emit: any, contextTokens: number, usage?: { inputTokens: number; outputTokens: number }) => {
|
||||
state.contextTokens = contextTokens
|
||||
emit('usage.updated', {
|
||||
event: 'usage.updated',
|
||||
session_id: sid,
|
||||
inputTokens: usage?.inputTokens ?? state.inputTokens ?? 0,
|
||||
outputTokens: usage?.outputTokens ?? state.outputTokens ?? 0,
|
||||
contextTokens,
|
||||
})
|
||||
return contextTokens
|
||||
})
|
||||
const getCachedBridgeContextOverheadMock = vi.fn(() => undefined)
|
||||
const contextTokensWithCachedOverheadMock = vi.fn((_state: any, messageTokens: number) => messageTokens)
|
||||
const updateMessageContextTokenUsageMock = vi.fn((sid: string, state: any, emit: any, messageTokens: number, usage?: { inputTokens: number; outputTokens: number }) => updateContextTokenUsageMock(sid, state, emit, messageTokens, usage))
|
||||
const flushBridgePendingToDbMock = vi.fn()
|
||||
const ensureOpenBridgeAssistantMessageMock = vi.fn()
|
||||
const syncBridgeReasoningToMessageMock = vi.fn()
|
||||
@@ -45,6 +60,7 @@ vi.mock('../../packages/server/src/services/logger', () => ({
|
||||
vi.mock('../../packages/server/src/services/hermes/run-chat/compression', () => ({
|
||||
buildCompressedHistory: buildCompressedHistoryMock,
|
||||
buildDbHistory: buildDbHistoryMock,
|
||||
buildSnapshotAwareHistory: buildSnapshotAwareHistoryMock,
|
||||
pushState: pushStateMock,
|
||||
replaceState: replaceStateMock,
|
||||
forceCompressBridgeHistory: forceCompressBridgeHistoryMock,
|
||||
@@ -53,6 +69,10 @@ vi.mock('../../packages/server/src/services/hermes/run-chat/compression', () =>
|
||||
vi.mock('../../packages/server/src/services/hermes/run-chat/usage', () => ({
|
||||
calcAndUpdateUsage: calcAndUpdateUsageMock,
|
||||
estimateUsageTokensFromMessages: estimateUsageTokensFromMessagesMock,
|
||||
getCachedBridgeContextOverhead: getCachedBridgeContextOverheadMock,
|
||||
contextTokensWithCachedOverhead: contextTokensWithCachedOverheadMock,
|
||||
updateContextTokenUsage: updateContextTokenUsageMock,
|
||||
updateMessageContextTokenUsage: updateMessageContextTokenUsageMock,
|
||||
}))
|
||||
|
||||
vi.mock('../../packages/server/src/services/hermes/run-chat/bridge-message', () => ({
|
||||
@@ -103,7 +123,11 @@ describe('bridge run final context usage', () => {
|
||||
{ role: 'user', content: 'hello' },
|
||||
{ role: 'assistant', content: 'done' },
|
||||
])
|
||||
buildSnapshotAwareHistoryMock.mockImplementation(async (_sessionId: string, _profile: string, history: any[]) => history)
|
||||
calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 11, outputTokens: 7 })
|
||||
estimateUsageTokensFromMessagesMock.mockReturnValue({ inputTokens: 11, outputTokens: 7 })
|
||||
getCachedBridgeContextOverheadMock.mockReturnValue(undefined)
|
||||
contextTokensWithCachedOverheadMock.mockImplementation((_state: any, messageTokens: number) => messageTokens)
|
||||
})
|
||||
|
||||
it('refreshes full context tokens when a bridge run completes', async () => {
|
||||
@@ -161,6 +185,50 @@ describe('bridge run final context usage', () => {
|
||||
}))
|
||||
})
|
||||
|
||||
it('uses cached fixed context instead of bridge estimate when available', async () => {
|
||||
const emit = vi.fn()
|
||||
const nsp = makeNamespace(emit)
|
||||
const socket = makeSocket()
|
||||
const state = makeState()
|
||||
state.bridgeContext = { fixedContextTokens: 20_000 }
|
||||
const sessionMap = new Map([['session-1', state]])
|
||||
getCachedBridgeContextOverheadMock.mockReturnValue(20_000)
|
||||
updateMessageContextTokenUsageMock.mockImplementation((sid: string, targetState: any, targetEmit: any, messageTokens: number, usage?: { inputTokens: number; outputTokens: number }) => updateContextTokenUsageMock(sid, targetState, targetEmit, 20_000 + messageTokens, usage))
|
||||
const bridge = {
|
||||
chat: vi.fn().mockResolvedValue({ run_id: 'run-1', status: 'started' }),
|
||||
contextEstimate: vi.fn(),
|
||||
streamOutput: vi.fn(async function* () {
|
||||
yield { run_id: 'run-1', done: true, status: 'completed', output: 'done' }
|
||||
}),
|
||||
} as any
|
||||
|
||||
const { handleBridgeRun } = await import('../../packages/server/src/services/hermes/run-chat/handle-bridge-run')
|
||||
await handleBridgeRun(
|
||||
nsp,
|
||||
socket,
|
||||
{ input: 'hello', session_id: 'session-1' },
|
||||
'default',
|
||||
sessionMap,
|
||||
bridge,
|
||||
false,
|
||||
vi.fn(),
|
||||
vi.fn(),
|
||||
)
|
||||
|
||||
expect(bridge.contextEstimate).not.toHaveBeenCalled()
|
||||
expect(updateMessageContextTokenUsageMock).toHaveBeenCalledWith(
|
||||
'session-1',
|
||||
state,
|
||||
expect.any(Function),
|
||||
18,
|
||||
{ inputTokens: 11, outputTokens: 7 },
|
||||
)
|
||||
expect(state.contextTokens).toBe(20_018)
|
||||
expect(emit).toHaveBeenCalledWith('run.completed', expect.objectContaining({
|
||||
contextTokens: 20_018,
|
||||
}))
|
||||
})
|
||||
|
||||
it('refreshes full context tokens when a bridge run fails', async () => {
|
||||
const emit = vi.fn()
|
||||
const nsp = makeNamespace(emit)
|
||||
|
||||
@@ -6,6 +6,17 @@ const getCompressionSnapshotMock = vi.fn()
|
||||
const getModelContextLengthMock = vi.fn()
|
||||
const calcAndUpdateUsageMock = vi.fn()
|
||||
const estimateUsageTokensFromMessagesMock = vi.fn()
|
||||
const updateMessageContextTokenUsageMock = vi.fn((sid: string, state: any, emit: any, messageTokens: number, usage?: { inputTokens: number; outputTokens: number }) => {
|
||||
state.contextTokens = messageTokens
|
||||
emit('usage.updated', {
|
||||
event: 'usage.updated',
|
||||
session_id: sid,
|
||||
inputTokens: usage?.inputTokens ?? state.inputTokens ?? 0,
|
||||
outputTokens: usage?.outputTokens ?? state.outputTokens ?? 0,
|
||||
contextTokens: messageTokens,
|
||||
})
|
||||
return messageTokens
|
||||
})
|
||||
const compressorCompressMock = vi.fn()
|
||||
const readConfigYamlForProfileMock = vi.fn()
|
||||
const compressorConstructorMock = vi.fn()
|
||||
@@ -55,6 +66,7 @@ vi.mock('../../packages/server/src/services/logger', () => ({
|
||||
vi.mock('../../packages/server/src/services/hermes/run-chat/usage', () => ({
|
||||
calcAndUpdateUsage: calcAndUpdateUsageMock,
|
||||
estimateUsageTokensFromMessages: estimateUsageTokensFromMessagesMock,
|
||||
updateMessageContextTokenUsage: updateMessageContextTokenUsageMock,
|
||||
}))
|
||||
|
||||
vi.mock('../../packages/server/src/services/hermes/run-chat/message-format', () => ({
|
||||
@@ -69,6 +81,7 @@ describe('run chat compression trigger', () => {
|
||||
getModelContextLengthMock.mockReset()
|
||||
calcAndUpdateUsageMock.mockReset()
|
||||
estimateUsageTokensFromMessagesMock.mockReset()
|
||||
updateMessageContextTokenUsageMock.mockClear()
|
||||
compressorCompressMock.mockReset()
|
||||
compressorConstructorMock.mockReset()
|
||||
readConfigYamlForProfileMock.mockReset()
|
||||
@@ -189,13 +202,14 @@ describe('run chat compression trigger', () => {
|
||||
},
|
||||
})
|
||||
|
||||
const emit = vi.fn()
|
||||
const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression')
|
||||
const history = await buildCompressedHistory(
|
||||
'session-1',
|
||||
'default',
|
||||
'http://upstream',
|
||||
undefined,
|
||||
vi.fn(),
|
||||
emit,
|
||||
new Map(),
|
||||
{},
|
||||
vi.fn(async () => 120_000),
|
||||
@@ -203,6 +217,13 @@ describe('run chat compression trigger', () => {
|
||||
|
||||
expect(history).toEqual([{ role: 'user', content: 'compressed by full context estimate' }])
|
||||
expect(compressorCompressMock).toHaveBeenCalledTimes(1)
|
||||
expect(updateMessageContextTokenUsageMock).toHaveBeenCalledWith(
|
||||
'session-1',
|
||||
expect.any(Object),
|
||||
emit,
|
||||
1_000,
|
||||
{ inputTokens: 1_000, outputTokens: 0 },
|
||||
)
|
||||
})
|
||||
|
||||
it('emits full context token usage when the full estimate is under threshold', async () => {
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { describe, expect, it, vi } from 'vitest'
|
||||
import { countTokens } from '../../packages/server/src/lib/context-compressor'
|
||||
import { estimateUsageTokensFromMessages } from '../../packages/server/src/services/hermes/run-chat/usage'
|
||||
import {
|
||||
contextTokensWithCachedOverhead,
|
||||
estimateUsageTokensFromMessages,
|
||||
updateMessageContextTokenUsage,
|
||||
} from '../../packages/server/src/services/hermes/run-chat/usage'
|
||||
|
||||
describe('run-chat usage token estimates', () => {
|
||||
it('counts message content instead of serialized message payloads', () => {
|
||||
@@ -30,4 +34,58 @@ describe('run-chat usage token estimates', () => {
|
||||
expect(usage.inputTokens).toBe(0)
|
||||
expect(usage.outputTokens).toBe(countTokens('calling tool') + countTokens(String(messages[0].tool_calls || '')))
|
||||
})
|
||||
|
||||
it('adds cached bridge fixed context when updating full context usage', () => {
|
||||
const emit = vi.fn()
|
||||
const state = {
|
||||
messages: [],
|
||||
isWorking: false,
|
||||
events: [],
|
||||
queue: [],
|
||||
bridgeContext: { fixedContextTokens: 20_000 },
|
||||
} as any
|
||||
|
||||
const contextTokens = updateMessageContextTokenUsage(
|
||||
'session-1',
|
||||
state,
|
||||
emit,
|
||||
1_569,
|
||||
{ inputTokens: 1_200, outputTokens: 369 },
|
||||
)
|
||||
|
||||
expect(contextTokens).toBe(21_569)
|
||||
expect(state.contextTokens).toBe(21_569)
|
||||
expect(emit).toHaveBeenCalledWith('usage.updated', expect.objectContaining({
|
||||
session_id: 'session-1',
|
||||
inputTokens: 1_200,
|
||||
outputTokens: 369,
|
||||
contextTokens: 21_569,
|
||||
}))
|
||||
})
|
||||
|
||||
it('falls back to message tokens when bridge fixed context is missing', () => {
|
||||
const emit = vi.fn()
|
||||
const state = {
|
||||
messages: [],
|
||||
isWorking: false,
|
||||
events: [],
|
||||
queue: [],
|
||||
} as any
|
||||
|
||||
expect(contextTokensWithCachedOverhead(state, 1_569)).toBe(1_569)
|
||||
|
||||
const contextTokens = updateMessageContextTokenUsage(
|
||||
'session-1',
|
||||
state,
|
||||
emit,
|
||||
1_569,
|
||||
{ inputTokens: 1_200, outputTokens: 369 },
|
||||
)
|
||||
|
||||
expect(contextTokens).toBe(1_569)
|
||||
expect(state.contextTokens).toBe(1_569)
|
||||
expect(emit).toHaveBeenCalledWith('usage.updated', expect.objectContaining({
|
||||
contextTokens: 1_569,
|
||||
}))
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user