fix context token resume (#1039)

This commit is contained in:
ekko
2026-05-26 16:32:07 +08:00
committed by GitHub
parent e686f0277a
commit ad1cab277a
13 changed files with 959 additions and 203 deletions
+110 -5
View File
@@ -175,7 +175,7 @@ describe('run chat compression trigger', () => {
)
})
it('uses full context estimates for compression threshold decisions', async () => {
it('uses local context estimates for compression threshold decisions', async () => {
const messages = Array.from({ length: 10 }, (_, index) => ({
id: index + 1,
session_id: 'session-1',
@@ -191,7 +191,7 @@ describe('run chat compression trigger', () => {
getSessionDetailMock.mockReturnValue({ messages })
calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 1_000, outputTokens: 0 })
compressorCompressMock.mockResolvedValue({
messages: [{ role: 'user', content: 'compressed by full context estimate' }],
messages: [{ role: 'user', content: 'compressed by local context estimate' }],
meta: {
compressed: true,
llmCompressed: true,
@@ -215,7 +215,7 @@ describe('run chat compression trigger', () => {
vi.fn(async () => 120_000),
)
expect(history).toEqual([{ role: 'user', content: 'compressed by full context estimate' }])
expect(history).toEqual([{ role: 'user', content: 'compressed by local context estimate' }])
expect(compressorCompressMock).toHaveBeenCalledTimes(1)
expect(updateMessageContextTokenUsageMock).toHaveBeenCalledWith(
'session-1',
@@ -226,7 +226,7 @@ describe('run chat compression trigger', () => {
)
})
it('emits full context token usage when the full estimate is under threshold', async () => {
it('emits local context token usage when the local estimate is under threshold', async () => {
const messages = Array.from({ length: 10 }, (_, index) => ({
id: index + 1,
session_id: 'session-1',
@@ -257,7 +257,10 @@ describe('run chat compression trigger', () => {
)
expect(history).toHaveLength(9)
expect(contextTokenEstimator).toHaveBeenCalledWith(expect.arrayContaining([{ role: 'user', content: 'message 0' }]))
expect(contextTokenEstimator).toHaveBeenCalledWith(
expect.arrayContaining([{ role: 'user', content: 'message 0' }]),
1_900,
)
expect(emit).toHaveBeenCalledWith('usage.updated', expect.objectContaining({
event: 'usage.updated',
session_id: 'session-1',
@@ -268,6 +271,108 @@ describe('run chat compression trigger', () => {
expect(compressorCompressMock).not.toHaveBeenCalled()
})
it('includes current input tokens when estimating snapshot-aware context', async () => {
const messages = Array.from({ length: 10 }, (_, index) => ({
id: index + 1,
session_id: 'session-1',
role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant',
content: `message ${index}`,
timestamp: index + 1,
tool_call_id: null,
tool_calls: null,
tool_name: null,
finish_reason: null,
reasoning_content: null,
}))
getSessionDetailMock.mockReturnValue({ messages })
getCompressionSnapshotMock.mockReturnValue({
summary: 'previous summary',
lastMessageIndex: 4,
messageCountAtTime: 5,
})
calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 10, outputTokens: 0 })
estimateUsageTokensFromMessagesMock.mockReturnValue({ inputTokens: 1_000, outputTokens: 0 })
const emit = vi.fn()
const contextTokenEstimator = vi.fn(async (_messages, messageTokens: number) => 20_000 + messageTokens)
const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression')
await buildCompressedHistory(
'session-1',
'default',
'http://upstream',
undefined,
emit,
new Map(),
{},
contextTokenEstimator,
700,
)
expect(contextTokenEstimator).toHaveBeenCalledWith(expect.any(Array), 1_700)
expect(emit).toHaveBeenCalledWith('usage.updated', expect.objectContaining({
contextTokens: 21_700,
}))
expect(compressorCompressMock).not.toHaveBeenCalled()
})
it('keeps current input tokens in the compression completed context total', async () => {
const messages = Array.from({ length: 10 }, (_, index) => ({
id: index + 1,
session_id: 'session-1',
role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant',
content: `message ${index}`,
timestamp: index + 1,
tool_call_id: null,
tool_calls: null,
tool_name: null,
finish_reason: null,
reasoning_content: null,
}))
getSessionDetailMock.mockReturnValue({ messages })
calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 100, outputTokens: 0 })
estimateUsageTokensFromMessagesMock.mockImplementation((items: any[]) => {
if (items?.[0]?.content === 'compressed result') return { inputTokens: 1_000, outputTokens: 0 }
return { inputTokens: 100, outputTokens: 0 }
})
compressorCompressMock.mockResolvedValue({
messages: [{ role: 'user', content: 'compressed result' }],
meta: {
compressed: true,
llmCompressed: true,
totalMessages: 9,
summaryTokenEstimate: 1,
verbatimCount: 0,
compressedStartIndex: 0,
},
})
const emit = vi.fn()
const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression')
await buildCompressedHistory(
'session-1',
'default',
'http://upstream',
undefined,
emit,
new Map(),
{},
vi.fn(async () => 120_000),
700,
)
expect(updateMessageContextTokenUsageMock).toHaveBeenCalledWith(
'session-1',
expect.any(Object),
emit,
1_700,
{ inputTokens: 100, outputTokens: 0 },
)
expect(emit).toHaveBeenCalledWith('compression.completed', expect.objectContaining({
afterTokens: 1_700,
contextTokens: 1_700,
}))
})
it('throws when fixed prompt and tool schemas exceed threshold before any history exists', async () => {
getSessionDetailMock.mockReturnValue({ messages: [] })
const emit = vi.fn()