Account for full context tokens in compression (#908)
* Account for full context tokens in compression * Fix group chat final context updates --------- Co-authored-by: Codex <codex@openai.com>
This commit is contained in:
@@ -203,6 +203,114 @@ describe('ContextEngine.buildContext', () => {
|
||||
expect(mockSummarize).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('records full context token estimates without compressing when under threshold', async () => {
|
||||
const messages = makeMessages(3)
|
||||
mockFetcher.getMessages = vi.fn().mockReturnValue(messages)
|
||||
const contextTokenEstimator = vi.fn().mockResolvedValue(19_379)
|
||||
|
||||
const result = await engine.buildContext({
|
||||
roomId: 'room-1',
|
||||
agentId: 'agent-1',
|
||||
agentName: 'Claude',
|
||||
agentDescription: 'Helper',
|
||||
agentSocketId: 'agent-socket',
|
||||
roomName: 'general',
|
||||
memberNames: ['Alice'],
|
||||
members: [{ userId: 'u1', name: 'Alice', description: '' }],
|
||||
upstream: 'http://localhost:8642',
|
||||
apiKey: null,
|
||||
currentMessage: messages[messages.length - 1],
|
||||
contextTokenEstimator,
|
||||
})
|
||||
|
||||
expect(result.meta.compressed).toBe(false)
|
||||
expect(result.meta.contextTokenEstimate).toBe(19_379)
|
||||
expect(result.meta.messageTokenEstimate).toBeGreaterThan(0)
|
||||
expect(contextTokenEstimator).toHaveBeenCalledWith(
|
||||
expect.arrayContaining([{ role: 'assistant', content: expect.stringContaining('[Claude]') }]),
|
||||
expect.stringContaining('"Claude"'),
|
||||
)
|
||||
expect(mockSummarize).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('uses full context token estimates to trigger group compression', async () => {
|
||||
const messages = makeMessages(20)
|
||||
mockFetcher.getMessages = vi.fn().mockReturnValue(messages)
|
||||
|
||||
const result = await engine.buildContext({
|
||||
roomId: 'room-1',
|
||||
agentId: 'agent-1',
|
||||
agentName: 'Claude',
|
||||
agentDescription: 'Helper',
|
||||
agentSocketId: 'agent-socket',
|
||||
roomName: 'general',
|
||||
memberNames: [],
|
||||
members: [],
|
||||
upstream: 'http://localhost:8642',
|
||||
apiKey: null,
|
||||
currentMessage: messages[messages.length - 1],
|
||||
contextTokenEstimator: vi.fn().mockResolvedValue(120_000),
|
||||
})
|
||||
|
||||
expect(result.meta.compressed).toBe(true)
|
||||
expect(result.meta.contextTokenEstimate).toBe(120_000)
|
||||
expect(mockSummarize).toHaveBeenCalledTimes(1)
|
||||
expect(mockFetcher.saveContextSnapshot).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
it('throws when group prompt and tools exceed threshold with too little history to compress', async () => {
|
||||
const messages = makeMessages(4)
|
||||
mockFetcher.getMessages = vi.fn().mockReturnValue(messages)
|
||||
|
||||
await expect(engine.buildContext({
|
||||
roomId: 'room-1',
|
||||
agentId: 'agent-1',
|
||||
agentName: 'Claude',
|
||||
agentDescription: 'Helper',
|
||||
agentSocketId: 'agent-socket',
|
||||
roomName: 'general',
|
||||
memberNames: [],
|
||||
members: [],
|
||||
upstream: 'http://localhost:8642',
|
||||
apiKey: null,
|
||||
currentMessage: messages[messages.length - 1],
|
||||
contextTokenEstimator: vi.fn().mockResolvedValue(120_000),
|
||||
})).rejects.toThrow('Context window is too small')
|
||||
|
||||
expect(mockSummarize).not.toHaveBeenCalled()
|
||||
expect(mockFetcher.saveContextSnapshot).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('throws on snapshot path when overhead plus new messages exceed threshold without compressible history', async () => {
|
||||
const messages = makeMessages(12)
|
||||
mockFetcher.getMessages = vi.fn().mockReturnValue(messages)
|
||||
mockFetcher.getContextSnapshot = vi.fn().mockReturnValue({
|
||||
roomId: 'room-1',
|
||||
summary: 'Existing summary',
|
||||
lastMessageId: 'msg-9',
|
||||
lastMessageTimestamp: messages[9].timestamp,
|
||||
updatedAt: Date.now(),
|
||||
})
|
||||
|
||||
await expect(engine.buildContext({
|
||||
roomId: 'room-1',
|
||||
agentId: 'agent-1',
|
||||
agentName: 'Claude',
|
||||
agentDescription: 'Helper',
|
||||
agentSocketId: 'agent-socket',
|
||||
roomName: 'general',
|
||||
memberNames: [],
|
||||
members: [],
|
||||
upstream: 'http://localhost:8642',
|
||||
apiKey: null,
|
||||
currentMessage: messages[messages.length - 1],
|
||||
contextTokenEstimator: vi.fn().mockResolvedValue(120_000),
|
||||
})).rejects.toThrow('Context window is too small')
|
||||
|
||||
expect(mockSummarize).not.toHaveBeenCalled()
|
||||
expect(mockFetcher.saveContextSnapshot).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('splits into head/tail and compresses middle when over threshold', async () => {
|
||||
const messages = makeMessages(20)
|
||||
mockFetcher.getMessages = vi.fn().mockReturnValue(messages)
|
||||
|
||||
@@ -27,6 +27,7 @@ vi.mock('../../packages/server/src/services/auth', () => ({
|
||||
}))
|
||||
|
||||
import { AgentClients } from '../../packages/server/src/services/hermes/group-chat/agent-clients'
|
||||
import { GroupChatServer } from '../../packages/server/src/services/hermes/group-chat'
|
||||
import { groupChatRoutes, setGroupChatServer } from '../../packages/server/src/routes/hermes/group-chat'
|
||||
|
||||
function routeHandler(path: string, method: string) {
|
||||
@@ -222,4 +223,37 @@ describe('Group Chat member/agent identity sync', () => {
|
||||
members: [{ id: 'member-1', userId: 'human-1', name: 'Han', description: '', joinedAt: 1 }],
|
||||
})
|
||||
})
|
||||
|
||||
it('routes @mentions only from user messages, not agent replies', () => {
|
||||
const server = Object.create(GroupChatServer.prototype) as any
|
||||
const emit = vi.fn()
|
||||
server.rooms = new Map([
|
||||
['room-1', {
|
||||
hasOnlineMember: vi.fn(() => true),
|
||||
getOnlineMemberBySocketId: vi.fn((socketId: string) => socketId === 'agent-socket'
|
||||
? { userId: 'agent-1', name: '丫鬟' }
|
||||
: { userId: 'human-1', name: 'Human' }),
|
||||
}],
|
||||
])
|
||||
server.socketUserMap = new Map([
|
||||
['human-socket', 'human-1'],
|
||||
['agent-socket', 'agent-1'],
|
||||
])
|
||||
server.userInfoMap = new Map([
|
||||
['human-1', { name: 'Human', description: '' }],
|
||||
['agent-1', { name: '丫鬟', description: '' }],
|
||||
])
|
||||
server.agentClients = { processMentions: vi.fn(async () => undefined) }
|
||||
server.storage = {
|
||||
saveMessageAndRefreshRoom: vi.fn((msg: any) => ({ message: msg, totalTokens: 123 })),
|
||||
}
|
||||
server.nsp = { to: vi.fn(() => ({ emit })) }
|
||||
|
||||
server.handleMessage({ id: 'human-socket' }, { roomId: 'room-1', content: '@all hi', role: 'user' }, vi.fn())
|
||||
expect(server.agentClients.processMentions).toHaveBeenCalledTimes(1)
|
||||
|
||||
server.agentClients.processMentions.mockClear()
|
||||
server.handleMessage({ id: 'agent-socket' }, { roomId: 'room-1', content: '@all agent says hi', role: 'assistant', mentionDepth: 1 }, vi.fn())
|
||||
expect(server.agentClients.processMentions).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
||||
@@ -162,6 +162,144 @@ describe('run chat compression trigger', () => {
|
||||
)
|
||||
})
|
||||
|
||||
it('uses full context estimates for compression threshold decisions', async () => {
|
||||
const messages = Array.from({ length: 10 }, (_, index) => ({
|
||||
id: index + 1,
|
||||
session_id: 'session-1',
|
||||
role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant',
|
||||
content: `message ${index}`,
|
||||
timestamp: index + 1,
|
||||
tool_call_id: null,
|
||||
tool_calls: null,
|
||||
tool_name: null,
|
||||
finish_reason: null,
|
||||
reasoning_content: null,
|
||||
}))
|
||||
getSessionDetailMock.mockReturnValue({ messages })
|
||||
calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 1_000, outputTokens: 0 })
|
||||
compressorCompressMock.mockResolvedValue({
|
||||
messages: [{ role: 'user', content: 'compressed by full context estimate' }],
|
||||
meta: {
|
||||
compressed: true,
|
||||
llmCompressed: true,
|
||||
totalMessages: 9,
|
||||
summaryTokenEstimate: 1,
|
||||
verbatimCount: 0,
|
||||
compressedStartIndex: 0,
|
||||
},
|
||||
})
|
||||
|
||||
const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression')
|
||||
const history = await buildCompressedHistory(
|
||||
'session-1',
|
||||
'default',
|
||||
'http://upstream',
|
||||
undefined,
|
||||
vi.fn(),
|
||||
new Map(),
|
||||
{},
|
||||
vi.fn(async () => 120_000),
|
||||
)
|
||||
|
||||
expect(history).toEqual([{ role: 'user', content: 'compressed by full context estimate' }])
|
||||
expect(compressorCompressMock).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
it('emits full context token usage when the full estimate is under threshold', async () => {
|
||||
const messages = Array.from({ length: 10 }, (_, index) => ({
|
||||
id: index + 1,
|
||||
session_id: 'session-1',
|
||||
role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant',
|
||||
content: `message ${index}`,
|
||||
timestamp: index + 1,
|
||||
tool_call_id: null,
|
||||
tool_calls: null,
|
||||
tool_name: null,
|
||||
finish_reason: null,
|
||||
reasoning_content: null,
|
||||
}))
|
||||
getSessionDetailMock.mockReturnValue({ messages })
|
||||
calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 1_000, outputTokens: 900 })
|
||||
const emit = vi.fn()
|
||||
const contextTokenEstimator = vi.fn(async () => 19_379)
|
||||
|
||||
const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression')
|
||||
const history = await buildCompressedHistory(
|
||||
'session-1',
|
||||
'default',
|
||||
'http://upstream',
|
||||
undefined,
|
||||
emit,
|
||||
new Map(),
|
||||
{},
|
||||
contextTokenEstimator,
|
||||
)
|
||||
|
||||
expect(history).toHaveLength(9)
|
||||
expect(contextTokenEstimator).toHaveBeenCalledWith(expect.arrayContaining([{ role: 'user', content: 'message 0' }]))
|
||||
expect(emit).toHaveBeenCalledWith('usage.updated', expect.objectContaining({
|
||||
event: 'usage.updated',
|
||||
session_id: 'session-1',
|
||||
inputTokens: 1_000,
|
||||
outputTokens: 900,
|
||||
contextTokens: 19_379,
|
||||
}))
|
||||
expect(compressorCompressMock).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('throws when fixed prompt and tool schemas exceed threshold before any history exists', async () => {
|
||||
getSessionDetailMock.mockReturnValue({ messages: [] })
|
||||
const emit = vi.fn()
|
||||
|
||||
const { buildCompressedHistory, ContextWindowTooSmallError } = await import('../../packages/server/src/services/hermes/run-chat/compression')
|
||||
|
||||
await expect(buildCompressedHistory(
|
||||
'session-1',
|
||||
'default',
|
||||
'http://upstream',
|
||||
undefined,
|
||||
emit,
|
||||
new Map(),
|
||||
{},
|
||||
vi.fn(async () => 120_000),
|
||||
)).rejects.toBeInstanceOf(ContextWindowTooSmallError)
|
||||
|
||||
expect(emit).not.toHaveBeenCalledWith('usage.updated', expect.anything())
|
||||
expect(compressorCompressMock).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('throws instead of compressing when full context is over threshold but history is too short', async () => {
|
||||
const messages = Array.from({ length: 5 }, (_, index) => ({
|
||||
id: index + 1,
|
||||
session_id: 'session-1',
|
||||
role: index === 4 ? 'user' : index % 2 === 0 ? 'user' : 'assistant',
|
||||
content: `message ${index}`,
|
||||
timestamp: index + 1,
|
||||
tool_call_id: null,
|
||||
tool_calls: null,
|
||||
tool_name: null,
|
||||
finish_reason: null,
|
||||
reasoning_content: null,
|
||||
}))
|
||||
getSessionDetailMock.mockReturnValue({ messages })
|
||||
calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 1_000, outputTokens: 0 })
|
||||
|
||||
const { buildCompressedHistory, ContextWindowTooSmallError } = await import('../../packages/server/src/services/hermes/run-chat/compression')
|
||||
|
||||
await expect(buildCompressedHistory(
|
||||
'session-1',
|
||||
'default',
|
||||
'http://upstream',
|
||||
undefined,
|
||||
vi.fn(),
|
||||
new Map(),
|
||||
{},
|
||||
vi.fn(async () => 120_000),
|
||||
)).rejects.toBeInstanceOf(ContextWindowTooSmallError)
|
||||
|
||||
expect(compressorCompressMock).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('merges partial compression config with defaults', async () => {
|
||||
const messages = Array.from({ length: 10 }, (_, index) => ({
|
||||
id: index + 1,
|
||||
|
||||
Reference in New Issue
Block a user