Files
Hermes-ui/packages/server/src/lib/context-compressor/index.ts
T

843 lines
30 KiB
TypeScript

/**
* Chat Context Compressor
*
* Compresses 1:1 chat conversation history before sending to upstream.
* Uses the Hermes structured summary prompt for LLM-based compression.
*
* Algorithm:
* 1. If total tokens < trigger threshold → return as-is
* 2. Pre-clean: truncate old tool results (no LLM call)
* 3. Load snapshot from SQLite for incremental update
* 4. Keep last 10 messages verbatim (tail protection by message count)
* 5. Summarize everything before the tail
* 6. Save snapshot: last_message_index = index where compression ends
*/
import { encodingForModel, getEncoding } from 'js-tiktoken'
import { randomUUID } from 'crypto'
import { mkdir, writeFile } from 'fs/promises'
import { resolve } from 'path'
import { logger } from '../../services/logger'
import { AgentBridgeClient, type AgentBridgeRunResult } from '../../services/hermes/agent-bridge'
import {
getCompressionSnapshot,
saveCompressionSnapshot,
deleteCompressionSnapshot,
} from '../../db/hermes/compression-snapshot'
// ─── Types ───────────────────────────────────────────────
export interface ContentBlock {
type: 'text' | 'image' | 'file'
text?: string
path?: string
source?: { type: string; media_type?: string; data?: string }
}
export interface ChatMessage {
role: string
content: string | ContentBlock[]
tool_calls?: Array<{ id: string; type: string; function: { name: string; arguments: string } }>
tool_call_id?: string
name?: string
reasoning_content?: string | null
}
export interface CompressionConfig {
/** Token threshold to trigger compression (default: contextLength / 2) */
triggerTokens: number
/** Summary token target (default: 8000) */
summaryBudget: number
/** Number of earliest messages to keep verbatim (default: 0) */
headMessageCount: number
/** Number of recent messages to keep verbatim (default: 10) */
tailMessageCount: number
/** Timeout for LLM summarization call (default: 300_000ms) */
summarizationTimeoutMs: number
}
export const DEFAULT_COMPRESSION_CONFIG: CompressionConfig = {
triggerTokens: 100_000,
summaryBudget: 8_000,
headMessageCount: 0,
tailMessageCount: 10,
summarizationTimeoutMs: 300_000,
}
export interface CompressedResult {
messages: ChatMessage[]
meta: {
totalMessages: number
compressed: boolean
/** true = actually called LLM to summarize; false = assembled from existing snapshot or returned as-is */
llmCompressed: boolean
summaryTokenEstimate: number
verbatimCount: number
compressedStartIndex: number
}
}
export interface SummarizerOptions {
profile?: string
model?: string | null
provider?: string | null
workerKey?: string
}
const SUMMARIZER_TRIGGER_MESSAGE = 'Generate the context checkpoint summary now.'
const SUMMARIZER_DEBUG_DIR = 'logs/context-compressor'
const SUMMARIZER_DEBUG_FILE = 'summarizer-debug.json'
async function writeSummarizerDebugDump(payload: Record<string, unknown>): Promise<void> {
if (process.env.NODE_ENV !== 'development') return
try {
const debugDir = resolve(process.cwd(), SUMMARIZER_DEBUG_DIR)
await mkdir(debugDir, { recursive: true })
await writeFile(
resolve(debugDir, SUMMARIZER_DEBUG_FILE),
`${JSON.stringify(payload, null, 2)}\n`,
'utf8',
)
} catch (err) {
logger.warn(err, '[context-compressor] failed to write summarizer debug dump')
}
}
// ─── Token counting ─────────────────────────────────────
let _encoder: ReturnType<typeof getEncoding> | null = null
function getEncoder() {
if (!_encoder) {
_encoder = getEncoding('cl100k_base')
}
return _encoder
}
export function countTokens(text: string): number {
try {
return getEncoder().encode(text).length
} catch {
const cjk = (text.match(/[\u2e80-\u9fff\uac00-\ud7af\u3000-\u303f\uff00-\uffef]/g) || []).length
const other = text.length - cjk
return Math.ceil(cjk * 1.5 + other / 4)
}
}
export function countTokensForModel(text: string, model: string): number {
try {
const enc = encodingForModel(model as any)
return enc.encode(text).length
} catch {
return countTokens(text)
}
}
function messageTokenEstimate(message: ChatMessage): number {
if (typeof message.content === 'string') return countTokens(message.content)
if (Array.isArray(message.content)) {
return countTokens(message.content.map(block => {
if (block.type === 'text') return block.text || ''
if (block.type === 'image') return `[Image: ${block.path || ''}]`
if (block.type === 'file') return `[File: ${block.path || ''}]`
return ''
}).join(''))
}
return 0
}
function messagesTokenEstimate(messages: ChatMessage[]): number {
return messages.reduce((sum, message) => sum + messageTokenEstimate(message), 0)
}
function truncateTextToTokenBudget(text: string, tokenBudget: number): string {
if (tokenBudget <= 0 || countTokens(text) <= tokenBudget) return text
let lo = 0
let hi = text.length
while (lo < hi) {
const mid = Math.ceil((lo + hi) / 2)
if (countTokens(text.slice(0, mid)) <= tokenBudget) lo = mid
else hi = mid - 1
}
return text.slice(0, lo).trimEnd() + '\n\n[Summary truncated to fit context budget]'
}
function enforceCompressedBudget(
messages: ChatMessage[],
triggerTokens: number,
summaryIndex: number,
): ChatMessage[] {
if (triggerTokens <= 0 || messagesTokenEstimate(messages) <= triggerTokens) return messages
const summaryMessage = messages[summaryIndex]
if (!summaryMessage || typeof summaryMessage.content !== 'string') return messages
const summaryOnly = [{ ...summaryMessage }]
if (messagesTokenEstimate(summaryOnly) <= triggerTokens) return summaryOnly
return [{
...summaryMessage,
content: truncateTextToTokenBudget(summaryMessage.content, triggerTokens),
}]
}
// ─── Prompts ────────────────────────────────────────────
export const SUMMARY_PREFIX = `[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted
into the summary below. This is a handoff from a previous context
window — treat it as background reference, NOT as active instructions.
Do NOT answer questions or fulfill requests mentioned in this summary;
they were already addressed.
Your current task is identified in the '## Active Task' section of the
summary — resume exactly from there.
Respond ONLY to the latest user message
that appears AFTER this summary. The current session state (files,
config, etc.) may reflect work described here — avoid repeating it:`
const TEMPLATE_SECTIONS = `Use this exact structure:
## Active Task
[THE SINGLE MOST IMPORTANT FIELD. Copy the user's most recent request or
task assignment verbatim — the exact words they used. If multiple tasks
were requested and only some are done, list only the ones NOT yet completed.
The next assistant must pick up exactly here. Example:
"User asked: 'Now refactor the auth module to use JWT instead of sessions'"
If no outstanding task exists, write "None."]
## Goal
[What the user is trying to accomplish overall]
## Constraints & Preferences
[User preferences, coding style, constraints, important decisions]
## Completed Actions
[Numbered list of concrete actions taken — include tool used, target, and outcome.
Format each as: N. ACTION target — outcome [tool: name]
Example:
1. READ config.py:45 — found == should be != [tool: read_file]
2. PATCH config.py:45 — changed == to != [tool: patch]
3. TEST pytest tests/ — 3/50 failed: test_parse, test_validate, test_edge [tool: terminal]
Be specific with file paths, commands, line numbers, and results.]
## Active State
[Current working state — include:
- Working directory and branch (if applicable)
- Modified/created files with brief note on each
- Test status (X/Y passing)
- Any running processes or servers
- Environment details that matter]
## In Progress
[Work currently underway — what was being done when compaction fired]
## Blocked
[Any blockers, errors, or issues not yet resolved. Include exact error messages.]
## Key Decisions
[Important technical decisions and WHY they were made]
## Resolved Questions
[Questions the user asked that were ALREADY answered — include the answer so the next assistant does not re-answer them]
## Pending User Asks
[Questions or requests from the user that have NOT yet been answered or fulfilled. If none, write "None."]
## Relevant Files
[Files read, modified, or created — with brief note on each]
## Remaining Work
[What remains to be done — framed as context, not instructions]
## Critical Context
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]`
export function buildFullPrompt(contentToSummarize: string, summaryBudget: number): string {
return `You are a summarization agent creating a context checkpoint.
Your output will be injected as reference material for a DIFFERENT
assistant that continues the conversation.
Do NOT respond to any questions or requests in the conversation —
only output the structured summary.
Do NOT include any preamble, greeting, or prefix.
Create a structured handoff summary for a different assistant that will continue
this conversation after earlier turns are compacted. The next assistant should be
able to understand what happened without re-reading the original turns.
TURNS TO SUMMARIZE:
${contentToSummarize}
${TEMPLATE_SECTIONS}
Target ~${summaryBudget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.
Write only the summary body. Do not include any preamble or prefix.`
}
export function buildIncrementalPrompt(previousSummary: string, contentToSummarize: string, summaryBudget: number): string {
return `You are a summarization agent creating a context checkpoint.
Your output will be injected as reference material for a DIFFERENT
assistant that continues the conversation.
Do NOT respond to any questions or requests in the conversation —
only output the structured summary.
Do NOT include any preamble, greeting, or prefix.
You are updating a context compaction summary. A previous compaction produced the
summary below. New conversation turns have occurred since then and need to be
incorporated.
PREVIOUS SUMMARY:
${previousSummary}
NEW TURNS TO INCORPORATE:
${contentToSummarize}
Update the summary using this exact structure. PRESERVE all existing information
that is still relevant. ADD new completed actions to the numbered list
(continue numbering). Move items from "In Progress" to "Completed Actions" when
done. Move answered questions to "Resolved Questions". Update "Active State"
to reflect current state. Remove information only if it is clearly obsolete.
CRITICAL: Update "## Active Task" to reflect the user's most recent unfulfilled
request — this is the most important field for task continuity.
${TEMPLATE_SECTIONS}
Target ~${summaryBudget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.
Write only the summary body. Do not include any preamble or prefix.`
}
// ─── Pre-cleaning ───────────────────────────────────────
export function serializeForSummary(messages: ChatMessage[]): string {
const parts: string[] = []
function contentToString(content: string | ContentBlock[]): string {
if (typeof content === 'string') return content
if (Array.isArray(content)) {
return content.map(block => {
if (block.type === 'text') return block.text || ''
if (block.type === 'image') return `[Image: ${block.path || ''}]`
if (block.type === 'file') return `[File: ${block.path || ''}]`
return ''
}).join('')
}
return ''
}
for (const msg of messages) {
const role = msg.role === 'tool' ? `[tool:${msg.name || 'unknown'}]` : msg.role
let content = contentToString(msg.content || '')
if (msg.role === 'tool' && content.length > 5500) {
content = content.slice(0, 4000) + '\n... [truncated]\n...' + content.slice(-1500)
}
if (msg.role === 'assistant' && msg.tool_calls?.length) {
const toolsInfo = msg.tool_calls.map(tc => {
let args = tc.function.arguments
if (args.length > 1500) args = args.slice(0, 1500) + '...'
return `[tool_call: ${tc.function.name}(${args})]`
}).join('\n')
parts.push(`${role}: ${toolsInfo}`)
if (content.trim()) parts.push(`${role}: ${content}`)
} else {
parts.push(`${role}: ${content}`)
}
}
return parts.join('\n\n')
}
/**
* Convert messages to conversation history format for LLM API.
* Tool calls are converted to text format within assistant messages.
*/
export function buildConversationHistory(messages: ChatMessage[]): Array<{ role: string; content: string }> {
const result: Array<{ role: string; content: string }> = []
for (const msg of messages) {
if (msg.role === 'tool') {
// Convert tool result to text and append to previous assistant message
const toolText = `[Tool result: ${msg.name || 'unknown'}]\n${(msg.content || '').slice(0, 4000)}${msg.content && msg.content.length > 4000 ? '...' : ''}`
// Find the last assistant message and append to it
const lastAssistant = result.findLast(m => m.role === 'assistant')
if (lastAssistant) {
lastAssistant.content += `\n\n${toolText}`
} else {
// Fallback: create an assistant message
result.push({ role: 'assistant', content: toolText })
}
} else if (msg.role === 'assistant' && msg.tool_calls?.length) {
// Include tool calls in assistant message
const toolsInfo = msg.tool_calls.map(tc => {
let args = tc.function.arguments
if (args.length > 4000) args = args.slice(0, 4000) + '...'
return `[Calling tool: ${tc.function.name} with arguments: ${args}]`
}).join('\n')
const content = msg.content ? `${msg.content}\n\n${toolsInfo}` : toolsInfo
result.push({ role: msg.role, content })
} else if (msg.role === 'user') {
// Handle ContentBlock[] format: { type: 'text', text: '...' } or { type: 'image', path: '...' }
let contentStr = ''
const content = msg.content || ''
if (typeof content === 'string') {
contentStr = content
} else if (Array.isArray(content)) {
for (const block of content) {
if (block.type === 'text') {
contentStr += block.text || ''
} else if (block.type === 'image') {
contentStr += `[Image: ${block.path || ''}]`
} else if (block.type === 'file') {
contentStr += `[File: ${block.path || ''}]`
}
}
}
if (contentStr.length > 4000) contentStr = contentStr.slice(0, 4000) + '...'
result.push({ role: 'user', content: contentStr })
} else if (msg.role === 'assistant' || msg.role === 'system') {
let contentStr = ''
const content = msg.content
if (typeof content === 'string') {
contentStr = content
} else if (Array.isArray(content)) {
for (const block of content) {
if (block.type === 'text') {
contentStr += block.text || ''
} else if (block.type === 'image') {
contentStr += `[Image: ${block.path || ''}]`
} else if (block.type === 'file') {
contentStr += `[File: ${block.path || ''}]`
}
}
}
if (contentStr.length > 4000) contentStr = contentStr.slice(0, 4000) + '...'
result.push({ role: msg.role, content: contentStr })
}
// Skip other roles
}
return result
}
export function pruneOldToolResults(messages: ChatMessage[], keepRecentCount: number): ChatMessage[] {
if (messages.length <= keepRecentCount) return messages
const tail = messages.slice(-keepRecentCount)
const head = messages.slice(0, -keepRecentCount)
const pruned = head.map(msg => {
if (msg.role !== 'tool') return msg
let content = ''
if (typeof msg.content === 'string') {
content = msg.content
} else if (Array.isArray(msg.content)) {
content = msg.content.map(block => {
if (block.type === 'text') return block.text || ''
return `[${block.type}]`
}).join('')
}
const preview = content.slice(0, 100).replace(/\n/g, ' ')
const truncated = content.length > 100 ? '...' : ''
return { ...msg, content: `[${msg.name || 'tool'}] ${preview}${truncated}` }
})
return [...pruned, ...tail]
}
function pruneFallbackToolResults(messages: ChatMessage[], keepRecentCount: number): ChatMessage[] {
return pruneOldToolResults(messages, keepRecentCount)
}
// ─── LLM Summarization ──────────────────────────────────
export async function callSummarizer(
upstream: string,
apiKey: string | undefined,
prompt: string,
history: Array<{ role: string; content: string }>,
timeoutMs: number,
previousSummary?: string,
summarizer?: string | SummarizerOptions,
): Promise<string> {
void upstream
void apiKey
const options: SummarizerOptions = typeof summarizer === 'string'
? { profile: summarizer }
: summarizer || {}
const profile = options.profile || 'default'
void history
const convHistory: Array<{ role: string; content: string }> = []
if (previousSummary) {
convHistory.unshift(
{ role: 'user', content: `[Previous summary]\n${previousSummary}` },
{ role: 'assistant', content: 'Understood, I will update the summary.' },
{ role: 'user', content: prompt },
)
} else {
convHistory.unshift({ role: 'user', content: prompt })
}
const bridge = new AgentBridgeClient({ timeoutMs: timeoutMs + 15_000 })
const sessionId = `compress_${Date.now().toString(36)}_${randomUUID().replace(/-/g, '').slice(0, 12)}`
const workerKey = options.workerKey || `${profile}:compression:${sessionId}`
const message = SUMMARIZER_TRIGGER_MESSAGE
await writeSummarizerDebugDump({
writtenAt: new Date().toISOString(),
sessionId,
workerKey,
profile,
model: options.model || null,
provider: options.provider || null,
message,
convHistory,
})
try {
const result = await bridge.request<AgentBridgeRunResult>({
action: 'chat',
session_id: sessionId,
message,
conversation_history: convHistory,
profile,
worker_key: workerKey,
source: 'api_server',
wait: true,
timeout: Math.ceil(timeoutMs / 1000),
...(options.model ? { model: options.model } : {}),
...(options.provider ? { provider: options.provider } : {}),
}, { timeoutMs: timeoutMs + 15_000 })
if (result.status === 'error') {
throw new Error(result.error || 'Summarization bridge run failed')
}
const payload = result.result as any
const output = String(
payload?.final_response ||
result.output ||
'',
).trim()
if (!output) throw new Error('Empty summarization response')
return output
} finally {
await bridge.destroy(sessionId, profile, workerKey).catch(() => undefined)
}
}
// ─── Main Compressor ────────────────────────────────────
export class ChatContextCompressor {
private config: CompressionConfig
constructor(opts?: {
config?: Partial<CompressionConfig>
}) {
this.config = { ...DEFAULT_COMPRESSION_CONFIG, ...opts?.config }
}
/**
* Assemble and compress conversation history.
*
* Flow:
* 1. Check snapshot → if exists, assemble = summary + new messages after snapshot index
* 2. If no snapshot → assemble = all messages
* 3. Count tokens of assembled context
* 4. Under threshold → return assembled as-is (no LLM call)
* 5. Over threshold → LLM compress, keep last N messages, save new snapshot
*/
async compress(
messages: ChatMessage[],
upstream: string,
apiKey: string | undefined,
sessionId?: string,
summarizer?: string | SummarizerOptions,
): Promise<CompressedResult> {
const total = messages.length
const makeMeta = (opts: Partial<CompressedResult['meta']> = {}): CompressedResult['meta'] => ({
totalMessages: total,
compressed: false,
llmCompressed: false,
summaryTokenEstimate: 0,
verbatimCount: total,
compressedStartIndex: -1,
...opts,
})
// Check if we have a previous compression snapshot
const snapshot = sessionId ? getCompressionSnapshot(sessionId) : null
if (snapshot && snapshot.lastMessageIndex >= 0 && snapshot.lastMessageIndex < messages.length) {
// Has snapshot → incremental compress (merge old summary with new messages)
logger.info(
'[context-compressor] session=%s: incremental compress with snapshot at index %d',
sessionId, snapshot.lastMessageIndex,
)
return this.incrementalCompress(
messages, snapshot, upstream, apiKey, sessionId!, makeMeta(), summarizer,
)
} else {
if (snapshot && sessionId) {
const fallbackLastMessageIndex = Math.max(-1, messages.length - this.config.tailMessageCount - 1)
logger.warn(
'[context-compressor] session=%s: stale snapshot index %d for %d messages; using summary plus tail from index %d',
sessionId, snapshot.lastMessageIndex, messages.length, fallbackLastMessageIndex,
)
return this.incrementalCompress(
messages,
{ summary: snapshot.summary, lastMessageIndex: fallbackLastMessageIndex },
upstream,
apiKey,
sessionId,
makeMeta(),
summarizer,
)
}
// No snapshot → full compress (compress all messages)
logger.info(
'[context-compressor] session=%s: full compress %d messages',
sessionId, total,
)
return this.fullCompress(messages, upstream, apiKey, sessionId!, makeMeta(), summarizer)
}
}
private async incrementalCompress(
messages: ChatMessage[],
snapshot: { summary: string; lastMessageIndex: number },
upstream: string,
apiKey: string | undefined,
sessionId: string,
meta: CompressedResult['meta'],
summarizer?: string | SummarizerOptions,
): Promise<CompressedResult> {
const { summary: previousSummary, lastMessageIndex } = snapshot
const total = messages.length
const headCount = Math.min(this.config.headMessageCount, Math.max(0, lastMessageIndex + 1))
const head = messages.slice(0, headCount)
const newMessages = messages.slice(lastMessageIndex + 1)
const tailCount = this.config.tailMessageCount
const previousSummaryMessage: ChatMessage = { role: 'user', content: SUMMARY_PREFIX + '\n\n' + previousSummary }
const assembledWithPrevious = [
...head,
previousSummaryMessage,
...newMessages,
]
const assembledOverBudget = messagesTokenEstimate(assembledWithPrevious) > this.config.triggerTokens
const canKeepTailWindow = newMessages.length > tailCount
// If the new segment itself is too small to split but already over budget,
// fold all new messages into the existing summary instead of preserving them verbatim.
const tailStart = assembledOverBudget && !canKeepTailWindow
? newMessages.length
: Math.max(0, newMessages.length - tailCount)
const toCompress = newMessages.slice(0, tailStart)
const tail = newMessages.slice(tailStart)
if (toCompress.length === 0) {
return {
messages: assembledWithPrevious,
meta: {
...meta,
compressed: true,
llmCompressed: false,
summaryTokenEstimate: countTokens(SUMMARY_PREFIX + previousSummary),
verbatimCount: head.length + newMessages.length,
compressedStartIndex: lastMessageIndex,
},
}
}
logger.info(
'[context-compressor] [incremental-llm] compressing %d of %d new messages, keeping %d tail',
toCompress.length, newMessages.length, tail.length,
)
let summary: string | null = null
try {
const contentToSummarize = serializeForSummary(toCompress)
const prompt = buildIncrementalPrompt(previousSummary, contentToSummarize, this.config.summaryBudget)
const t0 = Date.now()
summary = await callSummarizer(upstream, apiKey, prompt, [], this.config.summarizationTimeoutMs, previousSummary, summarizer)
logger.info('[context-compressor] incremental-llm done in %dms, %d chars', Date.now() - t0, summary.length)
} catch (err: any) {
logger.warn('[context-compressor] incremental-llm failed: %s — keeping new messages verbatim', err.message)
const fallback = [
...head,
previousSummaryMessage,
...newMessages,
]
const prunedFallback = pruneFallbackToolResults(fallback, this.config.tailMessageCount)
const budgetedFallback = enforceCompressedBudget(prunedFallback, this.config.triggerTokens, head.length)
return {
messages: budgetedFallback,
meta: {
...meta,
compressed: true,
llmCompressed: false,
summaryTokenEstimate: countTokens(SUMMARY_PREFIX + previousSummary),
verbatimCount: budgetedFallback.length === fallback.length ? head.length + newMessages.length : 0,
compressedStartIndex: lastMessageIndex,
},
}
}
let result: ChatMessage[] = [
...head,
{ role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary },
...tail,
]
result = enforceCompressedBudget(result, this.config.triggerTokens, head.length)
const newLastIndex = lastMessageIndex + tailStart
if (sessionId) {
saveCompressionSnapshot(sessionId, summary, newLastIndex, total)
}
return {
messages: result,
meta: {
...meta,
compressed: true,
llmCompressed: true,
summaryTokenEstimate: countTokens(SUMMARY_PREFIX + summary),
verbatimCount: result.length === head.length + 1 + tail.length ? head.length + tail.length : 0,
compressedStartIndex: newLastIndex,
},
}
}
private async fullCompress(
messages: ChatMessage[],
upstream: string,
apiKey: string | undefined,
sessionId: string,
meta: CompressedResult['meta'],
summarizer?: string | SummarizerOptions,
): Promise<CompressedResult> {
const total = messages.length
const requestedHeadCount = Math.min(this.config.headMessageCount, total)
const requestedTailCount = this.config.tailMessageCount
const canKeepProtectedWindows = total > requestedHeadCount + requestedTailCount
const headCount = canKeepProtectedWindows ? requestedHeadCount : 0
const tailCount = canKeepProtectedWindows ? requestedTailCount : 0
const tailStart = total - tailCount
const head = messages.slice(0, headCount)
const toCompress = messages.slice(headCount, tailStart)
const tail = messages.slice(tailStart)
logger.info(
'[context-compressor] [full-llm] compressing messages %d-%d, keeping first %d and last %d',
headCount, tailStart - 1, head.length, tail.length,
)
const contentToSummarize = serializeForSummary(toCompress)
const prompt = buildFullPrompt(contentToSummarize, this.config.summaryBudget)
let summary: string | null = null
try {
const t0 = Date.now()
summary = await callSummarizer(upstream, apiKey, prompt, [], this.config.summarizationTimeoutMs, undefined, summarizer)
logger.info('[context-compressor] full-llm done in %dms, %d chars', Date.now() - t0, summary.length)
} catch (err: any) {
logger.warn('[context-compressor] full-llm failed: %s', err.message)
}
if (!summary) {
return { messages: pruneFallbackToolResults(messages, this.config.tailMessageCount), meta }
}
const result: ChatMessage[] = []
result.push(...head)
result.push({ role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary })
if (sessionId) {
saveCompressionSnapshot(sessionId, summary, tailStart - 1, total)
}
result.push(...tail)
const budgetedResult = enforceCompressedBudget(result, this.config.triggerTokens, head.length)
return {
messages: budgetedResult,
meta: {
...meta,
compressed: true,
llmCompressed: !!summary,
summaryTokenEstimate: summary ? countTokens(SUMMARY_PREFIX + summary) : 0,
verbatimCount: budgetedResult.length === result.length ? head.length + tail.length : 0,
compressedStartIndex: tailStart - 1,
},
}
}
/** Remove snapshot for a session (e.g. when session is deleted) */
static invalidateSnapshot(sessionId: string): void {
deleteCompressionSnapshot(sessionId)
}
}
async function* readSseFrames(stream: ReadableStream<Uint8Array>): AsyncGenerator<{ event?: string; data: string }> {
const decoder = new TextDecoder()
const reader = stream.getReader()
let buffer = ''
try {
while (true) {
const { done, value } = await reader.read()
if (done) break
buffer += decoder.decode(value, { stream: true })
let boundary = buffer.indexOf('\n\n')
while (boundary >= 0) {
const raw = buffer.slice(0, boundary)
buffer = buffer.slice(boundary + 2)
const frame = parseSseFrame(raw)
if (frame?.data) yield frame
boundary = buffer.indexOf('\n\n')
}
}
buffer += decoder.decode()
const frame = parseSseFrame(buffer)
if (frame?.data) yield frame
} finally {
reader.releaseLock()
}
}
function parseSseFrame(raw: string): { event?: string; data: string } | null {
let event: string | undefined
const data: string[] = []
for (const line of raw.split(/\r?\n/)) {
if (!line || line.startsWith(':')) continue
if (line.startsWith('event:')) {
event = line.slice(6).trim()
} else if (line.startsWith('data:')) {
data.push(line.slice(5).trimStart())
}
}
if (data.length === 0) return null
return { event, data: data.join('\n') }
}
function extractResponseText(response: any): string {
const output = Array.isArray(response?.output) ? response.output : []
const parts: string[] = []
for (const item of output) {
if (item.type !== 'message') continue
const content = Array.isArray(item.content) ? item.content : []
for (const part of content) {
if (part.type === 'output_text' || part.type === 'text') {
parts.push(part.text || '')
}
}
}
if (parts.length > 0) return parts.join('')
return typeof response?.output_text === 'string' ? response.output_text : ''
}