[codex] Harden context compression history handling (#848)

* Use token threshold for chat compression

* Add compression settings controls

* Use config for chat compression

* Cover protected messages in compression tests

* Remove message-count compression limit

* Harden compression window fallback

* Rebuild stale compression snapshots

* Harden stale compression snapshots

* Update changelog for compression hardening

* Prefer local history session details
This commit is contained in:
ekko
2026-05-19 17:58:39 +08:00
committed by GitHub
parent 84e343fc22
commit 0547fd6b6a
22 changed files with 1255 additions and 56 deletions
@@ -46,6 +46,8 @@ export interface CompressionConfig {
triggerTokens: number
/** Summary token target (default: 8000) */
summaryBudget: number
/** Number of earliest messages to keep verbatim (default: 0) */
headMessageCount: number
/** Number of recent messages to keep verbatim (default: 10) */
tailMessageCount: number
/** Timeout for LLM summarization call (default: 60_000ms) */
@@ -55,6 +57,7 @@ export interface CompressionConfig {
export const DEFAULT_COMPRESSION_CONFIG: CompressionConfig = {
triggerTokens: 100_000,
summaryBudget: 8_000,
headMessageCount: 0,
tailMessageCount: 10,
summarizationTimeoutMs: 120_000,
}
@@ -108,6 +111,54 @@ export function countTokensForModel(text: string, model: string): number {
}
}
function messageTokenEstimate(message: ChatMessage): number {
if (typeof message.content === 'string') return countTokens(message.content)
if (Array.isArray(message.content)) {
return countTokens(message.content.map(block => {
if (block.type === 'text') return block.text || ''
if (block.type === 'image') return `[Image: ${block.path || ''}]`
if (block.type === 'file') return `[File: ${block.path || ''}]`
return ''
}).join(''))
}
return 0
}
function messagesTokenEstimate(messages: ChatMessage[]): number {
return messages.reduce((sum, message) => sum + messageTokenEstimate(message), 0)
}
function truncateTextToTokenBudget(text: string, tokenBudget: number): string {
if (tokenBudget <= 0 || countTokens(text) <= tokenBudget) return text
let lo = 0
let hi = text.length
while (lo < hi) {
const mid = Math.ceil((lo + hi) / 2)
if (countTokens(text.slice(0, mid)) <= tokenBudget) lo = mid
else hi = mid - 1
}
return text.slice(0, lo).trimEnd() + '\n\n[Summary truncated to fit context budget]'
}
function enforceCompressedBudget(
messages: ChatMessage[],
triggerTokens: number,
summaryIndex: number,
): ChatMessage[] {
if (triggerTokens <= 0 || messagesTokenEstimate(messages) <= triggerTokens) return messages
const summaryMessage = messages[summaryIndex]
if (!summaryMessage || typeof summaryMessage.content !== 'string') return messages
const summaryOnly = [{ ...summaryMessage }]
if (messagesTokenEstimate(summaryOnly) <= triggerTokens) return summaryOnly
return [{
...summaryMessage,
content: truncateTextToTokenBudget(summaryMessage.content, triggerTokens),
}]
}
// ─── Prompts ────────────────────────────────────────────
export const SUMMARY_PREFIX = `[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted
@@ -371,6 +422,10 @@ export function pruneOldToolResults(messages: ChatMessage[], keepRecentCount: nu
return [...pruned, ...tail]
}
function pruneFallbackToolResults(messages: ChatMessage[], keepRecentCount: number): ChatMessage[] {
return pruneOldToolResults(messages, keepRecentCount)
}
// ─── LLM Summarization ──────────────────────────────────
export async function callSummarizer(
@@ -474,7 +529,7 @@ export class ChatContextCompressor {
// Check if we have a previous compression snapshot
const snapshot = sessionId ? getCompressionSnapshot(sessionId) : null
if (snapshot) {
if (snapshot && snapshot.lastMessageIndex >= 0 && snapshot.lastMessageIndex < messages.length) {
// Has snapshot → incremental compress (merge old summary with new messages)
logger.info(
'[context-compressor] session=%s: incremental compress with snapshot at index %d',
@@ -484,6 +539,22 @@ export class ChatContextCompressor {
messages, snapshot, upstream, apiKey, sessionId!, makeMeta(), summarizer,
)
} else {
if (snapshot && sessionId) {
const fallbackLastMessageIndex = Math.max(-1, messages.length - this.config.tailMessageCount - 1)
logger.warn(
'[context-compressor] session=%s: stale snapshot index %d for %d messages; using summary plus tail from index %d',
sessionId, snapshot.lastMessageIndex, messages.length, fallbackLastMessageIndex,
)
return this.incrementalCompress(
messages,
{ summary: snapshot.summary, lastMessageIndex: fallbackLastMessageIndex },
upstream,
apiKey,
sessionId,
makeMeta(),
summarizer,
)
}
// No snapshot → full compress (compress all messages)
logger.info(
'[context-compressor] session=%s: full compress %d messages',
@@ -504,27 +575,36 @@ export class ChatContextCompressor {
): Promise<CompressedResult> {
const { summary: previousSummary, lastMessageIndex } = snapshot
const total = messages.length
const cleaned = pruneOldToolResults(messages, this.config.tailMessageCount)
const newMessages = cleaned.slice(lastMessageIndex + 1)
const headCount = Math.min(this.config.headMessageCount, Math.max(0, lastMessageIndex + 1))
const head = messages.slice(0, headCount)
const newMessages = messages.slice(lastMessageIndex + 1)
const tailCount = this.config.tailMessageCount
const previousSummaryMessage: ChatMessage = { role: 'user', content: SUMMARY_PREFIX + '\n\n' + previousSummary }
const assembledWithPrevious = [
...head,
previousSummaryMessage,
...newMessages,
]
const assembledOverBudget = messagesTokenEstimate(assembledWithPrevious) > this.config.triggerTokens
const canKeepTailWindow = newMessages.length > tailCount
// Keep last N of new messages, compress the rest
const tailStart = Math.max(0, newMessages.length - tailCount)
// If the new segment itself is too small to split but already over budget,
// fold all new messages into the existing summary instead of preserving them verbatim.
const tailStart = assembledOverBudget && !canKeepTailWindow
? newMessages.length
: Math.max(0, newMessages.length - tailCount)
const toCompress = newMessages.slice(0, tailStart)
const tail = newMessages.slice(tailStart)
if (toCompress.length === 0) {
return {
messages: [
{ role: 'user', content: SUMMARY_PREFIX + '\n\n' + previousSummary },
...newMessages,
],
messages: assembledWithPrevious,
meta: {
...meta,
compressed: true,
llmCompressed: false,
summaryTokenEstimate: countTokens(SUMMARY_PREFIX + previousSummary),
verbatimCount: newMessages.length,
verbatimCount: head.length + newMessages.length,
compressedStartIndex: lastMessageIndex,
},
}
@@ -546,26 +626,32 @@ export class ChatContextCompressor {
logger.info('[context-compressor] incremental-llm done in %dms, %d chars', Date.now() - t0, summary.length)
} catch (err: any) {
logger.warn('[context-compressor] incremental-llm failed: %s — keeping new messages verbatim', err.message)
const fallback = [
...head,
previousSummaryMessage,
...newMessages,
]
const prunedFallback = pruneFallbackToolResults(fallback, this.config.tailMessageCount)
const budgetedFallback = enforceCompressedBudget(prunedFallback, this.config.triggerTokens, head.length)
return {
messages: [
{ role: 'user', content: SUMMARY_PREFIX + '\n\n' + previousSummary },
...newMessages,
],
messages: budgetedFallback,
meta: {
...meta,
compressed: true,
llmCompressed: false,
summaryTokenEstimate: countTokens(SUMMARY_PREFIX + previousSummary),
verbatimCount: newMessages.length,
verbatimCount: budgetedFallback.length === fallback.length ? head.length + newMessages.length : 0,
compressedStartIndex: lastMessageIndex,
},
}
}
const result: ChatMessage[] = [
let result: ChatMessage[] = [
...head,
{ role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary },
...tail,
]
result = enforceCompressedBudget(result, this.config.triggerTokens, head.length)
const newLastIndex = lastMessageIndex + tailStart
if (sessionId) {
@@ -579,7 +665,7 @@ export class ChatContextCompressor {
compressed: true,
llmCompressed: true,
summaryTokenEstimate: countTokens(SUMMARY_PREFIX + summary),
verbatimCount: tail.length,
verbatimCount: result.length === head.length + 1 + tail.length ? head.length + tail.length : 0,
compressedStartIndex: newLastIndex,
},
}
@@ -594,20 +680,20 @@ export class ChatContextCompressor {
summarizer?: string | SummarizerOptions,
): Promise<CompressedResult> {
const total = messages.length
const cleaned = pruneOldToolResults(messages, this.config.tailMessageCount)
const tailCount = this.config.tailMessageCount
if (total <= tailCount) {
return { messages: cleaned, meta }
}
const requestedHeadCount = Math.min(this.config.headMessageCount, total)
const requestedTailCount = this.config.tailMessageCount
const canKeepProtectedWindows = total > requestedHeadCount + requestedTailCount
const headCount = canKeepProtectedWindows ? requestedHeadCount : 0
const tailCount = canKeepProtectedWindows ? requestedTailCount : 0
const tailStart = total - tailCount
const toCompress = cleaned.slice(0, tailStart)
const tail = cleaned.slice(tailStart)
const head = messages.slice(0, headCount)
const toCompress = messages.slice(headCount, tailStart)
const tail = messages.slice(tailStart)
logger.info(
'[context-compressor] [full-llm] compressing messages 0-%d, keeping %d-%d',
tailStart - 1, tailStart, total - 1,
'[context-compressor] [full-llm] compressing messages %d-%d, keeping first %d and last %d',
headCount, tailStart - 1, head.length, tail.length,
)
const contentToSummarize = serializeForSummary(toCompress)
@@ -624,26 +710,28 @@ export class ChatContextCompressor {
}
if (!summary) {
return { messages: cleaned, meta }
return { messages: pruneFallbackToolResults(messages, this.config.tailMessageCount), meta }
}
const result: ChatMessage[] = []
result.push(...head)
result.push({ role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary })
if (sessionId) {
saveCompressionSnapshot(sessionId, summary, tailStart - 1, total)
}
result.push(...tail)
const budgetedResult = enforceCompressedBudget(result, this.config.triggerTokens, head.length)
return {
messages: result,
messages: budgetedResult,
meta: {
...meta,
compressed: true,
llmCompressed: !!summary,
summaryTokenEstimate: summary ? countTokens(SUMMARY_PREFIX + summary) : 0,
verbatimCount: tail.length,
verbatimCount: budgetedResult.length === result.length ? head.length + tail.length : 0,
compressedStartIndex: tailStart - 1,
},
}