[codex] Harden context compression history handling (#848)
* Use token threshold for chat compression * Add compression settings controls * Use config for chat compression * Cover protected messages in compression tests * Remove message-count compression limit * Harden compression window fallback * Rebuild stale compression snapshots * Harden stale compression snapshots * Update changelog for compression hardening * Prefer local history session details
This commit is contained in:
@@ -188,7 +188,16 @@ export async function get(ctx: any) {
|
||||
* GET /api/hermes/sessions/hermes/:id
|
||||
*/
|
||||
export async function getHermesSession(ctx: any) {
|
||||
// Try database first (consistent with listHermesSessions)
|
||||
// Prefer the Web UI local session store. Hermes state.db can lag behind or
|
||||
// miss messages for Bridge-backed runs, while the local store is the source
|
||||
// used by chat rendering and compression.
|
||||
const localSession = localGetSessionDetail(ctx.params.id)
|
||||
if (localSession && localSession.source !== 'api_server') {
|
||||
ctx.body = { session: localSession }
|
||||
return
|
||||
}
|
||||
|
||||
// Try Hermes state.db next (consistent with listHermesSessions)
|
||||
try {
|
||||
const session = await getSessionDetailFromDb(ctx.params.id)
|
||||
if (session && session.source !== 'api_server') {
|
||||
|
||||
@@ -46,6 +46,8 @@ export interface CompressionConfig {
|
||||
triggerTokens: number
|
||||
/** Summary token target (default: 8000) */
|
||||
summaryBudget: number
|
||||
/** Number of earliest messages to keep verbatim (default: 0) */
|
||||
headMessageCount: number
|
||||
/** Number of recent messages to keep verbatim (default: 10) */
|
||||
tailMessageCount: number
|
||||
/** Timeout for LLM summarization call (default: 60_000ms) */
|
||||
@@ -55,6 +57,7 @@ export interface CompressionConfig {
|
||||
export const DEFAULT_COMPRESSION_CONFIG: CompressionConfig = {
|
||||
triggerTokens: 100_000,
|
||||
summaryBudget: 8_000,
|
||||
headMessageCount: 0,
|
||||
tailMessageCount: 10,
|
||||
summarizationTimeoutMs: 120_000,
|
||||
}
|
||||
@@ -108,6 +111,54 @@ export function countTokensForModel(text: string, model: string): number {
|
||||
}
|
||||
}
|
||||
|
||||
function messageTokenEstimate(message: ChatMessage): number {
|
||||
if (typeof message.content === 'string') return countTokens(message.content)
|
||||
if (Array.isArray(message.content)) {
|
||||
return countTokens(message.content.map(block => {
|
||||
if (block.type === 'text') return block.text || ''
|
||||
if (block.type === 'image') return `[Image: ${block.path || ''}]`
|
||||
if (block.type === 'file') return `[File: ${block.path || ''}]`
|
||||
return ''
|
||||
}).join(''))
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
function messagesTokenEstimate(messages: ChatMessage[]): number {
|
||||
return messages.reduce((sum, message) => sum + messageTokenEstimate(message), 0)
|
||||
}
|
||||
|
||||
function truncateTextToTokenBudget(text: string, tokenBudget: number): string {
|
||||
if (tokenBudget <= 0 || countTokens(text) <= tokenBudget) return text
|
||||
let lo = 0
|
||||
let hi = text.length
|
||||
while (lo < hi) {
|
||||
const mid = Math.ceil((lo + hi) / 2)
|
||||
if (countTokens(text.slice(0, mid)) <= tokenBudget) lo = mid
|
||||
else hi = mid - 1
|
||||
}
|
||||
return text.slice(0, lo).trimEnd() + '\n\n[Summary truncated to fit context budget]'
|
||||
}
|
||||
|
||||
function enforceCompressedBudget(
|
||||
messages: ChatMessage[],
|
||||
triggerTokens: number,
|
||||
summaryIndex: number,
|
||||
): ChatMessage[] {
|
||||
if (triggerTokens <= 0 || messagesTokenEstimate(messages) <= triggerTokens) return messages
|
||||
|
||||
const summaryMessage = messages[summaryIndex]
|
||||
if (!summaryMessage || typeof summaryMessage.content !== 'string') return messages
|
||||
|
||||
const summaryOnly = [{ ...summaryMessage }]
|
||||
if (messagesTokenEstimate(summaryOnly) <= triggerTokens) return summaryOnly
|
||||
|
||||
return [{
|
||||
...summaryMessage,
|
||||
content: truncateTextToTokenBudget(summaryMessage.content, triggerTokens),
|
||||
}]
|
||||
}
|
||||
|
||||
// ─── Prompts ────────────────────────────────────────────
|
||||
|
||||
export const SUMMARY_PREFIX = `[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted
|
||||
@@ -371,6 +422,10 @@ export function pruneOldToolResults(messages: ChatMessage[], keepRecentCount: nu
|
||||
return [...pruned, ...tail]
|
||||
}
|
||||
|
||||
function pruneFallbackToolResults(messages: ChatMessage[], keepRecentCount: number): ChatMessage[] {
|
||||
return pruneOldToolResults(messages, keepRecentCount)
|
||||
}
|
||||
|
||||
// ─── LLM Summarization ──────────────────────────────────
|
||||
|
||||
export async function callSummarizer(
|
||||
@@ -474,7 +529,7 @@ export class ChatContextCompressor {
|
||||
// Check if we have a previous compression snapshot
|
||||
const snapshot = sessionId ? getCompressionSnapshot(sessionId) : null
|
||||
|
||||
if (snapshot) {
|
||||
if (snapshot && snapshot.lastMessageIndex >= 0 && snapshot.lastMessageIndex < messages.length) {
|
||||
// Has snapshot → incremental compress (merge old summary with new messages)
|
||||
logger.info(
|
||||
'[context-compressor] session=%s: incremental compress with snapshot at index %d',
|
||||
@@ -484,6 +539,22 @@ export class ChatContextCompressor {
|
||||
messages, snapshot, upstream, apiKey, sessionId!, makeMeta(), summarizer,
|
||||
)
|
||||
} else {
|
||||
if (snapshot && sessionId) {
|
||||
const fallbackLastMessageIndex = Math.max(-1, messages.length - this.config.tailMessageCount - 1)
|
||||
logger.warn(
|
||||
'[context-compressor] session=%s: stale snapshot index %d for %d messages; using summary plus tail from index %d',
|
||||
sessionId, snapshot.lastMessageIndex, messages.length, fallbackLastMessageIndex,
|
||||
)
|
||||
return this.incrementalCompress(
|
||||
messages,
|
||||
{ summary: snapshot.summary, lastMessageIndex: fallbackLastMessageIndex },
|
||||
upstream,
|
||||
apiKey,
|
||||
sessionId,
|
||||
makeMeta(),
|
||||
summarizer,
|
||||
)
|
||||
}
|
||||
// No snapshot → full compress (compress all messages)
|
||||
logger.info(
|
||||
'[context-compressor] session=%s: full compress %d messages',
|
||||
@@ -504,27 +575,36 @@ export class ChatContextCompressor {
|
||||
): Promise<CompressedResult> {
|
||||
const { summary: previousSummary, lastMessageIndex } = snapshot
|
||||
const total = messages.length
|
||||
const cleaned = pruneOldToolResults(messages, this.config.tailMessageCount)
|
||||
const newMessages = cleaned.slice(lastMessageIndex + 1)
|
||||
const headCount = Math.min(this.config.headMessageCount, Math.max(0, lastMessageIndex + 1))
|
||||
const head = messages.slice(0, headCount)
|
||||
const newMessages = messages.slice(lastMessageIndex + 1)
|
||||
const tailCount = this.config.tailMessageCount
|
||||
const previousSummaryMessage: ChatMessage = { role: 'user', content: SUMMARY_PREFIX + '\n\n' + previousSummary }
|
||||
const assembledWithPrevious = [
|
||||
...head,
|
||||
previousSummaryMessage,
|
||||
...newMessages,
|
||||
]
|
||||
const assembledOverBudget = messagesTokenEstimate(assembledWithPrevious) > this.config.triggerTokens
|
||||
const canKeepTailWindow = newMessages.length > tailCount
|
||||
|
||||
// Keep last N of new messages, compress the rest
|
||||
const tailStart = Math.max(0, newMessages.length - tailCount)
|
||||
// If the new segment itself is too small to split but already over budget,
|
||||
// fold all new messages into the existing summary instead of preserving them verbatim.
|
||||
const tailStart = assembledOverBudget && !canKeepTailWindow
|
||||
? newMessages.length
|
||||
: Math.max(0, newMessages.length - tailCount)
|
||||
const toCompress = newMessages.slice(0, tailStart)
|
||||
const tail = newMessages.slice(tailStart)
|
||||
|
||||
if (toCompress.length === 0) {
|
||||
return {
|
||||
messages: [
|
||||
{ role: 'user', content: SUMMARY_PREFIX + '\n\n' + previousSummary },
|
||||
...newMessages,
|
||||
],
|
||||
messages: assembledWithPrevious,
|
||||
meta: {
|
||||
...meta,
|
||||
compressed: true,
|
||||
llmCompressed: false,
|
||||
summaryTokenEstimate: countTokens(SUMMARY_PREFIX + previousSummary),
|
||||
verbatimCount: newMessages.length,
|
||||
verbatimCount: head.length + newMessages.length,
|
||||
compressedStartIndex: lastMessageIndex,
|
||||
},
|
||||
}
|
||||
@@ -546,26 +626,32 @@ export class ChatContextCompressor {
|
||||
logger.info('[context-compressor] incremental-llm done in %dms, %d chars', Date.now() - t0, summary.length)
|
||||
} catch (err: any) {
|
||||
logger.warn('[context-compressor] incremental-llm failed: %s — keeping new messages verbatim', err.message)
|
||||
const fallback = [
|
||||
...head,
|
||||
previousSummaryMessage,
|
||||
...newMessages,
|
||||
]
|
||||
const prunedFallback = pruneFallbackToolResults(fallback, this.config.tailMessageCount)
|
||||
const budgetedFallback = enforceCompressedBudget(prunedFallback, this.config.triggerTokens, head.length)
|
||||
return {
|
||||
messages: [
|
||||
{ role: 'user', content: SUMMARY_PREFIX + '\n\n' + previousSummary },
|
||||
...newMessages,
|
||||
],
|
||||
messages: budgetedFallback,
|
||||
meta: {
|
||||
...meta,
|
||||
compressed: true,
|
||||
llmCompressed: false,
|
||||
summaryTokenEstimate: countTokens(SUMMARY_PREFIX + previousSummary),
|
||||
verbatimCount: newMessages.length,
|
||||
verbatimCount: budgetedFallback.length === fallback.length ? head.length + newMessages.length : 0,
|
||||
compressedStartIndex: lastMessageIndex,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
const result: ChatMessage[] = [
|
||||
let result: ChatMessage[] = [
|
||||
...head,
|
||||
{ role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary },
|
||||
...tail,
|
||||
]
|
||||
result = enforceCompressedBudget(result, this.config.triggerTokens, head.length)
|
||||
|
||||
const newLastIndex = lastMessageIndex + tailStart
|
||||
if (sessionId) {
|
||||
@@ -579,7 +665,7 @@ export class ChatContextCompressor {
|
||||
compressed: true,
|
||||
llmCompressed: true,
|
||||
summaryTokenEstimate: countTokens(SUMMARY_PREFIX + summary),
|
||||
verbatimCount: tail.length,
|
||||
verbatimCount: result.length === head.length + 1 + tail.length ? head.length + tail.length : 0,
|
||||
compressedStartIndex: newLastIndex,
|
||||
},
|
||||
}
|
||||
@@ -594,20 +680,20 @@ export class ChatContextCompressor {
|
||||
summarizer?: string | SummarizerOptions,
|
||||
): Promise<CompressedResult> {
|
||||
const total = messages.length
|
||||
const cleaned = pruneOldToolResults(messages, this.config.tailMessageCount)
|
||||
const tailCount = this.config.tailMessageCount
|
||||
|
||||
if (total <= tailCount) {
|
||||
return { messages: cleaned, meta }
|
||||
}
|
||||
const requestedHeadCount = Math.min(this.config.headMessageCount, total)
|
||||
const requestedTailCount = this.config.tailMessageCount
|
||||
const canKeepProtectedWindows = total > requestedHeadCount + requestedTailCount
|
||||
const headCount = canKeepProtectedWindows ? requestedHeadCount : 0
|
||||
const tailCount = canKeepProtectedWindows ? requestedTailCount : 0
|
||||
|
||||
const tailStart = total - tailCount
|
||||
const toCompress = cleaned.slice(0, tailStart)
|
||||
const tail = cleaned.slice(tailStart)
|
||||
const head = messages.slice(0, headCount)
|
||||
const toCompress = messages.slice(headCount, tailStart)
|
||||
const tail = messages.slice(tailStart)
|
||||
|
||||
logger.info(
|
||||
'[context-compressor] [full-llm] compressing messages 0-%d, keeping %d-%d',
|
||||
tailStart - 1, tailStart, total - 1,
|
||||
'[context-compressor] [full-llm] compressing messages %d-%d, keeping first %d and last %d',
|
||||
headCount, tailStart - 1, head.length, tail.length,
|
||||
)
|
||||
|
||||
const contentToSummarize = serializeForSummary(toCompress)
|
||||
@@ -624,26 +710,28 @@ export class ChatContextCompressor {
|
||||
}
|
||||
|
||||
if (!summary) {
|
||||
return { messages: cleaned, meta }
|
||||
return { messages: pruneFallbackToolResults(messages, this.config.tailMessageCount), meta }
|
||||
}
|
||||
|
||||
const result: ChatMessage[] = []
|
||||
|
||||
result.push(...head)
|
||||
result.push({ role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary })
|
||||
if (sessionId) {
|
||||
saveCompressionSnapshot(sessionId, summary, tailStart - 1, total)
|
||||
}
|
||||
|
||||
result.push(...tail)
|
||||
const budgetedResult = enforceCompressedBudget(result, this.config.triggerTokens, head.length)
|
||||
|
||||
return {
|
||||
messages: result,
|
||||
messages: budgetedResult,
|
||||
meta: {
|
||||
...meta,
|
||||
compressed: true,
|
||||
llmCompressed: !!summary,
|
||||
summaryTokenEstimate: summary ? countTokens(SUMMARY_PREFIX + summary) : 0,
|
||||
verbatimCount: tail.length,
|
||||
verbatimCount: budgetedResult.length === result.length ? head.length + tail.length : 0,
|
||||
compressedStartIndex: tailStart - 1,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -10,14 +10,88 @@ import {
|
||||
import { getCompressionSnapshot } from '../../../db/hermes/compression-snapshot'
|
||||
import { ChatContextCompressor, SUMMARY_PREFIX } from '../../../lib/context-compressor'
|
||||
import { getModelContextLength } from '../model-context'
|
||||
import { readConfigYamlForProfile } from '../../config-helpers'
|
||||
import { logger } from '../../logger'
|
||||
import { bridgeLogger } from '../../logger'
|
||||
import { calcAndUpdateUsage, estimateUsageTokensFromMessages } from './usage'
|
||||
import { isAssistantMessageSendable } from './message-format'
|
||||
import type { ChatMessage } from '../../../lib/context-compressor'
|
||||
import type { ChatMessage, CompressionConfig as CompressorConfig } from '../../../lib/context-compressor'
|
||||
import type { SessionState, BridgeCompressionResult } from './types'
|
||||
|
||||
const compressor = new ChatContextCompressor()
|
||||
interface RunChatCompressionConfig {
|
||||
enabled: boolean
|
||||
triggerTokens: number
|
||||
compressor: Partial<CompressorConfig>
|
||||
}
|
||||
|
||||
function isSnapshotUsable(
|
||||
snapshot: { lastMessageIndex: number } | null,
|
||||
history: ChatMessage[],
|
||||
): boolean {
|
||||
return !!snapshot && snapshot.lastMessageIndex >= 0 && snapshot.lastMessageIndex < history.length
|
||||
}
|
||||
|
||||
function buildSnapshotHistory(
|
||||
snapshot: { summary: string; lastMessageIndex: number } | null,
|
||||
history: ChatMessage[],
|
||||
compressionConfig?: Partial<CompressorConfig>,
|
||||
): ChatMessage[] | null {
|
||||
if (!snapshot) return null
|
||||
const headCount = compressionConfig?.headMessageCount || 0
|
||||
const tailCount = compressionConfig?.tailMessageCount || 0
|
||||
const protectedHead = headCount > 0 ? history.slice(0, headCount) : []
|
||||
const summaryMessage = { role: 'user', content: SUMMARY_PREFIX + '\n\n' + snapshot.summary } as ChatMessage
|
||||
|
||||
if (isSnapshotUsable(snapshot, history)) {
|
||||
return [
|
||||
...protectedHead,
|
||||
summaryMessage,
|
||||
...history.slice(snapshot.lastMessageIndex + 1),
|
||||
]
|
||||
}
|
||||
|
||||
const tailStart = Math.max(protectedHead.length, history.length - tailCount)
|
||||
return [
|
||||
...protectedHead,
|
||||
summaryMessage,
|
||||
...history.slice(tailStart),
|
||||
]
|
||||
}
|
||||
|
||||
function clampRatio(value: unknown, fallback: number, min: number, max: number): number {
|
||||
const n = typeof value === 'number' && Number.isFinite(value) ? value : fallback
|
||||
return Math.min(max, Math.max(min, n))
|
||||
}
|
||||
|
||||
function clampInt(value: unknown, fallback: number, min: number, max: number): number {
|
||||
const n = typeof value === 'number' && Number.isFinite(value) ? Math.floor(value) : fallback
|
||||
return Math.min(max, Math.max(min, n))
|
||||
}
|
||||
|
||||
async function getRunChatCompressionConfig(profile: string, contextLength: number): Promise<RunChatCompressionConfig> {
|
||||
let raw: Record<string, any> = {}
|
||||
try {
|
||||
raw = (await readConfigYamlForProfile(profile))?.compression || {}
|
||||
} catch (err) {
|
||||
logger.warn(err, '[context-compress] failed to read compression config for profile %s, using defaults', profile)
|
||||
}
|
||||
|
||||
const threshold = clampRatio(raw.threshold, 0.5, 0.05, 0.95)
|
||||
const targetRatio = clampRatio(raw.target_ratio, 0.2, 0.01, 0.8)
|
||||
const protectLastN = clampInt(raw.protect_last_n, 20, 0, 500)
|
||||
const protectFirstN = clampInt(raw.protect_first_n, 3, 0, 100)
|
||||
|
||||
return {
|
||||
enabled: raw.enabled !== false,
|
||||
triggerTokens: Math.floor(contextLength * threshold),
|
||||
compressor: {
|
||||
triggerTokens: Math.floor(contextLength * threshold),
|
||||
summaryBudget: Math.max(1_000, Math.floor(contextLength * targetRatio)),
|
||||
headMessageCount: protectFirstN,
|
||||
tailMessageCount: protectLastN,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load conversation history from DB with full message structure (user/assistant/tool).
|
||||
@@ -77,12 +151,7 @@ export function estimateSnapshotAwareHistoryUsage(
|
||||
history: ChatMessage[],
|
||||
): { messageCount: number; tokenCount: number } {
|
||||
const snapshot = getCompressionSnapshot(sessionId)
|
||||
const messages = snapshot
|
||||
? [
|
||||
{ role: 'user', content: SUMMARY_PREFIX + snapshot.summary },
|
||||
...history.slice(snapshot.lastMessageIndex + 1),
|
||||
]
|
||||
: history
|
||||
const messages = buildSnapshotHistory(snapshot, history) || history
|
||||
const usage = estimateUsageTokensFromMessages(messages)
|
||||
return {
|
||||
messageCount: messages.length,
|
||||
@@ -108,29 +177,45 @@ export async function buildCompressedHistory(
|
||||
model: modelContext.model,
|
||||
provider: modelContext.provider,
|
||||
})
|
||||
const triggerTokens = Math.floor(contextLength / 2)
|
||||
const compressionConfig = await getRunChatCompressionConfig(profile, contextLength)
|
||||
const triggerTokens = compressionConfig.triggerTokens
|
||||
if (!compressionConfig.enabled) {
|
||||
logger.info('[context-compress] session=%s: compression disabled by config', sessionId)
|
||||
return history
|
||||
}
|
||||
const cState = getOrCreateSession(sessionMap, sessionId)
|
||||
const assembledTokens = await calcAndUpdateUsage(sessionId, cState, emit)
|
||||
const totalTokens = assembledTokens.inputTokens + assembledTokens.outputTokens
|
||||
let totalTokens = assembledTokens.inputTokens + assembledTokens.outputTokens
|
||||
const snapshot = getCompressionSnapshot(sessionId)
|
||||
const staleSnapshot = snapshot && !isSnapshotUsable(snapshot, history)
|
||||
if (staleSnapshot) {
|
||||
logger.warn('[context-compress] session=%s: stale snapshot index %d for %d history messages; using summary plus safe tail',
|
||||
sessionId, snapshot.lastMessageIndex, history.length)
|
||||
const staleHistory = buildSnapshotHistory(snapshot, history, compressionConfig.compressor) || history
|
||||
const staleUsage = estimateUsageTokensFromMessages(staleHistory)
|
||||
totalTokens = staleUsage.inputTokens + staleUsage.outputTokens
|
||||
}
|
||||
|
||||
if (snapshot) {
|
||||
if (snapshot && !staleSnapshot) {
|
||||
const newMessages = history.slice(snapshot.lastMessageIndex + 1)
|
||||
logger.info('[context-compress] session=%s: snapshot at %d, %d new messages, assembled ~%d tokens (threshold %d)',
|
||||
sessionId, snapshot.lastMessageIndex, newMessages.length, totalTokens, triggerTokens)
|
||||
if (totalTokens <= triggerTokens && newMessages.length <= 150) {
|
||||
history = [
|
||||
{ role: 'user', content: SUMMARY_PREFIX + '\n\n' + snapshot.summary },
|
||||
...newMessages,
|
||||
] as ChatMessage[]
|
||||
if (totalTokens <= triggerTokens) {
|
||||
history = buildSnapshotHistory(snapshot, history, compressionConfig.compressor) || history
|
||||
} else {
|
||||
history = await compressHistory(history, newMessages, sessionId, upstream, apiKey, cState, totalTokens, emit, sessionMap, modelContext)
|
||||
history = await compressHistory(history, newMessages, sessionId, upstream, apiKey, cState, totalTokens, emit, sessionMap, modelContext, compressionConfig.compressor)
|
||||
}
|
||||
} else if (snapshot && staleSnapshot) {
|
||||
if (totalTokens <= triggerTokens) {
|
||||
history = buildSnapshotHistory(snapshot, history, compressionConfig.compressor) || history
|
||||
} else {
|
||||
history = await compressHistory(history, null, sessionId, upstream, apiKey, cState, totalTokens, emit, sessionMap, modelContext, compressionConfig.compressor)
|
||||
}
|
||||
} else if (history.length > 4) {
|
||||
if (totalTokens <= triggerTokens && history.length <= 150) {
|
||||
if (totalTokens <= triggerTokens) {
|
||||
logger.info('[context-compress] session=%s: %d messages, ~%d tokens — under threshold, skip', sessionId, history.length, totalTokens)
|
||||
} else {
|
||||
history = await compressHistory(history, null, sessionId, upstream, apiKey, cState, totalTokens, emit, sessionMap, modelContext)
|
||||
history = await compressHistory(history, null, sessionId, upstream, apiKey, cState, totalTokens, emit, sessionMap, modelContext, compressionConfig.compressor)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -152,6 +237,7 @@ export async function compressHistory(
|
||||
emit: (event: string, payload: any) => void,
|
||||
sessionMap: Map<string, SessionState>,
|
||||
modelContext: { model?: string | null; provider?: string | null } = {},
|
||||
compressionConfig?: Partial<CompressorConfig>,
|
||||
): Promise<ChatMessage[]> {
|
||||
const msgCount = newMessagesOnly ? newMessagesOnly.length : history.length
|
||||
pushState(sessionMap, sessionId, 'compression.started', {
|
||||
@@ -163,6 +249,7 @@ export async function compressHistory(
|
||||
|
||||
try {
|
||||
const session = getSession(sessionId)
|
||||
const compressor = new ChatContextCompressor({ config: compressionConfig })
|
||||
const result = await compressor.compress(history, upstream, apiKey, sessionId, {
|
||||
profile: session?.profile,
|
||||
model: modelContext.model || session?.model,
|
||||
@@ -244,6 +331,8 @@ export async function forceCompressBridgeHistory(
|
||||
const upstream = ''
|
||||
const apiKey = undefined
|
||||
const session = getSession(sessionId)
|
||||
const contextLength = getModelContextLength({ profile, model: session?.model, provider: session?.provider })
|
||||
const compressionConfig = await getRunChatCompressionConfig(session?.profile || profile, contextLength)
|
||||
const beforeUsage = estimateSnapshotAwareHistoryUsage(sessionId, history)
|
||||
const totalTokens = beforeUsage.tokenCount
|
||||
bridgeLogger.info({
|
||||
@@ -256,6 +345,7 @@ export async function forceCompressBridgeHistory(
|
||||
snapshotAware: true,
|
||||
}, '[chat-run-socket] bridge forced compression started')
|
||||
|
||||
const compressor = new ChatContextCompressor({ config: compressionConfig.compressor })
|
||||
const result = await compressor.compress(history, upstream, apiKey, sessionId, {
|
||||
profile: session?.profile || profile,
|
||||
model: session?.model,
|
||||
|
||||
@@ -38,7 +38,7 @@ export async function loadSessionStateFromDb(sid: string, _sessionMap: Map<strin
|
||||
let inputTokens: number
|
||||
let outputTokens: number
|
||||
const snapshot = getCompressionSnapshot(sid)
|
||||
if (snapshot) {
|
||||
if (snapshot && snapshot.lastMessageIndex >= 0 && snapshot.lastMessageIndex < messages.length) {
|
||||
const newMessages = messages.slice(snapshot.lastMessageIndex + 1)
|
||||
const newUsage = estimateUsageTokensFromMessages(newMessages)
|
||||
inputTokens = countTokens(SUMMARY_PREFIX + snapshot.summary) +
|
||||
|
||||
@@ -53,7 +53,7 @@ export async function calcAndUpdateUsage(
|
||||
const snapshot = getCompressionSnapshot(sid)
|
||||
let inputTokens: number
|
||||
let outputTokens: number
|
||||
if (snapshot && msgs.length) {
|
||||
if (snapshot && msgs.length && snapshot.lastMessageIndex >= 0 && snapshot.lastMessageIndex < msgs.length) {
|
||||
const newMessages = msgs.slice(snapshot.lastMessageIndex + 1)
|
||||
const newUsage = estimateUsageTokensFromMessages(newMessages)
|
||||
inputTokens = countTokens(SUMMARY_PREFIX + snapshot.summary) +
|
||||
|
||||
Reference in New Issue
Block a user