[codex] Harden context compression history handling (#848)
* Use token threshold for chat compression * Add compression settings controls * Use config for chat compression * Cover protected messages in compression tests * Remove message-count compression limit * Harden compression window fallback * Rebuild stale compression snapshots * Harden stale compression snapshots * Update changelog for compression hardening * Prefer local history session details
This commit is contained in:
@@ -28,6 +28,14 @@ export interface MemoryConfig {
|
|||||||
user_char_limit?: number
|
user_char_limit?: number
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface CompressionConfig {
|
||||||
|
enabled?: boolean
|
||||||
|
threshold?: number
|
||||||
|
target_ratio?: number
|
||||||
|
protect_last_n?: number
|
||||||
|
protect_first_n?: number
|
||||||
|
}
|
||||||
|
|
||||||
export interface SessionResetConfig {
|
export interface SessionResetConfig {
|
||||||
mode?: string
|
mode?: string
|
||||||
idle_minutes?: number
|
idle_minutes?: number
|
||||||
@@ -47,6 +55,7 @@ export interface AppConfig {
|
|||||||
display?: DisplayConfig
|
display?: DisplayConfig
|
||||||
agent?: AgentConfig
|
agent?: AgentConfig
|
||||||
memory?: MemoryConfig
|
memory?: MemoryConfig
|
||||||
|
compression?: CompressionConfig
|
||||||
session_reset?: SessionResetConfig
|
session_reset?: SessionResetConfig
|
||||||
privacy?: PrivacyConfig
|
privacy?: PrivacyConfig
|
||||||
approvals?: ApprovalConfig
|
approvals?: ApprovalConfig
|
||||||
|
|||||||
@@ -0,0 +1,106 @@
|
|||||||
|
<script setup lang="ts">
|
||||||
|
import { NInputNumber, NSwitch, useMessage } from 'naive-ui'
|
||||||
|
import { useI18n } from 'vue-i18n'
|
||||||
|
import { useSettingsStore } from '@/stores/hermes/settings'
|
||||||
|
import SettingRow from './SettingRow.vue'
|
||||||
|
|
||||||
|
const settingsStore = useSettingsStore()
|
||||||
|
const message = useMessage()
|
||||||
|
const { t } = useI18n()
|
||||||
|
|
||||||
|
const defaults = {
|
||||||
|
enabled: true,
|
||||||
|
threshold: 0.5,
|
||||||
|
target_ratio: 0.2,
|
||||||
|
protect_last_n: 20,
|
||||||
|
protect_first_n: 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
const debounceTimers: Record<string, ReturnType<typeof setTimeout>> = {}
|
||||||
|
|
||||||
|
function save(values: Record<string, any>) {
|
||||||
|
settingsStore.updateLocal('compression', values)
|
||||||
|
settingsStore.saveSection('compression', values).then(() => {
|
||||||
|
message.success(t('settings.saved'))
|
||||||
|
}).catch(() => {
|
||||||
|
message.error(t('settings.saveFailed'))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
function debouncedSave(key: string, value: any) {
|
||||||
|
settingsStore.updateLocal('compression', { [key]: value })
|
||||||
|
if (debounceTimers[key]) clearTimeout(debounceTimers[key])
|
||||||
|
debounceTimers[key] = setTimeout(async () => {
|
||||||
|
try {
|
||||||
|
await settingsStore.saveSection('compression', { [key]: value })
|
||||||
|
message.success(t('settings.saved'))
|
||||||
|
} catch {
|
||||||
|
message.error(t('settings.saveFailed'))
|
||||||
|
}
|
||||||
|
}, 300)
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<template>
|
||||||
|
<section class="settings-section">
|
||||||
|
<SettingRow :label="t('settings.compression.enabled')" :hint="t('settings.compression.enabledHint')">
|
||||||
|
<NSwitch
|
||||||
|
:value="settingsStore.compression.enabled ?? defaults.enabled"
|
||||||
|
size="small"
|
||||||
|
@update:value="v => save({ enabled: v })"
|
||||||
|
/>
|
||||||
|
</SettingRow>
|
||||||
|
<SettingRow :label="t('settings.compression.threshold')" :hint="t('settings.compression.thresholdHint')">
|
||||||
|
<NInputNumber
|
||||||
|
:value="settingsStore.compression.threshold ?? defaults.threshold"
|
||||||
|
:min="0.1"
|
||||||
|
:max="0.95"
|
||||||
|
:step="0.05"
|
||||||
|
size="small"
|
||||||
|
class="input-sm"
|
||||||
|
@update:value="v => v != null && debouncedSave('threshold', v)"
|
||||||
|
/>
|
||||||
|
</SettingRow>
|
||||||
|
<SettingRow :label="t('settings.compression.targetRatio')" :hint="t('settings.compression.targetRatioHint')">
|
||||||
|
<NInputNumber
|
||||||
|
:value="settingsStore.compression.target_ratio ?? defaults.target_ratio"
|
||||||
|
:min="0.05"
|
||||||
|
:max="0.8"
|
||||||
|
:step="0.05"
|
||||||
|
size="small"
|
||||||
|
class="input-sm"
|
||||||
|
@update:value="v => v != null && debouncedSave('target_ratio', v)"
|
||||||
|
/>
|
||||||
|
</SettingRow>
|
||||||
|
<SettingRow :label="t('settings.compression.protectLastN')" :hint="t('settings.compression.protectLastNHint')">
|
||||||
|
<NInputNumber
|
||||||
|
:value="settingsStore.compression.protect_last_n ?? defaults.protect_last_n"
|
||||||
|
:min="0"
|
||||||
|
:max="200"
|
||||||
|
:step="1"
|
||||||
|
size="small"
|
||||||
|
class="input-sm"
|
||||||
|
@update:value="v => v != null && debouncedSave('protect_last_n', v)"
|
||||||
|
/>
|
||||||
|
</SettingRow>
|
||||||
|
<SettingRow :label="t('settings.compression.protectFirstN')" :hint="t('settings.compression.protectFirstNHint')">
|
||||||
|
<NInputNumber
|
||||||
|
:value="settingsStore.compression.protect_first_n ?? defaults.protect_first_n"
|
||||||
|
:min="0"
|
||||||
|
:max="50"
|
||||||
|
:step="1"
|
||||||
|
size="small"
|
||||||
|
class="input-sm"
|
||||||
|
@update:value="v => v != null && debouncedSave('protect_first_n', v)"
|
||||||
|
/>
|
||||||
|
</SettingRow>
|
||||||
|
</section>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<style scoped lang="scss">
|
||||||
|
@use '@/styles/variables' as *;
|
||||||
|
|
||||||
|
.settings-section {
|
||||||
|
margin-top: 16px;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
@@ -17,6 +17,7 @@ export const changelog: ChangelogEntry[] = [
|
|||||||
'changelog.new_0_5_30_6',
|
'changelog.new_0_5_30_6',
|
||||||
'changelog.new_0_5_30_7',
|
'changelog.new_0_5_30_7',
|
||||||
'changelog.new_0_5_30_8',
|
'changelog.new_0_5_30_8',
|
||||||
|
'changelog.new_0_5_30_9',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -549,6 +549,7 @@ jobTriggered: 'Job ausgelost',
|
|||||||
account: 'Konto',
|
account: 'Konto',
|
||||||
agent: 'Agent',
|
agent: 'Agent',
|
||||||
memory: 'Gedachtnis',
|
memory: 'Gedachtnis',
|
||||||
|
compression: 'Komprimierung',
|
||||||
session: 'Sitzung',
|
session: 'Sitzung',
|
||||||
privacy: 'Datenschutz',
|
privacy: 'Datenschutz',
|
||||||
apiServer: 'API-Server',
|
apiServer: 'API-Server',
|
||||||
@@ -599,6 +600,18 @@ jobTriggered: 'Job ausgelost',
|
|||||||
userCharLimit: 'Zeichenlimit fur Benutzerprofil',
|
userCharLimit: 'Zeichenlimit fur Benutzerprofil',
|
||||||
userCharLimitHint: 'Maximale Zeichen fur USER.md',
|
userCharLimitHint: 'Maximale Zeichen fur USER.md',
|
||||||
},
|
},
|
||||||
|
compression: {
|
||||||
|
enabled: 'Komprimierung aktivieren',
|
||||||
|
enabledHint: 'Langen Chatverlauf automatisch komprimieren, bevor der Modellkontext uberschritten wird',
|
||||||
|
threshold: 'Komprimierungsschwelle',
|
||||||
|
thresholdHint: 'Komprimierung starten, wenn geschatzte Token dieses Kontextverhaltnis uberschreiten',
|
||||||
|
targetRatio: 'Zielverhaltnis',
|
||||||
|
targetRatioHint: 'Zielgroße des Verlaufs nach der Komprimierung als Kontextverhaltnis',
|
||||||
|
protectLastN: 'Neueste Nachrichten schutzen',
|
||||||
|
protectLastNHint: 'So viele neueste Nachrichten unkomprimiert lassen',
|
||||||
|
protectFirstN: 'Erste Nachrichten schutzen',
|
||||||
|
protectFirstNHint: 'So viele erste Nachrichten unkomprimiert lassen',
|
||||||
|
},
|
||||||
session: {
|
session: {
|
||||||
mode: 'Zurucksetzungsmodus',
|
mode: 'Zurucksetzungsmodus',
|
||||||
modeHint: 'Ausloser fur Sitzungszurucksetzung',
|
modeHint: 'Ausloser fur Sitzungszurucksetzung',
|
||||||
@@ -957,6 +970,7 @@ jobTriggered: 'Job ausgelost',
|
|||||||
new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows',
|
new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows',
|
||||||
new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists',
|
new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists',
|
||||||
new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling',
|
new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling',
|
||||||
|
new_0_5_30_9: 'Context compression now follows Profile compression settings and hardens stale snapshots by reusing previous summaries with a safe tail instead of recompressing full history',
|
||||||
new_0_5_5_1: '🎉 Tag der Arbeit! Heute wird nicht gearbeitet, bitte habt Verständnis',
|
new_0_5_5_1: '🎉 Tag der Arbeit! Heute wird nicht gearbeitet, bitte habt Verständnis',
|
||||||
new_0_5_5_2: 'Verlaufsseite für Hermes-Sitzungshistorie hinzugefügt',
|
new_0_5_5_2: 'Verlaufsseite für Hermes-Sitzungshistorie hinzugefügt',
|
||||||
new_0_5_5_3: 'Verlaufsseite verwaltet Sitzungen unabhängig ohne Störung des aktiven Chats',
|
new_0_5_5_3: 'Verlaufsseite verwaltet Sitzungen unabhängig ohne Störung des aktiven Chats',
|
||||||
|
|||||||
@@ -716,6 +716,7 @@ export default {
|
|||||||
account: 'Account',
|
account: 'Account',
|
||||||
agent: 'Agent',
|
agent: 'Agent',
|
||||||
memory: 'Memory',
|
memory: 'Memory',
|
||||||
|
compression: 'Compression',
|
||||||
session: 'Session',
|
session: 'Session',
|
||||||
privacy: 'Privacy',
|
privacy: 'Privacy',
|
||||||
apiServer: 'API Server',
|
apiServer: 'API Server',
|
||||||
@@ -774,6 +775,18 @@ export default {
|
|||||||
userCharLimit: 'User Profile Char Limit',
|
userCharLimit: 'User Profile Char Limit',
|
||||||
userCharLimitHint: 'Max characters for USER.md',
|
userCharLimitHint: 'Max characters for USER.md',
|
||||||
},
|
},
|
||||||
|
compression: {
|
||||||
|
enabled: 'Enable Compression',
|
||||||
|
enabledHint: 'Automatically compress long chat history before it exceeds the model context',
|
||||||
|
threshold: 'Compression Threshold',
|
||||||
|
thresholdHint: 'Start compression when estimated tokens exceed this context ratio',
|
||||||
|
targetRatio: 'Target Ratio',
|
||||||
|
targetRatioHint: 'Target history size after compression as a context ratio',
|
||||||
|
protectLastN: 'Protect Recent Messages',
|
||||||
|
protectLastNHint: 'Keep this many latest messages uncompressed',
|
||||||
|
protectFirstN: 'Protect First Messages',
|
||||||
|
protectFirstNHint: 'Keep this many earliest messages uncompressed',
|
||||||
|
},
|
||||||
session: {
|
session: {
|
||||||
mode: 'Reset Mode',
|
mode: 'Reset Mode',
|
||||||
modeHint: 'Trigger condition for session reset',
|
modeHint: 'Trigger condition for session reset',
|
||||||
@@ -1251,6 +1264,7 @@ export default {
|
|||||||
new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows',
|
new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows',
|
||||||
new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists',
|
new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists',
|
||||||
new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling',
|
new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling',
|
||||||
|
new_0_5_30_9: 'Context compression now follows Profile compression settings and hardens stale snapshots by reusing previous summaries with a safe tail instead of recompressing full history',
|
||||||
|
|
||||||
new_0_5_6_1: 'Add voice playback feature with Web Speech API: manual button, auto-play toggle, rainbow border animation, and mobile optimization',
|
new_0_5_6_1: 'Add voice playback feature with Web Speech API: manual button, auto-play toggle, rainbow border animation, and mobile optimization',
|
||||||
new_0_5_6_2: 'Add robust LLM JSON parser with tolerance for Python format and extract text from streaming events',
|
new_0_5_6_2: 'Add robust LLM JSON parser with tolerance for Python format and extract text from streaming events',
|
||||||
|
|||||||
@@ -549,6 +549,7 @@ jobTriggered: 'Job ejecutado',
|
|||||||
account: 'Cuenta',
|
account: 'Cuenta',
|
||||||
agent: 'Agente',
|
agent: 'Agente',
|
||||||
memory: 'Memoria',
|
memory: 'Memoria',
|
||||||
|
compression: 'Compresion',
|
||||||
session: 'Sesion',
|
session: 'Sesion',
|
||||||
privacy: 'Privacidad',
|
privacy: 'Privacidad',
|
||||||
apiServer: 'Servidor API',
|
apiServer: 'Servidor API',
|
||||||
@@ -599,6 +600,18 @@ jobTriggered: 'Job ejecutado',
|
|||||||
userCharLimit: 'Limite de caracteres del perfil de usuario',
|
userCharLimit: 'Limite de caracteres del perfil de usuario',
|
||||||
userCharLimitHint: 'Maximo de caracteres para USER.md',
|
userCharLimitHint: 'Maximo de caracteres para USER.md',
|
||||||
},
|
},
|
||||||
|
compression: {
|
||||||
|
enabled: 'Activar compresion',
|
||||||
|
enabledHint: 'Comprimir automaticamente el historial largo antes de superar el contexto del modelo',
|
||||||
|
threshold: 'Umbral de compresion',
|
||||||
|
thresholdHint: 'Iniciar compresion cuando los tokens estimados superen esta proporcion del contexto',
|
||||||
|
targetRatio: 'Proporcion objetivo',
|
||||||
|
targetRatioHint: 'Tamano objetivo del historial tras comprimir como proporcion del contexto',
|
||||||
|
protectLastN: 'Proteger mensajes recientes',
|
||||||
|
protectLastNHint: 'Mantener sin comprimir esta cantidad de mensajes recientes',
|
||||||
|
protectFirstN: 'Proteger primeros mensajes',
|
||||||
|
protectFirstNHint: 'Mantener sin comprimir esta cantidad de mensajes iniciales',
|
||||||
|
},
|
||||||
session: {
|
session: {
|
||||||
mode: 'Modo de reinicio',
|
mode: 'Modo de reinicio',
|
||||||
modeHint: 'Condicion de activacion del reinicio de sesion',
|
modeHint: 'Condicion de activacion del reinicio de sesion',
|
||||||
@@ -953,6 +966,7 @@ jobTriggered: 'Job ejecutado',
|
|||||||
new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows',
|
new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows',
|
||||||
new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists',
|
new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists',
|
||||||
new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling',
|
new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling',
|
||||||
|
new_0_5_30_9: 'Context compression now follows Profile compression settings and hardens stale snapshots by reusing previous summaries with a safe tail instead of recompressing full history',
|
||||||
new_0_5_5_1: '🎉 ¡Feliz Día del Trabajo! Hoy no se trabaja, agradezcan su comprensión',
|
new_0_5_5_1: '🎉 ¡Feliz Día del Trabajo! Hoy no se trabaja, agradezcan su comprensión',
|
||||||
new_0_5_5_2: 'Añadida página de historial para sesiones Hermes',
|
new_0_5_5_2: 'Añadida página de historial para sesiones Hermes',
|
||||||
new_0_5_5_3: 'La página de historial gestiona sesiones de forma independiente',
|
new_0_5_5_3: 'La página de historial gestiona sesiones de forma independiente',
|
||||||
|
|||||||
@@ -549,6 +549,7 @@ jobTriggered: 'Job declenche',
|
|||||||
account: 'Compte',
|
account: 'Compte',
|
||||||
agent: 'Agent',
|
agent: 'Agent',
|
||||||
memory: 'Memoire',
|
memory: 'Memoire',
|
||||||
|
compression: 'Compression',
|
||||||
session: 'Session',
|
session: 'Session',
|
||||||
privacy: 'Confidentialite',
|
privacy: 'Confidentialite',
|
||||||
apiServer: 'Serveur API',
|
apiServer: 'Serveur API',
|
||||||
@@ -599,6 +600,18 @@ jobTriggered: 'Job declenche',
|
|||||||
userCharLimit: 'Limite de caracteres du profil utilisateur',
|
userCharLimit: 'Limite de caracteres du profil utilisateur',
|
||||||
userCharLimitHint: 'Nombre maximum de caracteres pour USER.md',
|
userCharLimitHint: 'Nombre maximum de caracteres pour USER.md',
|
||||||
},
|
},
|
||||||
|
compression: {
|
||||||
|
enabled: 'Activer la compression',
|
||||||
|
enabledHint: 'Compresser automatiquement un long historique avant de depasser le contexte du modele',
|
||||||
|
threshold: 'Seuil de compression',
|
||||||
|
thresholdHint: 'Demarrer la compression quand les jetons estimes depassent ce ratio de contexte',
|
||||||
|
targetRatio: 'Ratio cible',
|
||||||
|
targetRatioHint: 'Taille cible de l\'historique apres compression comme ratio du contexte',
|
||||||
|
protectLastN: 'Proteger les messages recents',
|
||||||
|
protectLastNHint: 'Garder autant de messages recents non compresses',
|
||||||
|
protectFirstN: 'Proteger les premiers messages',
|
||||||
|
protectFirstNHint: 'Garder autant de premiers messages non compresses',
|
||||||
|
},
|
||||||
session: {
|
session: {
|
||||||
mode: 'Mode de reinitialisation',
|
mode: 'Mode de reinitialisation',
|
||||||
modeHint: 'Condition de declenchement de la reinitialisation de session',
|
modeHint: 'Condition de declenchement de la reinitialisation de session',
|
||||||
@@ -952,6 +965,7 @@ jobTriggered: 'Job declenche',
|
|||||||
new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows',
|
new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows',
|
||||||
new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists',
|
new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists',
|
||||||
new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling',
|
new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling',
|
||||||
|
new_0_5_30_9: 'Context compression now follows Profile compression settings and hardens stale snapshots by reusing previous summaries with a safe tail instead of recompressing full history',
|
||||||
new_0_5_5_1: '🎉 Joyeuse Fête du Travail! Pas de travail aujourd\'hui, merci de votre compréhension',
|
new_0_5_5_1: '🎉 Joyeuse Fête du Travail! Pas de travail aujourd\'hui, merci de votre compréhension',
|
||||||
new_0_5_5_2: 'Ajout d\'une page d\'historique pour les sessions Hermes',
|
new_0_5_5_2: 'Ajout d\'une page d\'historique pour les sessions Hermes',
|
||||||
new_0_5_5_3: 'La page d\'historique gère les sessions de manière indépendante',
|
new_0_5_5_3: 'La page d\'historique gère les sessions de manière indépendante',
|
||||||
|
|||||||
@@ -549,6 +549,7 @@ export default {
|
|||||||
account: 'アカウント',
|
account: 'アカウント',
|
||||||
agent: 'エージェント',
|
agent: 'エージェント',
|
||||||
memory: 'メモリ',
|
memory: 'メモリ',
|
||||||
|
compression: '圧縮',
|
||||||
session: 'セッション',
|
session: 'セッション',
|
||||||
privacy: 'プライバシー',
|
privacy: 'プライバシー',
|
||||||
apiServer: 'API サーバー',
|
apiServer: 'API サーバー',
|
||||||
@@ -599,6 +600,18 @@ export default {
|
|||||||
userCharLimit: 'ユーザープロファイル文字数上限',
|
userCharLimit: 'ユーザープロファイル文字数上限',
|
||||||
userCharLimitHint: 'USER.md の最大文字数',
|
userCharLimitHint: 'USER.md の最大文字数',
|
||||||
},
|
},
|
||||||
|
compression: {
|
||||||
|
enabled: '圧縮を有効化',
|
||||||
|
enabledHint: '長いチャット履歴がモデルコンテキストを超える前に自動圧縮',
|
||||||
|
threshold: '圧縮しきい値',
|
||||||
|
thresholdHint: '推定トークンがこのコンテキスト比率を超えたら圧縮を開始',
|
||||||
|
targetRatio: '目標比率',
|
||||||
|
targetRatioHint: '圧縮後の履歴サイズをコンテキスト比率で指定',
|
||||||
|
protectLastN: '直近メッセージを保護',
|
||||||
|
protectLastNHint: 'この数の最新メッセージは圧縮しない',
|
||||||
|
protectFirstN: '先頭メッセージを保護',
|
||||||
|
protectFirstNHint: 'この数の最初のメッセージは圧縮しない',
|
||||||
|
},
|
||||||
session: {
|
session: {
|
||||||
mode: 'リセットモード',
|
mode: 'リセットモード',
|
||||||
modeHint: 'セッションリセットのトリガー条件',
|
modeHint: 'セッションリセットのトリガー条件',
|
||||||
@@ -953,6 +966,7 @@ export default {
|
|||||||
new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows',
|
new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows',
|
||||||
new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists',
|
new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists',
|
||||||
new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling',
|
new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling',
|
||||||
|
new_0_5_30_9: 'Context compression now follows Profile compression settings and hardens stale snapshots by reusing previous summaries with a safe tail instead of recompressing full history',
|
||||||
new_0_5_5_1: '🎉 労働者の日!今日はお休みです、何卒ご理解ください',
|
new_0_5_5_1: '🎉 労働者の日!今日はお休みです、何卒ご理解ください',
|
||||||
new_0_5_5_2: 'Hermesセッション履歴ページを追加',
|
new_0_5_5_2: 'Hermesセッション履歴ページを追加',
|
||||||
new_0_5_5_3: '履歴ページはアクティブチャットに干渉せずにセッション管理',
|
new_0_5_5_3: '履歴ページはアクティブチャットに干渉せずにセッション管理',
|
||||||
|
|||||||
@@ -549,6 +549,7 @@ export default {
|
|||||||
account: '계정',
|
account: '계정',
|
||||||
agent: '에이전트',
|
agent: '에이전트',
|
||||||
memory: '메모리',
|
memory: '메모리',
|
||||||
|
compression: '압축',
|
||||||
session: '세션',
|
session: '세션',
|
||||||
privacy: '개인정보',
|
privacy: '개인정보',
|
||||||
apiServer: 'API 서버',
|
apiServer: 'API 서버',
|
||||||
@@ -599,6 +600,18 @@ export default {
|
|||||||
userCharLimit: '사용자 프로필 문자 제한',
|
userCharLimit: '사용자 프로필 문자 제한',
|
||||||
userCharLimitHint: 'USER.md 최대 문자 수',
|
userCharLimitHint: 'USER.md 최대 문자 수',
|
||||||
},
|
},
|
||||||
|
compression: {
|
||||||
|
enabled: '압축 활성화',
|
||||||
|
enabledHint: '긴 채팅 기록이 모델 컨텍스트를 넘기 전에 자동 압축',
|
||||||
|
threshold: '압축 임계값',
|
||||||
|
thresholdHint: '추정 토큰이 이 컨텍스트 비율을 넘으면 압축 시작',
|
||||||
|
targetRatio: '목표 비율',
|
||||||
|
targetRatioHint: '압축 후 기록 크기를 컨텍스트 비율로 지정',
|
||||||
|
protectLastN: '최근 메시지 보호',
|
||||||
|
protectLastNHint: '이 수만큼 최신 메시지는 압축하지 않음',
|
||||||
|
protectFirstN: '처음 메시지 보호',
|
||||||
|
protectFirstNHint: '이 수만큼 처음 메시지는 압축하지 않음',
|
||||||
|
},
|
||||||
session: {
|
session: {
|
||||||
mode: '초기화 모드',
|
mode: '초기화 모드',
|
||||||
modeHint: '세션 초기화 트리거 조건',
|
modeHint: '세션 초기화 트리거 조건',
|
||||||
@@ -953,6 +966,7 @@ export default {
|
|||||||
new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows',
|
new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows',
|
||||||
new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists',
|
new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists',
|
||||||
new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling',
|
new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling',
|
||||||
|
new_0_5_30_9: 'Context compression now follows Profile compression settings and hardens stale snapshots by reusing previous summaries with a safe tail instead of recompressing full history',
|
||||||
new_0_5_5_1: '🎉 노동절 감사합니다! 오늘은 쉬니까 양해 부탁드립니다',
|
new_0_5_5_1: '🎉 노동절 감사합니다! 오늘은 쉬니까 양해 부탁드립니다',
|
||||||
new_0_5_5_2: 'Hermes 세션 기록 페이지 추가',
|
new_0_5_5_2: 'Hermes 세션 기록 페이지 추가',
|
||||||
new_0_5_5_3: '기록 페이지는 독립적으로 세션 관리',
|
new_0_5_5_3: '기록 페이지는 독립적으로 세션 관리',
|
||||||
|
|||||||
@@ -549,6 +549,7 @@ jobTriggered: 'Job acionado',
|
|||||||
account: 'Conta',
|
account: 'Conta',
|
||||||
agent: 'Agente',
|
agent: 'Agente',
|
||||||
memory: 'Memoria',
|
memory: 'Memoria',
|
||||||
|
compression: 'Compressao',
|
||||||
session: 'Sessao',
|
session: 'Sessao',
|
||||||
privacy: 'Privacidade',
|
privacy: 'Privacidade',
|
||||||
apiServer: 'Servidor API',
|
apiServer: 'Servidor API',
|
||||||
@@ -599,6 +600,18 @@ jobTriggered: 'Job acionado',
|
|||||||
userCharLimit: 'Limite de caracteres do perfil do usuario',
|
userCharLimit: 'Limite de caracteres do perfil do usuario',
|
||||||
userCharLimitHint: 'Maximo de caracteres para USER.md',
|
userCharLimitHint: 'Maximo de caracteres para USER.md',
|
||||||
},
|
},
|
||||||
|
compression: {
|
||||||
|
enabled: 'Ativar compressao',
|
||||||
|
enabledHint: 'Comprimir automaticamente historico longo antes de exceder o contexto do modelo',
|
||||||
|
threshold: 'Limiar de compressao',
|
||||||
|
thresholdHint: 'Iniciar compressao quando tokens estimados excederem esta proporcao do contexto',
|
||||||
|
targetRatio: 'Proporcao alvo',
|
||||||
|
targetRatioHint: 'Tamanho alvo do historico apos compressao como proporcao do contexto',
|
||||||
|
protectLastN: 'Proteger mensagens recentes',
|
||||||
|
protectLastNHint: 'Manter sem compressao esta quantidade de mensagens recentes',
|
||||||
|
protectFirstN: 'Proteger primeiras mensagens',
|
||||||
|
protectFirstNHint: 'Manter sem compressao esta quantidade de mensagens iniciais',
|
||||||
|
},
|
||||||
session: {
|
session: {
|
||||||
mode: 'Modo de reinicializacao',
|
mode: 'Modo de reinicializacao',
|
||||||
modeHint: 'Condicao de acionamento para reinicializacao de sessao',
|
modeHint: 'Condicao de acionamento para reinicializacao de sessao',
|
||||||
@@ -953,6 +966,7 @@ jobTriggered: 'Job acionado',
|
|||||||
new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows',
|
new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows',
|
||||||
new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists',
|
new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists',
|
||||||
new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling',
|
new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling',
|
||||||
|
new_0_5_30_9: 'Context compression now follows Profile compression settings and hardens stale snapshots by reusing previous summaries with a safe tail instead of recompressing full history',
|
||||||
new_0_5_5_1: '🎉 Feliz Dia do Trabalhador! Hoje não se trabalha, obrigado pela compreensão',
|
new_0_5_5_1: '🎉 Feliz Dia do Trabalhador! Hoje não se trabalha, obrigado pela compreensão',
|
||||||
new_0_5_5_2: 'Adicionada página de histórico para sessões Hermes',
|
new_0_5_5_2: 'Adicionada página de histórico para sessões Hermes',
|
||||||
new_0_5_5_3: 'Página de histórico gerencia sessões de forma independente',
|
new_0_5_5_3: 'Página de histórico gerencia sessões de forma independente',
|
||||||
|
|||||||
@@ -696,6 +696,7 @@ export default {
|
|||||||
account: '帳號',
|
account: '帳號',
|
||||||
agent: '代理',
|
agent: '代理',
|
||||||
memory: '記憶',
|
memory: '記憶',
|
||||||
|
compression: '上下文壓縮',
|
||||||
session: '工作階段',
|
session: '工作階段',
|
||||||
privacy: '隱私',
|
privacy: '隱私',
|
||||||
apiServer: 'API 伺服器',
|
apiServer: 'API 伺服器',
|
||||||
@@ -754,6 +755,18 @@ export default {
|
|||||||
userCharLimit: '使用者畫像字元上限',
|
userCharLimit: '使用者畫像字元上限',
|
||||||
userCharLimitHint: 'USER.md 最大字元數',
|
userCharLimitHint: 'USER.md 最大字元數',
|
||||||
},
|
},
|
||||||
|
compression: {
|
||||||
|
enabled: '啟用壓縮',
|
||||||
|
enabledHint: '長對話接近模型上下文上限前自動壓縮歷史',
|
||||||
|
threshold: '壓縮閾值',
|
||||||
|
thresholdHint: '預估 token 超過上下文比例時開始壓縮',
|
||||||
|
targetRatio: '目標比例',
|
||||||
|
targetRatioHint: '壓縮後歷史保留到上下文的目標比例',
|
||||||
|
protectLastN: '保護最近訊息',
|
||||||
|
protectLastNHint: '最近多少則訊息不參與壓縮',
|
||||||
|
protectFirstN: '保護開頭訊息',
|
||||||
|
protectFirstNHint: '最早多少則訊息不參與壓縮',
|
||||||
|
},
|
||||||
session: {
|
session: {
|
||||||
mode: '重設模式',
|
mode: '重設模式',
|
||||||
modeHint: '工作階段重設的觸發條件',
|
modeHint: '工作階段重設的觸發條件',
|
||||||
@@ -1243,6 +1256,7 @@ export default {
|
|||||||
new_0_5_30_6: '優化 Windows 相容性:路徑識別、檔案下載、任務/更新子程序不再彈出額外終端視窗',
|
new_0_5_30_6: '優化 Windows 相容性:路徑識別、檔案下載、任務/更新子程序不再彈出額外終端視窗',
|
||||||
new_0_5_30_7: '修復配置寫入和 Provider 預設:加強 .env 校驗,FUN-Codex 改走 Responses API,並更新 Z.AI/GLM 模型列表',
|
new_0_5_30_7: '修復配置寫入和 Provider 預設:加強 .env 校驗,FUN-Codex 改走 Responses API,並更新 Z.AI/GLM 模型列表',
|
||||||
new_0_5_30_8: '前端體驗細節優化:折疊側邊欄布局、分組短標題、側邊欄分隔線和對話大綱樣式更穩定',
|
new_0_5_30_8: '前端體驗細節優化:折疊側邊欄布局、分組短標題、側邊欄分隔線和對話大綱樣式更穩定',
|
||||||
|
new_0_5_30_9: '上下文壓縮現在跟隨 Profile 壓縮配置,並在舊快照失效時複用舊摘要和安全尾部,避免重新壓縮完整歷史',
|
||||||
new_0_5_6_1: '新增語音播放功能:使用 Web Speech API,支援手動播放按鈕、自動播放開關、彩虹邊框動畫和行動端最佳化',
|
new_0_5_6_1: '新增語音播放功能:使用 Web Speech API,支援手動播放按鈕、自動播放開關、彩虹邊框動畫和行動端最佳化',
|
||||||
new_0_5_6_2: '新增強健的 LLM JSON 解析器,相容 Python 格式並從串流事件中擷取文字',
|
new_0_5_6_2: '新增強健的 LLM JSON 解析器,相容 Python 格式並從串流事件中擷取文字',
|
||||||
new_0_5_6_3: 'Skills 功能增強:使用統計、來源過濾、封存技能、來源追溯和釘選切換',
|
new_0_5_6_3: 'Skills 功能增強:使用統計、來源過濾、封存技能、來源追溯和釘選切換',
|
||||||
|
|||||||
@@ -708,6 +708,7 @@ export default {
|
|||||||
account: '账户',
|
account: '账户',
|
||||||
agent: '代理',
|
agent: '代理',
|
||||||
memory: '记忆',
|
memory: '记忆',
|
||||||
|
compression: '上下文压缩',
|
||||||
session: '会话',
|
session: '会话',
|
||||||
privacy: '隐私',
|
privacy: '隐私',
|
||||||
apiServer: 'API 服务器',
|
apiServer: 'API 服务器',
|
||||||
@@ -766,6 +767,18 @@ export default {
|
|||||||
userCharLimit: '用户画像字符上限',
|
userCharLimit: '用户画像字符上限',
|
||||||
userCharLimitHint: 'USER.md 最大字符数',
|
userCharLimitHint: 'USER.md 最大字符数',
|
||||||
},
|
},
|
||||||
|
compression: {
|
||||||
|
enabled: '启用压缩',
|
||||||
|
enabledHint: '长对话接近模型上下文上限前自动压缩历史',
|
||||||
|
threshold: '压缩阈值',
|
||||||
|
thresholdHint: '预计 token 超过上下文比例时开始压缩',
|
||||||
|
targetRatio: '目标比例',
|
||||||
|
targetRatioHint: '压缩后历史保留到上下文的目标比例',
|
||||||
|
protectLastN: '保护最近消息',
|
||||||
|
protectLastNHint: '最近多少条消息不参与压缩',
|
||||||
|
protectFirstN: '保护开头消息',
|
||||||
|
protectFirstNHint: '最早多少条消息不参与压缩',
|
||||||
|
},
|
||||||
session: {
|
session: {
|
||||||
mode: '重置模式',
|
mode: '重置模式',
|
||||||
modeHint: '会话重置的触发条件',
|
modeHint: '会话重置的触发条件',
|
||||||
@@ -1253,6 +1266,7 @@ export default {
|
|||||||
new_0_5_30_6: '优化 Windows 兼容性:路径识别、文件下载、任务/更新子进程不再弹出额外终端窗口',
|
new_0_5_30_6: '优化 Windows 兼容性:路径识别、文件下载、任务/更新子进程不再弹出额外终端窗口',
|
||||||
new_0_5_30_7: '修复配置写入和 Provider 预设:加强 .env 校验,FUN-Codex 改走 Responses API,并更新 Z.AI/GLM 模型列表',
|
new_0_5_30_7: '修复配置写入和 Provider 预设:加强 .env 校验,FUN-Codex 改走 Responses API,并更新 Z.AI/GLM 模型列表',
|
||||||
new_0_5_30_8: '前端体验细节优化:折叠侧边栏布局、分组短标题、侧边栏分隔线和对话大纲样式更稳定',
|
new_0_5_30_8: '前端体验细节优化:折叠侧边栏布局、分组短标题、侧边栏分隔线和对话大纲样式更稳定',
|
||||||
|
new_0_5_30_9: '上下文压缩现在跟随 Profile 压缩配置,并在旧快照失效时复用旧摘要和安全尾部,避免重新压缩完整历史',
|
||||||
|
|
||||||
new_0_5_6_1: '新增语音播放功能:使用 Web Speech API,支持手动播放按钮、自动播放开关、彩虹边框动画和移动端优化',
|
new_0_5_6_1: '新增语音播放功能:使用 Web Speech API,支持手动播放按钮、自动播放开关、彩虹边框动画和移动端优化',
|
||||||
new_0_5_6_2: '新增健壮的 LLM JSON 解析器,兼容 Python 格式并从流式事件中提取文本',
|
new_0_5_6_2: '新增健壮的 LLM JSON 解析器,兼容 Python 格式并从流式事件中提取文本',
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import { defineStore } from 'pinia'
|
import { defineStore } from 'pinia'
|
||||||
import { ref } from 'vue'
|
import { ref } from 'vue'
|
||||||
import * as configApi from '@/api/hermes/config'
|
import * as configApi from '@/api/hermes/config'
|
||||||
import type { DisplayConfig, AgentConfig, MemoryConfig, SessionResetConfig, PrivacyConfig, ApprovalConfig } from '@/api/hermes/config'
|
import type { DisplayConfig, AgentConfig, MemoryConfig, CompressionConfig, SessionResetConfig, PrivacyConfig, ApprovalConfig } from '@/api/hermes/config'
|
||||||
|
|
||||||
export const useSettingsStore = defineStore('settings', () => {
|
export const useSettingsStore = defineStore('settings', () => {
|
||||||
const loading = ref(false)
|
const loading = ref(false)
|
||||||
@@ -10,6 +10,7 @@ export const useSettingsStore = defineStore('settings', () => {
|
|||||||
const display = ref<DisplayConfig>({})
|
const display = ref<DisplayConfig>({})
|
||||||
const agent = ref<AgentConfig>({})
|
const agent = ref<AgentConfig>({})
|
||||||
const memory = ref<MemoryConfig>({})
|
const memory = ref<MemoryConfig>({})
|
||||||
|
const compression = ref<CompressionConfig>({})
|
||||||
const sessionReset = ref<SessionResetConfig>({})
|
const sessionReset = ref<SessionResetConfig>({})
|
||||||
const privacy = ref<PrivacyConfig>({})
|
const privacy = ref<PrivacyConfig>({})
|
||||||
const approvals = ref<ApprovalConfig>({})
|
const approvals = ref<ApprovalConfig>({})
|
||||||
@@ -32,6 +33,7 @@ export const useSettingsStore = defineStore('settings', () => {
|
|||||||
display.value = data.display || {}
|
display.value = data.display || {}
|
||||||
agent.value = data.agent || {}
|
agent.value = data.agent || {}
|
||||||
memory.value = data.memory || {}
|
memory.value = data.memory || {}
|
||||||
|
compression.value = data.compression || {}
|
||||||
sessionReset.value = data.session_reset || {}
|
sessionReset.value = data.session_reset || {}
|
||||||
privacy.value = data.privacy || {}
|
privacy.value = data.privacy || {}
|
||||||
approvals.value = data.approvals || {}
|
approvals.value = data.approvals || {}
|
||||||
@@ -58,6 +60,7 @@ export const useSettingsStore = defineStore('settings', () => {
|
|||||||
case 'display': display.value = { ...display.value, ...values }; break
|
case 'display': display.value = { ...display.value, ...values }; break
|
||||||
case 'agent': agent.value = { ...agent.value, ...values }; break
|
case 'agent': agent.value = { ...agent.value, ...values }; break
|
||||||
case 'memory': memory.value = { ...memory.value, ...values }; break
|
case 'memory': memory.value = { ...memory.value, ...values }; break
|
||||||
|
case 'compression': compression.value = { ...compression.value, ...values }; break
|
||||||
case 'session_reset': sessionReset.value = { ...sessionReset.value, ...values }; break
|
case 'session_reset': sessionReset.value = { ...sessionReset.value, ...values }; break
|
||||||
case 'privacy': privacy.value = { ...privacy.value, ...values }; break
|
case 'privacy': privacy.value = { ...privacy.value, ...values }; break
|
||||||
case 'approvals': approvals.value = { ...approvals.value, ...values }; break
|
case 'approvals': approvals.value = { ...approvals.value, ...values }; break
|
||||||
@@ -91,6 +94,7 @@ export const useSettingsStore = defineStore('settings', () => {
|
|||||||
case 'display': display.value = { ...display.value, ...values }; break
|
case 'display': display.value = { ...display.value, ...values }; break
|
||||||
case 'agent': agent.value = { ...agent.value, ...values }; break
|
case 'agent': agent.value = { ...agent.value, ...values }; break
|
||||||
case 'memory': memory.value = { ...memory.value, ...values }; break
|
case 'memory': memory.value = { ...memory.value, ...values }; break
|
||||||
|
case 'compression': compression.value = { ...compression.value, ...values }; break
|
||||||
case 'session_reset': sessionReset.value = { ...sessionReset.value, ...values }; break
|
case 'session_reset': sessionReset.value = { ...sessionReset.value, ...values }; break
|
||||||
case 'privacy': privacy.value = { ...privacy.value, ...values }; break
|
case 'privacy': privacy.value = { ...privacy.value, ...values }; break
|
||||||
case 'approvals': approvals.value = { ...approvals.value, ...values }; break
|
case 'approvals': approvals.value = { ...approvals.value, ...values }; break
|
||||||
@@ -122,7 +126,7 @@ export const useSettingsStore = defineStore('settings', () => {
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
loading, saving,
|
loading, saving,
|
||||||
display, agent, memory, sessionReset, privacy, approvals,
|
display, agent, memory, compression, sessionReset, privacy, approvals,
|
||||||
telegram, discord, slack, whatsapp, matrix, wecom, feishu, dingtalk, qqbot, weixin, platforms,
|
telegram, discord, slack, whatsapp, matrix, wecom, feishu, dingtalk, qqbot, weixin, platforms,
|
||||||
fetchSettings, saveSection, updateLocal,
|
fetchSettings, saveSection, updateLocal,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import { useSettingsStore } from "@/stores/hermes/settings";
|
|||||||
import DisplaySettings from "@/components/hermes/settings/DisplaySettings.vue";
|
import DisplaySettings from "@/components/hermes/settings/DisplaySettings.vue";
|
||||||
import AgentSettings from "@/components/hermes/settings/AgentSettings.vue";
|
import AgentSettings from "@/components/hermes/settings/AgentSettings.vue";
|
||||||
import MemorySettings from "@/components/hermes/settings/MemorySettings.vue";
|
import MemorySettings from "@/components/hermes/settings/MemorySettings.vue";
|
||||||
|
import CompressionSettings from "@/components/hermes/settings/CompressionSettings.vue";
|
||||||
import SessionSettings from "@/components/hermes/settings/SessionSettings.vue";
|
import SessionSettings from "@/components/hermes/settings/SessionSettings.vue";
|
||||||
import PrivacySettings from "@/components/hermes/settings/PrivacySettings.vue";
|
import PrivacySettings from "@/components/hermes/settings/PrivacySettings.vue";
|
||||||
import ModelSettings from "@/components/hermes/settings/ModelSettings.vue";
|
import ModelSettings from "@/components/hermes/settings/ModelSettings.vue";
|
||||||
@@ -49,6 +50,9 @@ onMounted(() => {
|
|||||||
<NTabPane name="memory" :tab="t('settings.tabs.memory')">
|
<NTabPane name="memory" :tab="t('settings.tabs.memory')">
|
||||||
<MemorySettings />
|
<MemorySettings />
|
||||||
</NTabPane>
|
</NTabPane>
|
||||||
|
<NTabPane name="compression" :tab="t('settings.tabs.compression')">
|
||||||
|
<CompressionSettings />
|
||||||
|
</NTabPane>
|
||||||
<NTabPane name="session" :tab="t('settings.tabs.session')">
|
<NTabPane name="session" :tab="t('settings.tabs.session')">
|
||||||
<SessionSettings />
|
<SessionSettings />
|
||||||
</NTabPane>
|
</NTabPane>
|
||||||
|
|||||||
@@ -188,7 +188,16 @@ export async function get(ctx: any) {
|
|||||||
* GET /api/hermes/sessions/hermes/:id
|
* GET /api/hermes/sessions/hermes/:id
|
||||||
*/
|
*/
|
||||||
export async function getHermesSession(ctx: any) {
|
export async function getHermesSession(ctx: any) {
|
||||||
// Try database first (consistent with listHermesSessions)
|
// Prefer the Web UI local session store. Hermes state.db can lag behind or
|
||||||
|
// miss messages for Bridge-backed runs, while the local store is the source
|
||||||
|
// used by chat rendering and compression.
|
||||||
|
const localSession = localGetSessionDetail(ctx.params.id)
|
||||||
|
if (localSession && localSession.source !== 'api_server') {
|
||||||
|
ctx.body = { session: localSession }
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try Hermes state.db next (consistent with listHermesSessions)
|
||||||
try {
|
try {
|
||||||
const session = await getSessionDetailFromDb(ctx.params.id)
|
const session = await getSessionDetailFromDb(ctx.params.id)
|
||||||
if (session && session.source !== 'api_server') {
|
if (session && session.source !== 'api_server') {
|
||||||
|
|||||||
@@ -46,6 +46,8 @@ export interface CompressionConfig {
|
|||||||
triggerTokens: number
|
triggerTokens: number
|
||||||
/** Summary token target (default: 8000) */
|
/** Summary token target (default: 8000) */
|
||||||
summaryBudget: number
|
summaryBudget: number
|
||||||
|
/** Number of earliest messages to keep verbatim (default: 0) */
|
||||||
|
headMessageCount: number
|
||||||
/** Number of recent messages to keep verbatim (default: 10) */
|
/** Number of recent messages to keep verbatim (default: 10) */
|
||||||
tailMessageCount: number
|
tailMessageCount: number
|
||||||
/** Timeout for LLM summarization call (default: 60_000ms) */
|
/** Timeout for LLM summarization call (default: 60_000ms) */
|
||||||
@@ -55,6 +57,7 @@ export interface CompressionConfig {
|
|||||||
export const DEFAULT_COMPRESSION_CONFIG: CompressionConfig = {
|
export const DEFAULT_COMPRESSION_CONFIG: CompressionConfig = {
|
||||||
triggerTokens: 100_000,
|
triggerTokens: 100_000,
|
||||||
summaryBudget: 8_000,
|
summaryBudget: 8_000,
|
||||||
|
headMessageCount: 0,
|
||||||
tailMessageCount: 10,
|
tailMessageCount: 10,
|
||||||
summarizationTimeoutMs: 120_000,
|
summarizationTimeoutMs: 120_000,
|
||||||
}
|
}
|
||||||
@@ -108,6 +111,54 @@ export function countTokensForModel(text: string, model: string): number {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function messageTokenEstimate(message: ChatMessage): number {
|
||||||
|
if (typeof message.content === 'string') return countTokens(message.content)
|
||||||
|
if (Array.isArray(message.content)) {
|
||||||
|
return countTokens(message.content.map(block => {
|
||||||
|
if (block.type === 'text') return block.text || ''
|
||||||
|
if (block.type === 'image') return `[Image: ${block.path || ''}]`
|
||||||
|
if (block.type === 'file') return `[File: ${block.path || ''}]`
|
||||||
|
return ''
|
||||||
|
}).join(''))
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
function messagesTokenEstimate(messages: ChatMessage[]): number {
|
||||||
|
return messages.reduce((sum, message) => sum + messageTokenEstimate(message), 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
function truncateTextToTokenBudget(text: string, tokenBudget: number): string {
|
||||||
|
if (tokenBudget <= 0 || countTokens(text) <= tokenBudget) return text
|
||||||
|
let lo = 0
|
||||||
|
let hi = text.length
|
||||||
|
while (lo < hi) {
|
||||||
|
const mid = Math.ceil((lo + hi) / 2)
|
||||||
|
if (countTokens(text.slice(0, mid)) <= tokenBudget) lo = mid
|
||||||
|
else hi = mid - 1
|
||||||
|
}
|
||||||
|
return text.slice(0, lo).trimEnd() + '\n\n[Summary truncated to fit context budget]'
|
||||||
|
}
|
||||||
|
|
||||||
|
function enforceCompressedBudget(
|
||||||
|
messages: ChatMessage[],
|
||||||
|
triggerTokens: number,
|
||||||
|
summaryIndex: number,
|
||||||
|
): ChatMessage[] {
|
||||||
|
if (triggerTokens <= 0 || messagesTokenEstimate(messages) <= triggerTokens) return messages
|
||||||
|
|
||||||
|
const summaryMessage = messages[summaryIndex]
|
||||||
|
if (!summaryMessage || typeof summaryMessage.content !== 'string') return messages
|
||||||
|
|
||||||
|
const summaryOnly = [{ ...summaryMessage }]
|
||||||
|
if (messagesTokenEstimate(summaryOnly) <= triggerTokens) return summaryOnly
|
||||||
|
|
||||||
|
return [{
|
||||||
|
...summaryMessage,
|
||||||
|
content: truncateTextToTokenBudget(summaryMessage.content, triggerTokens),
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
// ─── Prompts ────────────────────────────────────────────
|
// ─── Prompts ────────────────────────────────────────────
|
||||||
|
|
||||||
export const SUMMARY_PREFIX = `[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted
|
export const SUMMARY_PREFIX = `[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted
|
||||||
@@ -371,6 +422,10 @@ export function pruneOldToolResults(messages: ChatMessage[], keepRecentCount: nu
|
|||||||
return [...pruned, ...tail]
|
return [...pruned, ...tail]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function pruneFallbackToolResults(messages: ChatMessage[], keepRecentCount: number): ChatMessage[] {
|
||||||
|
return pruneOldToolResults(messages, keepRecentCount)
|
||||||
|
}
|
||||||
|
|
||||||
// ─── LLM Summarization ──────────────────────────────────
|
// ─── LLM Summarization ──────────────────────────────────
|
||||||
|
|
||||||
export async function callSummarizer(
|
export async function callSummarizer(
|
||||||
@@ -474,7 +529,7 @@ export class ChatContextCompressor {
|
|||||||
// Check if we have a previous compression snapshot
|
// Check if we have a previous compression snapshot
|
||||||
const snapshot = sessionId ? getCompressionSnapshot(sessionId) : null
|
const snapshot = sessionId ? getCompressionSnapshot(sessionId) : null
|
||||||
|
|
||||||
if (snapshot) {
|
if (snapshot && snapshot.lastMessageIndex >= 0 && snapshot.lastMessageIndex < messages.length) {
|
||||||
// Has snapshot → incremental compress (merge old summary with new messages)
|
// Has snapshot → incremental compress (merge old summary with new messages)
|
||||||
logger.info(
|
logger.info(
|
||||||
'[context-compressor] session=%s: incremental compress with snapshot at index %d',
|
'[context-compressor] session=%s: incremental compress with snapshot at index %d',
|
||||||
@@ -484,6 +539,22 @@ export class ChatContextCompressor {
|
|||||||
messages, snapshot, upstream, apiKey, sessionId!, makeMeta(), summarizer,
|
messages, snapshot, upstream, apiKey, sessionId!, makeMeta(), summarizer,
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
|
if (snapshot && sessionId) {
|
||||||
|
const fallbackLastMessageIndex = Math.max(-1, messages.length - this.config.tailMessageCount - 1)
|
||||||
|
logger.warn(
|
||||||
|
'[context-compressor] session=%s: stale snapshot index %d for %d messages; using summary plus tail from index %d',
|
||||||
|
sessionId, snapshot.lastMessageIndex, messages.length, fallbackLastMessageIndex,
|
||||||
|
)
|
||||||
|
return this.incrementalCompress(
|
||||||
|
messages,
|
||||||
|
{ summary: snapshot.summary, lastMessageIndex: fallbackLastMessageIndex },
|
||||||
|
upstream,
|
||||||
|
apiKey,
|
||||||
|
sessionId,
|
||||||
|
makeMeta(),
|
||||||
|
summarizer,
|
||||||
|
)
|
||||||
|
}
|
||||||
// No snapshot → full compress (compress all messages)
|
// No snapshot → full compress (compress all messages)
|
||||||
logger.info(
|
logger.info(
|
||||||
'[context-compressor] session=%s: full compress %d messages',
|
'[context-compressor] session=%s: full compress %d messages',
|
||||||
@@ -504,27 +575,36 @@ export class ChatContextCompressor {
|
|||||||
): Promise<CompressedResult> {
|
): Promise<CompressedResult> {
|
||||||
const { summary: previousSummary, lastMessageIndex } = snapshot
|
const { summary: previousSummary, lastMessageIndex } = snapshot
|
||||||
const total = messages.length
|
const total = messages.length
|
||||||
const cleaned = pruneOldToolResults(messages, this.config.tailMessageCount)
|
const headCount = Math.min(this.config.headMessageCount, Math.max(0, lastMessageIndex + 1))
|
||||||
const newMessages = cleaned.slice(lastMessageIndex + 1)
|
const head = messages.slice(0, headCount)
|
||||||
|
const newMessages = messages.slice(lastMessageIndex + 1)
|
||||||
const tailCount = this.config.tailMessageCount
|
const tailCount = this.config.tailMessageCount
|
||||||
|
const previousSummaryMessage: ChatMessage = { role: 'user', content: SUMMARY_PREFIX + '\n\n' + previousSummary }
|
||||||
|
const assembledWithPrevious = [
|
||||||
|
...head,
|
||||||
|
previousSummaryMessage,
|
||||||
|
...newMessages,
|
||||||
|
]
|
||||||
|
const assembledOverBudget = messagesTokenEstimate(assembledWithPrevious) > this.config.triggerTokens
|
||||||
|
const canKeepTailWindow = newMessages.length > tailCount
|
||||||
|
|
||||||
// Keep last N of new messages, compress the rest
|
// If the new segment itself is too small to split but already over budget,
|
||||||
const tailStart = Math.max(0, newMessages.length - tailCount)
|
// fold all new messages into the existing summary instead of preserving them verbatim.
|
||||||
|
const tailStart = assembledOverBudget && !canKeepTailWindow
|
||||||
|
? newMessages.length
|
||||||
|
: Math.max(0, newMessages.length - tailCount)
|
||||||
const toCompress = newMessages.slice(0, tailStart)
|
const toCompress = newMessages.slice(0, tailStart)
|
||||||
const tail = newMessages.slice(tailStart)
|
const tail = newMessages.slice(tailStart)
|
||||||
|
|
||||||
if (toCompress.length === 0) {
|
if (toCompress.length === 0) {
|
||||||
return {
|
return {
|
||||||
messages: [
|
messages: assembledWithPrevious,
|
||||||
{ role: 'user', content: SUMMARY_PREFIX + '\n\n' + previousSummary },
|
|
||||||
...newMessages,
|
|
||||||
],
|
|
||||||
meta: {
|
meta: {
|
||||||
...meta,
|
...meta,
|
||||||
compressed: true,
|
compressed: true,
|
||||||
llmCompressed: false,
|
llmCompressed: false,
|
||||||
summaryTokenEstimate: countTokens(SUMMARY_PREFIX + previousSummary),
|
summaryTokenEstimate: countTokens(SUMMARY_PREFIX + previousSummary),
|
||||||
verbatimCount: newMessages.length,
|
verbatimCount: head.length + newMessages.length,
|
||||||
compressedStartIndex: lastMessageIndex,
|
compressedStartIndex: lastMessageIndex,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -546,26 +626,32 @@ export class ChatContextCompressor {
|
|||||||
logger.info('[context-compressor] incremental-llm done in %dms, %d chars', Date.now() - t0, summary.length)
|
logger.info('[context-compressor] incremental-llm done in %dms, %d chars', Date.now() - t0, summary.length)
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
logger.warn('[context-compressor] incremental-llm failed: %s — keeping new messages verbatim', err.message)
|
logger.warn('[context-compressor] incremental-llm failed: %s — keeping new messages verbatim', err.message)
|
||||||
return {
|
const fallback = [
|
||||||
messages: [
|
...head,
|
||||||
{ role: 'user', content: SUMMARY_PREFIX + '\n\n' + previousSummary },
|
previousSummaryMessage,
|
||||||
...newMessages,
|
...newMessages,
|
||||||
],
|
]
|
||||||
|
const prunedFallback = pruneFallbackToolResults(fallback, this.config.tailMessageCount)
|
||||||
|
const budgetedFallback = enforceCompressedBudget(prunedFallback, this.config.triggerTokens, head.length)
|
||||||
|
return {
|
||||||
|
messages: budgetedFallback,
|
||||||
meta: {
|
meta: {
|
||||||
...meta,
|
...meta,
|
||||||
compressed: true,
|
compressed: true,
|
||||||
llmCompressed: false,
|
llmCompressed: false,
|
||||||
summaryTokenEstimate: countTokens(SUMMARY_PREFIX + previousSummary),
|
summaryTokenEstimate: countTokens(SUMMARY_PREFIX + previousSummary),
|
||||||
verbatimCount: newMessages.length,
|
verbatimCount: budgetedFallback.length === fallback.length ? head.length + newMessages.length : 0,
|
||||||
compressedStartIndex: lastMessageIndex,
|
compressedStartIndex: lastMessageIndex,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const result: ChatMessage[] = [
|
let result: ChatMessage[] = [
|
||||||
|
...head,
|
||||||
{ role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary },
|
{ role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary },
|
||||||
...tail,
|
...tail,
|
||||||
]
|
]
|
||||||
|
result = enforceCompressedBudget(result, this.config.triggerTokens, head.length)
|
||||||
|
|
||||||
const newLastIndex = lastMessageIndex + tailStart
|
const newLastIndex = lastMessageIndex + tailStart
|
||||||
if (sessionId) {
|
if (sessionId) {
|
||||||
@@ -579,7 +665,7 @@ export class ChatContextCompressor {
|
|||||||
compressed: true,
|
compressed: true,
|
||||||
llmCompressed: true,
|
llmCompressed: true,
|
||||||
summaryTokenEstimate: countTokens(SUMMARY_PREFIX + summary),
|
summaryTokenEstimate: countTokens(SUMMARY_PREFIX + summary),
|
||||||
verbatimCount: tail.length,
|
verbatimCount: result.length === head.length + 1 + tail.length ? head.length + tail.length : 0,
|
||||||
compressedStartIndex: newLastIndex,
|
compressedStartIndex: newLastIndex,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -594,20 +680,20 @@ export class ChatContextCompressor {
|
|||||||
summarizer?: string | SummarizerOptions,
|
summarizer?: string | SummarizerOptions,
|
||||||
): Promise<CompressedResult> {
|
): Promise<CompressedResult> {
|
||||||
const total = messages.length
|
const total = messages.length
|
||||||
const cleaned = pruneOldToolResults(messages, this.config.tailMessageCount)
|
const requestedHeadCount = Math.min(this.config.headMessageCount, total)
|
||||||
const tailCount = this.config.tailMessageCount
|
const requestedTailCount = this.config.tailMessageCount
|
||||||
|
const canKeepProtectedWindows = total > requestedHeadCount + requestedTailCount
|
||||||
if (total <= tailCount) {
|
const headCount = canKeepProtectedWindows ? requestedHeadCount : 0
|
||||||
return { messages: cleaned, meta }
|
const tailCount = canKeepProtectedWindows ? requestedTailCount : 0
|
||||||
}
|
|
||||||
|
|
||||||
const tailStart = total - tailCount
|
const tailStart = total - tailCount
|
||||||
const toCompress = cleaned.slice(0, tailStart)
|
const head = messages.slice(0, headCount)
|
||||||
const tail = cleaned.slice(tailStart)
|
const toCompress = messages.slice(headCount, tailStart)
|
||||||
|
const tail = messages.slice(tailStart)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
'[context-compressor] [full-llm] compressing messages 0-%d, keeping %d-%d',
|
'[context-compressor] [full-llm] compressing messages %d-%d, keeping first %d and last %d',
|
||||||
tailStart - 1, tailStart, total - 1,
|
headCount, tailStart - 1, head.length, tail.length,
|
||||||
)
|
)
|
||||||
|
|
||||||
const contentToSummarize = serializeForSummary(toCompress)
|
const contentToSummarize = serializeForSummary(toCompress)
|
||||||
@@ -624,26 +710,28 @@ export class ChatContextCompressor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!summary) {
|
if (!summary) {
|
||||||
return { messages: cleaned, meta }
|
return { messages: pruneFallbackToolResults(messages, this.config.tailMessageCount), meta }
|
||||||
}
|
}
|
||||||
|
|
||||||
const result: ChatMessage[] = []
|
const result: ChatMessage[] = []
|
||||||
|
|
||||||
|
result.push(...head)
|
||||||
result.push({ role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary })
|
result.push({ role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary })
|
||||||
if (sessionId) {
|
if (sessionId) {
|
||||||
saveCompressionSnapshot(sessionId, summary, tailStart - 1, total)
|
saveCompressionSnapshot(sessionId, summary, tailStart - 1, total)
|
||||||
}
|
}
|
||||||
|
|
||||||
result.push(...tail)
|
result.push(...tail)
|
||||||
|
const budgetedResult = enforceCompressedBudget(result, this.config.triggerTokens, head.length)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
messages: result,
|
messages: budgetedResult,
|
||||||
meta: {
|
meta: {
|
||||||
...meta,
|
...meta,
|
||||||
compressed: true,
|
compressed: true,
|
||||||
llmCompressed: !!summary,
|
llmCompressed: !!summary,
|
||||||
summaryTokenEstimate: summary ? countTokens(SUMMARY_PREFIX + summary) : 0,
|
summaryTokenEstimate: summary ? countTokens(SUMMARY_PREFIX + summary) : 0,
|
||||||
verbatimCount: tail.length,
|
verbatimCount: budgetedResult.length === result.length ? head.length + tail.length : 0,
|
||||||
compressedStartIndex: tailStart - 1,
|
compressedStartIndex: tailStart - 1,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,14 +10,88 @@ import {
|
|||||||
import { getCompressionSnapshot } from '../../../db/hermes/compression-snapshot'
|
import { getCompressionSnapshot } from '../../../db/hermes/compression-snapshot'
|
||||||
import { ChatContextCompressor, SUMMARY_PREFIX } from '../../../lib/context-compressor'
|
import { ChatContextCompressor, SUMMARY_PREFIX } from '../../../lib/context-compressor'
|
||||||
import { getModelContextLength } from '../model-context'
|
import { getModelContextLength } from '../model-context'
|
||||||
|
import { readConfigYamlForProfile } from '../../config-helpers'
|
||||||
import { logger } from '../../logger'
|
import { logger } from '../../logger'
|
||||||
import { bridgeLogger } from '../../logger'
|
import { bridgeLogger } from '../../logger'
|
||||||
import { calcAndUpdateUsage, estimateUsageTokensFromMessages } from './usage'
|
import { calcAndUpdateUsage, estimateUsageTokensFromMessages } from './usage'
|
||||||
import { isAssistantMessageSendable } from './message-format'
|
import { isAssistantMessageSendable } from './message-format'
|
||||||
import type { ChatMessage } from '../../../lib/context-compressor'
|
import type { ChatMessage, CompressionConfig as CompressorConfig } from '../../../lib/context-compressor'
|
||||||
import type { SessionState, BridgeCompressionResult } from './types'
|
import type { SessionState, BridgeCompressionResult } from './types'
|
||||||
|
|
||||||
const compressor = new ChatContextCompressor()
|
interface RunChatCompressionConfig {
|
||||||
|
enabled: boolean
|
||||||
|
triggerTokens: number
|
||||||
|
compressor: Partial<CompressorConfig>
|
||||||
|
}
|
||||||
|
|
||||||
|
function isSnapshotUsable(
|
||||||
|
snapshot: { lastMessageIndex: number } | null,
|
||||||
|
history: ChatMessage[],
|
||||||
|
): boolean {
|
||||||
|
return !!snapshot && snapshot.lastMessageIndex >= 0 && snapshot.lastMessageIndex < history.length
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildSnapshotHistory(
|
||||||
|
snapshot: { summary: string; lastMessageIndex: number } | null,
|
||||||
|
history: ChatMessage[],
|
||||||
|
compressionConfig?: Partial<CompressorConfig>,
|
||||||
|
): ChatMessage[] | null {
|
||||||
|
if (!snapshot) return null
|
||||||
|
const headCount = compressionConfig?.headMessageCount || 0
|
||||||
|
const tailCount = compressionConfig?.tailMessageCount || 0
|
||||||
|
const protectedHead = headCount > 0 ? history.slice(0, headCount) : []
|
||||||
|
const summaryMessage = { role: 'user', content: SUMMARY_PREFIX + '\n\n' + snapshot.summary } as ChatMessage
|
||||||
|
|
||||||
|
if (isSnapshotUsable(snapshot, history)) {
|
||||||
|
return [
|
||||||
|
...protectedHead,
|
||||||
|
summaryMessage,
|
||||||
|
...history.slice(snapshot.lastMessageIndex + 1),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
const tailStart = Math.max(protectedHead.length, history.length - tailCount)
|
||||||
|
return [
|
||||||
|
...protectedHead,
|
||||||
|
summaryMessage,
|
||||||
|
...history.slice(tailStart),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
function clampRatio(value: unknown, fallback: number, min: number, max: number): number {
|
||||||
|
const n = typeof value === 'number' && Number.isFinite(value) ? value : fallback
|
||||||
|
return Math.min(max, Math.max(min, n))
|
||||||
|
}
|
||||||
|
|
||||||
|
function clampInt(value: unknown, fallback: number, min: number, max: number): number {
|
||||||
|
const n = typeof value === 'number' && Number.isFinite(value) ? Math.floor(value) : fallback
|
||||||
|
return Math.min(max, Math.max(min, n))
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getRunChatCompressionConfig(profile: string, contextLength: number): Promise<RunChatCompressionConfig> {
|
||||||
|
let raw: Record<string, any> = {}
|
||||||
|
try {
|
||||||
|
raw = (await readConfigYamlForProfile(profile))?.compression || {}
|
||||||
|
} catch (err) {
|
||||||
|
logger.warn(err, '[context-compress] failed to read compression config for profile %s, using defaults', profile)
|
||||||
|
}
|
||||||
|
|
||||||
|
const threshold = clampRatio(raw.threshold, 0.5, 0.05, 0.95)
|
||||||
|
const targetRatio = clampRatio(raw.target_ratio, 0.2, 0.01, 0.8)
|
||||||
|
const protectLastN = clampInt(raw.protect_last_n, 20, 0, 500)
|
||||||
|
const protectFirstN = clampInt(raw.protect_first_n, 3, 0, 100)
|
||||||
|
|
||||||
|
return {
|
||||||
|
enabled: raw.enabled !== false,
|
||||||
|
triggerTokens: Math.floor(contextLength * threshold),
|
||||||
|
compressor: {
|
||||||
|
triggerTokens: Math.floor(contextLength * threshold),
|
||||||
|
summaryBudget: Math.max(1_000, Math.floor(contextLength * targetRatio)),
|
||||||
|
headMessageCount: protectFirstN,
|
||||||
|
tailMessageCount: protectLastN,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Load conversation history from DB with full message structure (user/assistant/tool).
|
* Load conversation history from DB with full message structure (user/assistant/tool).
|
||||||
@@ -77,12 +151,7 @@ export function estimateSnapshotAwareHistoryUsage(
|
|||||||
history: ChatMessage[],
|
history: ChatMessage[],
|
||||||
): { messageCount: number; tokenCount: number } {
|
): { messageCount: number; tokenCount: number } {
|
||||||
const snapshot = getCompressionSnapshot(sessionId)
|
const snapshot = getCompressionSnapshot(sessionId)
|
||||||
const messages = snapshot
|
const messages = buildSnapshotHistory(snapshot, history) || history
|
||||||
? [
|
|
||||||
{ role: 'user', content: SUMMARY_PREFIX + snapshot.summary },
|
|
||||||
...history.slice(snapshot.lastMessageIndex + 1),
|
|
||||||
]
|
|
||||||
: history
|
|
||||||
const usage = estimateUsageTokensFromMessages(messages)
|
const usage = estimateUsageTokensFromMessages(messages)
|
||||||
return {
|
return {
|
||||||
messageCount: messages.length,
|
messageCount: messages.length,
|
||||||
@@ -108,29 +177,45 @@ export async function buildCompressedHistory(
|
|||||||
model: modelContext.model,
|
model: modelContext.model,
|
||||||
provider: modelContext.provider,
|
provider: modelContext.provider,
|
||||||
})
|
})
|
||||||
const triggerTokens = Math.floor(contextLength / 2)
|
const compressionConfig = await getRunChatCompressionConfig(profile, contextLength)
|
||||||
|
const triggerTokens = compressionConfig.triggerTokens
|
||||||
|
if (!compressionConfig.enabled) {
|
||||||
|
logger.info('[context-compress] session=%s: compression disabled by config', sessionId)
|
||||||
|
return history
|
||||||
|
}
|
||||||
const cState = getOrCreateSession(sessionMap, sessionId)
|
const cState = getOrCreateSession(sessionMap, sessionId)
|
||||||
const assembledTokens = await calcAndUpdateUsage(sessionId, cState, emit)
|
const assembledTokens = await calcAndUpdateUsage(sessionId, cState, emit)
|
||||||
const totalTokens = assembledTokens.inputTokens + assembledTokens.outputTokens
|
let totalTokens = assembledTokens.inputTokens + assembledTokens.outputTokens
|
||||||
const snapshot = getCompressionSnapshot(sessionId)
|
const snapshot = getCompressionSnapshot(sessionId)
|
||||||
|
const staleSnapshot = snapshot && !isSnapshotUsable(snapshot, history)
|
||||||
|
if (staleSnapshot) {
|
||||||
|
logger.warn('[context-compress] session=%s: stale snapshot index %d for %d history messages; using summary plus safe tail',
|
||||||
|
sessionId, snapshot.lastMessageIndex, history.length)
|
||||||
|
const staleHistory = buildSnapshotHistory(snapshot, history, compressionConfig.compressor) || history
|
||||||
|
const staleUsage = estimateUsageTokensFromMessages(staleHistory)
|
||||||
|
totalTokens = staleUsage.inputTokens + staleUsage.outputTokens
|
||||||
|
}
|
||||||
|
|
||||||
if (snapshot) {
|
if (snapshot && !staleSnapshot) {
|
||||||
const newMessages = history.slice(snapshot.lastMessageIndex + 1)
|
const newMessages = history.slice(snapshot.lastMessageIndex + 1)
|
||||||
logger.info('[context-compress] session=%s: snapshot at %d, %d new messages, assembled ~%d tokens (threshold %d)',
|
logger.info('[context-compress] session=%s: snapshot at %d, %d new messages, assembled ~%d tokens (threshold %d)',
|
||||||
sessionId, snapshot.lastMessageIndex, newMessages.length, totalTokens, triggerTokens)
|
sessionId, snapshot.lastMessageIndex, newMessages.length, totalTokens, triggerTokens)
|
||||||
if (totalTokens <= triggerTokens && newMessages.length <= 150) {
|
if (totalTokens <= triggerTokens) {
|
||||||
history = [
|
history = buildSnapshotHistory(snapshot, history, compressionConfig.compressor) || history
|
||||||
{ role: 'user', content: SUMMARY_PREFIX + '\n\n' + snapshot.summary },
|
|
||||||
...newMessages,
|
|
||||||
] as ChatMessage[]
|
|
||||||
} else {
|
} else {
|
||||||
history = await compressHistory(history, newMessages, sessionId, upstream, apiKey, cState, totalTokens, emit, sessionMap, modelContext)
|
history = await compressHistory(history, newMessages, sessionId, upstream, apiKey, cState, totalTokens, emit, sessionMap, modelContext, compressionConfig.compressor)
|
||||||
|
}
|
||||||
|
} else if (snapshot && staleSnapshot) {
|
||||||
|
if (totalTokens <= triggerTokens) {
|
||||||
|
history = buildSnapshotHistory(snapshot, history, compressionConfig.compressor) || history
|
||||||
|
} else {
|
||||||
|
history = await compressHistory(history, null, sessionId, upstream, apiKey, cState, totalTokens, emit, sessionMap, modelContext, compressionConfig.compressor)
|
||||||
}
|
}
|
||||||
} else if (history.length > 4) {
|
} else if (history.length > 4) {
|
||||||
if (totalTokens <= triggerTokens && history.length <= 150) {
|
if (totalTokens <= triggerTokens) {
|
||||||
logger.info('[context-compress] session=%s: %d messages, ~%d tokens — under threshold, skip', sessionId, history.length, totalTokens)
|
logger.info('[context-compress] session=%s: %d messages, ~%d tokens — under threshold, skip', sessionId, history.length, totalTokens)
|
||||||
} else {
|
} else {
|
||||||
history = await compressHistory(history, null, sessionId, upstream, apiKey, cState, totalTokens, emit, sessionMap, modelContext)
|
history = await compressHistory(history, null, sessionId, upstream, apiKey, cState, totalTokens, emit, sessionMap, modelContext, compressionConfig.compressor)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -152,6 +237,7 @@ export async function compressHistory(
|
|||||||
emit: (event: string, payload: any) => void,
|
emit: (event: string, payload: any) => void,
|
||||||
sessionMap: Map<string, SessionState>,
|
sessionMap: Map<string, SessionState>,
|
||||||
modelContext: { model?: string | null; provider?: string | null } = {},
|
modelContext: { model?: string | null; provider?: string | null } = {},
|
||||||
|
compressionConfig?: Partial<CompressorConfig>,
|
||||||
): Promise<ChatMessage[]> {
|
): Promise<ChatMessage[]> {
|
||||||
const msgCount = newMessagesOnly ? newMessagesOnly.length : history.length
|
const msgCount = newMessagesOnly ? newMessagesOnly.length : history.length
|
||||||
pushState(sessionMap, sessionId, 'compression.started', {
|
pushState(sessionMap, sessionId, 'compression.started', {
|
||||||
@@ -163,6 +249,7 @@ export async function compressHistory(
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
const session = getSession(sessionId)
|
const session = getSession(sessionId)
|
||||||
|
const compressor = new ChatContextCompressor({ config: compressionConfig })
|
||||||
const result = await compressor.compress(history, upstream, apiKey, sessionId, {
|
const result = await compressor.compress(history, upstream, apiKey, sessionId, {
|
||||||
profile: session?.profile,
|
profile: session?.profile,
|
||||||
model: modelContext.model || session?.model,
|
model: modelContext.model || session?.model,
|
||||||
@@ -244,6 +331,8 @@ export async function forceCompressBridgeHistory(
|
|||||||
const upstream = ''
|
const upstream = ''
|
||||||
const apiKey = undefined
|
const apiKey = undefined
|
||||||
const session = getSession(sessionId)
|
const session = getSession(sessionId)
|
||||||
|
const contextLength = getModelContextLength({ profile, model: session?.model, provider: session?.provider })
|
||||||
|
const compressionConfig = await getRunChatCompressionConfig(session?.profile || profile, contextLength)
|
||||||
const beforeUsage = estimateSnapshotAwareHistoryUsage(sessionId, history)
|
const beforeUsage = estimateSnapshotAwareHistoryUsage(sessionId, history)
|
||||||
const totalTokens = beforeUsage.tokenCount
|
const totalTokens = beforeUsage.tokenCount
|
||||||
bridgeLogger.info({
|
bridgeLogger.info({
|
||||||
@@ -256,6 +345,7 @@ export async function forceCompressBridgeHistory(
|
|||||||
snapshotAware: true,
|
snapshotAware: true,
|
||||||
}, '[chat-run-socket] bridge forced compression started')
|
}, '[chat-run-socket] bridge forced compression started')
|
||||||
|
|
||||||
|
const compressor = new ChatContextCompressor({ config: compressionConfig.compressor })
|
||||||
const result = await compressor.compress(history, upstream, apiKey, sessionId, {
|
const result = await compressor.compress(history, upstream, apiKey, sessionId, {
|
||||||
profile: session?.profile || profile,
|
profile: session?.profile || profile,
|
||||||
model: session?.model,
|
model: session?.model,
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ export async function loadSessionStateFromDb(sid: string, _sessionMap: Map<strin
|
|||||||
let inputTokens: number
|
let inputTokens: number
|
||||||
let outputTokens: number
|
let outputTokens: number
|
||||||
const snapshot = getCompressionSnapshot(sid)
|
const snapshot = getCompressionSnapshot(sid)
|
||||||
if (snapshot) {
|
if (snapshot && snapshot.lastMessageIndex >= 0 && snapshot.lastMessageIndex < messages.length) {
|
||||||
const newMessages = messages.slice(snapshot.lastMessageIndex + 1)
|
const newMessages = messages.slice(snapshot.lastMessageIndex + 1)
|
||||||
const newUsage = estimateUsageTokensFromMessages(newMessages)
|
const newUsage = estimateUsageTokensFromMessages(newMessages)
|
||||||
inputTokens = countTokens(SUMMARY_PREFIX + snapshot.summary) +
|
inputTokens = countTokens(SUMMARY_PREFIX + snapshot.summary) +
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ export async function calcAndUpdateUsage(
|
|||||||
const snapshot = getCompressionSnapshot(sid)
|
const snapshot = getCompressionSnapshot(sid)
|
||||||
let inputTokens: number
|
let inputTokens: number
|
||||||
let outputTokens: number
|
let outputTokens: number
|
||||||
if (snapshot && msgs.length) {
|
if (snapshot && msgs.length && snapshot.lastMessageIndex >= 0 && snapshot.lastMessageIndex < msgs.length) {
|
||||||
const newMessages = msgs.slice(snapshot.lastMessageIndex + 1)
|
const newMessages = msgs.slice(snapshot.lastMessageIndex + 1)
|
||||||
const newUsage = estimateUsageTokensFromMessages(newMessages)
|
const newUsage = estimateUsageTokensFromMessages(newMessages)
|
||||||
inputTokens = countTokens(SUMMARY_PREFIX + snapshot.summary) +
|
inputTokens = countTokens(SUMMARY_PREFIX + snapshot.summary) +
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
|||||||
const getCompressionSnapshotMock = vi.fn()
|
const getCompressionSnapshotMock = vi.fn()
|
||||||
const saveCompressionSnapshotMock = vi.fn()
|
const saveCompressionSnapshotMock = vi.fn()
|
||||||
const deleteCompressionSnapshotMock = vi.fn()
|
const deleteCompressionSnapshotMock = vi.fn()
|
||||||
|
const bridgeRequestMock = vi.fn()
|
||||||
|
const bridgeDestroyMock = vi.fn()
|
||||||
|
|
||||||
vi.mock('../../packages/server/src/services/logger', () => ({
|
vi.mock('../../packages/server/src/services/logger', () => ({
|
||||||
logger: {
|
logger: {
|
||||||
@@ -19,6 +21,13 @@ vi.mock('../../packages/server/src/db/hermes/compression-snapshot', () => ({
|
|||||||
deleteCompressionSnapshot: deleteCompressionSnapshotMock,
|
deleteCompressionSnapshot: deleteCompressionSnapshotMock,
|
||||||
}))
|
}))
|
||||||
|
|
||||||
|
vi.mock('../../packages/server/src/services/hermes/agent-bridge', () => ({
|
||||||
|
AgentBridgeClient: class {
|
||||||
|
request = bridgeRequestMock
|
||||||
|
destroy = bridgeDestroyMock
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
describe('ChatContextCompressor', () => {
|
describe('ChatContextCompressor', () => {
|
||||||
let originalFetch: typeof global.fetch
|
let originalFetch: typeof global.fetch
|
||||||
|
|
||||||
@@ -27,6 +36,10 @@ describe('ChatContextCompressor', () => {
|
|||||||
getCompressionSnapshotMock.mockReset()
|
getCompressionSnapshotMock.mockReset()
|
||||||
saveCompressionSnapshotMock.mockReset()
|
saveCompressionSnapshotMock.mockReset()
|
||||||
deleteCompressionSnapshotMock.mockReset()
|
deleteCompressionSnapshotMock.mockReset()
|
||||||
|
bridgeRequestMock.mockReset()
|
||||||
|
bridgeDestroyMock.mockReset()
|
||||||
|
bridgeRequestMock.mockRejectedValue(new Error('summarizer failed'))
|
||||||
|
bridgeDestroyMock.mockResolvedValue(undefined)
|
||||||
})
|
})
|
||||||
|
|
||||||
afterEach(() => {
|
afterEach(() => {
|
||||||
@@ -42,7 +55,6 @@ describe('ChatContextCompressor', () => {
|
|||||||
}))
|
}))
|
||||||
|
|
||||||
getCompressionSnapshotMock.mockReturnValue(null)
|
getCompressionSnapshotMock.mockReturnValue(null)
|
||||||
global.fetch = vi.fn(async () => ({ ok: false, status: 500 })) as any
|
|
||||||
|
|
||||||
const result = await compressor.compress(messages, 'http://upstream', undefined, 's1')
|
const result = await compressor.compress(messages, 'http://upstream', undefined, 's1')
|
||||||
|
|
||||||
@@ -66,7 +78,6 @@ describe('ChatContextCompressor', () => {
|
|||||||
lastMessageIndex: 1,
|
lastMessageIndex: 1,
|
||||||
messageCountAtTime: 2,
|
messageCountAtTime: 2,
|
||||||
})
|
})
|
||||||
global.fetch = vi.fn(async () => ({ ok: false, status: 500 })) as any
|
|
||||||
|
|
||||||
const result = await compressor.compress(messages, 'http://upstream', undefined, 's1')
|
const result = await compressor.compress(messages, 'http://upstream', undefined, 's1')
|
||||||
|
|
||||||
@@ -109,4 +120,331 @@ describe('ChatContextCompressor', () => {
|
|||||||
expect(result.meta.compressedStartIndex).toBe(3)
|
expect(result.meta.compressedStartIndex).toBe(3)
|
||||||
expect(saveCompressionSnapshotMock).not.toHaveBeenCalled()
|
expect(saveCompressionSnapshotMock).not.toHaveBeenCalled()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('keeps configured first and last messages during full compression', async () => {
|
||||||
|
const { ChatContextCompressor, SUMMARY_PREFIX } = await import('../../packages/server/src/lib/context-compressor')
|
||||||
|
const compressor = new ChatContextCompressor({
|
||||||
|
config: { headMessageCount: 2, tailMessageCount: 3, summaryBudget: 1000 },
|
||||||
|
})
|
||||||
|
const messages = Array.from({ length: 10 }, (_, i) => ({
|
||||||
|
role: i % 2 === 0 ? 'user' : 'assistant',
|
||||||
|
content: `message ${i}`,
|
||||||
|
}))
|
||||||
|
|
||||||
|
getCompressionSnapshotMock.mockReturnValue(null)
|
||||||
|
bridgeRequestMock.mockResolvedValue({
|
||||||
|
status: 'completed',
|
||||||
|
result: { final_response: 'compressed summary' },
|
||||||
|
})
|
||||||
|
|
||||||
|
const result = await compressor.compress(messages, 'http://upstream', undefined, 's1')
|
||||||
|
|
||||||
|
expect(result.messages.map(m => m.content)).toEqual([
|
||||||
|
'message 0',
|
||||||
|
'message 1',
|
||||||
|
`${SUMMARY_PREFIX}\n\ncompressed summary`,
|
||||||
|
'message 7',
|
||||||
|
'message 8',
|
||||||
|
'message 9',
|
||||||
|
])
|
||||||
|
expect(result.meta.compressed).toBe(true)
|
||||||
|
expect(result.meta.llmCompressed).toBe(true)
|
||||||
|
expect(result.meta.verbatimCount).toBe(5)
|
||||||
|
expect(saveCompressionSnapshotMock).toHaveBeenCalledWith('s1', 'compressed summary', 6, 10)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('does not pre-prune tool results before sending them to the summarizer', async () => {
|
||||||
|
const { ChatContextCompressor } = await import('../../packages/server/src/lib/context-compressor')
|
||||||
|
const compressor = new ChatContextCompressor({
|
||||||
|
config: { headMessageCount: 0, tailMessageCount: 1, summaryBudget: 1000 },
|
||||||
|
})
|
||||||
|
const longToolOutput = `${'x'.repeat(180)}KEEP_MARKER${'y'.repeat(180)}`
|
||||||
|
const messages = [
|
||||||
|
{
|
||||||
|
role: 'assistant',
|
||||||
|
content: 'calling terminal',
|
||||||
|
tool_calls: [{ id: 'call_1', type: 'function', function: { name: 'terminal', arguments: '{}' } }],
|
||||||
|
},
|
||||||
|
{ role: 'tool', name: 'terminal', tool_call_id: 'call_1', content: longToolOutput },
|
||||||
|
{ role: 'user', content: 'tail' },
|
||||||
|
]
|
||||||
|
|
||||||
|
getCompressionSnapshotMock.mockReturnValue(null)
|
||||||
|
bridgeRequestMock.mockResolvedValue({
|
||||||
|
status: 'completed',
|
||||||
|
result: { final_response: 'compressed summary' },
|
||||||
|
})
|
||||||
|
|
||||||
|
await compressor.compress(messages, 'http://upstream', undefined, 's1')
|
||||||
|
|
||||||
|
const request = bridgeRequestMock.mock.calls[0][0]
|
||||||
|
const serializedHistory = JSON.stringify(request.conversation_history)
|
||||||
|
expect(serializedHistory).toContain('KEEP_MARKER')
|
||||||
|
expect(serializedHistory).not.toContain('[terminal] ')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('keeps protected head tool results verbatim after successful full compression', async () => {
|
||||||
|
const { ChatContextCompressor, SUMMARY_PREFIX } = await import('../../packages/server/src/lib/context-compressor')
|
||||||
|
const compressor = new ChatContextCompressor({
|
||||||
|
config: { headMessageCount: 2, tailMessageCount: 1, summaryBudget: 1000 },
|
||||||
|
})
|
||||||
|
const longToolOutput = `${'head-tool-output '.repeat(30)}KEEP_HEAD_TOOL`
|
||||||
|
const messages = [
|
||||||
|
{
|
||||||
|
role: 'assistant',
|
||||||
|
content: 'calling terminal',
|
||||||
|
tool_calls: [{ id: 'call_1', type: 'function', function: { name: 'terminal', arguments: '{}' } }],
|
||||||
|
},
|
||||||
|
{ role: 'tool', name: 'terminal', tool_call_id: 'call_1', content: longToolOutput },
|
||||||
|
{ role: 'user', content: 'middle' },
|
||||||
|
{ role: 'assistant', content: 'tail' },
|
||||||
|
]
|
||||||
|
|
||||||
|
getCompressionSnapshotMock.mockReturnValue(null)
|
||||||
|
bridgeRequestMock.mockResolvedValue({
|
||||||
|
status: 'completed',
|
||||||
|
result: { final_response: 'compressed summary' },
|
||||||
|
})
|
||||||
|
|
||||||
|
const result = await compressor.compress(messages, 'http://upstream', undefined, 's1')
|
||||||
|
|
||||||
|
expect(result.messages.map(m => m.content)).toEqual([
|
||||||
|
'calling terminal',
|
||||||
|
longToolOutput,
|
||||||
|
`${SUMMARY_PREFIX}\n\ncompressed summary`,
|
||||||
|
'tail',
|
||||||
|
])
|
||||||
|
})
|
||||||
|
|
||||||
|
it('uses the previous summary plus a safe tail when an existing snapshot index is stale', async () => {
|
||||||
|
const { ChatContextCompressor, SUMMARY_PREFIX } = await import('../../packages/server/src/lib/context-compressor')
|
||||||
|
const compressor = new ChatContextCompressor({
|
||||||
|
config: { headMessageCount: 2, tailMessageCount: 3, summaryBudget: 1000 },
|
||||||
|
})
|
||||||
|
const messages = Array.from({ length: 8 }, (_, i) => ({
|
||||||
|
role: i % 2 === 0 ? 'user' : 'assistant',
|
||||||
|
content: `message ${i}`,
|
||||||
|
}))
|
||||||
|
|
||||||
|
getCompressionSnapshotMock.mockReturnValue({
|
||||||
|
summary: 'stale previous summary',
|
||||||
|
lastMessageIndex: 20,
|
||||||
|
messageCountAtTime: 21,
|
||||||
|
})
|
||||||
|
bridgeRequestMock.mockResolvedValue({
|
||||||
|
status: 'completed',
|
||||||
|
result: { final_response: 'rebuilt summary' },
|
||||||
|
})
|
||||||
|
|
||||||
|
const result = await compressor.compress(messages, 'http://upstream', undefined, 's1')
|
||||||
|
|
||||||
|
expect(deleteCompressionSnapshotMock).not.toHaveBeenCalled()
|
||||||
|
expect(bridgeRequestMock).not.toHaveBeenCalled()
|
||||||
|
expect(result.messages.map(m => m.content)).toEqual([
|
||||||
|
'message 0',
|
||||||
|
'message 1',
|
||||||
|
`${SUMMARY_PREFIX}\n\nstale previous summary`,
|
||||||
|
'message 5',
|
||||||
|
'message 6',
|
||||||
|
'message 7',
|
||||||
|
])
|
||||||
|
expect(saveCompressionSnapshotMock).not.toHaveBeenCalled()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('folds a stale snapshot safe tail into a new summary when it still exceeds budget', async () => {
|
||||||
|
const { ChatContextCompressor, SUMMARY_PREFIX } = await import('../../packages/server/src/lib/context-compressor')
|
||||||
|
const compressor = new ChatContextCompressor({
|
||||||
|
config: { triggerTokens: 800, headMessageCount: 2, tailMessageCount: 3, summaryBudget: 1000 },
|
||||||
|
})
|
||||||
|
const largeTail = 'tail-token '.repeat(200)
|
||||||
|
const messages = [
|
||||||
|
{ role: 'user', content: 'message 0' },
|
||||||
|
{ role: 'assistant', content: 'message 1' },
|
||||||
|
{ role: 'user', content: 'message 2' },
|
||||||
|
{ role: 'assistant', content: 'message 3' },
|
||||||
|
{ role: 'user', content: 'message 4' },
|
||||||
|
{ role: 'assistant', content: largeTail },
|
||||||
|
{ role: 'user', content: largeTail },
|
||||||
|
{ role: 'assistant', content: largeTail },
|
||||||
|
]
|
||||||
|
|
||||||
|
getCompressionSnapshotMock.mockReturnValue({
|
||||||
|
summary: 'stale previous summary',
|
||||||
|
lastMessageIndex: 20,
|
||||||
|
messageCountAtTime: 21,
|
||||||
|
})
|
||||||
|
bridgeRequestMock.mockResolvedValue({
|
||||||
|
status: 'completed',
|
||||||
|
result: { final_response: 'updated stale summary' },
|
||||||
|
})
|
||||||
|
|
||||||
|
const result = await compressor.compress(messages, 'http://upstream', undefined, 's1')
|
||||||
|
|
||||||
|
expect(deleteCompressionSnapshotMock).not.toHaveBeenCalled()
|
||||||
|
expect(bridgeRequestMock).toHaveBeenCalledTimes(1)
|
||||||
|
expect(result.messages.map(m => m.content)).toEqual([
|
||||||
|
'message 0',
|
||||||
|
'message 1',
|
||||||
|
`${SUMMARY_PREFIX}\n\nupdated stale summary`,
|
||||||
|
])
|
||||||
|
expect(saveCompressionSnapshotMock).toHaveBeenCalledWith('s1', 'updated stale summary', 7, 8)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('compresses the full history when protected windows cover all messages', async () => {
|
||||||
|
const { ChatContextCompressor, SUMMARY_PREFIX } = await import('../../packages/server/src/lib/context-compressor')
|
||||||
|
const compressor = new ChatContextCompressor({
|
||||||
|
config: { headMessageCount: 3, tailMessageCount: 20, summaryBudget: 1000 },
|
||||||
|
})
|
||||||
|
const messages = Array.from({ length: 20 }, (_, i) => ({
|
||||||
|
role: i % 2 === 0 ? 'user' : 'assistant',
|
||||||
|
content: `message ${i}`,
|
||||||
|
}))
|
||||||
|
|
||||||
|
getCompressionSnapshotMock.mockReturnValue(null)
|
||||||
|
bridgeRequestMock.mockResolvedValue({
|
||||||
|
status: 'completed',
|
||||||
|
result: { final_response: 'compressed all messages' },
|
||||||
|
})
|
||||||
|
|
||||||
|
const result = await compressor.compress(messages, 'http://upstream', undefined, 's1')
|
||||||
|
|
||||||
|
expect(bridgeRequestMock).toHaveBeenCalledTimes(1)
|
||||||
|
expect(result.messages.map(m => m.content)).toEqual([
|
||||||
|
`${SUMMARY_PREFIX}\n\ncompressed all messages`,
|
||||||
|
])
|
||||||
|
expect(result.meta.compressed).toBe(true)
|
||||||
|
expect(result.meta.llmCompressed).toBe(true)
|
||||||
|
expect(result.meta.verbatimCount).toBe(0)
|
||||||
|
expect(result.meta.compressedStartIndex).toBe(19)
|
||||||
|
expect(saveCompressionSnapshotMock).toHaveBeenCalledWith('s1', 'compressed all messages', 19, 20)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('drops protected messages when compressed output still exceeds the trigger budget', async () => {
|
||||||
|
const { ChatContextCompressor, SUMMARY_PREFIX } = await import('../../packages/server/src/lib/context-compressor')
|
||||||
|
const compressor = new ChatContextCompressor({
|
||||||
|
config: { triggerTokens: 200, headMessageCount: 2, tailMessageCount: 2, summaryBudget: 100 },
|
||||||
|
})
|
||||||
|
const largeText = 'tail-token '.repeat(500)
|
||||||
|
const messages = [
|
||||||
|
{ role: 'user', content: 'head 0' },
|
||||||
|
{ role: 'assistant', content: 'head 1' },
|
||||||
|
{ role: 'user', content: 'middle 2' },
|
||||||
|
{ role: 'assistant', content: 'middle 3' },
|
||||||
|
{ role: 'user', content: largeText },
|
||||||
|
{ role: 'assistant', content: largeText },
|
||||||
|
]
|
||||||
|
|
||||||
|
getCompressionSnapshotMock.mockReturnValue(null)
|
||||||
|
bridgeRequestMock.mockResolvedValue({
|
||||||
|
status: 'completed',
|
||||||
|
result: { final_response: 'short summary' },
|
||||||
|
})
|
||||||
|
|
||||||
|
const result = await compressor.compress(messages, 'http://upstream', undefined, 's1')
|
||||||
|
|
||||||
|
expect(result.messages.map(m => m.content)).toEqual([
|
||||||
|
`${SUMMARY_PREFIX}\n\nshort summary`,
|
||||||
|
])
|
||||||
|
expect(result.meta.compressed).toBe(true)
|
||||||
|
expect(result.meta.llmCompressed).toBe(true)
|
||||||
|
expect(result.meta.verbatimCount).toBe(0)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('truncates the summary when the summary alone exceeds the trigger budget', async () => {
|
||||||
|
const { ChatContextCompressor, SUMMARY_PREFIX, countTokens } = await import('../../packages/server/src/lib/context-compressor')
|
||||||
|
const compressor = new ChatContextCompressor({
|
||||||
|
config: { triggerTokens: 120, headMessageCount: 2, tailMessageCount: 2, summaryBudget: 100 },
|
||||||
|
})
|
||||||
|
const messages = Array.from({ length: 6 }, (_, i) => ({
|
||||||
|
role: i % 2 === 0 ? 'user' : 'assistant',
|
||||||
|
content: `message ${i}`,
|
||||||
|
}))
|
||||||
|
const longSummary = 'summary-token '.repeat(500)
|
||||||
|
|
||||||
|
getCompressionSnapshotMock.mockReturnValue(null)
|
||||||
|
bridgeRequestMock.mockResolvedValue({
|
||||||
|
status: 'completed',
|
||||||
|
result: { final_response: longSummary },
|
||||||
|
})
|
||||||
|
|
||||||
|
const result = await compressor.compress(messages, 'http://upstream', undefined, 's1')
|
||||||
|
|
||||||
|
expect(result.messages).toHaveLength(1)
|
||||||
|
expect(String(result.messages[0].content)).toContain('[Summary truncated to fit context budget]')
|
||||||
|
expect(String(result.messages[0].content).startsWith(SUMMARY_PREFIX)).toBe(true)
|
||||||
|
expect(countTokens(String(result.messages[0].content))).toBeLessThanOrEqual(140)
|
||||||
|
expect(result.meta.verbatimCount).toBe(0)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('keeps configured first messages when incremental compression reuses an existing snapshot', async () => {
|
||||||
|
const { ChatContextCompressor, SUMMARY_PREFIX } = await import('../../packages/server/src/lib/context-compressor')
|
||||||
|
const compressor = new ChatContextCompressor({
|
||||||
|
config: { headMessageCount: 2, tailMessageCount: 10 },
|
||||||
|
})
|
||||||
|
const messages = Array.from({ length: 6 }, (_, i) => ({
|
||||||
|
role: i % 2 === 0 ? 'user' : 'assistant',
|
||||||
|
content: `message ${i}`,
|
||||||
|
}))
|
||||||
|
|
||||||
|
getCompressionSnapshotMock.mockReturnValue({
|
||||||
|
summary: 'previous summary',
|
||||||
|
lastMessageIndex: 3,
|
||||||
|
messageCountAtTime: 4,
|
||||||
|
})
|
||||||
|
|
||||||
|
const result = await compressor.compress(messages, 'http://upstream', undefined, 's1')
|
||||||
|
|
||||||
|
expect(bridgeRequestMock).not.toHaveBeenCalled()
|
||||||
|
expect(result.messages.map(m => m.content)).toEqual([
|
||||||
|
'message 0',
|
||||||
|
'message 1',
|
||||||
|
`${SUMMARY_PREFIX}\n\nprevious summary`,
|
||||||
|
'message 4',
|
||||||
|
'message 5',
|
||||||
|
])
|
||||||
|
expect(result.meta.verbatimCount).toBe(4)
|
||||||
|
expect(saveCompressionSnapshotMock).not.toHaveBeenCalled()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('folds all new messages into the summary when incremental tail protection would exceed budget', async () => {
|
||||||
|
const { ChatContextCompressor, SUMMARY_PREFIX } = await import('../../packages/server/src/lib/context-compressor')
|
||||||
|
const compressor = new ChatContextCompressor({
|
||||||
|
config: { triggerTokens: 1000, headMessageCount: 3, tailMessageCount: 20, summaryBudget: 100 },
|
||||||
|
})
|
||||||
|
const largeText = 'new-token '.repeat(80)
|
||||||
|
const messages = [
|
||||||
|
{ role: 'user', content: 'head 0' },
|
||||||
|
{ role: 'assistant', content: 'head 1' },
|
||||||
|
{ role: 'user', content: 'head 2' },
|
||||||
|
...Array.from({ length: 20 }, (_, i) => ({
|
||||||
|
role: i % 2 === 0 ? 'user' : 'assistant',
|
||||||
|
content: `${largeText}${i}`,
|
||||||
|
})),
|
||||||
|
]
|
||||||
|
|
||||||
|
getCompressionSnapshotMock.mockReturnValue({
|
||||||
|
summary: 'previous summary',
|
||||||
|
lastMessageIndex: 2,
|
||||||
|
messageCountAtTime: 3,
|
||||||
|
})
|
||||||
|
bridgeRequestMock.mockResolvedValue({
|
||||||
|
status: 'completed',
|
||||||
|
result: { final_response: 'updated summary' },
|
||||||
|
})
|
||||||
|
|
||||||
|
const result = await compressor.compress(messages, 'http://upstream', undefined, 's1')
|
||||||
|
|
||||||
|
expect(bridgeRequestMock).toHaveBeenCalledTimes(1)
|
||||||
|
expect(result.messages.map(m => m.content)).toEqual([
|
||||||
|
'head 0',
|
||||||
|
'head 1',
|
||||||
|
'head 2',
|
||||||
|
`${SUMMARY_PREFIX}\n\nupdated summary`,
|
||||||
|
])
|
||||||
|
expect(result.meta.compressed).toBe(true)
|
||||||
|
expect(result.meta.llmCompressed).toBe(true)
|
||||||
|
expect(result.meta.verbatimCount).toBe(3)
|
||||||
|
expect(result.meta.compressedStartIndex).toBe(22)
|
||||||
|
expect(saveCompressionSnapshotMock).toHaveBeenCalledWith('s1', 'updated summary', 22, 23)
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -0,0 +1,349 @@
|
|||||||
|
import { beforeEach, describe, expect, it, vi } from 'vitest'
|
||||||
|
|
||||||
|
const getSessionDetailMock = vi.fn()
|
||||||
|
const getSessionMock = vi.fn()
|
||||||
|
const getCompressionSnapshotMock = vi.fn()
|
||||||
|
const getModelContextLengthMock = vi.fn()
|
||||||
|
const calcAndUpdateUsageMock = vi.fn()
|
||||||
|
const estimateUsageTokensFromMessagesMock = vi.fn()
|
||||||
|
const compressorCompressMock = vi.fn()
|
||||||
|
const readConfigYamlForProfileMock = vi.fn()
|
||||||
|
const compressorConstructorMock = vi.fn()
|
||||||
|
|
||||||
|
vi.mock('../../packages/server/src/db/hermes/session-store', () => ({
|
||||||
|
getSessionDetail: getSessionDetailMock,
|
||||||
|
getSession: getSessionMock,
|
||||||
|
}))
|
||||||
|
|
||||||
|
vi.mock('../../packages/server/src/db/hermes/compression-snapshot', () => ({
|
||||||
|
getCompressionSnapshot: getCompressionSnapshotMock,
|
||||||
|
}))
|
||||||
|
|
||||||
|
vi.mock('../../packages/server/src/lib/context-compressor', () => ({
|
||||||
|
SUMMARY_PREFIX: '[Previous context summary]',
|
||||||
|
ChatContextCompressor: class {
|
||||||
|
constructor(opts?: any) {
|
||||||
|
compressorConstructorMock(opts)
|
||||||
|
}
|
||||||
|
compress = compressorCompressMock
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
vi.mock('../../packages/server/src/services/hermes/model-context', () => ({
|
||||||
|
getModelContextLength: getModelContextLengthMock,
|
||||||
|
}))
|
||||||
|
|
||||||
|
vi.mock('../../packages/server/src/services/config-helpers', () => ({
|
||||||
|
readConfigYamlForProfile: readConfigYamlForProfileMock,
|
||||||
|
}))
|
||||||
|
|
||||||
|
vi.mock('../../packages/server/src/services/logger', () => ({
|
||||||
|
logger: {
|
||||||
|
info: vi.fn(),
|
||||||
|
warn: vi.fn(),
|
||||||
|
error: vi.fn(),
|
||||||
|
debug: vi.fn(),
|
||||||
|
},
|
||||||
|
bridgeLogger: {
|
||||||
|
info: vi.fn(),
|
||||||
|
warn: vi.fn(),
|
||||||
|
error: vi.fn(),
|
||||||
|
debug: vi.fn(),
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
vi.mock('../../packages/server/src/services/hermes/run-chat/usage', () => ({
|
||||||
|
calcAndUpdateUsage: calcAndUpdateUsageMock,
|
||||||
|
estimateUsageTokensFromMessages: estimateUsageTokensFromMessagesMock,
|
||||||
|
}))
|
||||||
|
|
||||||
|
vi.mock('../../packages/server/src/services/hermes/run-chat/message-format', () => ({
|
||||||
|
isAssistantMessageSendable: vi.fn(() => true),
|
||||||
|
}))
|
||||||
|
|
||||||
|
describe('run chat compression trigger', () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
getSessionDetailMock.mockReset()
|
||||||
|
getSessionMock.mockReset()
|
||||||
|
getCompressionSnapshotMock.mockReset()
|
||||||
|
getModelContextLengthMock.mockReset()
|
||||||
|
calcAndUpdateUsageMock.mockReset()
|
||||||
|
estimateUsageTokensFromMessagesMock.mockReset()
|
||||||
|
compressorCompressMock.mockReset()
|
||||||
|
compressorConstructorMock.mockReset()
|
||||||
|
readConfigYamlForProfileMock.mockReset()
|
||||||
|
|
||||||
|
getSessionMock.mockReturnValue({ id: 'session-1', profile: 'default' })
|
||||||
|
getModelContextLengthMock.mockReturnValue(200_000)
|
||||||
|
calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 1_000, outputTokens: 0 })
|
||||||
|
estimateUsageTokensFromMessagesMock.mockReturnValue({ inputTokens: 0, outputTokens: 0 })
|
||||||
|
getCompressionSnapshotMock.mockReturnValue(null)
|
||||||
|
readConfigYamlForProfileMock.mockResolvedValue({})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('does not compress long low-token history just because it has more than 150 messages', async () => {
|
||||||
|
const messages = Array.from({ length: 152 }, (_, index) => ({
|
||||||
|
id: index + 1,
|
||||||
|
session_id: 'session-1',
|
||||||
|
role: index === 151 ? 'user' : index % 2 === 0 ? 'user' : 'assistant',
|
||||||
|
content: `m${index}`,
|
||||||
|
timestamp: index + 1,
|
||||||
|
tool_call_id: null,
|
||||||
|
tool_calls: null,
|
||||||
|
tool_name: null,
|
||||||
|
finish_reason: null,
|
||||||
|
reasoning_content: null,
|
||||||
|
}))
|
||||||
|
getSessionDetailMock.mockReturnValue({ messages })
|
||||||
|
|
||||||
|
const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression')
|
||||||
|
const history = await buildCompressedHistory(
|
||||||
|
'session-1',
|
||||||
|
'default',
|
||||||
|
'http://upstream',
|
||||||
|
undefined,
|
||||||
|
vi.fn(),
|
||||||
|
new Map(),
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(history).toHaveLength(151)
|
||||||
|
expect(history[0]).toEqual({ role: 'user', content: 'm0' })
|
||||||
|
expect(history.at(-1)).toEqual({ role: 'user', content: 'm150' })
|
||||||
|
expect(compressorCompressMock).not.toHaveBeenCalled()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('uses configured threshold before triggering compression', async () => {
|
||||||
|
const messages = Array.from({ length: 10 }, (_, index) => ({
|
||||||
|
id: index + 1,
|
||||||
|
session_id: 'session-1',
|
||||||
|
role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant',
|
||||||
|
content: `message ${index}`,
|
||||||
|
timestamp: index + 1,
|
||||||
|
tool_call_id: null,
|
||||||
|
tool_calls: null,
|
||||||
|
tool_name: null,
|
||||||
|
finish_reason: null,
|
||||||
|
reasoning_content: null,
|
||||||
|
}))
|
||||||
|
getSessionDetailMock.mockReturnValue({ messages })
|
||||||
|
readConfigYamlForProfileMock.mockResolvedValue({
|
||||||
|
compression: { threshold: 0.25, target_ratio: 0.1, protect_last_n: 7, protect_first_n: 2 },
|
||||||
|
})
|
||||||
|
calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 60_000, outputTokens: 0 })
|
||||||
|
compressorCompressMock.mockResolvedValue({
|
||||||
|
messages: [{ role: 'user', content: 'compressed' }],
|
||||||
|
meta: {
|
||||||
|
compressed: true,
|
||||||
|
llmCompressed: true,
|
||||||
|
totalMessages: 9,
|
||||||
|
summaryTokenEstimate: 1,
|
||||||
|
verbatimCount: 0,
|
||||||
|
compressedStartIndex: 0,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression')
|
||||||
|
const history = await buildCompressedHistory(
|
||||||
|
'session-1',
|
||||||
|
'default',
|
||||||
|
'http://upstream',
|
||||||
|
undefined,
|
||||||
|
vi.fn(),
|
||||||
|
new Map(),
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(history).toEqual([{ role: 'user', content: 'compressed' }])
|
||||||
|
expect(compressorCompressMock).toHaveBeenCalledWith(
|
||||||
|
expect.any(Array),
|
||||||
|
'http://upstream',
|
||||||
|
undefined,
|
||||||
|
'session-1',
|
||||||
|
expect.objectContaining({ profile: 'default' }),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('merges partial compression config with defaults', async () => {
|
||||||
|
const messages = Array.from({ length: 10 }, (_, index) => ({
|
||||||
|
id: index + 1,
|
||||||
|
session_id: 'session-1',
|
||||||
|
role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant',
|
||||||
|
content: `message ${index}`,
|
||||||
|
timestamp: index + 1,
|
||||||
|
tool_call_id: null,
|
||||||
|
tool_calls: null,
|
||||||
|
tool_name: null,
|
||||||
|
finish_reason: null,
|
||||||
|
reasoning_content: null,
|
||||||
|
}))
|
||||||
|
getSessionDetailMock.mockReturnValue({ messages })
|
||||||
|
readConfigYamlForProfileMock.mockResolvedValue({
|
||||||
|
compression: { protect_last_n: 5 },
|
||||||
|
})
|
||||||
|
calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 120_000, outputTokens: 0 })
|
||||||
|
compressorCompressMock.mockResolvedValue({
|
||||||
|
messages: [{ role: 'user', content: 'compressed' }],
|
||||||
|
meta: {
|
||||||
|
compressed: true,
|
||||||
|
llmCompressed: true,
|
||||||
|
totalMessages: 9,
|
||||||
|
summaryTokenEstimate: 1,
|
||||||
|
verbatimCount: 0,
|
||||||
|
compressedStartIndex: 0,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression')
|
||||||
|
await buildCompressedHistory(
|
||||||
|
'session-1',
|
||||||
|
'default',
|
||||||
|
'http://upstream',
|
||||||
|
undefined,
|
||||||
|
vi.fn(),
|
||||||
|
new Map(),
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(compressorConstructorMock).toHaveBeenCalledWith({
|
||||||
|
config: {
|
||||||
|
triggerTokens: 100_000,
|
||||||
|
summaryBudget: 40_000,
|
||||||
|
headMessageCount: 3,
|
||||||
|
tailMessageCount: 5,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
expect(compressorCompressMock).toHaveBeenCalledTimes(1)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('uses stale snapshot summary plus safe tail instead of full history when under threshold', async () => {
|
||||||
|
const messages = Array.from({ length: 10 }, (_, index) => ({
|
||||||
|
id: index + 1,
|
||||||
|
session_id: 'session-1',
|
||||||
|
role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant',
|
||||||
|
content: `message ${index}`,
|
||||||
|
timestamp: index + 1,
|
||||||
|
tool_call_id: null,
|
||||||
|
tool_calls: null,
|
||||||
|
tool_name: null,
|
||||||
|
finish_reason: null,
|
||||||
|
reasoning_content: null,
|
||||||
|
}))
|
||||||
|
getSessionDetailMock.mockReturnValue({ messages })
|
||||||
|
getCompressionSnapshotMock.mockReturnValue({
|
||||||
|
summary: 'old summary',
|
||||||
|
lastMessageIndex: 99,
|
||||||
|
messageCountAtTime: 100,
|
||||||
|
})
|
||||||
|
readConfigYamlForProfileMock.mockResolvedValue({
|
||||||
|
compression: { protect_first_n: 2, protect_last_n: 3 },
|
||||||
|
})
|
||||||
|
estimateUsageTokensFromMessagesMock.mockReturnValue({ inputTokens: 1_000, outputTokens: 0 })
|
||||||
|
|
||||||
|
const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression')
|
||||||
|
const history = await buildCompressedHistory(
|
||||||
|
'session-1',
|
||||||
|
'default',
|
||||||
|
'http://upstream',
|
||||||
|
undefined,
|
||||||
|
vi.fn(),
|
||||||
|
new Map(),
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(history.map(m => m.content)).toEqual([
|
||||||
|
'message 0',
|
||||||
|
'message 1',
|
||||||
|
'[Previous context summary]\n\nold summary',
|
||||||
|
'message 6',
|
||||||
|
'message 7',
|
||||||
|
'message 8',
|
||||||
|
])
|
||||||
|
expect(compressorCompressMock).not.toHaveBeenCalled()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('compresses stale snapshot safe tail instead of full history when stale assembly exceeds threshold', async () => {
|
||||||
|
const messages = Array.from({ length: 10 }, (_, index) => ({
|
||||||
|
id: index + 1,
|
||||||
|
session_id: 'session-1',
|
||||||
|
role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant',
|
||||||
|
content: `message ${index}`,
|
||||||
|
timestamp: index + 1,
|
||||||
|
tool_call_id: null,
|
||||||
|
tool_calls: null,
|
||||||
|
tool_name: null,
|
||||||
|
finish_reason: null,
|
||||||
|
reasoning_content: null,
|
||||||
|
}))
|
||||||
|
getSessionDetailMock.mockReturnValue({ messages })
|
||||||
|
getCompressionSnapshotMock.mockReturnValue({
|
||||||
|
summary: 'old summary',
|
||||||
|
lastMessageIndex: 99,
|
||||||
|
messageCountAtTime: 100,
|
||||||
|
})
|
||||||
|
readConfigYamlForProfileMock.mockResolvedValue({
|
||||||
|
compression: { protect_first_n: 2, protect_last_n: 3 },
|
||||||
|
})
|
||||||
|
estimateUsageTokensFromMessagesMock.mockReturnValue({ inputTokens: 120_000, outputTokens: 0 })
|
||||||
|
compressorCompressMock.mockResolvedValue({
|
||||||
|
messages: [{ role: 'user', content: 'updated stale compressed' }],
|
||||||
|
meta: {
|
||||||
|
compressed: true,
|
||||||
|
llmCompressed: true,
|
||||||
|
totalMessages: 9,
|
||||||
|
summaryTokenEstimate: 1,
|
||||||
|
verbatimCount: 0,
|
||||||
|
compressedStartIndex: 8,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression')
|
||||||
|
const history = await buildCompressedHistory(
|
||||||
|
'session-1',
|
||||||
|
'default',
|
||||||
|
'http://upstream',
|
||||||
|
undefined,
|
||||||
|
vi.fn(),
|
||||||
|
new Map(),
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(history).toEqual([{ role: 'user', content: 'updated stale compressed' }])
|
||||||
|
expect(compressorCompressMock).toHaveBeenCalledWith(
|
||||||
|
expect.arrayContaining([{ role: 'user', content: 'message 0' }]),
|
||||||
|
'http://upstream',
|
||||||
|
undefined,
|
||||||
|
'session-1',
|
||||||
|
expect.objectContaining({ profile: 'default' }),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('does not compress when compression is disabled', async () => {
|
||||||
|
const messages = Array.from({ length: 10 }, (_, index) => ({
|
||||||
|
id: index + 1,
|
||||||
|
session_id: 'session-1',
|
||||||
|
role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant',
|
||||||
|
content: `message ${index}`,
|
||||||
|
timestamp: index + 1,
|
||||||
|
tool_call_id: null,
|
||||||
|
tool_calls: null,
|
||||||
|
tool_name: null,
|
||||||
|
finish_reason: null,
|
||||||
|
reasoning_content: null,
|
||||||
|
}))
|
||||||
|
getSessionDetailMock.mockReturnValue({ messages })
|
||||||
|
readConfigYamlForProfileMock.mockResolvedValue({
|
||||||
|
compression: { enabled: false, threshold: 0.01 },
|
||||||
|
})
|
||||||
|
calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 180_000, outputTokens: 0 })
|
||||||
|
|
||||||
|
const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression')
|
||||||
|
const history = await buildCompressedHistory(
|
||||||
|
'session-1',
|
||||||
|
'default',
|
||||||
|
'http://upstream',
|
||||||
|
undefined,
|
||||||
|
vi.fn(),
|
||||||
|
new Map(),
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(history).toHaveLength(9)
|
||||||
|
expect(compressorCompressMock).not.toHaveBeenCalled()
|
||||||
|
expect(calcAndUpdateUsageMock).not.toHaveBeenCalled()
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -206,6 +206,81 @@ describe('session conversations controller', () => {
|
|||||||
expect(ctx.body).toEqual({ error: 'Conversation not found' })
|
expect(ctx.body).toEqual({ error: 'Conversation not found' })
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('prefers local session detail for Hermes history detail when available', async () => {
|
||||||
|
localGetSessionDetailMock.mockReturnValue({
|
||||||
|
id: 'cli-1',
|
||||||
|
source: 'cli',
|
||||||
|
title: 'Local complete',
|
||||||
|
messages: [
|
||||||
|
{ id: 1, session_id: 'cli-1', role: 'user', content: 'local full message', timestamp: 1 },
|
||||||
|
],
|
||||||
|
})
|
||||||
|
getSessionDetailFromDbMock.mockResolvedValue({
|
||||||
|
id: 'cli-1',
|
||||||
|
source: 'cli',
|
||||||
|
title: 'Hermes incomplete',
|
||||||
|
messages: [],
|
||||||
|
})
|
||||||
|
|
||||||
|
const mod = await import('../../packages/server/src/controllers/hermes/sessions')
|
||||||
|
const ctx: any = { params: { id: 'cli-1' }, body: null }
|
||||||
|
await mod.getHermesSession(ctx)
|
||||||
|
|
||||||
|
expect(localGetSessionDetailMock).toHaveBeenCalledWith('cli-1')
|
||||||
|
expect(getSessionDetailFromDbMock).not.toHaveBeenCalled()
|
||||||
|
expect(getSessionMock).not.toHaveBeenCalled()
|
||||||
|
expect(ctx.body.session).toMatchObject({
|
||||||
|
id: 'cli-1',
|
||||||
|
title: 'Local complete',
|
||||||
|
messages: [{ content: 'local full message' }],
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('falls back to Hermes state.db when local history detail is missing', async () => {
|
||||||
|
localGetSessionDetailMock.mockReturnValue(null)
|
||||||
|
getSessionDetailFromDbMock.mockResolvedValue({
|
||||||
|
id: 'hermes-1',
|
||||||
|
source: 'cli',
|
||||||
|
title: 'Hermes detail',
|
||||||
|
messages: [
|
||||||
|
{ id: 1, session_id: 'hermes-1', role: 'user', content: 'from hermes', timestamp: 1 },
|
||||||
|
],
|
||||||
|
})
|
||||||
|
|
||||||
|
const mod = await import('../../packages/server/src/controllers/hermes/sessions')
|
||||||
|
const ctx: any = { params: { id: 'hermes-1' }, body: null }
|
||||||
|
await mod.getHermesSession(ctx)
|
||||||
|
|
||||||
|
expect(localGetSessionDetailMock).toHaveBeenCalledWith('hermes-1')
|
||||||
|
expect(getSessionDetailFromDbMock).toHaveBeenCalledWith('hermes-1')
|
||||||
|
expect(getSessionMock).not.toHaveBeenCalled()
|
||||||
|
expect(ctx.body.session).toMatchObject({
|
||||||
|
id: 'hermes-1',
|
||||||
|
title: 'Hermes detail',
|
||||||
|
messages: [{ content: 'from hermes' }],
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('does not return api_server sessions from the Hermes history detail endpoint', async () => {
|
||||||
|
localGetSessionDetailMock.mockReturnValue({
|
||||||
|
id: 'api-1',
|
||||||
|
source: 'api_server',
|
||||||
|
title: 'API Server',
|
||||||
|
messages: [{ id: 1, session_id: 'api-1', role: 'user', content: 'local api', timestamp: 1 }],
|
||||||
|
})
|
||||||
|
getSessionDetailFromDbMock.mockResolvedValue(null)
|
||||||
|
getSessionMock.mockResolvedValue(null)
|
||||||
|
|
||||||
|
const mod = await import('../../packages/server/src/controllers/hermes/sessions')
|
||||||
|
const ctx: any = { params: { id: 'api-1' }, body: null }
|
||||||
|
await mod.getHermesSession(ctx)
|
||||||
|
|
||||||
|
expect(localGetSessionDetailMock).toHaveBeenCalledWith('api-1')
|
||||||
|
expect(getSessionDetailFromDbMock).toHaveBeenCalledWith('api-1')
|
||||||
|
expect(ctx.status).toBe(404)
|
||||||
|
expect(ctx.body).toEqual({ error: 'Session not found' })
|
||||||
|
})
|
||||||
|
|
||||||
it('returns native state.db usage analytics for the requested period', async () => {
|
it('returns native state.db usage analytics for the requested period', async () => {
|
||||||
const today = new Date().toISOString().slice(0, 10)
|
const today = new Date().toISOString().slice(0, 10)
|
||||||
getLocalUsageStatsMock.mockReturnValue({
|
getLocalUsageStatsMock.mockReturnValue({
|
||||||
|
|||||||
Reference in New Issue
Block a user