diff --git a/packages/client/src/api/hermes/config.ts b/packages/client/src/api/hermes/config.ts index f381c02..d9f4a72 100644 --- a/packages/client/src/api/hermes/config.ts +++ b/packages/client/src/api/hermes/config.ts @@ -28,6 +28,14 @@ export interface MemoryConfig { user_char_limit?: number } +export interface CompressionConfig { + enabled?: boolean + threshold?: number + target_ratio?: number + protect_last_n?: number + protect_first_n?: number +} + export interface SessionResetConfig { mode?: string idle_minutes?: number @@ -47,6 +55,7 @@ export interface AppConfig { display?: DisplayConfig agent?: AgentConfig memory?: MemoryConfig + compression?: CompressionConfig session_reset?: SessionResetConfig privacy?: PrivacyConfig approvals?: ApprovalConfig diff --git a/packages/client/src/components/hermes/settings/CompressionSettings.vue b/packages/client/src/components/hermes/settings/CompressionSettings.vue new file mode 100644 index 0000000..5c9c180 --- /dev/null +++ b/packages/client/src/components/hermes/settings/CompressionSettings.vue @@ -0,0 +1,106 @@ + + + + + diff --git a/packages/client/src/data/changelog.ts b/packages/client/src/data/changelog.ts index 3ac31b1..e3339ef 100644 --- a/packages/client/src/data/changelog.ts +++ b/packages/client/src/data/changelog.ts @@ -17,6 +17,7 @@ export const changelog: ChangelogEntry[] = [ 'changelog.new_0_5_30_6', 'changelog.new_0_5_30_7', 'changelog.new_0_5_30_8', + 'changelog.new_0_5_30_9', ], }, { diff --git a/packages/client/src/i18n/locales/de.ts b/packages/client/src/i18n/locales/de.ts index 61bed96..28c834c 100644 --- a/packages/client/src/i18n/locales/de.ts +++ b/packages/client/src/i18n/locales/de.ts @@ -549,6 +549,7 @@ jobTriggered: 'Job ausgelost', account: 'Konto', agent: 'Agent', memory: 'Gedachtnis', + compression: 'Komprimierung', session: 'Sitzung', privacy: 'Datenschutz', apiServer: 'API-Server', @@ -599,6 +600,18 @@ jobTriggered: 'Job ausgelost', userCharLimit: 'Zeichenlimit fur Benutzerprofil', userCharLimitHint: 'Maximale Zeichen fur USER.md', }, + compression: { + enabled: 'Komprimierung aktivieren', + enabledHint: 'Langen Chatverlauf automatisch komprimieren, bevor der Modellkontext uberschritten wird', + threshold: 'Komprimierungsschwelle', + thresholdHint: 'Komprimierung starten, wenn geschatzte Token dieses Kontextverhaltnis uberschreiten', + targetRatio: 'Zielverhaltnis', + targetRatioHint: 'Zielgroße des Verlaufs nach der Komprimierung als Kontextverhaltnis', + protectLastN: 'Neueste Nachrichten schutzen', + protectLastNHint: 'So viele neueste Nachrichten unkomprimiert lassen', + protectFirstN: 'Erste Nachrichten schutzen', + protectFirstNHint: 'So viele erste Nachrichten unkomprimiert lassen', + }, session: { mode: 'Zurucksetzungsmodus', modeHint: 'Ausloser fur Sitzungszurucksetzung', @@ -957,6 +970,7 @@ jobTriggered: 'Job ausgelost', new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows', new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists', new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling', + new_0_5_30_9: 'Context compression now follows Profile compression settings and hardens stale snapshots by reusing previous summaries with a safe tail instead of recompressing full history', new_0_5_5_1: '🎉 Tag der Arbeit! Heute wird nicht gearbeitet, bitte habt Verständnis', new_0_5_5_2: 'Verlaufsseite für Hermes-Sitzungshistorie hinzugefügt', new_0_5_5_3: 'Verlaufsseite verwaltet Sitzungen unabhängig ohne Störung des aktiven Chats', diff --git a/packages/client/src/i18n/locales/en.ts b/packages/client/src/i18n/locales/en.ts index 23d6944..be61561 100644 --- a/packages/client/src/i18n/locales/en.ts +++ b/packages/client/src/i18n/locales/en.ts @@ -716,6 +716,7 @@ export default { account: 'Account', agent: 'Agent', memory: 'Memory', + compression: 'Compression', session: 'Session', privacy: 'Privacy', apiServer: 'API Server', @@ -774,6 +775,18 @@ export default { userCharLimit: 'User Profile Char Limit', userCharLimitHint: 'Max characters for USER.md', }, + compression: { + enabled: 'Enable Compression', + enabledHint: 'Automatically compress long chat history before it exceeds the model context', + threshold: 'Compression Threshold', + thresholdHint: 'Start compression when estimated tokens exceed this context ratio', + targetRatio: 'Target Ratio', + targetRatioHint: 'Target history size after compression as a context ratio', + protectLastN: 'Protect Recent Messages', + protectLastNHint: 'Keep this many latest messages uncompressed', + protectFirstN: 'Protect First Messages', + protectFirstNHint: 'Keep this many earliest messages uncompressed', + }, session: { mode: 'Reset Mode', modeHint: 'Trigger condition for session reset', @@ -1251,6 +1264,7 @@ export default { new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows', new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists', new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling', + new_0_5_30_9: 'Context compression now follows Profile compression settings and hardens stale snapshots by reusing previous summaries with a safe tail instead of recompressing full history', new_0_5_6_1: 'Add voice playback feature with Web Speech API: manual button, auto-play toggle, rainbow border animation, and mobile optimization', new_0_5_6_2: 'Add robust LLM JSON parser with tolerance for Python format and extract text from streaming events', diff --git a/packages/client/src/i18n/locales/es.ts b/packages/client/src/i18n/locales/es.ts index 9c9e443..c58af6c 100644 --- a/packages/client/src/i18n/locales/es.ts +++ b/packages/client/src/i18n/locales/es.ts @@ -549,6 +549,7 @@ jobTriggered: 'Job ejecutado', account: 'Cuenta', agent: 'Agente', memory: 'Memoria', + compression: 'Compresion', session: 'Sesion', privacy: 'Privacidad', apiServer: 'Servidor API', @@ -599,6 +600,18 @@ jobTriggered: 'Job ejecutado', userCharLimit: 'Limite de caracteres del perfil de usuario', userCharLimitHint: 'Maximo de caracteres para USER.md', }, + compression: { + enabled: 'Activar compresion', + enabledHint: 'Comprimir automaticamente el historial largo antes de superar el contexto del modelo', + threshold: 'Umbral de compresion', + thresholdHint: 'Iniciar compresion cuando los tokens estimados superen esta proporcion del contexto', + targetRatio: 'Proporcion objetivo', + targetRatioHint: 'Tamano objetivo del historial tras comprimir como proporcion del contexto', + protectLastN: 'Proteger mensajes recientes', + protectLastNHint: 'Mantener sin comprimir esta cantidad de mensajes recientes', + protectFirstN: 'Proteger primeros mensajes', + protectFirstNHint: 'Mantener sin comprimir esta cantidad de mensajes iniciales', + }, session: { mode: 'Modo de reinicio', modeHint: 'Condicion de activacion del reinicio de sesion', @@ -953,6 +966,7 @@ jobTriggered: 'Job ejecutado', new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows', new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists', new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling', + new_0_5_30_9: 'Context compression now follows Profile compression settings and hardens stale snapshots by reusing previous summaries with a safe tail instead of recompressing full history', new_0_5_5_1: '🎉 ¡Feliz Día del Trabajo! Hoy no se trabaja, agradezcan su comprensión', new_0_5_5_2: 'Añadida página de historial para sesiones Hermes', new_0_5_5_3: 'La página de historial gestiona sesiones de forma independiente', diff --git a/packages/client/src/i18n/locales/fr.ts b/packages/client/src/i18n/locales/fr.ts index 1df8706..aee6b32 100644 --- a/packages/client/src/i18n/locales/fr.ts +++ b/packages/client/src/i18n/locales/fr.ts @@ -549,6 +549,7 @@ jobTriggered: 'Job declenche', account: 'Compte', agent: 'Agent', memory: 'Memoire', + compression: 'Compression', session: 'Session', privacy: 'Confidentialite', apiServer: 'Serveur API', @@ -599,6 +600,18 @@ jobTriggered: 'Job declenche', userCharLimit: 'Limite de caracteres du profil utilisateur', userCharLimitHint: 'Nombre maximum de caracteres pour USER.md', }, + compression: { + enabled: 'Activer la compression', + enabledHint: 'Compresser automatiquement un long historique avant de depasser le contexte du modele', + threshold: 'Seuil de compression', + thresholdHint: 'Demarrer la compression quand les jetons estimes depassent ce ratio de contexte', + targetRatio: 'Ratio cible', + targetRatioHint: 'Taille cible de l\'historique apres compression comme ratio du contexte', + protectLastN: 'Proteger les messages recents', + protectLastNHint: 'Garder autant de messages recents non compresses', + protectFirstN: 'Proteger les premiers messages', + protectFirstNHint: 'Garder autant de premiers messages non compresses', + }, session: { mode: 'Mode de reinitialisation', modeHint: 'Condition de declenchement de la reinitialisation de session', @@ -952,6 +965,7 @@ jobTriggered: 'Job declenche', new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows', new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists', new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling', + new_0_5_30_9: 'Context compression now follows Profile compression settings and hardens stale snapshots by reusing previous summaries with a safe tail instead of recompressing full history', new_0_5_5_1: '🎉 Joyeuse Fête du Travail! Pas de travail aujourd\'hui, merci de votre compréhension', new_0_5_5_2: 'Ajout d\'une page d\'historique pour les sessions Hermes', new_0_5_5_3: 'La page d\'historique gère les sessions de manière indépendante', diff --git a/packages/client/src/i18n/locales/ja.ts b/packages/client/src/i18n/locales/ja.ts index 887ec0a..e6d1c94 100644 --- a/packages/client/src/i18n/locales/ja.ts +++ b/packages/client/src/i18n/locales/ja.ts @@ -549,6 +549,7 @@ export default { account: 'アカウント', agent: 'エージェント', memory: 'メモリ', + compression: '圧縮', session: 'セッション', privacy: 'プライバシー', apiServer: 'API サーバー', @@ -599,6 +600,18 @@ export default { userCharLimit: 'ユーザープロファイル文字数上限', userCharLimitHint: 'USER.md の最大文字数', }, + compression: { + enabled: '圧縮を有効化', + enabledHint: '長いチャット履歴がモデルコンテキストを超える前に自動圧縮', + threshold: '圧縮しきい値', + thresholdHint: '推定トークンがこのコンテキスト比率を超えたら圧縮を開始', + targetRatio: '目標比率', + targetRatioHint: '圧縮後の履歴サイズをコンテキスト比率で指定', + protectLastN: '直近メッセージを保護', + protectLastNHint: 'この数の最新メッセージは圧縮しない', + protectFirstN: '先頭メッセージを保護', + protectFirstNHint: 'この数の最初のメッセージは圧縮しない', + }, session: { mode: 'リセットモード', modeHint: 'セッションリセットのトリガー条件', @@ -953,6 +966,7 @@ export default { new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows', new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists', new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling', + new_0_5_30_9: 'Context compression now follows Profile compression settings and hardens stale snapshots by reusing previous summaries with a safe tail instead of recompressing full history', new_0_5_5_1: '🎉 労働者の日!今日はお休みです、何卒ご理解ください', new_0_5_5_2: 'Hermesセッション履歴ページを追加', new_0_5_5_3: '履歴ページはアクティブチャットに干渉せずにセッション管理', diff --git a/packages/client/src/i18n/locales/ko.ts b/packages/client/src/i18n/locales/ko.ts index 8455eba..e2bc37b 100644 --- a/packages/client/src/i18n/locales/ko.ts +++ b/packages/client/src/i18n/locales/ko.ts @@ -549,6 +549,7 @@ export default { account: '계정', agent: '에이전트', memory: '메모리', + compression: '압축', session: '세션', privacy: '개인정보', apiServer: 'API 서버', @@ -599,6 +600,18 @@ export default { userCharLimit: '사용자 프로필 문자 제한', userCharLimitHint: 'USER.md 최대 문자 수', }, + compression: { + enabled: '압축 활성화', + enabledHint: '긴 채팅 기록이 모델 컨텍스트를 넘기 전에 자동 압축', + threshold: '압축 임계값', + thresholdHint: '추정 토큰이 이 컨텍스트 비율을 넘으면 압축 시작', + targetRatio: '목표 비율', + targetRatioHint: '압축 후 기록 크기를 컨텍스트 비율로 지정', + protectLastN: '최근 메시지 보호', + protectLastNHint: '이 수만큼 최신 메시지는 압축하지 않음', + protectFirstN: '처음 메시지 보호', + protectFirstNHint: '이 수만큼 처음 메시지는 압축하지 않음', + }, session: { mode: '초기화 모드', modeHint: '세션 초기화 트리거 조건', @@ -953,6 +966,7 @@ export default { new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows', new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists', new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling', + new_0_5_30_9: 'Context compression now follows Profile compression settings and hardens stale snapshots by reusing previous summaries with a safe tail instead of recompressing full history', new_0_5_5_1: '🎉 노동절 감사합니다! 오늘은 쉬니까 양해 부탁드립니다', new_0_5_5_2: 'Hermes 세션 기록 페이지 추가', new_0_5_5_3: '기록 페이지는 독립적으로 세션 관리', diff --git a/packages/client/src/i18n/locales/pt.ts b/packages/client/src/i18n/locales/pt.ts index fd9c7d5..d390051 100644 --- a/packages/client/src/i18n/locales/pt.ts +++ b/packages/client/src/i18n/locales/pt.ts @@ -549,6 +549,7 @@ jobTriggered: 'Job acionado', account: 'Conta', agent: 'Agente', memory: 'Memoria', + compression: 'Compressao', session: 'Sessao', privacy: 'Privacidade', apiServer: 'Servidor API', @@ -599,6 +600,18 @@ jobTriggered: 'Job acionado', userCharLimit: 'Limite de caracteres do perfil do usuario', userCharLimitHint: 'Maximo de caracteres para USER.md', }, + compression: { + enabled: 'Ativar compressao', + enabledHint: 'Comprimir automaticamente historico longo antes de exceder o contexto do modelo', + threshold: 'Limiar de compressao', + thresholdHint: 'Iniciar compressao quando tokens estimados excederem esta proporcao do contexto', + targetRatio: 'Proporcao alvo', + targetRatioHint: 'Tamanho alvo do historico apos compressao como proporcao do contexto', + protectLastN: 'Proteger mensagens recentes', + protectLastNHint: 'Manter sem compressao esta quantidade de mensagens recentes', + protectFirstN: 'Proteger primeiras mensagens', + protectFirstNHint: 'Manter sem compressao esta quantidade de mensagens iniciais', + }, session: { mode: 'Modo de reinicializacao', modeHint: 'Condicao de acionamento para reinicializacao de sessao', @@ -953,6 +966,7 @@ jobTriggered: 'Job acionado', new_0_5_30_6: 'Harden Windows compatibility for path detection, file downloads, and job/update subprocesses so they no longer flash terminal windows', new_0_5_30_7: 'Fix config writes and provider presets: validate .env keys, route FUN-Codex through the Responses API, and refresh Z.AI/GLM model lists', new_0_5_30_8: 'Polish frontend details including collapsed sidebar layout, short group labels, sidebar divider, and conversation outline styling', + new_0_5_30_9: 'Context compression now follows Profile compression settings and hardens stale snapshots by reusing previous summaries with a safe tail instead of recompressing full history', new_0_5_5_1: '🎉 Feliz Dia do Trabalhador! Hoje não se trabalha, obrigado pela compreensão', new_0_5_5_2: 'Adicionada página de histórico para sessões Hermes', new_0_5_5_3: 'Página de histórico gerencia sessões de forma independente', diff --git a/packages/client/src/i18n/locales/zh-TW.ts b/packages/client/src/i18n/locales/zh-TW.ts index 72d26c5..43ef940 100644 --- a/packages/client/src/i18n/locales/zh-TW.ts +++ b/packages/client/src/i18n/locales/zh-TW.ts @@ -696,6 +696,7 @@ export default { account: '帳號', agent: '代理', memory: '記憶', + compression: '上下文壓縮', session: '工作階段', privacy: '隱私', apiServer: 'API 伺服器', @@ -754,6 +755,18 @@ export default { userCharLimit: '使用者畫像字元上限', userCharLimitHint: 'USER.md 最大字元數', }, + compression: { + enabled: '啟用壓縮', + enabledHint: '長對話接近模型上下文上限前自動壓縮歷史', + threshold: '壓縮閾值', + thresholdHint: '預估 token 超過上下文比例時開始壓縮', + targetRatio: '目標比例', + targetRatioHint: '壓縮後歷史保留到上下文的目標比例', + protectLastN: '保護最近訊息', + protectLastNHint: '最近多少則訊息不參與壓縮', + protectFirstN: '保護開頭訊息', + protectFirstNHint: '最早多少則訊息不參與壓縮', + }, session: { mode: '重設模式', modeHint: '工作階段重設的觸發條件', @@ -1243,6 +1256,7 @@ export default { new_0_5_30_6: '優化 Windows 相容性:路徑識別、檔案下載、任務/更新子程序不再彈出額外終端視窗', new_0_5_30_7: '修復配置寫入和 Provider 預設:加強 .env 校驗,FUN-Codex 改走 Responses API,並更新 Z.AI/GLM 模型列表', new_0_5_30_8: '前端體驗細節優化:折疊側邊欄布局、分組短標題、側邊欄分隔線和對話大綱樣式更穩定', + new_0_5_30_9: '上下文壓縮現在跟隨 Profile 壓縮配置,並在舊快照失效時複用舊摘要和安全尾部,避免重新壓縮完整歷史', new_0_5_6_1: '新增語音播放功能:使用 Web Speech API,支援手動播放按鈕、自動播放開關、彩虹邊框動畫和行動端最佳化', new_0_5_6_2: '新增強健的 LLM JSON 解析器,相容 Python 格式並從串流事件中擷取文字', new_0_5_6_3: 'Skills 功能增強:使用統計、來源過濾、封存技能、來源追溯和釘選切換', diff --git a/packages/client/src/i18n/locales/zh.ts b/packages/client/src/i18n/locales/zh.ts index 882c3ea..a9e0fdf 100644 --- a/packages/client/src/i18n/locales/zh.ts +++ b/packages/client/src/i18n/locales/zh.ts @@ -708,6 +708,7 @@ export default { account: '账户', agent: '代理', memory: '记忆', + compression: '上下文压缩', session: '会话', privacy: '隐私', apiServer: 'API 服务器', @@ -766,6 +767,18 @@ export default { userCharLimit: '用户画像字符上限', userCharLimitHint: 'USER.md 最大字符数', }, + compression: { + enabled: '启用压缩', + enabledHint: '长对话接近模型上下文上限前自动压缩历史', + threshold: '压缩阈值', + thresholdHint: '预计 token 超过上下文比例时开始压缩', + targetRatio: '目标比例', + targetRatioHint: '压缩后历史保留到上下文的目标比例', + protectLastN: '保护最近消息', + protectLastNHint: '最近多少条消息不参与压缩', + protectFirstN: '保护开头消息', + protectFirstNHint: '最早多少条消息不参与压缩', + }, session: { mode: '重置模式', modeHint: '会话重置的触发条件', @@ -1253,6 +1266,7 @@ export default { new_0_5_30_6: '优化 Windows 兼容性:路径识别、文件下载、任务/更新子进程不再弹出额外终端窗口', new_0_5_30_7: '修复配置写入和 Provider 预设:加强 .env 校验,FUN-Codex 改走 Responses API,并更新 Z.AI/GLM 模型列表', new_0_5_30_8: '前端体验细节优化:折叠侧边栏布局、分组短标题、侧边栏分隔线和对话大纲样式更稳定', + new_0_5_30_9: '上下文压缩现在跟随 Profile 压缩配置,并在旧快照失效时复用旧摘要和安全尾部,避免重新压缩完整历史', new_0_5_6_1: '新增语音播放功能:使用 Web Speech API,支持手动播放按钮、自动播放开关、彩虹边框动画和移动端优化', new_0_5_6_2: '新增健壮的 LLM JSON 解析器,兼容 Python 格式并从流式事件中提取文本', diff --git a/packages/client/src/stores/hermes/settings.ts b/packages/client/src/stores/hermes/settings.ts index 0ed2e83..95f6d56 100644 --- a/packages/client/src/stores/hermes/settings.ts +++ b/packages/client/src/stores/hermes/settings.ts @@ -1,7 +1,7 @@ import { defineStore } from 'pinia' import { ref } from 'vue' import * as configApi from '@/api/hermes/config' -import type { DisplayConfig, AgentConfig, MemoryConfig, SessionResetConfig, PrivacyConfig, ApprovalConfig } from '@/api/hermes/config' +import type { DisplayConfig, AgentConfig, MemoryConfig, CompressionConfig, SessionResetConfig, PrivacyConfig, ApprovalConfig } from '@/api/hermes/config' export const useSettingsStore = defineStore('settings', () => { const loading = ref(false) @@ -10,6 +10,7 @@ export const useSettingsStore = defineStore('settings', () => { const display = ref({}) const agent = ref({}) const memory = ref({}) + const compression = ref({}) const sessionReset = ref({}) const privacy = ref({}) const approvals = ref({}) @@ -32,6 +33,7 @@ export const useSettingsStore = defineStore('settings', () => { display.value = data.display || {} agent.value = data.agent || {} memory.value = data.memory || {} + compression.value = data.compression || {} sessionReset.value = data.session_reset || {} privacy.value = data.privacy || {} approvals.value = data.approvals || {} @@ -58,6 +60,7 @@ export const useSettingsStore = defineStore('settings', () => { case 'display': display.value = { ...display.value, ...values }; break case 'agent': agent.value = { ...agent.value, ...values }; break case 'memory': memory.value = { ...memory.value, ...values }; break + case 'compression': compression.value = { ...compression.value, ...values }; break case 'session_reset': sessionReset.value = { ...sessionReset.value, ...values }; break case 'privacy': privacy.value = { ...privacy.value, ...values }; break case 'approvals': approvals.value = { ...approvals.value, ...values }; break @@ -91,6 +94,7 @@ export const useSettingsStore = defineStore('settings', () => { case 'display': display.value = { ...display.value, ...values }; break case 'agent': agent.value = { ...agent.value, ...values }; break case 'memory': memory.value = { ...memory.value, ...values }; break + case 'compression': compression.value = { ...compression.value, ...values }; break case 'session_reset': sessionReset.value = { ...sessionReset.value, ...values }; break case 'privacy': privacy.value = { ...privacy.value, ...values }; break case 'approvals': approvals.value = { ...approvals.value, ...values }; break @@ -122,7 +126,7 @@ export const useSettingsStore = defineStore('settings', () => { return { loading, saving, - display, agent, memory, sessionReset, privacy, approvals, + display, agent, memory, compression, sessionReset, privacy, approvals, telegram, discord, slack, whatsapp, matrix, wecom, feishu, dingtalk, qqbot, weixin, platforms, fetchSettings, saveSection, updateLocal, } diff --git a/packages/client/src/views/hermes/SettingsView.vue b/packages/client/src/views/hermes/SettingsView.vue index 83e25f8..f4d3a85 100644 --- a/packages/client/src/views/hermes/SettingsView.vue +++ b/packages/client/src/views/hermes/SettingsView.vue @@ -10,6 +10,7 @@ import { useSettingsStore } from "@/stores/hermes/settings"; import DisplaySettings from "@/components/hermes/settings/DisplaySettings.vue"; import AgentSettings from "@/components/hermes/settings/AgentSettings.vue"; import MemorySettings from "@/components/hermes/settings/MemorySettings.vue"; +import CompressionSettings from "@/components/hermes/settings/CompressionSettings.vue"; import SessionSettings from "@/components/hermes/settings/SessionSettings.vue"; import PrivacySettings from "@/components/hermes/settings/PrivacySettings.vue"; import ModelSettings from "@/components/hermes/settings/ModelSettings.vue"; @@ -49,6 +50,9 @@ onMounted(() => { + + + diff --git a/packages/server/src/controllers/hermes/sessions.ts b/packages/server/src/controllers/hermes/sessions.ts index 5dfbdb5..644e1fc 100644 --- a/packages/server/src/controllers/hermes/sessions.ts +++ b/packages/server/src/controllers/hermes/sessions.ts @@ -188,7 +188,16 @@ export async function get(ctx: any) { * GET /api/hermes/sessions/hermes/:id */ export async function getHermesSession(ctx: any) { - // Try database first (consistent with listHermesSessions) + // Prefer the Web UI local session store. Hermes state.db can lag behind or + // miss messages for Bridge-backed runs, while the local store is the source + // used by chat rendering and compression. + const localSession = localGetSessionDetail(ctx.params.id) + if (localSession && localSession.source !== 'api_server') { + ctx.body = { session: localSession } + return + } + + // Try Hermes state.db next (consistent with listHermesSessions) try { const session = await getSessionDetailFromDb(ctx.params.id) if (session && session.source !== 'api_server') { diff --git a/packages/server/src/lib/context-compressor/index.ts b/packages/server/src/lib/context-compressor/index.ts index 3e83430..a535f52 100644 --- a/packages/server/src/lib/context-compressor/index.ts +++ b/packages/server/src/lib/context-compressor/index.ts @@ -46,6 +46,8 @@ export interface CompressionConfig { triggerTokens: number /** Summary token target (default: 8000) */ summaryBudget: number + /** Number of earliest messages to keep verbatim (default: 0) */ + headMessageCount: number /** Number of recent messages to keep verbatim (default: 10) */ tailMessageCount: number /** Timeout for LLM summarization call (default: 60_000ms) */ @@ -55,6 +57,7 @@ export interface CompressionConfig { export const DEFAULT_COMPRESSION_CONFIG: CompressionConfig = { triggerTokens: 100_000, summaryBudget: 8_000, + headMessageCount: 0, tailMessageCount: 10, summarizationTimeoutMs: 120_000, } @@ -108,6 +111,54 @@ export function countTokensForModel(text: string, model: string): number { } } +function messageTokenEstimate(message: ChatMessage): number { + if (typeof message.content === 'string') return countTokens(message.content) + if (Array.isArray(message.content)) { + return countTokens(message.content.map(block => { + if (block.type === 'text') return block.text || '' + if (block.type === 'image') return `[Image: ${block.path || ''}]` + if (block.type === 'file') return `[File: ${block.path || ''}]` + return '' + }).join('')) + } + return 0 +} + +function messagesTokenEstimate(messages: ChatMessage[]): number { + return messages.reduce((sum, message) => sum + messageTokenEstimate(message), 0) +} + +function truncateTextToTokenBudget(text: string, tokenBudget: number): string { + if (tokenBudget <= 0 || countTokens(text) <= tokenBudget) return text + let lo = 0 + let hi = text.length + while (lo < hi) { + const mid = Math.ceil((lo + hi) / 2) + if (countTokens(text.slice(0, mid)) <= tokenBudget) lo = mid + else hi = mid - 1 + } + return text.slice(0, lo).trimEnd() + '\n\n[Summary truncated to fit context budget]' +} + +function enforceCompressedBudget( + messages: ChatMessage[], + triggerTokens: number, + summaryIndex: number, +): ChatMessage[] { + if (triggerTokens <= 0 || messagesTokenEstimate(messages) <= triggerTokens) return messages + + const summaryMessage = messages[summaryIndex] + if (!summaryMessage || typeof summaryMessage.content !== 'string') return messages + + const summaryOnly = [{ ...summaryMessage }] + if (messagesTokenEstimate(summaryOnly) <= triggerTokens) return summaryOnly + + return [{ + ...summaryMessage, + content: truncateTextToTokenBudget(summaryMessage.content, triggerTokens), + }] +} + // ─── Prompts ──────────────────────────────────────────── export const SUMMARY_PREFIX = `[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted @@ -371,6 +422,10 @@ export function pruneOldToolResults(messages: ChatMessage[], keepRecentCount: nu return [...pruned, ...tail] } +function pruneFallbackToolResults(messages: ChatMessage[], keepRecentCount: number): ChatMessage[] { + return pruneOldToolResults(messages, keepRecentCount) +} + // ─── LLM Summarization ────────────────────────────────── export async function callSummarizer( @@ -474,7 +529,7 @@ export class ChatContextCompressor { // Check if we have a previous compression snapshot const snapshot = sessionId ? getCompressionSnapshot(sessionId) : null - if (snapshot) { + if (snapshot && snapshot.lastMessageIndex >= 0 && snapshot.lastMessageIndex < messages.length) { // Has snapshot → incremental compress (merge old summary with new messages) logger.info( '[context-compressor] session=%s: incremental compress with snapshot at index %d', @@ -484,6 +539,22 @@ export class ChatContextCompressor { messages, snapshot, upstream, apiKey, sessionId!, makeMeta(), summarizer, ) } else { + if (snapshot && sessionId) { + const fallbackLastMessageIndex = Math.max(-1, messages.length - this.config.tailMessageCount - 1) + logger.warn( + '[context-compressor] session=%s: stale snapshot index %d for %d messages; using summary plus tail from index %d', + sessionId, snapshot.lastMessageIndex, messages.length, fallbackLastMessageIndex, + ) + return this.incrementalCompress( + messages, + { summary: snapshot.summary, lastMessageIndex: fallbackLastMessageIndex }, + upstream, + apiKey, + sessionId, + makeMeta(), + summarizer, + ) + } // No snapshot → full compress (compress all messages) logger.info( '[context-compressor] session=%s: full compress %d messages', @@ -504,27 +575,36 @@ export class ChatContextCompressor { ): Promise { const { summary: previousSummary, lastMessageIndex } = snapshot const total = messages.length - const cleaned = pruneOldToolResults(messages, this.config.tailMessageCount) - const newMessages = cleaned.slice(lastMessageIndex + 1) + const headCount = Math.min(this.config.headMessageCount, Math.max(0, lastMessageIndex + 1)) + const head = messages.slice(0, headCount) + const newMessages = messages.slice(lastMessageIndex + 1) const tailCount = this.config.tailMessageCount + const previousSummaryMessage: ChatMessage = { role: 'user', content: SUMMARY_PREFIX + '\n\n' + previousSummary } + const assembledWithPrevious = [ + ...head, + previousSummaryMessage, + ...newMessages, + ] + const assembledOverBudget = messagesTokenEstimate(assembledWithPrevious) > this.config.triggerTokens + const canKeepTailWindow = newMessages.length > tailCount - // Keep last N of new messages, compress the rest - const tailStart = Math.max(0, newMessages.length - tailCount) + // If the new segment itself is too small to split but already over budget, + // fold all new messages into the existing summary instead of preserving them verbatim. + const tailStart = assembledOverBudget && !canKeepTailWindow + ? newMessages.length + : Math.max(0, newMessages.length - tailCount) const toCompress = newMessages.slice(0, tailStart) const tail = newMessages.slice(tailStart) if (toCompress.length === 0) { return { - messages: [ - { role: 'user', content: SUMMARY_PREFIX + '\n\n' + previousSummary }, - ...newMessages, - ], + messages: assembledWithPrevious, meta: { ...meta, compressed: true, llmCompressed: false, summaryTokenEstimate: countTokens(SUMMARY_PREFIX + previousSummary), - verbatimCount: newMessages.length, + verbatimCount: head.length + newMessages.length, compressedStartIndex: lastMessageIndex, }, } @@ -546,26 +626,32 @@ export class ChatContextCompressor { logger.info('[context-compressor] incremental-llm done in %dms, %d chars', Date.now() - t0, summary.length) } catch (err: any) { logger.warn('[context-compressor] incremental-llm failed: %s — keeping new messages verbatim', err.message) + const fallback = [ + ...head, + previousSummaryMessage, + ...newMessages, + ] + const prunedFallback = pruneFallbackToolResults(fallback, this.config.tailMessageCount) + const budgetedFallback = enforceCompressedBudget(prunedFallback, this.config.triggerTokens, head.length) return { - messages: [ - { role: 'user', content: SUMMARY_PREFIX + '\n\n' + previousSummary }, - ...newMessages, - ], + messages: budgetedFallback, meta: { ...meta, compressed: true, llmCompressed: false, summaryTokenEstimate: countTokens(SUMMARY_PREFIX + previousSummary), - verbatimCount: newMessages.length, + verbatimCount: budgetedFallback.length === fallback.length ? head.length + newMessages.length : 0, compressedStartIndex: lastMessageIndex, }, } } - const result: ChatMessage[] = [ + let result: ChatMessage[] = [ + ...head, { role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary }, ...tail, ] + result = enforceCompressedBudget(result, this.config.triggerTokens, head.length) const newLastIndex = lastMessageIndex + tailStart if (sessionId) { @@ -579,7 +665,7 @@ export class ChatContextCompressor { compressed: true, llmCompressed: true, summaryTokenEstimate: countTokens(SUMMARY_PREFIX + summary), - verbatimCount: tail.length, + verbatimCount: result.length === head.length + 1 + tail.length ? head.length + tail.length : 0, compressedStartIndex: newLastIndex, }, } @@ -594,20 +680,20 @@ export class ChatContextCompressor { summarizer?: string | SummarizerOptions, ): Promise { const total = messages.length - const cleaned = pruneOldToolResults(messages, this.config.tailMessageCount) - const tailCount = this.config.tailMessageCount - - if (total <= tailCount) { - return { messages: cleaned, meta } - } + const requestedHeadCount = Math.min(this.config.headMessageCount, total) + const requestedTailCount = this.config.tailMessageCount + const canKeepProtectedWindows = total > requestedHeadCount + requestedTailCount + const headCount = canKeepProtectedWindows ? requestedHeadCount : 0 + const tailCount = canKeepProtectedWindows ? requestedTailCount : 0 const tailStart = total - tailCount - const toCompress = cleaned.slice(0, tailStart) - const tail = cleaned.slice(tailStart) + const head = messages.slice(0, headCount) + const toCompress = messages.slice(headCount, tailStart) + const tail = messages.slice(tailStart) logger.info( - '[context-compressor] [full-llm] compressing messages 0-%d, keeping %d-%d', - tailStart - 1, tailStart, total - 1, + '[context-compressor] [full-llm] compressing messages %d-%d, keeping first %d and last %d', + headCount, tailStart - 1, head.length, tail.length, ) const contentToSummarize = serializeForSummary(toCompress) @@ -624,26 +710,28 @@ export class ChatContextCompressor { } if (!summary) { - return { messages: cleaned, meta } + return { messages: pruneFallbackToolResults(messages, this.config.tailMessageCount), meta } } const result: ChatMessage[] = [] + result.push(...head) result.push({ role: 'user', content: SUMMARY_PREFIX + '\n\n' + summary }) if (sessionId) { saveCompressionSnapshot(sessionId, summary, tailStart - 1, total) } result.push(...tail) + const budgetedResult = enforceCompressedBudget(result, this.config.triggerTokens, head.length) return { - messages: result, + messages: budgetedResult, meta: { ...meta, compressed: true, llmCompressed: !!summary, summaryTokenEstimate: summary ? countTokens(SUMMARY_PREFIX + summary) : 0, - verbatimCount: tail.length, + verbatimCount: budgetedResult.length === result.length ? head.length + tail.length : 0, compressedStartIndex: tailStart - 1, }, } diff --git a/packages/server/src/services/hermes/run-chat/compression.ts b/packages/server/src/services/hermes/run-chat/compression.ts index 02a9ab2..f203237 100644 --- a/packages/server/src/services/hermes/run-chat/compression.ts +++ b/packages/server/src/services/hermes/run-chat/compression.ts @@ -10,14 +10,88 @@ import { import { getCompressionSnapshot } from '../../../db/hermes/compression-snapshot' import { ChatContextCompressor, SUMMARY_PREFIX } from '../../../lib/context-compressor' import { getModelContextLength } from '../model-context' +import { readConfigYamlForProfile } from '../../config-helpers' import { logger } from '../../logger' import { bridgeLogger } from '../../logger' import { calcAndUpdateUsage, estimateUsageTokensFromMessages } from './usage' import { isAssistantMessageSendable } from './message-format' -import type { ChatMessage } from '../../../lib/context-compressor' +import type { ChatMessage, CompressionConfig as CompressorConfig } from '../../../lib/context-compressor' import type { SessionState, BridgeCompressionResult } from './types' -const compressor = new ChatContextCompressor() +interface RunChatCompressionConfig { + enabled: boolean + triggerTokens: number + compressor: Partial +} + +function isSnapshotUsable( + snapshot: { lastMessageIndex: number } | null, + history: ChatMessage[], +): boolean { + return !!snapshot && snapshot.lastMessageIndex >= 0 && snapshot.lastMessageIndex < history.length +} + +function buildSnapshotHistory( + snapshot: { summary: string; lastMessageIndex: number } | null, + history: ChatMessage[], + compressionConfig?: Partial, +): ChatMessage[] | null { + if (!snapshot) return null + const headCount = compressionConfig?.headMessageCount || 0 + const tailCount = compressionConfig?.tailMessageCount || 0 + const protectedHead = headCount > 0 ? history.slice(0, headCount) : [] + const summaryMessage = { role: 'user', content: SUMMARY_PREFIX + '\n\n' + snapshot.summary } as ChatMessage + + if (isSnapshotUsable(snapshot, history)) { + return [ + ...protectedHead, + summaryMessage, + ...history.slice(snapshot.lastMessageIndex + 1), + ] + } + + const tailStart = Math.max(protectedHead.length, history.length - tailCount) + return [ + ...protectedHead, + summaryMessage, + ...history.slice(tailStart), + ] +} + +function clampRatio(value: unknown, fallback: number, min: number, max: number): number { + const n = typeof value === 'number' && Number.isFinite(value) ? value : fallback + return Math.min(max, Math.max(min, n)) +} + +function clampInt(value: unknown, fallback: number, min: number, max: number): number { + const n = typeof value === 'number' && Number.isFinite(value) ? Math.floor(value) : fallback + return Math.min(max, Math.max(min, n)) +} + +async function getRunChatCompressionConfig(profile: string, contextLength: number): Promise { + let raw: Record = {} + try { + raw = (await readConfigYamlForProfile(profile))?.compression || {} + } catch (err) { + logger.warn(err, '[context-compress] failed to read compression config for profile %s, using defaults', profile) + } + + const threshold = clampRatio(raw.threshold, 0.5, 0.05, 0.95) + const targetRatio = clampRatio(raw.target_ratio, 0.2, 0.01, 0.8) + const protectLastN = clampInt(raw.protect_last_n, 20, 0, 500) + const protectFirstN = clampInt(raw.protect_first_n, 3, 0, 100) + + return { + enabled: raw.enabled !== false, + triggerTokens: Math.floor(contextLength * threshold), + compressor: { + triggerTokens: Math.floor(contextLength * threshold), + summaryBudget: Math.max(1_000, Math.floor(contextLength * targetRatio)), + headMessageCount: protectFirstN, + tailMessageCount: protectLastN, + }, + } +} /** * Load conversation history from DB with full message structure (user/assistant/tool). @@ -77,12 +151,7 @@ export function estimateSnapshotAwareHistoryUsage( history: ChatMessage[], ): { messageCount: number; tokenCount: number } { const snapshot = getCompressionSnapshot(sessionId) - const messages = snapshot - ? [ - { role: 'user', content: SUMMARY_PREFIX + snapshot.summary }, - ...history.slice(snapshot.lastMessageIndex + 1), - ] - : history + const messages = buildSnapshotHistory(snapshot, history) || history const usage = estimateUsageTokensFromMessages(messages) return { messageCount: messages.length, @@ -108,29 +177,45 @@ export async function buildCompressedHistory( model: modelContext.model, provider: modelContext.provider, }) - const triggerTokens = Math.floor(contextLength / 2) + const compressionConfig = await getRunChatCompressionConfig(profile, contextLength) + const triggerTokens = compressionConfig.triggerTokens + if (!compressionConfig.enabled) { + logger.info('[context-compress] session=%s: compression disabled by config', sessionId) + return history + } const cState = getOrCreateSession(sessionMap, sessionId) const assembledTokens = await calcAndUpdateUsage(sessionId, cState, emit) - const totalTokens = assembledTokens.inputTokens + assembledTokens.outputTokens + let totalTokens = assembledTokens.inputTokens + assembledTokens.outputTokens const snapshot = getCompressionSnapshot(sessionId) + const staleSnapshot = snapshot && !isSnapshotUsable(snapshot, history) + if (staleSnapshot) { + logger.warn('[context-compress] session=%s: stale snapshot index %d for %d history messages; using summary plus safe tail', + sessionId, snapshot.lastMessageIndex, history.length) + const staleHistory = buildSnapshotHistory(snapshot, history, compressionConfig.compressor) || history + const staleUsage = estimateUsageTokensFromMessages(staleHistory) + totalTokens = staleUsage.inputTokens + staleUsage.outputTokens + } - if (snapshot) { + if (snapshot && !staleSnapshot) { const newMessages = history.slice(snapshot.lastMessageIndex + 1) logger.info('[context-compress] session=%s: snapshot at %d, %d new messages, assembled ~%d tokens (threshold %d)', sessionId, snapshot.lastMessageIndex, newMessages.length, totalTokens, triggerTokens) - if (totalTokens <= triggerTokens && newMessages.length <= 150) { - history = [ - { role: 'user', content: SUMMARY_PREFIX + '\n\n' + snapshot.summary }, - ...newMessages, - ] as ChatMessage[] + if (totalTokens <= triggerTokens) { + history = buildSnapshotHistory(snapshot, history, compressionConfig.compressor) || history } else { - history = await compressHistory(history, newMessages, sessionId, upstream, apiKey, cState, totalTokens, emit, sessionMap, modelContext) + history = await compressHistory(history, newMessages, sessionId, upstream, apiKey, cState, totalTokens, emit, sessionMap, modelContext, compressionConfig.compressor) + } + } else if (snapshot && staleSnapshot) { + if (totalTokens <= triggerTokens) { + history = buildSnapshotHistory(snapshot, history, compressionConfig.compressor) || history + } else { + history = await compressHistory(history, null, sessionId, upstream, apiKey, cState, totalTokens, emit, sessionMap, modelContext, compressionConfig.compressor) } } else if (history.length > 4) { - if (totalTokens <= triggerTokens && history.length <= 150) { + if (totalTokens <= triggerTokens) { logger.info('[context-compress] session=%s: %d messages, ~%d tokens — under threshold, skip', sessionId, history.length, totalTokens) } else { - history = await compressHistory(history, null, sessionId, upstream, apiKey, cState, totalTokens, emit, sessionMap, modelContext) + history = await compressHistory(history, null, sessionId, upstream, apiKey, cState, totalTokens, emit, sessionMap, modelContext, compressionConfig.compressor) } } @@ -152,6 +237,7 @@ export async function compressHistory( emit: (event: string, payload: any) => void, sessionMap: Map, modelContext: { model?: string | null; provider?: string | null } = {}, + compressionConfig?: Partial, ): Promise { const msgCount = newMessagesOnly ? newMessagesOnly.length : history.length pushState(sessionMap, sessionId, 'compression.started', { @@ -163,6 +249,7 @@ export async function compressHistory( try { const session = getSession(sessionId) + const compressor = new ChatContextCompressor({ config: compressionConfig }) const result = await compressor.compress(history, upstream, apiKey, sessionId, { profile: session?.profile, model: modelContext.model || session?.model, @@ -244,6 +331,8 @@ export async function forceCompressBridgeHistory( const upstream = '' const apiKey = undefined const session = getSession(sessionId) + const contextLength = getModelContextLength({ profile, model: session?.model, provider: session?.provider }) + const compressionConfig = await getRunChatCompressionConfig(session?.profile || profile, contextLength) const beforeUsage = estimateSnapshotAwareHistoryUsage(sessionId, history) const totalTokens = beforeUsage.tokenCount bridgeLogger.info({ @@ -256,6 +345,7 @@ export async function forceCompressBridgeHistory( snapshotAware: true, }, '[chat-run-socket] bridge forced compression started') + const compressor = new ChatContextCompressor({ config: compressionConfig.compressor }) const result = await compressor.compress(history, upstream, apiKey, sessionId, { profile: session?.profile || profile, model: session?.model, diff --git a/packages/server/src/services/hermes/run-chat/handle-api-run.ts b/packages/server/src/services/hermes/run-chat/handle-api-run.ts index 8d4948f..3f8d5b5 100644 --- a/packages/server/src/services/hermes/run-chat/handle-api-run.ts +++ b/packages/server/src/services/hermes/run-chat/handle-api-run.ts @@ -38,7 +38,7 @@ export async function loadSessionStateFromDb(sid: string, _sessionMap: Map= 0 && snapshot.lastMessageIndex < messages.length) { const newMessages = messages.slice(snapshot.lastMessageIndex + 1) const newUsage = estimateUsageTokensFromMessages(newMessages) inputTokens = countTokens(SUMMARY_PREFIX + snapshot.summary) + diff --git a/packages/server/src/services/hermes/run-chat/usage.ts b/packages/server/src/services/hermes/run-chat/usage.ts index a259389..cb152ce 100644 --- a/packages/server/src/services/hermes/run-chat/usage.ts +++ b/packages/server/src/services/hermes/run-chat/usage.ts @@ -53,7 +53,7 @@ export async function calcAndUpdateUsage( const snapshot = getCompressionSnapshot(sid) let inputTokens: number let outputTokens: number - if (snapshot && msgs.length) { + if (snapshot && msgs.length && snapshot.lastMessageIndex >= 0 && snapshot.lastMessageIndex < msgs.length) { const newMessages = msgs.slice(snapshot.lastMessageIndex + 1) const newUsage = estimateUsageTokensFromMessages(newMessages) inputTokens = countTokens(SUMMARY_PREFIX + snapshot.summary) + diff --git a/tests/server/context-compressor.test.ts b/tests/server/context-compressor.test.ts index 45cce66..8aa096c 100644 --- a/tests/server/context-compressor.test.ts +++ b/tests/server/context-compressor.test.ts @@ -3,6 +3,8 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' const getCompressionSnapshotMock = vi.fn() const saveCompressionSnapshotMock = vi.fn() const deleteCompressionSnapshotMock = vi.fn() +const bridgeRequestMock = vi.fn() +const bridgeDestroyMock = vi.fn() vi.mock('../../packages/server/src/services/logger', () => ({ logger: { @@ -19,6 +21,13 @@ vi.mock('../../packages/server/src/db/hermes/compression-snapshot', () => ({ deleteCompressionSnapshot: deleteCompressionSnapshotMock, })) +vi.mock('../../packages/server/src/services/hermes/agent-bridge', () => ({ + AgentBridgeClient: class { + request = bridgeRequestMock + destroy = bridgeDestroyMock + }, +})) + describe('ChatContextCompressor', () => { let originalFetch: typeof global.fetch @@ -27,6 +36,10 @@ describe('ChatContextCompressor', () => { getCompressionSnapshotMock.mockReset() saveCompressionSnapshotMock.mockReset() deleteCompressionSnapshotMock.mockReset() + bridgeRequestMock.mockReset() + bridgeDestroyMock.mockReset() + bridgeRequestMock.mockRejectedValue(new Error('summarizer failed')) + bridgeDestroyMock.mockResolvedValue(undefined) }) afterEach(() => { @@ -42,7 +55,6 @@ describe('ChatContextCompressor', () => { })) getCompressionSnapshotMock.mockReturnValue(null) - global.fetch = vi.fn(async () => ({ ok: false, status: 500 })) as any const result = await compressor.compress(messages, 'http://upstream', undefined, 's1') @@ -66,7 +78,6 @@ describe('ChatContextCompressor', () => { lastMessageIndex: 1, messageCountAtTime: 2, }) - global.fetch = vi.fn(async () => ({ ok: false, status: 500 })) as any const result = await compressor.compress(messages, 'http://upstream', undefined, 's1') @@ -109,4 +120,331 @@ describe('ChatContextCompressor', () => { expect(result.meta.compressedStartIndex).toBe(3) expect(saveCompressionSnapshotMock).not.toHaveBeenCalled() }) + + it('keeps configured first and last messages during full compression', async () => { + const { ChatContextCompressor, SUMMARY_PREFIX } = await import('../../packages/server/src/lib/context-compressor') + const compressor = new ChatContextCompressor({ + config: { headMessageCount: 2, tailMessageCount: 3, summaryBudget: 1000 }, + }) + const messages = Array.from({ length: 10 }, (_, i) => ({ + role: i % 2 === 0 ? 'user' : 'assistant', + content: `message ${i}`, + })) + + getCompressionSnapshotMock.mockReturnValue(null) + bridgeRequestMock.mockResolvedValue({ + status: 'completed', + result: { final_response: 'compressed summary' }, + }) + + const result = await compressor.compress(messages, 'http://upstream', undefined, 's1') + + expect(result.messages.map(m => m.content)).toEqual([ + 'message 0', + 'message 1', + `${SUMMARY_PREFIX}\n\ncompressed summary`, + 'message 7', + 'message 8', + 'message 9', + ]) + expect(result.meta.compressed).toBe(true) + expect(result.meta.llmCompressed).toBe(true) + expect(result.meta.verbatimCount).toBe(5) + expect(saveCompressionSnapshotMock).toHaveBeenCalledWith('s1', 'compressed summary', 6, 10) + }) + + it('does not pre-prune tool results before sending them to the summarizer', async () => { + const { ChatContextCompressor } = await import('../../packages/server/src/lib/context-compressor') + const compressor = new ChatContextCompressor({ + config: { headMessageCount: 0, tailMessageCount: 1, summaryBudget: 1000 }, + }) + const longToolOutput = `${'x'.repeat(180)}KEEP_MARKER${'y'.repeat(180)}` + const messages = [ + { + role: 'assistant', + content: 'calling terminal', + tool_calls: [{ id: 'call_1', type: 'function', function: { name: 'terminal', arguments: '{}' } }], + }, + { role: 'tool', name: 'terminal', tool_call_id: 'call_1', content: longToolOutput }, + { role: 'user', content: 'tail' }, + ] + + getCompressionSnapshotMock.mockReturnValue(null) + bridgeRequestMock.mockResolvedValue({ + status: 'completed', + result: { final_response: 'compressed summary' }, + }) + + await compressor.compress(messages, 'http://upstream', undefined, 's1') + + const request = bridgeRequestMock.mock.calls[0][0] + const serializedHistory = JSON.stringify(request.conversation_history) + expect(serializedHistory).toContain('KEEP_MARKER') + expect(serializedHistory).not.toContain('[terminal] ') + }) + + it('keeps protected head tool results verbatim after successful full compression', async () => { + const { ChatContextCompressor, SUMMARY_PREFIX } = await import('../../packages/server/src/lib/context-compressor') + const compressor = new ChatContextCompressor({ + config: { headMessageCount: 2, tailMessageCount: 1, summaryBudget: 1000 }, + }) + const longToolOutput = `${'head-tool-output '.repeat(30)}KEEP_HEAD_TOOL` + const messages = [ + { + role: 'assistant', + content: 'calling terminal', + tool_calls: [{ id: 'call_1', type: 'function', function: { name: 'terminal', arguments: '{}' } }], + }, + { role: 'tool', name: 'terminal', tool_call_id: 'call_1', content: longToolOutput }, + { role: 'user', content: 'middle' }, + { role: 'assistant', content: 'tail' }, + ] + + getCompressionSnapshotMock.mockReturnValue(null) + bridgeRequestMock.mockResolvedValue({ + status: 'completed', + result: { final_response: 'compressed summary' }, + }) + + const result = await compressor.compress(messages, 'http://upstream', undefined, 's1') + + expect(result.messages.map(m => m.content)).toEqual([ + 'calling terminal', + longToolOutput, + `${SUMMARY_PREFIX}\n\ncompressed summary`, + 'tail', + ]) + }) + + it('uses the previous summary plus a safe tail when an existing snapshot index is stale', async () => { + const { ChatContextCompressor, SUMMARY_PREFIX } = await import('../../packages/server/src/lib/context-compressor') + const compressor = new ChatContextCompressor({ + config: { headMessageCount: 2, tailMessageCount: 3, summaryBudget: 1000 }, + }) + const messages = Array.from({ length: 8 }, (_, i) => ({ + role: i % 2 === 0 ? 'user' : 'assistant', + content: `message ${i}`, + })) + + getCompressionSnapshotMock.mockReturnValue({ + summary: 'stale previous summary', + lastMessageIndex: 20, + messageCountAtTime: 21, + }) + bridgeRequestMock.mockResolvedValue({ + status: 'completed', + result: { final_response: 'rebuilt summary' }, + }) + + const result = await compressor.compress(messages, 'http://upstream', undefined, 's1') + + expect(deleteCompressionSnapshotMock).not.toHaveBeenCalled() + expect(bridgeRequestMock).not.toHaveBeenCalled() + expect(result.messages.map(m => m.content)).toEqual([ + 'message 0', + 'message 1', + `${SUMMARY_PREFIX}\n\nstale previous summary`, + 'message 5', + 'message 6', + 'message 7', + ]) + expect(saveCompressionSnapshotMock).not.toHaveBeenCalled() + }) + + it('folds a stale snapshot safe tail into a new summary when it still exceeds budget', async () => { + const { ChatContextCompressor, SUMMARY_PREFIX } = await import('../../packages/server/src/lib/context-compressor') + const compressor = new ChatContextCompressor({ + config: { triggerTokens: 800, headMessageCount: 2, tailMessageCount: 3, summaryBudget: 1000 }, + }) + const largeTail = 'tail-token '.repeat(200) + const messages = [ + { role: 'user', content: 'message 0' }, + { role: 'assistant', content: 'message 1' }, + { role: 'user', content: 'message 2' }, + { role: 'assistant', content: 'message 3' }, + { role: 'user', content: 'message 4' }, + { role: 'assistant', content: largeTail }, + { role: 'user', content: largeTail }, + { role: 'assistant', content: largeTail }, + ] + + getCompressionSnapshotMock.mockReturnValue({ + summary: 'stale previous summary', + lastMessageIndex: 20, + messageCountAtTime: 21, + }) + bridgeRequestMock.mockResolvedValue({ + status: 'completed', + result: { final_response: 'updated stale summary' }, + }) + + const result = await compressor.compress(messages, 'http://upstream', undefined, 's1') + + expect(deleteCompressionSnapshotMock).not.toHaveBeenCalled() + expect(bridgeRequestMock).toHaveBeenCalledTimes(1) + expect(result.messages.map(m => m.content)).toEqual([ + 'message 0', + 'message 1', + `${SUMMARY_PREFIX}\n\nupdated stale summary`, + ]) + expect(saveCompressionSnapshotMock).toHaveBeenCalledWith('s1', 'updated stale summary', 7, 8) + }) + + it('compresses the full history when protected windows cover all messages', async () => { + const { ChatContextCompressor, SUMMARY_PREFIX } = await import('../../packages/server/src/lib/context-compressor') + const compressor = new ChatContextCompressor({ + config: { headMessageCount: 3, tailMessageCount: 20, summaryBudget: 1000 }, + }) + const messages = Array.from({ length: 20 }, (_, i) => ({ + role: i % 2 === 0 ? 'user' : 'assistant', + content: `message ${i}`, + })) + + getCompressionSnapshotMock.mockReturnValue(null) + bridgeRequestMock.mockResolvedValue({ + status: 'completed', + result: { final_response: 'compressed all messages' }, + }) + + const result = await compressor.compress(messages, 'http://upstream', undefined, 's1') + + expect(bridgeRequestMock).toHaveBeenCalledTimes(1) + expect(result.messages.map(m => m.content)).toEqual([ + `${SUMMARY_PREFIX}\n\ncompressed all messages`, + ]) + expect(result.meta.compressed).toBe(true) + expect(result.meta.llmCompressed).toBe(true) + expect(result.meta.verbatimCount).toBe(0) + expect(result.meta.compressedStartIndex).toBe(19) + expect(saveCompressionSnapshotMock).toHaveBeenCalledWith('s1', 'compressed all messages', 19, 20) + }) + + it('drops protected messages when compressed output still exceeds the trigger budget', async () => { + const { ChatContextCompressor, SUMMARY_PREFIX } = await import('../../packages/server/src/lib/context-compressor') + const compressor = new ChatContextCompressor({ + config: { triggerTokens: 200, headMessageCount: 2, tailMessageCount: 2, summaryBudget: 100 }, + }) + const largeText = 'tail-token '.repeat(500) + const messages = [ + { role: 'user', content: 'head 0' }, + { role: 'assistant', content: 'head 1' }, + { role: 'user', content: 'middle 2' }, + { role: 'assistant', content: 'middle 3' }, + { role: 'user', content: largeText }, + { role: 'assistant', content: largeText }, + ] + + getCompressionSnapshotMock.mockReturnValue(null) + bridgeRequestMock.mockResolvedValue({ + status: 'completed', + result: { final_response: 'short summary' }, + }) + + const result = await compressor.compress(messages, 'http://upstream', undefined, 's1') + + expect(result.messages.map(m => m.content)).toEqual([ + `${SUMMARY_PREFIX}\n\nshort summary`, + ]) + expect(result.meta.compressed).toBe(true) + expect(result.meta.llmCompressed).toBe(true) + expect(result.meta.verbatimCount).toBe(0) + }) + + it('truncates the summary when the summary alone exceeds the trigger budget', async () => { + const { ChatContextCompressor, SUMMARY_PREFIX, countTokens } = await import('../../packages/server/src/lib/context-compressor') + const compressor = new ChatContextCompressor({ + config: { triggerTokens: 120, headMessageCount: 2, tailMessageCount: 2, summaryBudget: 100 }, + }) + const messages = Array.from({ length: 6 }, (_, i) => ({ + role: i % 2 === 0 ? 'user' : 'assistant', + content: `message ${i}`, + })) + const longSummary = 'summary-token '.repeat(500) + + getCompressionSnapshotMock.mockReturnValue(null) + bridgeRequestMock.mockResolvedValue({ + status: 'completed', + result: { final_response: longSummary }, + }) + + const result = await compressor.compress(messages, 'http://upstream', undefined, 's1') + + expect(result.messages).toHaveLength(1) + expect(String(result.messages[0].content)).toContain('[Summary truncated to fit context budget]') + expect(String(result.messages[0].content).startsWith(SUMMARY_PREFIX)).toBe(true) + expect(countTokens(String(result.messages[0].content))).toBeLessThanOrEqual(140) + expect(result.meta.verbatimCount).toBe(0) + }) + + it('keeps configured first messages when incremental compression reuses an existing snapshot', async () => { + const { ChatContextCompressor, SUMMARY_PREFIX } = await import('../../packages/server/src/lib/context-compressor') + const compressor = new ChatContextCompressor({ + config: { headMessageCount: 2, tailMessageCount: 10 }, + }) + const messages = Array.from({ length: 6 }, (_, i) => ({ + role: i % 2 === 0 ? 'user' : 'assistant', + content: `message ${i}`, + })) + + getCompressionSnapshotMock.mockReturnValue({ + summary: 'previous summary', + lastMessageIndex: 3, + messageCountAtTime: 4, + }) + + const result = await compressor.compress(messages, 'http://upstream', undefined, 's1') + + expect(bridgeRequestMock).not.toHaveBeenCalled() + expect(result.messages.map(m => m.content)).toEqual([ + 'message 0', + 'message 1', + `${SUMMARY_PREFIX}\n\nprevious summary`, + 'message 4', + 'message 5', + ]) + expect(result.meta.verbatimCount).toBe(4) + expect(saveCompressionSnapshotMock).not.toHaveBeenCalled() + }) + + it('folds all new messages into the summary when incremental tail protection would exceed budget', async () => { + const { ChatContextCompressor, SUMMARY_PREFIX } = await import('../../packages/server/src/lib/context-compressor') + const compressor = new ChatContextCompressor({ + config: { triggerTokens: 1000, headMessageCount: 3, tailMessageCount: 20, summaryBudget: 100 }, + }) + const largeText = 'new-token '.repeat(80) + const messages = [ + { role: 'user', content: 'head 0' }, + { role: 'assistant', content: 'head 1' }, + { role: 'user', content: 'head 2' }, + ...Array.from({ length: 20 }, (_, i) => ({ + role: i % 2 === 0 ? 'user' : 'assistant', + content: `${largeText}${i}`, + })), + ] + + getCompressionSnapshotMock.mockReturnValue({ + summary: 'previous summary', + lastMessageIndex: 2, + messageCountAtTime: 3, + }) + bridgeRequestMock.mockResolvedValue({ + status: 'completed', + result: { final_response: 'updated summary' }, + }) + + const result = await compressor.compress(messages, 'http://upstream', undefined, 's1') + + expect(bridgeRequestMock).toHaveBeenCalledTimes(1) + expect(result.messages.map(m => m.content)).toEqual([ + 'head 0', + 'head 1', + 'head 2', + `${SUMMARY_PREFIX}\n\nupdated summary`, + ]) + expect(result.meta.compressed).toBe(true) + expect(result.meta.llmCompressed).toBe(true) + expect(result.meta.verbatimCount).toBe(3) + expect(result.meta.compressedStartIndex).toBe(22) + expect(saveCompressionSnapshotMock).toHaveBeenCalledWith('s1', 'updated summary', 22, 23) + }) }) diff --git a/tests/server/run-chat-compression.test.ts b/tests/server/run-chat-compression.test.ts new file mode 100644 index 0000000..182b55e --- /dev/null +++ b/tests/server/run-chat-compression.test.ts @@ -0,0 +1,349 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const getSessionDetailMock = vi.fn() +const getSessionMock = vi.fn() +const getCompressionSnapshotMock = vi.fn() +const getModelContextLengthMock = vi.fn() +const calcAndUpdateUsageMock = vi.fn() +const estimateUsageTokensFromMessagesMock = vi.fn() +const compressorCompressMock = vi.fn() +const readConfigYamlForProfileMock = vi.fn() +const compressorConstructorMock = vi.fn() + +vi.mock('../../packages/server/src/db/hermes/session-store', () => ({ + getSessionDetail: getSessionDetailMock, + getSession: getSessionMock, +})) + +vi.mock('../../packages/server/src/db/hermes/compression-snapshot', () => ({ + getCompressionSnapshot: getCompressionSnapshotMock, +})) + +vi.mock('../../packages/server/src/lib/context-compressor', () => ({ + SUMMARY_PREFIX: '[Previous context summary]', + ChatContextCompressor: class { + constructor(opts?: any) { + compressorConstructorMock(opts) + } + compress = compressorCompressMock + }, +})) + +vi.mock('../../packages/server/src/services/hermes/model-context', () => ({ + getModelContextLength: getModelContextLengthMock, +})) + +vi.mock('../../packages/server/src/services/config-helpers', () => ({ + readConfigYamlForProfile: readConfigYamlForProfileMock, +})) + +vi.mock('../../packages/server/src/services/logger', () => ({ + logger: { + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }, + bridgeLogger: { + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }, +})) + +vi.mock('../../packages/server/src/services/hermes/run-chat/usage', () => ({ + calcAndUpdateUsage: calcAndUpdateUsageMock, + estimateUsageTokensFromMessages: estimateUsageTokensFromMessagesMock, +})) + +vi.mock('../../packages/server/src/services/hermes/run-chat/message-format', () => ({ + isAssistantMessageSendable: vi.fn(() => true), +})) + +describe('run chat compression trigger', () => { + beforeEach(() => { + getSessionDetailMock.mockReset() + getSessionMock.mockReset() + getCompressionSnapshotMock.mockReset() + getModelContextLengthMock.mockReset() + calcAndUpdateUsageMock.mockReset() + estimateUsageTokensFromMessagesMock.mockReset() + compressorCompressMock.mockReset() + compressorConstructorMock.mockReset() + readConfigYamlForProfileMock.mockReset() + + getSessionMock.mockReturnValue({ id: 'session-1', profile: 'default' }) + getModelContextLengthMock.mockReturnValue(200_000) + calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 1_000, outputTokens: 0 }) + estimateUsageTokensFromMessagesMock.mockReturnValue({ inputTokens: 0, outputTokens: 0 }) + getCompressionSnapshotMock.mockReturnValue(null) + readConfigYamlForProfileMock.mockResolvedValue({}) + }) + + it('does not compress long low-token history just because it has more than 150 messages', async () => { + const messages = Array.from({ length: 152 }, (_, index) => ({ + id: index + 1, + session_id: 'session-1', + role: index === 151 ? 'user' : index % 2 === 0 ? 'user' : 'assistant', + content: `m${index}`, + timestamp: index + 1, + tool_call_id: null, + tool_calls: null, + tool_name: null, + finish_reason: null, + reasoning_content: null, + })) + getSessionDetailMock.mockReturnValue({ messages }) + + const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression') + const history = await buildCompressedHistory( + 'session-1', + 'default', + 'http://upstream', + undefined, + vi.fn(), + new Map(), + ) + + expect(history).toHaveLength(151) + expect(history[0]).toEqual({ role: 'user', content: 'm0' }) + expect(history.at(-1)).toEqual({ role: 'user', content: 'm150' }) + expect(compressorCompressMock).not.toHaveBeenCalled() + }) + + it('uses configured threshold before triggering compression', async () => { + const messages = Array.from({ length: 10 }, (_, index) => ({ + id: index + 1, + session_id: 'session-1', + role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant', + content: `message ${index}`, + timestamp: index + 1, + tool_call_id: null, + tool_calls: null, + tool_name: null, + finish_reason: null, + reasoning_content: null, + })) + getSessionDetailMock.mockReturnValue({ messages }) + readConfigYamlForProfileMock.mockResolvedValue({ + compression: { threshold: 0.25, target_ratio: 0.1, protect_last_n: 7, protect_first_n: 2 }, + }) + calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 60_000, outputTokens: 0 }) + compressorCompressMock.mockResolvedValue({ + messages: [{ role: 'user', content: 'compressed' }], + meta: { + compressed: true, + llmCompressed: true, + totalMessages: 9, + summaryTokenEstimate: 1, + verbatimCount: 0, + compressedStartIndex: 0, + }, + }) + + const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression') + const history = await buildCompressedHistory( + 'session-1', + 'default', + 'http://upstream', + undefined, + vi.fn(), + new Map(), + ) + + expect(history).toEqual([{ role: 'user', content: 'compressed' }]) + expect(compressorCompressMock).toHaveBeenCalledWith( + expect.any(Array), + 'http://upstream', + undefined, + 'session-1', + expect.objectContaining({ profile: 'default' }), + ) + }) + + it('merges partial compression config with defaults', async () => { + const messages = Array.from({ length: 10 }, (_, index) => ({ + id: index + 1, + session_id: 'session-1', + role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant', + content: `message ${index}`, + timestamp: index + 1, + tool_call_id: null, + tool_calls: null, + tool_name: null, + finish_reason: null, + reasoning_content: null, + })) + getSessionDetailMock.mockReturnValue({ messages }) + readConfigYamlForProfileMock.mockResolvedValue({ + compression: { protect_last_n: 5 }, + }) + calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 120_000, outputTokens: 0 }) + compressorCompressMock.mockResolvedValue({ + messages: [{ role: 'user', content: 'compressed' }], + meta: { + compressed: true, + llmCompressed: true, + totalMessages: 9, + summaryTokenEstimate: 1, + verbatimCount: 0, + compressedStartIndex: 0, + }, + }) + + const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression') + await buildCompressedHistory( + 'session-1', + 'default', + 'http://upstream', + undefined, + vi.fn(), + new Map(), + ) + + expect(compressorConstructorMock).toHaveBeenCalledWith({ + config: { + triggerTokens: 100_000, + summaryBudget: 40_000, + headMessageCount: 3, + tailMessageCount: 5, + }, + }) + expect(compressorCompressMock).toHaveBeenCalledTimes(1) + }) + + it('uses stale snapshot summary plus safe tail instead of full history when under threshold', async () => { + const messages = Array.from({ length: 10 }, (_, index) => ({ + id: index + 1, + session_id: 'session-1', + role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant', + content: `message ${index}`, + timestamp: index + 1, + tool_call_id: null, + tool_calls: null, + tool_name: null, + finish_reason: null, + reasoning_content: null, + })) + getSessionDetailMock.mockReturnValue({ messages }) + getCompressionSnapshotMock.mockReturnValue({ + summary: 'old summary', + lastMessageIndex: 99, + messageCountAtTime: 100, + }) + readConfigYamlForProfileMock.mockResolvedValue({ + compression: { protect_first_n: 2, protect_last_n: 3 }, + }) + estimateUsageTokensFromMessagesMock.mockReturnValue({ inputTokens: 1_000, outputTokens: 0 }) + + const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression') + const history = await buildCompressedHistory( + 'session-1', + 'default', + 'http://upstream', + undefined, + vi.fn(), + new Map(), + ) + + expect(history.map(m => m.content)).toEqual([ + 'message 0', + 'message 1', + '[Previous context summary]\n\nold summary', + 'message 6', + 'message 7', + 'message 8', + ]) + expect(compressorCompressMock).not.toHaveBeenCalled() + }) + + it('compresses stale snapshot safe tail instead of full history when stale assembly exceeds threshold', async () => { + const messages = Array.from({ length: 10 }, (_, index) => ({ + id: index + 1, + session_id: 'session-1', + role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant', + content: `message ${index}`, + timestamp: index + 1, + tool_call_id: null, + tool_calls: null, + tool_name: null, + finish_reason: null, + reasoning_content: null, + })) + getSessionDetailMock.mockReturnValue({ messages }) + getCompressionSnapshotMock.mockReturnValue({ + summary: 'old summary', + lastMessageIndex: 99, + messageCountAtTime: 100, + }) + readConfigYamlForProfileMock.mockResolvedValue({ + compression: { protect_first_n: 2, protect_last_n: 3 }, + }) + estimateUsageTokensFromMessagesMock.mockReturnValue({ inputTokens: 120_000, outputTokens: 0 }) + compressorCompressMock.mockResolvedValue({ + messages: [{ role: 'user', content: 'updated stale compressed' }], + meta: { + compressed: true, + llmCompressed: true, + totalMessages: 9, + summaryTokenEstimate: 1, + verbatimCount: 0, + compressedStartIndex: 8, + }, + }) + + const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression') + const history = await buildCompressedHistory( + 'session-1', + 'default', + 'http://upstream', + undefined, + vi.fn(), + new Map(), + ) + + expect(history).toEqual([{ role: 'user', content: 'updated stale compressed' }]) + expect(compressorCompressMock).toHaveBeenCalledWith( + expect.arrayContaining([{ role: 'user', content: 'message 0' }]), + 'http://upstream', + undefined, + 'session-1', + expect.objectContaining({ profile: 'default' }), + ) + }) + + it('does not compress when compression is disabled', async () => { + const messages = Array.from({ length: 10 }, (_, index) => ({ + id: index + 1, + session_id: 'session-1', + role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant', + content: `message ${index}`, + timestamp: index + 1, + tool_call_id: null, + tool_calls: null, + tool_name: null, + finish_reason: null, + reasoning_content: null, + })) + getSessionDetailMock.mockReturnValue({ messages }) + readConfigYamlForProfileMock.mockResolvedValue({ + compression: { enabled: false, threshold: 0.01 }, + }) + calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 180_000, outputTokens: 0 }) + + const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression') + const history = await buildCompressedHistory( + 'session-1', + 'default', + 'http://upstream', + undefined, + vi.fn(), + new Map(), + ) + + expect(history).toHaveLength(9) + expect(compressorCompressMock).not.toHaveBeenCalled() + expect(calcAndUpdateUsageMock).not.toHaveBeenCalled() + }) +}) diff --git a/tests/server/sessions-controller.test.ts b/tests/server/sessions-controller.test.ts index acd2a02..c0ba61a 100644 --- a/tests/server/sessions-controller.test.ts +++ b/tests/server/sessions-controller.test.ts @@ -206,6 +206,81 @@ describe('session conversations controller', () => { expect(ctx.body).toEqual({ error: 'Conversation not found' }) }) + it('prefers local session detail for Hermes history detail when available', async () => { + localGetSessionDetailMock.mockReturnValue({ + id: 'cli-1', + source: 'cli', + title: 'Local complete', + messages: [ + { id: 1, session_id: 'cli-1', role: 'user', content: 'local full message', timestamp: 1 }, + ], + }) + getSessionDetailFromDbMock.mockResolvedValue({ + id: 'cli-1', + source: 'cli', + title: 'Hermes incomplete', + messages: [], + }) + + const mod = await import('../../packages/server/src/controllers/hermes/sessions') + const ctx: any = { params: { id: 'cli-1' }, body: null } + await mod.getHermesSession(ctx) + + expect(localGetSessionDetailMock).toHaveBeenCalledWith('cli-1') + expect(getSessionDetailFromDbMock).not.toHaveBeenCalled() + expect(getSessionMock).not.toHaveBeenCalled() + expect(ctx.body.session).toMatchObject({ + id: 'cli-1', + title: 'Local complete', + messages: [{ content: 'local full message' }], + }) + }) + + it('falls back to Hermes state.db when local history detail is missing', async () => { + localGetSessionDetailMock.mockReturnValue(null) + getSessionDetailFromDbMock.mockResolvedValue({ + id: 'hermes-1', + source: 'cli', + title: 'Hermes detail', + messages: [ + { id: 1, session_id: 'hermes-1', role: 'user', content: 'from hermes', timestamp: 1 }, + ], + }) + + const mod = await import('../../packages/server/src/controllers/hermes/sessions') + const ctx: any = { params: { id: 'hermes-1' }, body: null } + await mod.getHermesSession(ctx) + + expect(localGetSessionDetailMock).toHaveBeenCalledWith('hermes-1') + expect(getSessionDetailFromDbMock).toHaveBeenCalledWith('hermes-1') + expect(getSessionMock).not.toHaveBeenCalled() + expect(ctx.body.session).toMatchObject({ + id: 'hermes-1', + title: 'Hermes detail', + messages: [{ content: 'from hermes' }], + }) + }) + + it('does not return api_server sessions from the Hermes history detail endpoint', async () => { + localGetSessionDetailMock.mockReturnValue({ + id: 'api-1', + source: 'api_server', + title: 'API Server', + messages: [{ id: 1, session_id: 'api-1', role: 'user', content: 'local api', timestamp: 1 }], + }) + getSessionDetailFromDbMock.mockResolvedValue(null) + getSessionMock.mockResolvedValue(null) + + const mod = await import('../../packages/server/src/controllers/hermes/sessions') + const ctx: any = { params: { id: 'api-1' }, body: null } + await mod.getHermesSession(ctx) + + expect(localGetSessionDetailMock).toHaveBeenCalledWith('api-1') + expect(getSessionDetailFromDbMock).toHaveBeenCalledWith('api-1') + expect(ctx.status).toBe(404) + expect(ctx.body).toEqual({ error: 'Session not found' }) + }) + it('returns native state.db usage analytics for the requested period', async () => { const today = new Date().toISOString().slice(0, 10) getLocalUsageStatsMock.mockReturnValue({