feat: v0.5.16 - migrate to Responses API (#586)
* refactor: migrate from /v1/runs to /v1/responses streaming API Replace EventSource-based polling with direct SSE streaming via the /v1/responses endpoint across all server-side callers (chat-run-socket, context-compressor, gateway-client, agent-clients). Messages are now written to DB in real-time during streaming, eliminating post-run sync. Frontend chat store adds tool_call_id tracking for deduplication. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * chore: bump version to 0.5.16 and add changelog - Persist real API usage to usage table on response.completed - Remove unused codex_reasoning_items field from message schema - Fix unused variable warnings in chat-run-socket - Bump version to 0.5.16 - Add changelog entries for 0.5.16 (8 locales) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "hermes-web-ui",
|
||||
"version": "0.5.15",
|
||||
"version": "0.5.16",
|
||||
"description": "Self-hosted AI chat dashboard for Hermes Agent — multi-model (Claude, GPT, Gemini, DeepSeek) web UI with Telegram, Discord, Slack, WhatsApp integration",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
|
||||
@@ -5,6 +5,16 @@ export interface ChangelogEntry {
|
||||
}
|
||||
|
||||
export const changelog: ChangelogEntry[] = [
|
||||
{
|
||||
version: '0.5.16',
|
||||
date: '2026-05-10',
|
||||
changes: [
|
||||
'changelog.new_0_5_16_1',
|
||||
'changelog.new_0_5_16_2',
|
||||
'changelog.new_0_5_16_3',
|
||||
'changelog.new_0_5_16_4',
|
||||
],
|
||||
},
|
||||
{
|
||||
version: '0.5.15',
|
||||
date: '2026-05-09',
|
||||
|
||||
@@ -665,6 +665,10 @@ jobTriggered: 'Job ausgelost',
|
||||
new_0_5_15_9: 'Hermes Markdown-Medien-Rendering und Sync-Wiederholung korrigiert',
|
||||
new_0_5_15_10: 'Upstream-Umgebungsvariablenabhängigkeit entfernt',
|
||||
new_0_5_15_11: 'Wenn die Kanban-Funktion nicht verfügbar ist, updaten Sie bitte hermes-agent',
|
||||
new_0_5_16_1: 'Chat-Streaming von /v1/runs auf /v1/responses API migriert für geringere Latenz',
|
||||
new_0_5_16_2: 'Echte API-Nutzung (Tokens, Cache, Reasoning) in Nutzungsstatistik-Tabelle speichern',
|
||||
new_0_5_16_3: 'QQ-Gruppen-QR-Code zur Website-Navigationsleiste hinzugefügt',
|
||||
new_0_5_16_4: 'Unbenutztes codex_reasoning_items-Feld aus dem Nachrichtenschema entfernt',
|
||||
new_0_5_13_1: 'Nachrichtenwarteschlange für sequenzielle Run-Verarbeitung hinzugefügt, um gleichzeitige Konflikte zu vermeiden',
|
||||
new_0_5_13_2: 'Zwei-Ebenen-Skills-Verzeichnisstruktur mit Sonstige-Kategorie für flache Skills unterstützt',
|
||||
new_0_5_13_3: 'Temporäre Sitzungen (eph_*) beim Start-Sync filtern, um interne Sitzungen nicht zu importieren',
|
||||
|
||||
@@ -912,6 +912,10 @@ export default {
|
||||
new_0_5_15_9: 'Fix Hermes markdown media rendering and sync retry',
|
||||
new_0_5_15_10: 'Refactor to remove upstream env dependency',
|
||||
new_0_5_15_11: 'If the Kanban feature is not available, please upgrade hermes-agent',
|
||||
new_0_5_16_1: 'Migrate chat streaming from /v1/runs to /v1/responses API for lower latency',
|
||||
new_0_5_16_2: 'Persist real API usage (tokens, cache, reasoning) to usage table',
|
||||
new_0_5_16_3: 'Add QQ group QR code to website navigation bar',
|
||||
new_0_5_16_4: 'Remove unused codex_reasoning_items field from message schema',
|
||||
new_0_5_13_1: 'Add message queue for sequential run processing to prevent concurrent request conflicts',
|
||||
new_0_5_13_2: 'Support two-level skills directory structure with misc category for flat skills',
|
||||
new_0_5_13_3: 'Filter out ephemeral sessions during startup sync to avoid importing internal sessions',
|
||||
|
||||
@@ -661,6 +661,10 @@ jobTriggered: 'Job ejecutado',
|
||||
new_0_5_15_9: 'Corregido renderizado de medios Markdown y reintento de sincronización',
|
||||
new_0_5_15_10: 'Eliminada dependencia de variables de entorno upstream',
|
||||
new_0_5_15_11: 'Si la función Kanban no está disponible, actualice hermes-agent',
|
||||
new_0_5_16_1: 'Migrar streaming de chat de /v1/runs a /v1/responses API para menor latencia',
|
||||
new_0_5_16_2: 'Persistir uso real de API (tokens, caché, razonamiento) en tabla de estadísticas',
|
||||
new_0_5_16_3: 'Añadir código QR del grupo QQ a la barra de navegación del sitio web',
|
||||
new_0_5_16_4: 'Eliminar campo codex_reasoning_items no utilizado del esquema de mensajes',
|
||||
new_0_5_13_1: 'Cola de mensajes para procesamiento secuencial de ejecuciones, evitando conflictos concurrentes',
|
||||
new_0_5_13_2: 'Soporte para estructura de directorios de skills de dos niveles con categoría misc',
|
||||
new_0_5_13_3: 'Filtrar sesiones efímeras (eph_*) durante la sincronización de inicio',
|
||||
|
||||
@@ -660,6 +660,10 @@ jobTriggered: 'Job declenche',
|
||||
new_0_5_15_9: 'Correction rendu média Markdown et retry synchronisation',
|
||||
new_0_5_15_10: 'Suppression dépendance variable d\'environnement amont',
|
||||
new_0_5_15_11: 'Si la fonction Kanban n\'est pas disponible, veuillez mettre à niveau hermes-agent',
|
||||
new_0_5_16_1: 'Migration du streaming de chat de /v1/runs vers l\'API /v1/responses pour une latence réduite',
|
||||
new_0_5_16_2: 'Persistance de l\'utilisation réelle de l\'API (tokens, cache, raisonnement) dans la table des statistiques',
|
||||
new_0_5_16_3: 'Ajout du code QR du groupe QQ dans la barre de navigation du site',
|
||||
new_0_5_16_4: 'Suppression du champ codex_reasoning_items inutilisé du schéma de messages',
|
||||
new_0_5_13_1: 'File d\'attente de messages pour le traitement séquentiel des exécutions, évitant les conflits concurrents',
|
||||
new_0_5_13_2: 'Prise en charge de la structure de répertoire de skills à deux niveaux avec catégorie divers',
|
||||
new_0_5_13_3: 'Filtrer les sessions éphémères (eph_*) lors de la synchronisation au démarrage',
|
||||
|
||||
@@ -661,6 +661,10 @@ export default {
|
||||
new_0_5_15_9: 'Hermes Markdownメディアレンダリングと同期リトライを修正',
|
||||
new_0_5_15_10: 'アップストリーム環境変数依存をリファクタリングで削除',
|
||||
new_0_5_15_11: 'カンバン機能が使用できない場合は、hermes-agent をアップグレードしてください',
|
||||
new_0_5_16_1: 'チャットストリーミングを /v1/runs から /v1/responses API に移行し、レイテンシを削減',
|
||||
new_0_5_16_2: '実際の API 使用量(トークン、キャッシュ、推論)を統計テーブルに保存',
|
||||
new_0_5_16_3: 'ウェブサイトのナビゲーションバーにQQグループのQRコードを追加',
|
||||
new_0_5_16_4: 'メッセージスキーマから未使用の codex_reasoning_items フィールドを削除',
|
||||
new_0_5_13_1: 'メッセージキューによる順次実行処理で同時リクエストの競合を防止',
|
||||
new_0_5_13_2: '2階層スキルディレクトリ構造をサポート、フラットスキルは「その他」カテゴリに分類',
|
||||
new_0_5_13_3: '起動同期時に一時セッション(eph_*)をフィルタリング',
|
||||
|
||||
@@ -661,6 +661,10 @@ export default {
|
||||
new_0_5_15_9: 'Hermes Markdown 미디어 렌더링 및 동기화 재시도 수정',
|
||||
new_0_5_15_10: '업스트림 환경 변수 종속성 제거',
|
||||
new_0_5_15_11: '칸반 기능을 사용할 수 없는 경우 hermes-agent를 업그레이드하세요',
|
||||
new_0_5_16_1: '채팅 스트리밍을 /v1/runs에서 /v1/responses API로 마이그레이션하여 지연 시간 단축',
|
||||
new_0_5_16_2: '실제 API 사용량(토큰, 캐시, 추론)을 사용량 통계 테이블에 저장',
|
||||
new_0_5_16_3: '웹사이트 내비게이션 바에 QQ 그룹 QR 코드 추가',
|
||||
new_0_5_16_4: '메시지 스키마에서 사용하지 않는 codex_reasoning_items 필드 제거',
|
||||
new_0_5_13_1: '메시지 큐를 통한 순차 실행 처리로 동시 요청 충돌 방지',
|
||||
new_0_5_13_2: '2단계 스킬 디렉토리 구조 지원, 플랫 스킬은 기타 카테고리로 분류',
|
||||
new_0_5_13_3: '시작 동기화 시 임시 세션(eph_*) 필터링',
|
||||
|
||||
@@ -661,6 +661,10 @@ jobTriggered: 'Job acionado',
|
||||
new_0_5_15_9: 'Corrigido renderização de mídia Markdown e retry de sincronização',
|
||||
new_0_5_15_10: 'Removida dependência de variável de ambiente upstream',
|
||||
new_0_5_15_11: 'Se o recurso Kanban não estiver disponível, atualize o hermes-agent',
|
||||
new_0_5_16_1: 'Migrar streaming de chat de /v1/runs para /v1/responses API para menor latência',
|
||||
new_0_5_16_2: 'Persistir uso real da API (tokens, cache, raciocínio) na tabela de estatísticas',
|
||||
new_0_5_16_3: 'Adicionar código QR do grupo QQ à barra de navegação do site',
|
||||
new_0_5_16_4: 'Remover campo codex_reasoning_items não utilizado do esquema de mensagens',
|
||||
new_0_5_13_1: 'Fila de mensagens para processamento sequencial de execuções, evitando conflitos concorrentes',
|
||||
new_0_5_13_2: 'Suporte à estrutura de diretório de skills de dois níveis com categoria diversos',
|
||||
new_0_5_13_3: 'Filtrar sessões efêmeras (eph_*) durante a sincronização na inicialização',
|
||||
|
||||
@@ -914,6 +914,10 @@ export default {
|
||||
new_0_5_15_9: '修复 Hermes Markdown 媒体渲染与同步重试',
|
||||
new_0_5_15_10: '重构移除上游环境变量依赖',
|
||||
new_0_5_15_11: '如果看板功能无法使用,请升级 hermes-agent',
|
||||
new_0_5_16_1: '聊天流式接口从 /v1/runs 迁移至 /v1/responses,降低延迟',
|
||||
new_0_5_16_2: '持久化真实 API 用量(token、缓存、推理)到用量统计表',
|
||||
new_0_5_16_3: '官网导航栏新增 QQ 群二维码',
|
||||
new_0_5_16_4: '移除消息 schema 中未使用的 codex_reasoning_items 字段',
|
||||
new_0_5_13_1: '新增消息队列,顺序处理运行请求,避免并发冲突',
|
||||
new_0_5_13_2: '支持二级 Skills 目录结构,扁平化 Skill 归入"杂项"分类',
|
||||
new_0_5_13_3: '启动同步时过滤临时会话(eph_*),避免导入内部会话',
|
||||
|
||||
@@ -27,6 +27,7 @@ export interface Message {
|
||||
content: string
|
||||
timestamp: number
|
||||
toolName?: string
|
||||
toolCallId?: string
|
||||
toolPreview?: string
|
||||
toolArgs?: string
|
||||
toolResult?: string
|
||||
@@ -156,6 +157,7 @@ function mapHermesMessages(msgs: HermesMessage[]): Message[] {
|
||||
content: '',
|
||||
timestamp: Math.round(msg.timestamp * 1000),
|
||||
toolName: tc.function?.name || 'tool',
|
||||
toolCallId: tc.id,
|
||||
toolArgs: tc.function?.arguments || undefined,
|
||||
toolStatus: 'done',
|
||||
})
|
||||
@@ -191,6 +193,7 @@ function mapHermesMessages(msgs: HermesMessage[]): Message[] {
|
||||
content: '',
|
||||
timestamp: Math.round(msg.timestamp * 1000),
|
||||
toolName,
|
||||
toolCallId: tcId || undefined,
|
||||
toolArgs,
|
||||
toolPreview: typeof preview === 'string' ? preview.slice(0, 100) || undefined : undefined,
|
||||
toolResult: msg.content || undefined,
|
||||
@@ -910,6 +913,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||
case 'tool.started': {
|
||||
runHadToolActivity = true
|
||||
const msgs = getSessionMsgs(sid)
|
||||
const toolCallId = (evt as any).tool_call_id as string | undefined
|
||||
const last = activeAssistantMessageId
|
||||
? msgs.find(m => m.id === activeAssistantMessageId)
|
||||
: msgs[msgs.length - 1]
|
||||
@@ -917,13 +921,27 @@ export const useChatStore = defineStore('chat', () => {
|
||||
updateMessage(sid, last.id, { isStreaming: false })
|
||||
}
|
||||
activeAssistantMessageId = null
|
||||
const existingTool = toolCallId
|
||||
? msgs.find(m => m.role === 'tool' && m.toolCallId === toolCallId)
|
||||
: null
|
||||
if (existingTool) {
|
||||
updateMessage(sid, existingTool.id, {
|
||||
toolName: evt.tool || evt.name,
|
||||
toolArgs: typeof (evt as any).arguments === 'string' ? (evt as any).arguments : existingTool.toolArgs,
|
||||
toolPreview: evt.preview || existingTool.toolPreview,
|
||||
toolStatus: existingTool.toolStatus || 'running',
|
||||
})
|
||||
break
|
||||
}
|
||||
addMessage(sid, {
|
||||
id: uid(),
|
||||
role: 'tool',
|
||||
content: '',
|
||||
timestamp: Date.now(),
|
||||
toolName: evt.tool || evt.name,
|
||||
toolCallId,
|
||||
toolPreview: evt.preview,
|
||||
toolArgs: typeof (evt as any).arguments === 'string' ? (evt as any).arguments : undefined,
|
||||
toolStatus: 'running',
|
||||
})
|
||||
|
||||
@@ -933,9 +951,10 @@ export const useChatStore = defineStore('chat', () => {
|
||||
case 'tool.completed': {
|
||||
runHadToolActivity = true
|
||||
const msgs = getSessionMsgs(sid)
|
||||
const toolMsgs = msgs.filter(
|
||||
m => m.role === 'tool' && m.toolStatus === 'running',
|
||||
)
|
||||
const toolCallId = (evt as any).tool_call_id as string | undefined
|
||||
const toolMsgs = toolCallId
|
||||
? msgs.filter(m => m.role === 'tool' && m.toolCallId === toolCallId)
|
||||
: msgs.filter(m => m.role === 'tool' && m.toolStatus === 'running')
|
||||
if (toolMsgs.length > 0) {
|
||||
const last = toolMsgs[toolMsgs.length - 1]
|
||||
// Check if tool errored
|
||||
@@ -944,6 +963,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||
updateMessage(sid, last.id, {
|
||||
toolStatus: hasError ? 'error' : 'done',
|
||||
toolDuration: duration,
|
||||
toolResult: typeof (evt as any).output === 'string' ? (evt as any).output : undefined,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1326,6 +1346,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||
case 'tool.started': {
|
||||
runHadToolActivity = true
|
||||
const msgs = getSessionMsgs(sid)
|
||||
const toolCallId = (evt as any).tool_call_id as string | undefined
|
||||
const last = activeAssistantMessageId
|
||||
? msgs.find(m => m.id === activeAssistantMessageId)
|
||||
: msgs[msgs.length - 1]
|
||||
@@ -1333,13 +1354,27 @@ export const useChatStore = defineStore('chat', () => {
|
||||
updateMessage(sid, last.id, { isStreaming: false })
|
||||
}
|
||||
activeAssistantMessageId = null
|
||||
const existingTool = toolCallId
|
||||
? msgs.find(m => m.role === 'tool' && m.toolCallId === toolCallId)
|
||||
: null
|
||||
if (existingTool) {
|
||||
updateMessage(sid, existingTool.id, {
|
||||
toolName: evt.tool || evt.name,
|
||||
toolArgs: typeof (evt as any).arguments === 'string' ? (evt as any).arguments : existingTool.toolArgs,
|
||||
toolPreview: evt.preview || existingTool.toolPreview,
|
||||
toolStatus: existingTool.toolStatus || 'running',
|
||||
})
|
||||
break
|
||||
}
|
||||
addMessage(sid, {
|
||||
id: uid(),
|
||||
role: 'tool',
|
||||
content: '',
|
||||
timestamp: Date.now(),
|
||||
toolName: evt.tool || evt.name,
|
||||
toolCallId,
|
||||
toolPreview: evt.preview,
|
||||
toolArgs: typeof (evt as any).arguments === 'string' ? (evt as any).arguments : undefined,
|
||||
toolStatus: 'running',
|
||||
})
|
||||
|
||||
@@ -1349,12 +1384,16 @@ export const useChatStore = defineStore('chat', () => {
|
||||
case 'tool.completed': {
|
||||
runHadToolActivity = true
|
||||
const msgs = getSessionMsgs(sid)
|
||||
const toolMsgs = msgs.filter(m => m.role === 'tool' && m.toolStatus === 'running')
|
||||
const toolCallId = (evt as any).tool_call_id as string | undefined
|
||||
const toolMsgs = toolCallId
|
||||
? msgs.filter(m => m.role === 'tool' && m.toolCallId === toolCallId)
|
||||
: msgs.filter(m => m.role === 'tool' && m.toolStatus === 'running')
|
||||
if (toolMsgs.length > 0) {
|
||||
const hasError = (evt as any).error === true
|
||||
updateMessage(sid, toolMsgs[toolMsgs.length - 1].id, {
|
||||
toolStatus: hasError ? 'error' : 'done',
|
||||
toolDuration: (evt as any).duration,
|
||||
toolResult: typeof (evt as any).output === 'string' ? (evt as any).output : undefined,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -70,7 +70,6 @@ export const MESSAGES_SCHEMA: Record<string, string> = {
|
||||
reasoning: 'TEXT',
|
||||
reasoning_details: 'TEXT',
|
||||
reasoning_content: 'TEXT',
|
||||
codex_reasoning_items: 'TEXT',
|
||||
}
|
||||
|
||||
export const MESSAGES_INDEX = 'CREATE INDEX IF NOT EXISTS idx_messages_session_id ON messages(session_id)'
|
||||
|
||||
@@ -45,7 +45,6 @@ export interface HermesMessageRow {
|
||||
finish_reason: string | null
|
||||
reasoning: string | null
|
||||
reasoning_details?: string | null
|
||||
codex_reasoning_items?: string | null
|
||||
reasoning_content?: string | null
|
||||
}
|
||||
|
||||
@@ -121,7 +120,6 @@ function mapMessageRow(row: Record<string, unknown>): HermesMessageRow {
|
||||
finish_reason: row.finish_reason != null ? String(row.finish_reason) : null,
|
||||
reasoning: row.reasoning != null ? String(row.reasoning) : null,
|
||||
reasoning_details: row.reasoning_details != null ? String(row.reasoning_details) : null,
|
||||
codex_reasoning_items: row.codex_reasoning_items != null ? String(row.codex_reasoning_items) : null,
|
||||
reasoning_content: row.reasoning_content != null ? String(row.reasoning_content) : null,
|
||||
}
|
||||
}
|
||||
@@ -343,21 +341,20 @@ export function addMessage(msg: {
|
||||
reasoning?: string | null
|
||||
reasoning_details?: string | null
|
||||
reasoning_content?: string | null
|
||||
codex_reasoning_items?: string | null
|
||||
}): number | undefined {
|
||||
if (!isSqliteAvailable()) return undefined
|
||||
const db = getDb()!
|
||||
const toolCallsJson = msg.tool_calls ? JSON.stringify(msg.tool_calls) : null
|
||||
const result = db.prepare(
|
||||
`INSERT INTO ${MESSAGES_TABLE} (session_id, role, content, tool_call_id, tool_calls, tool_name, timestamp, token_count, finish_reason, reasoning, reasoning_details, reasoning_content, codex_reasoning_items)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
`INSERT INTO ${MESSAGES_TABLE} (session_id, role, content, tool_call_id, tool_calls, tool_name, timestamp, token_count, finish_reason, reasoning, reasoning_details, reasoning_content)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
).run(
|
||||
msg.session_id, msg.role, msg.content,
|
||||
msg.tool_call_id ?? null, toolCallsJson, msg.tool_name ?? null,
|
||||
msg.timestamp ?? Math.floor(Date.now() / 1000),
|
||||
msg.token_count ?? null, msg.finish_reason ?? null,
|
||||
msg.reasoning ?? null, msg.reasoning_details ?? null,
|
||||
msg.reasoning_content ?? null, msg.codex_reasoning_items ?? null,
|
||||
msg.reasoning_content ?? null,
|
||||
)
|
||||
return result.lastInsertRowid as number
|
||||
}
|
||||
@@ -375,13 +372,12 @@ export function addMessages(msgs: Array<{
|
||||
reasoning?: string | null
|
||||
reasoning_details?: string | null
|
||||
reasoning_content?: string | null
|
||||
codex_reasoning_items?: string | null
|
||||
}>): void {
|
||||
if (!isSqliteAvailable() || msgs.length === 0) return
|
||||
const db = getDb()!
|
||||
const insert = db.prepare(
|
||||
`INSERT INTO ${MESSAGES_TABLE} (session_id, role, content, tool_call_id, tool_calls, tool_name, timestamp, token_count, finish_reason, reasoning, reasoning_details, reasoning_content, codex_reasoning_items)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
`INSERT INTO ${MESSAGES_TABLE} (session_id, role, content, tool_call_id, tool_calls, tool_name, timestamp, token_count, finish_reason, reasoning, reasoning_details, reasoning_content)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
)
|
||||
db.exec('BEGIN')
|
||||
try {
|
||||
@@ -393,7 +389,7 @@ export function addMessages(msgs: Array<{
|
||||
msg.timestamp ?? Math.floor(Date.now() / 1000),
|
||||
msg.token_count ?? null, msg.finish_reason ?? null,
|
||||
msg.reasoning ?? null, msg.reasoning_details ?? null,
|
||||
msg.reasoning_content ?? null, msg.codex_reasoning_items ?? null,
|
||||
msg.reasoning_content ?? null,
|
||||
)
|
||||
}
|
||||
db.exec('COMMIT')
|
||||
|
||||
@@ -53,7 +53,6 @@ export interface HermesMessageRow {
|
||||
finish_reason: string | null
|
||||
reasoning: string | null
|
||||
reasoning_details?: string | null
|
||||
codex_reasoning_items?: string | null
|
||||
reasoning_content?: string | null
|
||||
}
|
||||
|
||||
@@ -350,7 +349,6 @@ function mapMessageRow(row: Record<string, unknown>): HermesMessageRow {
|
||||
finish_reason: normalizeNullableString(row.finish_reason),
|
||||
reasoning,
|
||||
reasoning_details: normalizeNullableString(row.reasoning_details),
|
||||
codex_reasoning_items: normalizeNullableString(row.codex_reasoning_items),
|
||||
reasoning_content: normalizeNullableString(row.reasoning_content),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
* 6. Save snapshot: last_message_index = index where compression ends
|
||||
*/
|
||||
|
||||
import { EventSource } from 'eventsource'
|
||||
import { encodingForModel, getEncoding } from 'js-tiktoken'
|
||||
import { logger } from '../../services/logger'
|
||||
import {
|
||||
@@ -21,7 +20,6 @@ import {
|
||||
saveCompressionSnapshot,
|
||||
deleteCompressionSnapshot,
|
||||
} from '../../db/hermes/compression-snapshot'
|
||||
import { getDb } from '../../db/index'
|
||||
|
||||
// ─── Types ───────────────────────────────────────────────
|
||||
|
||||
@@ -376,8 +374,6 @@ export async function callSummarizer(
|
||||
previousSummary?: string,
|
||||
profile?: string,
|
||||
): Promise<string> {
|
||||
const sessionId = `compress_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`
|
||||
|
||||
const convHistory: Array<{ role: string; content: string }> = [...history]
|
||||
|
||||
if (previousSummary) {
|
||||
@@ -390,88 +386,57 @@ export async function callSummarizer(
|
||||
const headers: Record<string, string> = { 'Content-Type': 'application/json' }
|
||||
if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`
|
||||
|
||||
const res = await fetch(`${upstream}/v1/runs`, {
|
||||
const res = await fetch(`${upstream.replace(/\/$/, '')}/v1/responses`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify({
|
||||
input: prompt,
|
||||
conversation_history: convHistory,
|
||||
session_id: sessionId,
|
||||
stream: true,
|
||||
store: false,
|
||||
}),
|
||||
signal: AbortSignal.timeout(timeoutMs),
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`Summarization run failed: ${res.status}`)
|
||||
throw new Error(`Summarization response failed: ${res.status}`)
|
||||
}
|
||||
|
||||
const { run_id } = await res.json() as { run_id: string }
|
||||
if (!res.body) {
|
||||
throw new Error('Summarization response stream missing')
|
||||
}
|
||||
|
||||
return new Promise<string>((resolve, reject) => {
|
||||
const timer = setTimeout(() => {
|
||||
source.close()
|
||||
reject(new Error('Summarization timed out'))
|
||||
}, timeoutMs)
|
||||
let output = ''
|
||||
for await (const frame of readSseFrames(res.body)) {
|
||||
let parsed: any
|
||||
try {
|
||||
parsed = JSON.parse(frame.data)
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
const eventType = parsed.type || frame.event || parsed.event
|
||||
|
||||
const eventsUrl = new URL(`${upstream}/v1/runs/${run_id}/events`)
|
||||
|
||||
// Use Authorization header instead of query parameter for better compatibility
|
||||
const eventSourceInit: any = apiKey ? {
|
||||
fetch: (url: string, init: any = {}) => fetch(url, {
|
||||
...init,
|
||||
headers: {
|
||||
...(init.headers || {}),
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
}),
|
||||
} : {}
|
||||
|
||||
// @ts-ignore - eventsource library types are too strict
|
||||
const source = new EventSource(eventsUrl.toString(), eventSourceInit)
|
||||
|
||||
source.onmessage = (event: MessageEvent) => {
|
||||
try {
|
||||
const parsed = JSON.parse(event.data)
|
||||
if (parsed.event === 'run.completed') {
|
||||
clearTimeout(timer)
|
||||
source.close()
|
||||
deleteCompressSession(sessionId, profile).catch(() => { })
|
||||
const output = parsed.output
|
||||
if (!output || typeof output !== 'string' || output.trim() === '') {
|
||||
reject(new Error('Empty summarization response'))
|
||||
return
|
||||
}
|
||||
resolve(output.trim())
|
||||
} else if (parsed.event === 'run.failed') {
|
||||
clearTimeout(timer)
|
||||
source.close()
|
||||
deleteCompressSession(sessionId, profile).catch(() => { })
|
||||
reject(new Error(parsed.error || 'Summarization run failed'))
|
||||
}
|
||||
} catch { /* ignore parse errors */ }
|
||||
if (eventType === 'response.output_text.delta' && parsed.delta) {
|
||||
output += parsed.delta
|
||||
continue
|
||||
}
|
||||
|
||||
source.onerror = () => {
|
||||
clearTimeout(timer)
|
||||
source.close()
|
||||
deleteCompressSession(sessionId, profile).catch(() => { })
|
||||
reject(new Error('Summarization SSE connection error'))
|
||||
if (eventType === 'response.completed') {
|
||||
const response = parsed.response || parsed
|
||||
const finalText = extractResponseText(response)
|
||||
if (!output && finalText) output = finalText
|
||||
if (!output || output.trim() === '') {
|
||||
throw new Error('Empty summarization response')
|
||||
}
|
||||
return output.trim()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/** Enqueue compression session for later deletion instead of deleting immediately */
|
||||
async function deleteCompressSession(sessionId: string, profile?: string): Promise<void> {
|
||||
try {
|
||||
const db = getDb()
|
||||
if (!db) return
|
||||
const now = Date.now()
|
||||
db.prepare(
|
||||
`INSERT INTO gc_pending_session_deletes (session_id, profile_name, status, attempt_count, last_error, created_at, updated_at, next_attempt_at)
|
||||
VALUES (?, ?, 'pending', 0, NULL, ?, ?, 0)
|
||||
ON CONFLICT(session_id) DO NOTHING`,
|
||||
).run(sessionId, profile || 'default', now, now)
|
||||
} catch { /* best-effort */ }
|
||||
if (eventType === 'response.failed') {
|
||||
throw new Error(parsed.error?.message || parsed.error || 'Summarization response failed')
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error('Summarization response stream ended without a terminal event')
|
||||
}
|
||||
|
||||
// ─── Main Compressor ────────────────────────────────────
|
||||
@@ -665,3 +630,63 @@ export class ChatContextCompressor {
|
||||
deleteCompressionSnapshot(sessionId)
|
||||
}
|
||||
}
|
||||
|
||||
async function* readSseFrames(stream: ReadableStream<Uint8Array>): AsyncGenerator<{ event?: string; data: string }> {
|
||||
const decoder = new TextDecoder()
|
||||
const reader = stream.getReader()
|
||||
let buffer = ''
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
buffer += decoder.decode(value, { stream: true })
|
||||
|
||||
let boundary = buffer.indexOf('\n\n')
|
||||
while (boundary >= 0) {
|
||||
const raw = buffer.slice(0, boundary)
|
||||
buffer = buffer.slice(boundary + 2)
|
||||
const frame = parseSseFrame(raw)
|
||||
if (frame?.data) yield frame
|
||||
boundary = buffer.indexOf('\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
buffer += decoder.decode()
|
||||
const frame = parseSseFrame(buffer)
|
||||
if (frame?.data) yield frame
|
||||
} finally {
|
||||
reader.releaseLock()
|
||||
}
|
||||
}
|
||||
|
||||
function parseSseFrame(raw: string): { event?: string; data: string } | null {
|
||||
let event: string | undefined
|
||||
const data: string[] = []
|
||||
for (const line of raw.split(/\r?\n/)) {
|
||||
if (!line || line.startsWith(':')) continue
|
||||
if (line.startsWith('event:')) {
|
||||
event = line.slice(6).trim()
|
||||
} else if (line.startsWith('data:')) {
|
||||
data.push(line.slice(5).trimStart())
|
||||
}
|
||||
}
|
||||
if (data.length === 0) return null
|
||||
return { event, data: data.join('\n') }
|
||||
}
|
||||
|
||||
function extractResponseText(response: any): string {
|
||||
const output = Array.isArray(response?.output) ? response.output : []
|
||||
const parts: string[] = []
|
||||
for (const item of output) {
|
||||
if (item.type !== 'message') continue
|
||||
const content = Array.isArray(item.content) ? item.content : []
|
||||
for (const part of content) {
|
||||
if (part.type === 'output_text' || part.type === 'text') {
|
||||
parts.push(part.text || '')
|
||||
}
|
||||
}
|
||||
}
|
||||
if (parts.length > 0) return parts.join('')
|
||||
return typeof response?.output_text === 'string' ? response.output_text : ''
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,3 @@
|
||||
import { EventSource } from 'eventsource'
|
||||
import type { StoredMessage, GatewayCaller } from './types'
|
||||
import {
|
||||
buildSummarizationSystemPrompt,
|
||||
@@ -6,12 +5,11 @@ import {
|
||||
buildIncrementalUpdatePrompt,
|
||||
} from './prompt'
|
||||
import { updateUsage } from '../../../db/hermes/usage-store'
|
||||
import { getSessionDetailFromDbWithProfile } from '../../../db/hermes/sessions-db'
|
||||
import { logger } from '../../logger'
|
||||
|
||||
/**
|
||||
* Calls Hermes /v1/runs to produce LLM-generated summaries.
|
||||
* Uses non-streaming EventSource to wait for run.completed.
|
||||
* Calls Hermes /v1/responses to produce LLM-generated summaries.
|
||||
* The context engine owns history assembly; Responses storage/chaining is not used.
|
||||
*/
|
||||
export class GatewaySummarizer implements GatewayCaller {
|
||||
private timeoutMs: number
|
||||
@@ -29,13 +27,11 @@ export class GatewaySummarizer implements GatewayCaller {
|
||||
profile: string,
|
||||
previousSummary?: string,
|
||||
): Promise<{ summary: string; sessionId: string }> {
|
||||
// Build conversation_history from messages
|
||||
const history: Array<{ role: string; content: string }> = messages.map(m => ({
|
||||
role: 'user',
|
||||
content: `[${m.senderName}]: ${m.content}`,
|
||||
}))
|
||||
|
||||
// Inject previous summary for incremental update
|
||||
if (previousSummary) {
|
||||
history.unshift(
|
||||
{ role: 'user', content: `[Previous summary]\n${previousSummary}` },
|
||||
@@ -47,10 +43,7 @@ export class GatewaySummarizer implements GatewayCaller {
|
||||
? buildIncrementalUpdatePrompt()
|
||||
: buildFullSummaryPrompt()
|
||||
|
||||
const sessionId = Date.now().toString(36) + Math.random().toString(36).slice(2, 8)
|
||||
|
||||
// POST /v1/runs
|
||||
const res = await fetch(`${upstream}/v1/runs`, {
|
||||
const res = await fetch(`${upstream.replace(/\/$/, '')}/v1/responses`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
@@ -60,98 +53,122 @@ export class GatewaySummarizer implements GatewayCaller {
|
||||
input: userPrompt,
|
||||
instructions: systemPrompt || buildSummarizationSystemPrompt(),
|
||||
conversation_history: history,
|
||||
session_id: sessionId,
|
||||
stream: true,
|
||||
store: false,
|
||||
}),
|
||||
signal: AbortSignal.timeout(this.timeoutMs),
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`Summarization run failed: ${res.status}`)
|
||||
throw new Error(`Summarization response failed: ${res.status}`)
|
||||
}
|
||||
if (!res.body) {
|
||||
throw new Error('Summarization response stream missing')
|
||||
}
|
||||
|
||||
const { run_id } = await res.json() as { run_id: string }
|
||||
let output = ''
|
||||
for await (const frame of readSseFrames(res.body)) {
|
||||
let parsed: any
|
||||
try {
|
||||
parsed = JSON.parse(frame.data)
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
const eventType = parsed.type || frame.event || parsed.event
|
||||
|
||||
try {
|
||||
const output = await this.pollForResult(upstream, apiKey, run_id, sessionId, roomId, profile)
|
||||
return { summary: output, sessionId }
|
||||
} finally {
|
||||
// Note: session cleanup is handled by the caller (compressor.ts)
|
||||
if (eventType === 'response.output_text.delta' && parsed.delta) {
|
||||
output += parsed.delta
|
||||
continue
|
||||
}
|
||||
|
||||
if (eventType === 'response.completed') {
|
||||
const response = parsed.response || parsed
|
||||
const finalText = extractResponseText(response)
|
||||
if (!output && finalText) output = finalText
|
||||
|
||||
const usage = response.usage || {}
|
||||
updateUsage(roomId, {
|
||||
inputTokens: usage.input_tokens ?? usage.inputTokens ?? 0,
|
||||
outputTokens: usage.output_tokens ?? usage.outputTokens ?? 0,
|
||||
cacheReadTokens: usage.cache_read_tokens ?? usage.cacheReadTokens ?? 0,
|
||||
cacheWriteTokens: usage.cache_write_tokens ?? usage.cacheWriteTokens ?? 0,
|
||||
reasoningTokens: usage.reasoning_tokens ?? usage.reasoningTokens ?? 0,
|
||||
model: response.model || '',
|
||||
profile,
|
||||
})
|
||||
logger.debug(`[GatewaySummarizer] Recorded response usage for compression room ${roomId} (profile=${profile}): input=${usage.input_tokens ?? 0}, output=${usage.output_tokens ?? 0}`)
|
||||
|
||||
if (!output || output.trim() === '') {
|
||||
throw new Error('Empty summarization response')
|
||||
}
|
||||
return { summary: output.trim(), sessionId: '' }
|
||||
}
|
||||
|
||||
if (eventType === 'response.failed') {
|
||||
throw new Error(parsed.error?.message || parsed.error || 'Summarization response failed')
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error('Summarization response stream ended without a terminal event')
|
||||
}
|
||||
|
||||
private pollForResult(upstream: string, apiKey: string | null, runId: string, sessionId: string, roomId: string, profile: string): Promise<string> {
|
||||
return new Promise<string>((resolve, reject) => {
|
||||
const timer = setTimeout(() => {
|
||||
source.close()
|
||||
reject(new Error('Summarization timed out'))
|
||||
}, this.timeoutMs)
|
||||
|
||||
const eventsUrl = new URL(`${upstream}/v1/runs/${runId}/events`)
|
||||
|
||||
// Use Authorization header instead of query parameter for better compatibility
|
||||
const eventSourceInit: any = apiKey ? {
|
||||
fetch: (url: string, init: any = {}) => fetch(url, {
|
||||
...init,
|
||||
headers: {
|
||||
...(init.headers || {}),
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
}),
|
||||
} : {}
|
||||
|
||||
// @ts-ignore - eventsource library types are too strict
|
||||
const source = new EventSource(eventsUrl.toString(), eventSourceInit)
|
||||
|
||||
source.onmessage = async (event: MessageEvent) => {
|
||||
try {
|
||||
const parsed = JSON.parse(event.data)
|
||||
if (parsed.event === 'run.completed') {
|
||||
clearTimeout(timer)
|
||||
|
||||
// Record usage data from Hermes state.db BEFORE closing source
|
||||
// This ensures we fetch usage before sessionCleaner can delete it
|
||||
try {
|
||||
const detail = await getSessionDetailFromDbWithProfile(sessionId, profile)
|
||||
if (detail) {
|
||||
updateUsage(roomId, {
|
||||
inputTokens: detail.input_tokens,
|
||||
outputTokens: detail.output_tokens,
|
||||
cacheReadTokens: detail.cache_read_tokens,
|
||||
cacheWriteTokens: detail.cache_write_tokens,
|
||||
reasoningTokens: detail.reasoning_tokens,
|
||||
model: detail.model,
|
||||
profile,
|
||||
})
|
||||
logger.debug(`[GatewaySummarizer] Recorded usage for compression room ${roomId} (session ${sessionId}, profile=${profile}): input=${detail.input_tokens}, output=${detail.output_tokens}`)
|
||||
} else {
|
||||
logger.warn(`[GatewaySummarizer] Failed to get session detail for ${sessionId} (profile=${profile})`)
|
||||
}
|
||||
} catch (err: any) {
|
||||
logger.warn(err, '[GatewaySummarizer] Failed to record usage from DB')
|
||||
}
|
||||
|
||||
source.close()
|
||||
|
||||
const output = parsed.output
|
||||
if (!output || typeof output !== 'string' || output.trim() === '') {
|
||||
reject(new Error('Empty summarization response'))
|
||||
return
|
||||
}
|
||||
resolve(output.trim())
|
||||
} else if (parsed.event === 'run.failed') {
|
||||
clearTimeout(timer)
|
||||
source.close()
|
||||
reject(new Error(parsed.error || 'Summarization run failed'))
|
||||
}
|
||||
} catch { /* ignore parse errors for non-JSON events */ }
|
||||
}
|
||||
|
||||
source.onerror = () => {
|
||||
clearTimeout(timer)
|
||||
source.close()
|
||||
reject(new Error('Summarization SSE connection error'))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
async function* readSseFrames(stream: ReadableStream<Uint8Array>): AsyncGenerator<{ event?: string; data: string }> {
|
||||
const decoder = new TextDecoder()
|
||||
const reader = stream.getReader()
|
||||
let buffer = ''
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
buffer += decoder.decode(value, { stream: true })
|
||||
|
||||
let boundary = buffer.indexOf('\n\n')
|
||||
while (boundary >= 0) {
|
||||
const raw = buffer.slice(0, boundary)
|
||||
buffer = buffer.slice(boundary + 2)
|
||||
const frame = parseSseFrame(raw)
|
||||
if (frame?.data) yield frame
|
||||
boundary = buffer.indexOf('\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
buffer += decoder.decode()
|
||||
const frame = parseSseFrame(buffer)
|
||||
if (frame?.data) yield frame
|
||||
} finally {
|
||||
reader.releaseLock()
|
||||
}
|
||||
}
|
||||
|
||||
function parseSseFrame(raw: string): { event?: string; data: string } | null {
|
||||
let event: string | undefined
|
||||
const data: string[] = []
|
||||
for (const line of raw.split(/\r?\n/)) {
|
||||
if (!line || line.startsWith(':')) continue
|
||||
if (line.startsWith('event:')) {
|
||||
event = line.slice(6).trim()
|
||||
} else if (line.startsWith('data:')) {
|
||||
data.push(line.slice(5).trimStart())
|
||||
}
|
||||
}
|
||||
if (data.length === 0) return null
|
||||
return { event, data: data.join('\n') }
|
||||
}
|
||||
|
||||
function extractResponseText(response: any): string {
|
||||
const output = Array.isArray(response?.output) ? response.output : []
|
||||
const parts: string[] = []
|
||||
for (const item of output) {
|
||||
if (item.type !== 'message') continue
|
||||
const content = Array.isArray(item.content) ? item.content : []
|
||||
for (const part of content) {
|
||||
if (part.type === 'output_text' || part.type === 'text') {
|
||||
parts.push(part.text || '')
|
||||
}
|
||||
}
|
||||
}
|
||||
if (parts.length > 0) return parts.join('')
|
||||
return typeof response?.output_text === 'string' ? response.output_text : ''
|
||||
}
|
||||
|
||||
@@ -1,12 +1,8 @@
|
||||
import { io, Socket } from 'socket.io-client'
|
||||
import { EventSource } from 'eventsource'
|
||||
import { getToken } from '../../../services/auth'
|
||||
import type { GatewayManager } from '../gateway-manager'
|
||||
import { deleteSession as hermesDeleteSession } from '../hermes-cli'
|
||||
import { getActiveProfileName } from '../hermes-profile'
|
||||
import { logger } from '../../../services/logger'
|
||||
import { updateUsage } from '../../../db/hermes/usage-store'
|
||||
import { getSessionDetailFromDbWithProfile } from '../../../db/hermes/sessions-db'
|
||||
|
||||
// ─── Types ────────────────────────────────────────────────────
|
||||
|
||||
@@ -186,29 +182,6 @@ class AgentClient {
|
||||
}
|
||||
}
|
||||
|
||||
private async deleteSession(sessionId: string): Promise<void> {
|
||||
try {
|
||||
const sessionProfile = this.storage?.getSessionProfile?.(sessionId)
|
||||
const currentProfile = getActiveProfileName()
|
||||
|
||||
if (sessionProfile && sessionProfile.profile_name !== currentProfile) {
|
||||
// Cross-profile: enqueue deferred delete, don't switch profile
|
||||
this.storage?.enqueuePendingSessionDelete?.(sessionId, sessionProfile.profile_name)
|
||||
logger.info(`[AgentClients] ${this.name}: cross-profile deferred delete session ${sessionId} (session=${sessionProfile.profile_name}, active=${currentProfile})`)
|
||||
return
|
||||
}
|
||||
|
||||
// Same profile or no mapping: delete directly
|
||||
const ok = await hermesDeleteSession(sessionId)
|
||||
if (ok) {
|
||||
this.storage?.deleteSessionProfile?.(sessionId)
|
||||
}
|
||||
logger.debug(`[AgentClients] ${this.name}: delete session ${sessionId} (profile=${this.profile}) → ${ok ? 'ok' : 'failed'}`)
|
||||
} catch (err: any) {
|
||||
logger.warn(`[AgentClients] ${this.name}: failed to delete session ${sessionId}: ${err.message}`)
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Hermes Gateway Integration ────────────────────────────
|
||||
|
||||
/**
|
||||
@@ -235,8 +208,6 @@ class AgentClient {
|
||||
return
|
||||
}
|
||||
|
||||
const sessionId = Date.now().toString(36) + Math.random().toString(36).slice(2, 8)
|
||||
|
||||
try {
|
||||
// Notify room that agent is typing
|
||||
this.startTyping(roomId)
|
||||
@@ -290,8 +261,7 @@ class AgentClient {
|
||||
|
||||
// Strip @mention from input — agent already knows it was mentioned
|
||||
const input = msg.content.replace(new RegExp(`@${this.name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\s*`, 'gi'), '').trim() || msg.content
|
||||
// Start a run on Hermes gateway
|
||||
const runRes = await fetch(`${upstream}/v1/runs`, {
|
||||
const responseRes = await fetch(`${upstream.replace(/\/$/, '')}/v1/responses`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
@@ -299,126 +269,81 @@ class AgentClient {
|
||||
},
|
||||
body: JSON.stringify({
|
||||
input,
|
||||
session_id: sessionId,
|
||||
...(conversationHistory.length > 0 ? { conversation_history: conversationHistory } : {}),
|
||||
...(instructions ? { instructions } : {}),
|
||||
stream: true,
|
||||
store: false,
|
||||
}),
|
||||
signal: AbortSignal.timeout(120000),
|
||||
})
|
||||
|
||||
if (!runRes.ok) {
|
||||
const text = await runRes.text().catch(() => '')
|
||||
logger.error(`[AgentClients] ${this.name}: gateway run failed (${runRes.status}): ${text}`)
|
||||
if (!responseRes.ok) {
|
||||
const text = await responseRes.text().catch(() => '')
|
||||
logger.error(`[AgentClients] ${this.name}: gateway response failed (${responseRes.status}): ${text}`)
|
||||
this.stopTyping(roomId)
|
||||
onStatus?.('ready')
|
||||
return
|
||||
}
|
||||
|
||||
const runData = await runRes.json() as any
|
||||
const run_id = runData.run_id
|
||||
logger.debug(`[AgentClients] ${this.name}: run started, response=%j`, runData)
|
||||
if (!run_id) {
|
||||
logger.error(`[AgentClients] ${this.name}: no run_id in response`)
|
||||
if (!responseRes.body) {
|
||||
logger.error(`[AgentClients] ${this.name}: gateway response stream missing`)
|
||||
this.stopTyping(roomId)
|
||||
onStatus?.('ready')
|
||||
return
|
||||
}
|
||||
|
||||
// Save session-to-profile mapping after gateway confirms the run
|
||||
const actualSessionId = runData.session_id || sessionId
|
||||
if (!this.storage) {
|
||||
logger.warn(`[AgentClients] ${this.name}: storage is null, cannot save session profile for ${actualSessionId}`)
|
||||
} else {
|
||||
this.storage.saveSessionProfile(actualSessionId, roomId, this.agentId, this.profile)
|
||||
logger.debug(`[AgentClients] ${this.name}: saved session profile ${actualSessionId} → profile=${this.profile}`)
|
||||
}
|
||||
|
||||
// Stream events from Hermes
|
||||
const eventsUrl = new URL(`${upstream}/v1/runs/${run_id}/events`)
|
||||
logger.debug(`[AgentClients] ${this.name}: streaming events from ${eventsUrl}`)
|
||||
|
||||
// Use Authorization header instead of query parameter for better compatibility
|
||||
const eventSourceInit: any = apiKey ? {
|
||||
fetch: (url: string, init: any = {}) => fetch(url, {
|
||||
...init,
|
||||
headers: {
|
||||
...(init.headers || {}),
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
}),
|
||||
} : {}
|
||||
|
||||
// @ts-ignore - eventsource library types are too strict
|
||||
const source = new EventSource(eventsUrl.toString(), eventSourceInit)
|
||||
|
||||
let fullContent = ''
|
||||
|
||||
source.onmessage = async (e: any) => {
|
||||
for await (const frame of readSseFrames(responseRes.body)) {
|
||||
let parsed: any
|
||||
try {
|
||||
const parsed = JSON.parse(e.data)
|
||||
logger.debug(`[AgentClients] ${this.name}: event=${parsed.event}`)
|
||||
|
||||
if (parsed.event === 'run.completed') {
|
||||
// Record usage data from Hermes state.db BEFORE closing source
|
||||
// This ensures we fetch usage before deleteSession can delete it
|
||||
try {
|
||||
const detail = await getSessionDetailFromDbWithProfile(actualSessionId, this.profile)
|
||||
if (detail) {
|
||||
updateUsage(roomId, {
|
||||
inputTokens: detail.input_tokens,
|
||||
outputTokens: detail.output_tokens,
|
||||
cacheReadTokens: detail.cache_read_tokens,
|
||||
cacheWriteTokens: detail.cache_write_tokens,
|
||||
reasoningTokens: detail.reasoning_tokens,
|
||||
model: detail.model,
|
||||
profile: this.profile,
|
||||
})
|
||||
logger.debug(`[AgentClients] Recorded usage for room ${roomId} (session ${actualSessionId}, profile=${this.profile}): input=${detail.input_tokens}, output=${detail.output_tokens}`)
|
||||
} else {
|
||||
logger.warn(`[AgentClients] Failed to get session detail for ${actualSessionId} (profile=${this.profile})`)
|
||||
}
|
||||
} catch (err: any) {
|
||||
logger.warn(err, '[AgentClients] Failed to record usage from DB')
|
||||
}
|
||||
|
||||
source.close()
|
||||
logger.debug(`[AgentClients] ${this.name}: run completed, content length=${fullContent.length}`)
|
||||
if (fullContent) {
|
||||
this.stopTyping(roomId)
|
||||
this.sendMessage(roomId, fullContent)
|
||||
}
|
||||
this.deleteSession(actualSessionId).catch(() => { })
|
||||
onStatus?.('ready')
|
||||
return
|
||||
}
|
||||
|
||||
if (parsed.event === 'run.failed') {
|
||||
source.close()
|
||||
logger.error(`[AgentClients] ${this.name}: run failed`)
|
||||
this.stopTyping(roomId)
|
||||
this.deleteSession(actualSessionId).catch(() => { })
|
||||
onStatus?.('ready')
|
||||
return
|
||||
}
|
||||
|
||||
// Accumulate message deltas
|
||||
if (parsed.event === 'message.delta' && parsed.delta) {
|
||||
fullContent += parsed.delta
|
||||
}
|
||||
parsed = JSON.parse(frame.data)
|
||||
} catch {
|
||||
// ignore parse errors
|
||||
continue
|
||||
}
|
||||
const eventType = parsed.type || frame.event || parsed.event
|
||||
logger.debug(`[AgentClients] ${this.name}: event=${eventType}`)
|
||||
|
||||
if (eventType === 'response.output_text.delta' && parsed.delta) {
|
||||
fullContent += parsed.delta
|
||||
continue
|
||||
}
|
||||
|
||||
if (eventType === 'response.completed') {
|
||||
const response = parsed.response || parsed
|
||||
const finalText = extractResponseText(response)
|
||||
if (!fullContent && finalText) fullContent = finalText
|
||||
const usage = response.usage || {}
|
||||
updateUsage(roomId, {
|
||||
inputTokens: usage.input_tokens ?? usage.inputTokens ?? 0,
|
||||
outputTokens: usage.output_tokens ?? usage.outputTokens ?? 0,
|
||||
cacheReadTokens: usage.cache_read_tokens ?? usage.cacheReadTokens ?? 0,
|
||||
cacheWriteTokens: usage.cache_write_tokens ?? usage.cacheWriteTokens ?? 0,
|
||||
reasoningTokens: usage.reasoning_tokens ?? usage.reasoningTokens ?? 0,
|
||||
model: response.model || '',
|
||||
profile: this.profile,
|
||||
})
|
||||
logger.debug(`[AgentClients] ${this.name}: response completed, content length=${fullContent.length}`)
|
||||
if (fullContent) {
|
||||
this.stopTyping(roomId)
|
||||
this.sendMessage(roomId, fullContent)
|
||||
}
|
||||
onStatus?.('ready')
|
||||
return
|
||||
}
|
||||
|
||||
if (eventType === 'response.failed') {
|
||||
logger.error(`[AgentClients] ${this.name}: response failed`)
|
||||
this.stopTyping(roomId)
|
||||
onStatus?.('ready')
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
source.onerror = (err: any) => {
|
||||
logger.error(err, `[AgentClients] ${this.name}: EventSource error`)
|
||||
source.close()
|
||||
this.stopTyping(roomId)
|
||||
this.deleteSession(actualSessionId).catch(() => { })
|
||||
onStatus?.('ready')
|
||||
}
|
||||
logger.warn(`[AgentClients] ${this.name}: response stream ended without terminal event`)
|
||||
this.stopTyping(roomId)
|
||||
onStatus?.('ready')
|
||||
} catch (err: any) {
|
||||
logger.error(`[AgentClients] ${this.name}: error handling message: ${err.message}`)
|
||||
this.stopTyping(roomId)
|
||||
this.deleteSession(sessionId).catch(() => { })
|
||||
onStatus?.('ready')
|
||||
}
|
||||
}
|
||||
@@ -460,6 +385,66 @@ class AgentClient {
|
||||
}
|
||||
}
|
||||
|
||||
async function* readSseFrames(stream: ReadableStream<Uint8Array>): AsyncGenerator<{ event?: string; data: string }> {
|
||||
const decoder = new TextDecoder()
|
||||
const reader = stream.getReader()
|
||||
let buffer = ''
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
buffer += decoder.decode(value, { stream: true })
|
||||
|
||||
let boundary = buffer.indexOf('\n\n')
|
||||
while (boundary >= 0) {
|
||||
const raw = buffer.slice(0, boundary)
|
||||
buffer = buffer.slice(boundary + 2)
|
||||
const frame = parseSseFrame(raw)
|
||||
if (frame?.data) yield frame
|
||||
boundary = buffer.indexOf('\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
buffer += decoder.decode()
|
||||
const frame = parseSseFrame(buffer)
|
||||
if (frame?.data) yield frame
|
||||
} finally {
|
||||
reader.releaseLock()
|
||||
}
|
||||
}
|
||||
|
||||
function parseSseFrame(raw: string): { event?: string; data: string } | null {
|
||||
let event: string | undefined
|
||||
const data: string[] = []
|
||||
for (const line of raw.split(/\r?\n/)) {
|
||||
if (!line || line.startsWith(':')) continue
|
||||
if (line.startsWith('event:')) {
|
||||
event = line.slice(6).trim()
|
||||
} else if (line.startsWith('data:')) {
|
||||
data.push(line.slice(5).trimStart())
|
||||
}
|
||||
}
|
||||
if (data.length === 0) return null
|
||||
return { event, data: data.join('\n') }
|
||||
}
|
||||
|
||||
function extractResponseText(response: any): string {
|
||||
const output = Array.isArray(response?.output) ? response.output : []
|
||||
const parts: string[] = []
|
||||
for (const item of output) {
|
||||
if (item.type !== 'message') continue
|
||||
const content = Array.isArray(item.content) ? item.content : []
|
||||
for (const part of content) {
|
||||
if (part.type === 'output_text' || part.type === 'text') {
|
||||
parts.push(part.text || '')
|
||||
}
|
||||
}
|
||||
}
|
||||
if (parts.length > 0) return parts.join('')
|
||||
return typeof response?.output_text === 'string' ? response.output_text : ''
|
||||
}
|
||||
|
||||
// ─── AgentClients (roomId -> agents) ──────────────────────────
|
||||
|
||||
export class AgentClients {
|
||||
|
||||
@@ -107,7 +107,6 @@ async function syncProfileSessions(profile: string): Promise<{
|
||||
reasoning: msg.reasoning,
|
||||
reasoning_details: msg.reasoning_details,
|
||||
reasoning_content: msg.reasoning_content,
|
||||
codex_reasoning_items: msg.codex_reasoning_items,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user