From eae7195ba8f9ed8ec5a27fe772aae09d22f8c1f9 Mon Sep 17 00:00:00 2001 From: ekko <152005280+EKKOLearnAI@users.noreply.github.com> Date: Thu, 14 May 2026 09:03:57 +0800 Subject: [PATCH] Update CLI chat session bridge (#697) * feat: add CLI chat sessions with Python agent bridge Introduce a new CLI chat mode that connects Web UI directly to Hermes Agent's AIAgent via a Python bridge subprocess and Socket.IO, bypassing the API Server /v1/responses path. Supports streaming, slash commands (/new, /undo, /retry, /branch, /compress, /save, /title), interrupt, and steer. Co-Authored-By: Claude Opus 4.7 * feat: update CLI chat session bridge * fix: extend agent bridge startup timeouts * docs: update bridge chat session design * feat: align bridge compression and provider registry * chore: bump version to 0.5.20 --------- Co-authored-by: Claude Opus 4.7 --- .gitignore | 4 +- Dockerfile | 1 + docker-compose.yml | 1 + docs/cli-chat-sessions.md | 459 +++++++ package.json | 2 +- packages/client/src/api/hermes/chat.ts | 52 +- packages/client/src/api/hermes/group-chat.ts | 5 +- .../src/components/hermes/chat/ChatPanel.vue | 201 ++- .../components/hermes/chat/MessageList.vue | 20 +- packages/client/src/i18n/locales/en.ts | 2 + packages/client/src/i18n/locales/zh.ts | 2 + packages/client/src/shared/providers.ts | 310 ----- packages/client/src/stores/hermes/chat.ts | 148 ++- .../client/src/views/hermes/HistoryView.vue | 6 +- .../server/src/controllers/hermes/profiles.ts | 32 +- .../server/src/controllers/hermes/sessions.ts | 413 ++---- .../server/src/db/hermes/session-store.ts | 8 +- packages/server/src/db/hermes/sessions-db.ts | 28 +- packages/server/src/index.ts | 16 +- .../services/hermes/agent-bridge/README.md | 85 ++ .../services/hermes/agent-bridge/client.ts | 330 +++++ .../hermes/agent-bridge/hermes_bridge.py | 1184 +++++++++++++++++ .../src/services/hermes/agent-bridge/index.ts | 2 + .../services/hermes/agent-bridge/manager.ts | 360 +++++ .../src/services/hermes/chat-run-socket.ts | 1147 +++++++++++----- .../hermes/group-chat/agent-clients.ts | 2 + .../src/services/hermes/group-chat/index.ts | 8 +- packages/server/src/services/shutdown.ts | 11 +- packages/server/src/shared/providers.ts | 90 +- scripts/build-server.mjs | 9 +- tests/shared/provider-presets.test.ts | 8 +- 31 files changed, 3906 insertions(+), 1040 deletions(-) create mode 100644 docs/cli-chat-sessions.md delete mode 100644 packages/client/src/shared/providers.ts create mode 100644 packages/server/src/services/hermes/agent-bridge/README.md create mode 100644 packages/server/src/services/hermes/agent-bridge/client.ts create mode 100644 packages/server/src/services/hermes/agent-bridge/hermes_bridge.py create mode 100644 packages/server/src/services/hermes/agent-bridge/index.ts create mode 100644 packages/server/src/services/hermes/agent-bridge/manager.ts diff --git a/.gitignore b/.gitignore index 5b9910f..f0d37f8 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,8 @@ package-lock.json node_modules dist dist-ssr +__pycache__/ +*.py[cod] server/dist packages/server/dist *.local @@ -37,4 +39,4 @@ hermes-dependencies.md .superpowers/ CLAUDE.md # Client source map artifacts -packages/client/src/**/*.js \ No newline at end of file +packages/client/src/**/*.js diff --git a/Dockerfile b/Dockerfile index f01b673..7d91a3a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,6 +33,7 @@ RUN npm run build && npm prune --omit=dev ENV NODE_ENV=production ENV HOME=/home/agent ENV HERMES_HOME=/home/agent/.hermes +ENV PATH=/opt/hermes/.venv/bin:$PATH EXPOSE 6060 diff --git a/docker-compose.yml b/docker-compose.yml index 6099418..577c8cd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,6 +14,7 @@ services: - PORT=${PORT:-6060} - HERMES_HOME=/home/agent/.hermes - HERMES_BIN=/opt/hermes/.venv/bin/hermes + - PATH=/opt/hermes/.venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin - AUTH_DISABLED=${AUTH_DISABLED:-false} - HERMES_ALLOW_ROOT_GATEWAY=1 restart: unless-stopped diff --git a/docs/cli-chat-sessions.md b/docs/cli-chat-sessions.md new file mode 100644 index 0000000..f715a89 --- /dev/null +++ b/docs/cli-chat-sessions.md @@ -0,0 +1,459 @@ +# CLI/Bridge Chat Sessions 实现文档 + +> 分支:`feat/cli-chat-sessions` + +## 概述 + +当前实现把原来的聊天通道统一到 Socket.IO namespace `/chat-run`。前端仍使用同一套 `ChatPanel + MessageList + ChatInput`,通过会话的 `source` 字段区分运行方式: + +| source | 运行路径 | 说明 | +|--------|----------|------| +| `api_server` | Web UI Server → Hermes Gateway `/v1/responses` | 默认聊天路径 | +| `cli` | Web UI Server → Python agent bridge → `AIAgent` | Bridge(beta),在 Web UI 服务端子进程里直接运行 Hermes Agent | + +Bridge 会话不是一个独立 UI 面板,而是普通会话的一种来源。用户通过“新建聊天”下拉菜单选择 `API` 或 `Bridge (beta)`。 + +Bridge 模式支持: + +- 流式文本输出 +- reasoning/thinking 增量 +- tool started/completed 事件 +- 工具审批请求与响应 +- abort 中断 +- per-session 队列 +- profile 隔离 +- 从 DB resume 会话 +- 与 API Server 路径共用上下文压缩逻辑 + +当前不再支持旧文档里的独立 `/cli-chat-run` namespace、`CliChatPanel.vue`、`cli-chat.ts` 和 CLI 命令控制层。前端不会再发送 `command` 或 `steer` socket 事件,也不会把 `/new`、`/reset`、`/undo`、`/retry`、`/branch`、`/compress` 等输入当作特殊命令处理。 + +--- + +## 整体架构 + +```text +ChatPanel.vue + ├─ MessageList.vue + └─ ChatInput.vue + │ + │ Socket.IO /chat-run + ▼ +ChatRunSocket (Node.js) + ├─ source=api_server → Hermes Gateway /v1/responses + └─ source=cli → AgentBridgeClient + │ TCP/Unix socket, newline JSON + ▼ + hermes_bridge.py + │ in-process import + ▼ + AIAgent (hermes-agent) +``` + +### 分流规则 + +`ChatRunSocket.resolveRunSource()` 决定本轮运行走哪个后端: + +1. `run` payload 中 `source === 'cli'` 时走 bridge。 +2. `source === 'api_server'` 时走 gateway。 +3. 未显式传 `source` 时,如果 DB 中已有 session 的 `source` 是 `cli`,继续走 bridge。 +4. 其他情况默认走 `api_server`。 + +--- + +## 主要文件 + +### 前端 + +| 文件 | 说明 | +|------|------| +| `packages/client/src/components/hermes/chat/ChatPanel.vue` | 统一聊天面板;新建菜单包含 `API` 和 `Bridge (beta)`;渲染审批条 | +| `packages/client/src/components/hermes/chat/MessageList.vue` | 统一消息列表;展示文本、reasoning、tool 消息等 | +| `packages/client/src/components/hermes/chat/ChatInput.vue` | 统一输入框;发送、停止、附件上传入口 | +| `packages/client/src/api/hermes/chat.ts` | `/chat-run` Socket.IO 客户端;注册 session 事件处理器;发送 run/abort/approval | +| `packages/client/src/stores/hermes/chat.ts` | 会话状态、发送流程、resume、队列、审批、消息映射 | + +### 后端 + +| 文件 | 说明 | +|------|------| +| `packages/server/src/services/hermes/chat-run-socket.ts` | `/chat-run` Socket.IO 服务;同时处理 API Server 和 Bridge 运行 | +| `packages/server/src/services/hermes/agent-bridge/client.ts` | Node 端 bridge 客户端;通过 socket 请求 Python bridge | +| `packages/server/src/services/hermes/agent-bridge/manager.ts` | Python bridge 子进程生命周期管理 | +| `packages/server/src/services/hermes/agent-bridge/hermes_bridge.py` | Python bridge 服务;创建并复用 `AIAgent` 实例 | +| `packages/server/src/services/hermes/agent-bridge/index.ts` | bridge 模块导出 | +| `packages/server/src/index.ts` | 启动 `AgentBridgeManager` 和 `ChatRunSocket` | +| `packages/server/src/services/shutdown.ts` | 关闭时停止 chat socket 和 bridge 子进程 | +| `packages/server/src/controllers/hermes/sessions.ts` | 会话列表和详情读取,包含 `source` 信息 | +| `packages/server/src/controllers/hermes/profiles.ts` | profile 切换/管理时清理 bridge 内存会话 | + +### 已移除的旧文件 + +| 文件 | 状态 | +|------|------| +| `packages/client/src/api/hermes/cli-chat.ts` | 已删除 | +| `packages/client/src/components/hermes/chat/CliChatPanel.vue` | 已删除 | +| `packages/server/src/services/hermes/cli-chat-run-socket.ts` | 已删除 | + +--- + +## 前端流程 + +### 新建会话 + +`ChatPanel.vue` 中的新建按钮使用下拉菜单: + +- `API`:调用 `chatStore.newChat()`,创建默认 `api_server` 会话。 +- `Bridge (beta)`:调用 `chatStore.newCliSession()`,创建 `source: 'cli'` 会话。 + +Bridge 会话 ID 使用类似 `YYYYMMDD_HHMMSS_xxxxxx` 的格式,便于与 Hermes CLI 风格的 session ID 对齐。 + +### 发送消息 + +1. `ChatInput.vue` 触发 store 的发送逻辑。 +2. `chat.ts` 根据 active session 组装输入内容,附件会被转为 `ContentBlock[]`。 +3. 调用 `startRunViaSocket()`。 +4. 前端向 `/chat-run` emit: + +```ts +socket.emit('run', { + session_id, + input, + instructions, + model, + queue_id, + source, // api_server 或 cli +}) +``` + +5. 前端注册本 session 的事件 handler,通过 `session_id` 隔离多会话并发事件。 + +### Resume + +切换会话、页面恢复可见、或刷新后,前端通过: + +```ts +socket.emit('resume', { session_id }) +``` + +服务端返回: + +```ts +{ + session_id, + messages, + isWorking, + isAborting, + events, + inputTokens, + outputTokens, + queueLength, +} +``` + +如果服务端发现该 session 仍在运行,前端会重新注册 handler,并允许继续 abort。 + +### 审批 + +Bridge 工具需要人工确认时,服务端会发 `approval.requested`,前端 store 记录为 `activePendingApproval`,`ChatPanel.vue` 在输入框上方显示审批条。 + +前端响应审批: + +```ts +socket.emit('approval.respond', { + session_id, + approval_id, + choice, // once | session | always | deny +}) +``` + +--- + +## `/chat-run` Socket.IO 协议 + +### 客户端 → 服务端 + +| 事件 | 数据 | 说明 | +|------|------|------| +| `run` | `{ session_id, input, model?, instructions?, queue_id?, source? }` | 启动一轮运行;`source` 决定 API Server 或 Bridge | +| `resume` | `{ session_id }` | 加入 session room 并恢复状态 | +| `abort` | `{ session_id }` | 中断当前运行 | +| `cancel_queued_run` | `{ session_id, queue_id }` | 取消等待队列中的一条 run | +| `approval.respond` | `{ session_id, approval_id, choice }` | 响应 Bridge 工具审批 | + +当前没有 `command`、`steer` 或 slash-command 相关 Socket.IO 事件。 + +### 服务端 → 客户端 + +| 事件 | 说明 | +|------|------| +| `resumed` | 返回 DB 消息、运行状态、队列长度和最近事件 | +| `run.started` | 运行开始 | +| `run.queued` | 当前 session 已有运行,新请求进入队列 | +| `message.delta` | 文本增量 | +| `reasoning.delta` | reasoning 增量 | +| `thinking.delta` | thinking 增量 | +| `reasoning.available` | reasoning 内容可用 | +| `tool.started` | 工具调用开始 | +| `tool.completed` | 工具调用结束 | +| `approval.requested` | Bridge 工具请求人工审批 | +| `approval.resolved` | 审批完成或超时 | +| `compression.started` | 上下文压缩开始 | +| `compression.completed` | 上下文压缩结束 | +| `usage.updated` | token 用量更新 | +| `abort.started` | 中断开始 | +| `abort.completed` | 中断结束 | +| `run.completed` | 运行完成 | +| `run.failed` | 运行失败 | + +### 认证 + +`/chat-run` 使用 Socket.IO auth token: + +```ts +io(`${baseUrl}/chat-run`, { + auth: { token }, + query: { profile }, +}) +``` + +如果未设置 `AUTH_DISABLED=1`,服务端会与 Web UI token 比对。 + +--- + +## ChatRunSocket 后端行为 + +### API Server 路径 + +`source=api_server` 时: + +1. 写入用户消息到 Web UI 本地 session DB。 +2. 通过 `buildCompressedHistory()` 构建上下文。 +3. 请求当前 profile 的 Hermes Gateway: + +```text +POST /v1/responses +``` + +4. 读取 SSE frame,映射为统一的 `/chat-run` 事件。 +5. 完成后写入 assistant/tool 消息,更新 usage。 + +### Bridge 路径 + +`source=cli` 时: + +1. 写入用户消息到 Web UI 本地 session DB。 +2. 复用同一套 `buildCompressedHistory()` 构建压缩上下文。 +3. 调用: + +```ts +this.bridge.chat(session_id, input, history, instructions, profile) +``` + +4. 轮询 `AgentBridgeClient.streamOutput(run_id)`。 +5. 将 Python bridge 的 delta 和 events 映射成统一事件。 +6. 将 assistant 文本、reasoning、tool 调用结果 flush 回 DB。 + +### 队列 + +同一个 `session_id` 同时只能有一个 active run。新的 `run` 到达时: + +- 如果当前 session 正在运行,则放入 `state.queue`。 +- 发送 `run.queued` 更新队列长度。 +- 当前 run 结束或 abort 完成后,自动执行下一条 queued run。 + +--- + +## Python Agent Bridge + +### 通信协议 + +Node 和 Python bridge 之间使用本地 socket 的单行 JSON 协议: + +```json +{ "action": "chat", "session_id": "xxx", "message": "hello" } +``` + +响应也是单行 JSON: + +```json +{ "ok": true, "run_id": "xxx", "session_id": "xxx", "status": "running" } +``` + +### Endpoint + +默认 endpoint 按平台选择: + +| 平台 | 默认 endpoint | +|------|---------------| +| Windows | `tcp://127.0.0.1:18765` | +| macOS/Linux | `ipc:///tmp/hermes-agent-bridge.sock` | + +Windows 使用 TCP 是因为部分 Python/Windows 环境没有 Unix domain socket 支持。 + +### 当前实际使用的 action + +| Action | 说明 | +|--------|------| +| `chat` | 启动一轮 `AIAgent.run_conversation()` | +| `get_output` | 通过 `cursor` 和 `event_cursor` 获取增量文本与事件 | +| `interrupt` | 调用 agent 中断当前运行 | +| `approval_respond` | 响应工具审批 | +| `destroy_all` | profile 切换/管理时销毁全部 bridge 内存 session | + +bridge 代码里还保留了一些调试/维护 action,例如 `ping`、`get_result`、`get_history`、`destroy`、`list`、`shutdown`、`steer`,但当前 `/chat-run` 前端路径不会暴露这些能力。 + +旧的 `command` action 已移除,bridge 不再处理 `/new`、`/undo`、`/retry`、`/branch`、`/compress` 等斜杠命令。 + +### 会话和 profile + +`AgentPool` 维护 `session_id -> AgentSession`: + +- 每个 session 持有独立 `AIAgent` 实例。 +- session 按 profile 创建,profile 改变时会重建对应 agent。 +- `HERMES_HOME` 会在创建 agent 时临时切到 profile home。 +- `SessionDB` 按 profile 的 `state.db` 路径缓存。 +- 空闲 session 会被 bridge GC,默认 30 分钟无运行后销毁内存态。 + +### 工具和审批事件 + +bridge 从 `AIAgent` 回调中收集事件: + +- `stream.delta` +- `reasoning.delta` +- `thinking.delta` +- `tool.started` +- `tool.completed` +- `tool.progress` +- `approval.requested` +- `approval.resolved` +- `turn.boundary` +- `status` + +`ChatRunSocket` 会把这些事件转换为前端统一事件,并负责 DB 落盘。 + +审批默认等待 60 秒,超时自动 `deny`。 + +--- + +## AgentBridgeClient + +`AgentBridgeClient` 是 Node 端本地 socket 客户端。 + +行为: + +- 支持 `ipc://` 和 `tcp://` endpoint。 +- 每次请求新建 socket,发送一行 JSON,读取一行 JSON。 +- 请求通过内部 lock 串行化。 +- 默认请求响应超时为 `120000ms`。 +- `streamOutput()` 每 100ms 轮询一次 `get_output`。 + +示例: + +```ts +const started = await bridge.chat(sessionId, input, history, instructions, profile) + +for await (const chunk of bridge.streamOutput(started.run_id)) { + // chunk.delta + // chunk.events + // chunk.done +} +``` + +注意:目前 socket connect 阶段没有独立 connect timeout,主要依赖系统连接错误和请求响应 timeout。 + +--- + +## AgentBridgeManager + +`AgentBridgeManager` 负责启动和停止 Python bridge。 + +启动流程: + +1. 定位 `hermes_bridge.py`。 +2. 发现 `hermes-agent` 根目录。 +3. 选择 Python 解释器。 +4. 以子进程启动: + +```text +python hermes_bridge.py --endpoint --agent-root --hermes-home +``` + +5. 监听 stdout,等待: + +```json +{ "event": "ready", "endpoint": "..." } +``` + +6. 默认 ready 超时为 `120000ms`。 + +Python 选择优先级: + +1. `HERMES_AGENT_BRIDGE_PYTHON` +2. `agentRoot/venv` 或 `agentRoot/.venv` +3. installed `hermes` 命令 shebang +4. `uv run --project python` +5. 系统 `python3` / `python` + +关闭时先发 `SIGTERM`,1.5 秒后仍未退出则 `SIGKILL`。 + +--- + +## 启动与关闭 + +### 启动 + +`bootstrap()` 中会先尝试启动 bridge: + +```ts +agentBridgeManager = await startAgentBridgeManager() +``` + +bridge 启动失败不会阻止 Web UI 启动,但 Bridge(beta) 会话后续运行会失败。 + +随后创建统一的 chat socket: + +```ts +chatRunServer = new ChatRunSocket(groupChatServer.getIO(), getGatewayManagerInstance()) +chatRunServer.init() +``` + +### 关闭 + +服务关闭时会清理: + +- `/chat-run` Socket.IO 状态 +- Python agent bridge 子进程 +- 其他 WebSocket/Socket.IO 服务 + +--- + +## 环境变量 + +| 变量 | 说明 | +|------|------| +| `HERMES_AGENT_BRIDGE_ENDPOINT` | Bridge endpoint;Windows 默认 `tcp://127.0.0.1:18765`,macOS/Linux 默认 `ipc:///tmp/hermes-agent-bridge.sock` | +| `HERMES_AGENT_BRIDGE_TIMEOUT_MS` | Node 等待 bridge 请求响应的超时,默认 `120000` ms | +| `HERMES_AGENT_BRIDGE_STARTUP_TIMEOUT_MS` | Node 等待 Python bridge ready 的超时,默认 `120000` ms | +| `HERMES_AGENT_BRIDGE_PYTHON` | 指定 Python 解释器路径 | +| `HERMES_AGENT_ROOT` | hermes-agent 安装目录 | +| `HERMES_AGENT_BRIDGE_UV` | 指定 uv 可执行文件路径 | +| `HERMES_AGENT_BRIDGE_PLATFORM` | bridge 传给 Hermes Agent 的平台标识,默认 `cli` | +| `HERMES_BRIDGE_PROVIDER` | 覆盖 bridge 使用的 provider | +| `HERMES_BRIDGE_MAX_TURNS` | 覆盖 bridge 最大轮数 | +| `UV` | uv 可执行文件路径 fallback | + +Windows 首次启动慢时可以临时放大: + +```powershell +$env:HERMES_AGENT_BRIDGE_STARTUP_TIMEOUT_MS = "300000" +$env:HERMES_AGENT_BRIDGE_TIMEOUT_MS = "300000" +``` + +--- + +## 当前限制 + +- Bridge(beta) 仍依赖 Python bridge 成功启动;启动失败时 Web UI 可用,但 bridge 会话不可用。 +- bridge socket connect 阶段还没有单独 connect timeout。 +- 旧 CLI 独立面板和独立 `/cli-chat-run` namespace 已移除。 +- 旧 bridge 斜杠命令和 `command/steer` socket 控制层已移除;现在输入框内容一律按普通用户消息发送。 diff --git a/package.json b/package.json index fef437b..a29a85b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "hermes-web-ui", - "version": "0.5.17", + "version": "0.5.20", "description": "Self-hosted AI chat dashboard for Hermes Agent — multi-model web UI with multi-platform integration", "repository": { "type": "git", diff --git a/packages/client/src/api/hermes/chat.ts b/packages/client/src/api/hermes/chat.ts index a9ef556..12bfcb8 100644 --- a/packages/client/src/api/hermes/chat.ts +++ b/packages/client/src/api/hermes/chat.ts @@ -17,6 +17,7 @@ export interface StartRunRequest { session_id?: string model?: string queue_id?: string + source?: 'api_server' | 'cli' } export interface StartRunResponse { @@ -77,6 +78,8 @@ const sessionEventHandlers = new Map void onUsageUpdated: (event: RunEvent) => void onRunQueued?: (event: RunEvent) => void + onApprovalRequested?: (event: RunEvent) => void + onApprovalResolved?: (event: RunEvent) => void }>() /** @@ -288,6 +291,26 @@ function globalUsageUpdatedHandler(event: RunEvent): void { } } +function globalApprovalRequestedHandler(event: RunEvent): void { + const sid = event.session_id + if (!sid) return + + const handlers = sessionEventHandlers.get(sid) + if (handlers?.onApprovalRequested) { + handlers.onApprovalRequested(event) + } +} + +function globalApprovalResolvedHandler(event: RunEvent): void { + const sid = event.session_id + if (!sid) return + + const handlers = sessionEventHandlers.get(sid) + if (handlers?.onApprovalResolved) { + handlers.onApprovalResolved(event) + } +} + /** * Register event handlers for a session * @param sessionId - Session ID @@ -312,6 +335,8 @@ export function registerSessionHandlers( onAbortCompleted: (event: RunEvent) => void onUsageUpdated: (event: RunEvent) => void onRunQueued?: (event: RunEvent) => void + onApprovalRequested?: (event: RunEvent) => void + onApprovalResolved?: (event: RunEvent) => void } ): () => void { sessionEventHandlers.set(sessionId, handlers) @@ -330,6 +355,19 @@ export function unregisterSessionHandlers(sessionId: string): void { sessionEventHandlers.delete(sessionId) } +export function respondToolApproval( + sessionId: string, + approvalId: string, + choice: 'once' | 'session' | 'always' | 'deny', +): void { + const socket = connectChatRun() + socket.emit('approval.respond', { + session_id: sessionId, + approval_id: approvalId, + choice, + }) +} + export function getChatRunSocket(): Socket | null { return chatRunSocket } @@ -365,7 +403,9 @@ export function connectChatRun(): Socket { reconnection: true, reconnectionAttempts: Infinity, reconnectionDelay: 1000, - reconnectionDelayMax: 10000, + reconnectionDelayMax: 30000, + randomizationFactor: 0.5, + timeout: 30000, }) // Register global listeners only once per socket connection @@ -385,6 +425,8 @@ export function connectChatRun(): Socket { chatRunSocket.on('run.failed', globalRunFailedHandler) chatRunSocket.on('run.completed', globalRunCompletedHandler) chatRunSocket.on('run.queued', globalRunQueuedHandler) + chatRunSocket.on('approval.requested', globalApprovalRequestedHandler) + chatRunSocket.on('approval.resolved', globalApprovalResolvedHandler) // Compression events chatRunSocket.on('compression.started', globalCompressionStartedHandler) @@ -527,6 +569,14 @@ export function startRunViaSocket( if (closed) return onEvent(evt) }, + onApprovalRequested: (evt: RunEvent) => { + if (closed) return + onEvent(evt) + }, + onApprovalResolved: (evt: RunEvent) => { + if (closed) return + onEvent(evt) + }, } // Register handlers in the global session map diff --git a/packages/client/src/api/hermes/group-chat.ts b/packages/client/src/api/hermes/group-chat.ts index 7f8377c..4ed1c77 100644 --- a/packages/client/src/api/hermes/group-chat.ts +++ b/packages/client/src/api/hermes/group-chat.ts @@ -66,11 +66,13 @@ export function connectGroupChat(opts?: { userId?: string; userName?: string; de name: opts?.userName || localStorage.getItem('gc_user_name') || undefined, description: opts?.description || localStorage.getItem('gc_user_description') || undefined, }, - transports: ['websocket'], + transports: ['websocket', 'polling'], reconnection: true, reconnectionAttempts: Infinity, reconnectionDelay: 1000, reconnectionDelayMax: 30000, + randomizationFactor: 0.5, + timeout: 30000, }) return socket @@ -185,4 +187,3 @@ export async function forceCompress(roomId: string): Promise<{ success: boolean; method: 'POST', }) } - diff --git a/packages/client/src/components/hermes/chat/ChatPanel.vue b/packages/client/src/components/hermes/chat/ChatPanel.vue index 783c3ef..4972345 100644 --- a/packages/client/src/components/hermes/chat/ChatPanel.vue +++ b/packages/client/src/components/hermes/chat/ChatPanel.vue @@ -124,11 +124,16 @@ const groupedSessions = computed(() => { return keys.map((key) => ({ source: key, - label: key ? getSourceLabel(key) : t("chat.other"), + label: key ? getChatSourceLabel(key) : t("chat.other"), sessions: sortSessionsWithActiveFirst(map.get(key)!), })); }); +function getChatSourceLabel(source?: string): string { + if (source === "cli") return "Bridge (beta)"; + return getSourceLabel(source); +} + function toggleGroup(source: string) { const isExpanded = !collapsedGroups.value.has(source); if (isExpanded) { @@ -204,10 +209,40 @@ const activeSessionSource = computed(() => currentMode.value === "chat" ? chatStore.activeSession?.source || "" : "", ); +const activeApproval = computed(() => chatStore.activePendingApproval); + function handleNewChat() { chatStore.newChat(); } +function handleNewCliChat() { + const session = chatStore.newCliSession() + chatStore.switchSession(session.id) +} + +const newChatOptions = computed(() => [ + { + label: "API", + key: "api_server", + }, + { + label: "Bridge (beta)", + key: "cli", + }, +]); + +function handleNewChatSelect(key: string | number) { + if (key === "cli") { + handleNewCliChat(); + return; + } + handleNewChat(); +} + +function handleApproval(choice: "once" | "session" | "always" | "deny") { + chatStore.respondApproval(choice); +} + async function copySessionId(id?: string) { const sessionId = id || chatStore.activeSessionId; if (sessionId) { @@ -556,21 +591,27 @@ async function handleWorkspaceConfirm() { - - - + + + + +
@@ -723,7 +764,7 @@ async function handleWorkspaceConfirm() { {{ headerTitle }} {{ - getSourceLabel(activeSessionSource) + getChatSourceLabel(activeSessionSource) }} {{ t("chat.copySessionId") }} - - - - + + + + + +
- chatStore.messages.filter((m) => m.role !== "tool"), -); - const currentToolCalls = computed(() => { const msgs = chatStore.messages; // Find the last user message index @@ -45,6 +41,22 @@ const currentToolCalls = computed(() => { return [...tools].reverse(); }); +const displayMessages = computed(() => + chatStore.messages.filter((m) => { + if (m.role === "tool") return false; + if ( + m.role === "assistant" && + m.isStreaming && + !m.content?.trim() && + !!m.reasoning?.trim() && + currentToolCalls.value.length === 0 + ) { + return false; + } + return true; + }), +); + const queuedMessages = computed(() => { const sid = chatStore.activeSessionId; if (!sid) return []; diff --git a/packages/client/src/i18n/locales/en.ts b/packages/client/src/i18n/locales/en.ts index e895170..ffecbf8 100644 --- a/packages/client/src/i18n/locales/en.ts +++ b/packages/client/src/i18n/locales/en.ts @@ -131,6 +131,7 @@ export default { contextEditSuccess: 'Context length updated', contextEditFailed: 'Update failed', emptyState: 'Start a conversation with Hermes Agent', + cliEmptyState: 'Start a CLI chat session', inputPlaceholder: 'Type a message... (Enter to send, Shift+Enter for new line)', attachFiles: 'Attach files', autoPlaySpeech: 'Auto-play voice', @@ -159,6 +160,7 @@ export default { searchEnterHint: 'Enter to open · Esc to close', searchFailed: 'Failed to search sessions', newChat: 'New Chat', + newCliChat: 'New CLI', deleteSession: 'Delete this session?', sessionDeleted: 'Session deleted', toggleBatchMode: 'Batch selection', diff --git a/packages/client/src/i18n/locales/zh.ts b/packages/client/src/i18n/locales/zh.ts index 227624d..0f91d69 100644 --- a/packages/client/src/i18n/locales/zh.ts +++ b/packages/client/src/i18n/locales/zh.ts @@ -131,6 +131,7 @@ export default { contextEditSuccess: '上下文长度已更新', contextEditFailed: '更新失败', emptyState: '开始与 Hermes Agent 对话', + cliEmptyState: '开始 CLI 对话', inputPlaceholder: '输入消息... (Enter 发送,Shift+Enter 换行)', attachFiles: '添加附件', autoPlaySpeech: '自动播放语音', @@ -159,6 +160,7 @@ export default { searchEnterHint: 'Enter 打开 · Esc 关闭', searchFailed: '搜索会话失败', newChat: '新建对话', + newCliChat: '新建 CLI', deleteSession: '确定删除此会话?', sessionDeleted: '会话已删除', toggleBatchMode: '批量选择', diff --git a/packages/client/src/shared/providers.ts b/packages/client/src/shared/providers.ts deleted file mode 100644 index ef2bc25..0000000 --- a/packages/client/src/shared/providers.ts +++ /dev/null @@ -1,310 +0,0 @@ -/** - * Provider registry — single source of truth for both frontend and backend. - * Synced from hermes-agent hermes_cli/models.py _PROVIDER_MODELS. - */ - -export interface ProviderPreset { - label: string - value: string - base_url: string - models: string[] -} - -export const PROVIDER_PRESETS: ProviderPreset[] = [ - { - label: 'Anthropic', - value: 'anthropic', - base_url: 'https://api.anthropic.com', - models: [ - 'claude-opus-4-7', - 'claude-opus-4-6', - 'claude-sonnet-4-6', - 'claude-opus-4-5-20251101', - 'claude-sonnet-4-5-20250929', - 'claude-opus-4-20250514', - 'claude-sonnet-4-20250514', - 'claude-haiku-4-5-20251001', - ], - }, - { - label: 'Google AI Studio', - value: 'gemini', - base_url: 'https://generativelanguage.googleapis.com/v1beta/openai', - models: [ - 'gemini-3.1-pro-preview', - 'gemini-3-flash-preview', - 'gemini-3.1-flash-lite-preview', - 'gemini-2.5-pro', - 'gemini-2.5-flash', - 'gemini-2.5-flash-lite', - 'gemma-4-31b-it', - 'gemma-4-26b-it', - ], - }, - { - label: 'DeepSeek', - value: 'deepseek', - base_url: 'https://api.deepseek.com', - models: ['deepseek-chat', 'deepseek-reasoner'], - }, - { - label: 'Z.AI / GLM', - value: 'zai', - base_url: 'https://api.z.ai/api/paas/v4', - models: ['glm-5.1', 'glm-5', 'glm-5v-turbo', 'glm-5-turbo', 'glm-4.7', 'glm-4.5', 'glm-4.5-flash'], - }, - { - label: 'Kimi for Coding', - value: 'kimi-coding', - base_url: 'https://api.kimi.com/coding/v1', - models: [ - 'kimi-for-coding', - 'kimi-k2.5', - 'kimi-k2-thinking', - 'kimi-k2-thinking-turbo', - 'kimi-k2-turbo-preview', - 'kimi-k2-0905-preview', - ], - }, - { - label: 'Kimi for Coding (CN)', - value: 'kimi-coding-cn', - base_url: 'https://api.kimi.com/coding/v1', - models: [ - 'kimi-k2.5', - 'kimi-k2-thinking', - 'kimi-k2-turbo-preview', - 'kimi-k2-0905-preview', - ], - }, - { - label: 'Moonshot', - value: 'moonshot', - base_url: 'https://api.moonshot.cn/v1', - models: [ - 'kimi-k2.5', - 'kimi-k2-thinking', - 'kimi-k2-turbo-preview', - 'kimi-k2-0905-preview', - ], - }, - { - label: 'xAI', - value: 'xai', - base_url: 'https://api.x.ai/v1', - models: ['grok-4.20-reasoning', 'grok-4-1-fast-reasoning'], - }, - { - label: 'MiniMax', - value: 'minimax', - base_url: 'https://api.minimax.io/anthropic/v1', - models: ['MiniMax-M2.7', 'MiniMax-M2.7-highspeed', 'MiniMax-M2.5', 'MiniMax-M2.5-highspeed', 'MiniMax-M2.1', 'MiniMax-M2.1-highspeed', 'MiniMax-M2', 'MiniMax-M2-highspeed'], - }, - { - label: 'MiniMax (China)', - value: 'minimax-cn', - base_url: 'https://api.minimaxi.com/v1', - models: ['MiniMax-M2.7', 'MiniMax-M2.7-highspeed', 'MiniMax-M2.5', 'MiniMax-M2.5-highspeed', 'MiniMax-M2.1', 'MiniMax-M2.1-highspeed', 'MiniMax-M2', 'MiniMax-M2-highspeed'], - }, - { - label: 'Alibaba Cloud', - value: 'alibaba', - base_url: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1', - models: [ - 'qwen3.5-plus', - 'qwen3-coder-plus', - 'qwen3-coder-next', - 'glm-5', - 'glm-4.7', - 'kimi-k2.5', - 'MiniMax-M2.5', - ], - }, - { - label: 'Alibaba Cloud (Coding Plan)', - value: 'alibaba-coding-plan', - // NOTE: This is the international (intl) DashScope endpoint, matching upstream - // hermes-agent (auth.py:255). Mainland China DashScope accounts (sk-sp-* keys - // issued by dashscope.aliyun.com) must override via ALIBABA_CODING_PLAN_BASE_URL= - // https://coding.dashscope.aliyuncs.com/v1 (no -intl), since the -intl endpoint - // returns HTTP 401 for those keys. - base_url: 'https://coding-intl.dashscope.aliyuncs.com/v1', - models: [ - 'qwen3.5-plus', - 'qwen3-max-2026-01-23', - 'qwen3-coder-next', - 'qwen3-coder-plus', - 'glm-5', - 'glm-4.7', - 'kimi-k2.5', - 'MiniMax-M2.5', - ], - }, - { - label: 'Hugging Face', - value: 'huggingface', - base_url: 'https://router.huggingface.co/v1', - models: [ - 'Qwen/Qwen3.5-397B-A17B', - 'Qwen/Qwen3.5-35B-A3B', - 'deepseek-ai/DeepSeek-V3.2', - 'moonshotai/Kimi-K2.5', - 'MiniMaxAI/MiniMax-M2.5', - 'zai-org/GLM-5', - 'XiaomiMiMo/MiMo-V2-Flash', - 'moonshotai/Kimi-K2-Thinking', - ], - }, - { - label: 'Xiaomi MiMo', - value: 'xiaomi', - base_url: 'https://api.xiaomimimo.com/v1', - models: ['mimo-v2-pro', 'mimo-v2-omni', 'mimo-v2-flash'], - }, - { - label: 'Kilo Code', - value: 'kilocode', - base_url: 'https://api.kilo.ai/api/gateway', - models: [ - 'anthropic/claude-opus-4.6', - 'anthropic/claude-sonnet-4.6', - 'openai/gpt-5.4', - 'google/gemini-3-pro-preview', - 'google/gemini-3-flash-preview', - ], - }, - { - label: 'Vercel AI Gateway', - value: 'ai-gateway', - base_url: 'https://ai-gateway.vercel.sh/v1', - models: [ - 'anthropic/claude-opus-4.6', - 'anthropic/claude-sonnet-4.6', - 'anthropic/claude-sonnet-4.5', - 'anthropic/claude-haiku-4.5', - 'openai/gpt-5', - 'openai/gpt-4.1', - 'openai/gpt-4.1-mini', - 'google/gemini-3-pro-preview', - 'google/gemini-3-flash', - 'google/gemini-2.5-pro', - 'google/gemini-2.5-flash', - 'deepseek/deepseek-v3.2', - ], - }, - { - label: 'CLIProxyAPI', - value: 'cliproxyapi', - base_url: 'http://127.0.0.1:8317/v1', - models: [ - 'gpt-5.5', - 'gpt-5-codex', - 'claude-sonnet-4-6', - 'claude-sonnet-4-5-20250929', - 'gemini-3.1-pro-preview', - 'gemini-2.5-pro', - ], - }, - { - label: 'OpenCode Zen', - value: 'opencode-zen', - base_url: 'https://opencode.ai/zen/v1', - models: [ - 'gpt-5.4-pro', - 'gpt-5.4', - 'gpt-5.3-codex', - 'gpt-5.3-codex-spark', - 'gpt-5.2', - 'gpt-5.2-codex', - 'gpt-5.1', - 'gpt-5.1-codex', - 'gpt-5.1-codex-max', - 'gpt-5.1-codex-mini', - 'gpt-5', - 'gpt-5-codex', - 'gpt-5-nano', - 'claude-opus-4-6', - 'claude-opus-4-5', - 'claude-opus-4-1', - 'claude-sonnet-4-6', - 'claude-sonnet-4-5', - 'claude-sonnet-4', - 'claude-haiku-4-5', - 'claude-3-5-haiku', - 'gemini-3.1-pro', - 'gemini-3-pro', - 'gemini-3-flash', - 'minimax-m2.7', - 'minimax-m2.5', - 'minimax-m2.5-free', - 'minimax-m2.1', - 'glm-5', - 'glm-4.7', - 'glm-4.6', - 'kimi-k2.5', - 'kimi-k2-thinking', - 'kimi-k2', - 'qwen3-coder', - 'big-pickle', - ], - }, - { - label: 'OpenCode Go', - value: 'opencode-go', - base_url: 'https://opencode.ai/zen/go/v1', - models: ['glm-5.1', 'glm-5', 'kimi-k2.5', 'mimo-v2-pro', 'mimo-v2-omni', 'minimax-m2.7', 'minimax-m2.5'], - }, - { - label: 'OpenAI Codex', - value: 'openai-codex', - base_url: 'https://chatgpt.com/backend-api/codex', - models: ['gpt-5.5', 'gpt-5.4-mini', 'gpt-5.4', 'gpt-5.3-codex', 'gpt-5.2-codex', 'gpt-5.1-codex-max', 'gpt-5.1-codex-mini'], - }, - { - label: 'Arcee AI', - value: 'arcee', - base_url: 'https://api.arcee.ai/v1', - models: ['trinity-large-thinking', 'trinity-large-preview', 'trinity-mini'], - }, - { - label: 'OpenRouter', - value: 'openrouter', - base_url: 'https://openrouter.ai/api/v1', - models: [], - }, - { - label: 'GitHub Copilot', - value: 'copilot', - base_url: 'https://api.githubcopilot.com', - models: [ - 'gpt-5.4', - 'gpt-5.4-mini', - 'gpt-5-mini', - 'gpt-5.3-codex', - 'gpt-5.2-codex', - 'gpt-4.1', - 'gpt-4o', - 'gpt-4o-mini', - 'claude-sonnet-4.6', - 'claude-sonnet-4', - 'claude-sonnet-4.5', - 'claude-haiku-4.5', - 'gemini-3.1-pro-preview', - 'gemini-3-pro-preview', - 'gemini-3-flash-preview', - 'gemini-2.5-pro', - 'grok-code-fast-1', - ], - }, -] - -/** Build a Record for backend lookup */ -export function buildProviderModelMap(): Record { - const map: Record = {} - for (const p of PROVIDER_PRESETS) { - if (p.models.length > 0) { - map[p.value] = p.models - } - } - return map -} diff --git a/packages/client/src/stores/hermes/chat.ts b/packages/client/src/stores/hermes/chat.ts index 70de49b..e08cabc 100644 --- a/packages/client/src/stores/hermes/chat.ts +++ b/packages/client/src/stores/hermes/chat.ts @@ -1,4 +1,4 @@ -import { startRunViaSocket, resumeSession, registerSessionHandlers, unregisterSessionHandlers, getChatRunSocket, type RunEvent, type ContentBlock as ContentBlockImport } from '@/api/hermes/chat' +import { startRunViaSocket, resumeSession, registerSessionHandlers, unregisterSessionHandlers, getChatRunSocket, respondToolApproval, type RunEvent, type ContentBlock as ContentBlockImport } from '@/api/hermes/chat' import { deleteSession as deleteSessionApi, fetchSession, fetchSessions, type HermesMessage, type SessionSummary } from '@/api/hermes/sessions' import { getApiKey } from '@/api/client' import { defineStore } from 'pinia' @@ -43,6 +43,16 @@ export interface Message { queued?: boolean } +export interface PendingApproval { + sessionId: string + approvalId: string + command: string + description: string + choices: Array<'once' | 'session' | 'always' | 'deny'> + allowPermanent: boolean + requestedAt: number +} + export interface Session { id: string title: string @@ -320,6 +330,11 @@ export const useChatStore = defineStore('chat', () => { const queueLengths = ref>(new Map()) /** sessionId → queued user messages not yet visible in the transcript */ const queuedUserMessages = ref>(new Map()) + const pendingApprovals = ref>(new Map()) + const activePendingApproval = computed(() => { + const sid = activeSessionId.value + return sid ? pendingApprovals.value.get(sid) || null : null + }) // 自动播放语音开关 const autoPlaySpeechEnabled = ref(false) @@ -432,6 +447,30 @@ export const useChatStore = defineStore('chat', () => { return session } + function newCliSession(): Session { + const now = new Date() + const ts = [ + now.getFullYear(), + String(now.getMonth() + 1).padStart(2, '0'), + String(now.getDate()).padStart(2, '0'), + '_', + String(now.getHours()).padStart(2, '0'), + String(now.getMinutes()).padStart(2, '0'), + String(now.getSeconds()).padStart(2, '0'), + ].join('') + const hex = Math.random().toString(16).slice(2, 8) + const session: Session = { + id: `${ts}_${hex}`, + title: '', + source: 'cli', + messages: [], + createdAt: Date.now(), + updatedAt: Date.now(), + } + sessions.value.unshift(session) + return session + } + async function switchSession(sessionId: string, focusId?: string | null) { clearThinkingObservationFor(sessionId) activeSessionId.value = sessionId @@ -503,6 +542,49 @@ export const useChatStore = defineStore('chat', () => { setAbortState({ aborting: true, synced: null }) } else if (e.event === 'abort.completed') { setAbortState({ aborting: false, synced: e.synced ?? false }) + } else if (e.event === 'approval.requested') { + setPendingApproval({ ...e, session_id: sessionId } as RunEvent) + } else if (e.event === 'approval.resolved') { + clearPendingApproval({ ...e, session_id: sessionId } as RunEvent) + } else if (e.event === 'tool.started') { + const msgs = getSessionMsgs(sessionId) + const toolCallId = e.tool_call_id as string | undefined + const existingTool = toolCallId + ? msgs.find(m => m.role === 'tool' && m.toolCallId === toolCallId) + : null + if (existingTool) { + updateMessage(sessionId, existingTool.id, { + toolName: e.tool || e.name, + toolArgs: typeof e.arguments === 'string' ? e.arguments : existingTool.toolArgs, + toolPreview: e.preview || existingTool.toolPreview, + toolStatus: existingTool.toolStatus || 'running', + }) + } else { + addMessage(sessionId, { + id: uid(), + role: 'tool', + content: '', + timestamp: Date.now(), + toolName: e.tool || e.name, + toolCallId, + toolPreview: e.preview, + toolArgs: typeof e.arguments === 'string' ? e.arguments : undefined, + toolStatus: 'running', + }) + } + } else if (e.event === 'tool.completed') { + const msgs = getSessionMsgs(sessionId) + const toolCallId = e.tool_call_id as string | undefined + const toolMsgs = toolCallId + ? msgs.filter(m => m.role === 'tool' && m.toolCallId === toolCallId) + : msgs.filter(m => m.role === 'tool' && m.toolStatus === 'running') + if (toolMsgs.length > 0) { + updateMessage(sessionId, toolMsgs[toolMsgs.length - 1].id, { + toolStatus: e.error === true ? 'error' : 'done', + toolDuration: e.duration, + toolResult: typeof e.output === 'string' ? e.output : undefined, + }) + } } } } @@ -603,6 +685,45 @@ export const useChatStore = defineStore('chat', () => { }) } + function setPendingApproval(evt: RunEvent) { + const sid = evt.session_id + const approvalId = (evt as any).approval_id as string | undefined + if (!sid || !approvalId) return + const rawChoices = Array.isArray((evt as any).choices) ? (evt as any).choices : ['once', 'session', 'deny'] + const choices = rawChoices + .filter((choice: unknown): choice is PendingApproval['choices'][number] => + choice === 'once' || choice === 'session' || choice === 'always' || choice === 'deny') + pendingApprovals.value.set(sid, { + sessionId: sid, + approvalId, + command: String((evt as any).command || ''), + description: String((evt as any).description || ''), + choices: choices.length ? choices : ['once', 'session', 'deny'], + allowPermanent: Boolean((evt as any).allow_permanent), + requestedAt: Date.now(), + }) + pendingApprovals.value = new Map(pendingApprovals.value) + } + + function clearPendingApproval(evt: RunEvent) { + const sid = evt.session_id + if (!sid) return + const current = pendingApprovals.value.get(sid) + if (!current) return + const approvalId = (evt as any).approval_id + if (approvalId && current.approvalId !== approvalId) return + pendingApprovals.value.delete(sid) + pendingApprovals.value = new Map(pendingApprovals.value) + } + + function respondApproval(choice: PendingApproval['choices'][number]) { + const pending = activePendingApproval.value + if (!pending) return + respondToolApproval(pending.sessionId, pending.approvalId, choice) + pendingApprovals.value.delete(pending.sessionId) + pendingApprovals.value = new Map(pendingApprovals.value) + } + function showNextQueuedUserMessage(sessionId: string) { const queue = queuedUserMessages.value.get(sessionId) if (!queue?.length) return @@ -715,6 +836,7 @@ export const useChatStore = defineStore('chat', () => { session_id: sid, model: sessionModel || undefined, queue_id: userMsg.id, + source: (activeSession.value?.source === 'cli' ? 'cli' : 'api_server') as 'cli' | 'api_server', } if (shouldQueue) { @@ -967,6 +1089,16 @@ export const useChatStore = defineStore('chat', () => { break } + case 'approval.requested': { + setPendingApproval(evt) + break + } + + case 'approval.resolved': { + clearPendingApproval(evt) + break + } + case 'run.completed': { const msgs = getSessionMsgs(sid) const lastMsg = activeAssistantMessageId @@ -1394,6 +1526,16 @@ export const useChatStore = defineStore('chat', () => { break } + case 'approval.requested': { + setPendingApproval(evt) + break + } + + case 'approval.resolved': { + clearPendingApproval(evt) + break + } + case 'run.completed': { const hasQueue = (evt as any).queue_remaining > 0 if (hasQueue) { @@ -1689,12 +1831,15 @@ export const useChatStore = defineStore('chat', () => { isAborting, queueLengths, queuedUserMessages, + pendingApprovals, + activePendingApproval, removeQueuedMessage, isLoadingSessions, sessionsLoaded, isLoadingMessages, newChat, + newCliSession, switchSession, switchSessionModel, addOrUpdateSession, @@ -1702,6 +1847,7 @@ export const useChatStore = defineStore('chat', () => { deleteSession, sendMessage, stopStreaming, + respondApproval, loadSessions, refreshActiveSession, getThinkingObservation, diff --git a/packages/client/src/views/hermes/HistoryView.vue b/packages/client/src/views/hermes/HistoryView.vue index 0273670..d6b0cd3 100644 --- a/packages/client/src/views/hermes/HistoryView.vue +++ b/packages/client/src/views/hermes/HistoryView.vue @@ -240,12 +240,12 @@ watch(hermesSessionsLoaded, (loaded) => { if (loaded && hermesSessions.value.length > 0) { // Only auto-load if no session is currently active if (!historySessionId.value || !hermesSessions.value.find(s => s.id === historySessionId.value)) { - // Find first CLI session + // Find first CLI session. const firstCliSession = hermesSessions.value.find(s => s.source === 'cli') if (firstCliSession) { // Ensure the CLI group is expanded - if (collapsedGroups.value.has('cli')) { - collapsedGroups.value = new Set([...collapsedGroups.value].filter(s => s !== 'cli')) + if (collapsedGroups.value.has(firstCliSession.source)) { + collapsedGroups.value = new Set([...collapsedGroups.value].filter(s => s !== firstCliSession.source)) } // Load session details handleSessionClick(firstCliSession.id) diff --git a/packages/server/src/controllers/hermes/profiles.ts b/packages/server/src/controllers/hermes/profiles.ts index 97981bd..a7cc21c 100644 --- a/packages/server/src/controllers/hermes/profiles.ts +++ b/packages/server/src/controllers/hermes/profiles.ts @@ -7,6 +7,26 @@ import { SessionDeleter } from '../../services/hermes/session-deleter' import { getGatewayManagerInstance } from '../../services/gateway-bootstrap' import { logger } from '../../services/logger' import { smartCloneCleanup } from '../../services/hermes/profile-credentials' +import { detectHermesHome } from '../../services/hermes/hermes-path' + +function profileExistsForManualSwitch(name: string): boolean { + const base = detectHermesHome() + if (!name || name === 'default') return true + return existsSync(join(base, 'profiles', name, 'config.yaml')) || existsSync(join(base, 'profiles', name)) +} + +async function useProfileWithFallback(name: string): Promise { + try { + return await hermesCli.useProfile(name) + } catch (err: any) { + if (!profileExistsForManualSwitch(name)) throw err + + const base = detectHermesHome() + writeFileSync(join(base, 'active_profile'), `${name}\n`, 'utf-8') + logger.warn(err, '[switchProfile] hermes profile use failed; wrote active_profile directly for existing profile "%s"', name) + return `Switched to profile ${name}` + } +} export async function list(ctx: any) { try { @@ -159,7 +179,7 @@ export async function switchProfile(ctx: any) { return } try { - const output = await hermesCli.useProfile(name) + const output = await useProfileWithFallback(name) // Verify the active_profile file immediately (Hermes CLI writes synchronously) // Quick verification with 2 retries to handle edge cases (filesystem delays, concurrency) @@ -185,6 +205,16 @@ export async function switchProfile(ctx: any) { const mgr = getGatewayManagerInstance() if (mgr) { mgr.setActiveProfile(name) } + // Destroy all bridge sessions so they get recreated with the new profile config + try { + const { AgentBridgeClient } = await import('../../services/hermes/agent-bridge') + const bridge = new AgentBridgeClient() + await bridge.destroyAll() + logger.info('[switchProfile] destroyed all bridge sessions for profile "%s"', name) + } catch (err: any) { + logger.warn(err, '[switchProfile] failed to destroy bridge sessions') + } + try { const detail = await hermesCli.getProfile(name) logger.debug('Profile detail.path = %s', detail.path) diff --git a/packages/server/src/controllers/hermes/sessions.ts b/packages/server/src/controllers/hermes/sessions.ts index 0597fee..ce288e6 100644 --- a/packages/server/src/controllers/hermes/sessions.ts +++ b/packages/server/src/controllers/hermes/sessions.ts @@ -1,19 +1,16 @@ import * as hermesCli from '../../services/hermes/hermes-cli' -import { listConversationSummaries, getConversationDetail } from '../../services/hermes/conversations' -import { listConversationSummariesFromDb, getConversationDetailFromDb } from '../../db/hermes/conversations-db' -import { listSessionSummaries, searchSessionSummaries, getUsageStatsFromDb, getSessionDetailFromDb } from '../../db/hermes/sessions-db' +import { listSessionSummaries, getUsageStatsFromDb, getSessionDetailFromDb } from '../../db/hermes/sessions-db' import { listSessions as localListSessions, searchSessions as localSearchSessions, getSessionDetail as localGetSessionDetail, deleteSession as localDeleteSession, renameSession as localRenameSession, - useLocalSessionStore, } from '../../db/hermes/session-store' import { ExportCompressor } from '../../lib/context-compressor/export-compressor' import { getGatewayManagerInstance } from '../../services/gateway-bootstrap' -import { deleteUsage, getUsage, getUsageBatch, getLocalUsageStats } from '../../db/hermes/usage-store' -import type { LocalUsageStats, UsageStatsModelRow, UsageStatsDailyRow } from '../../db/hermes/usage-store' +import { deleteUsage, getUsage, getUsageBatch } from '../../db/hermes/usage-store' +import type { UsageStatsModelRow, UsageStatsDailyRow } from '../../db/hermes/usage-store' import { getModelContextLength } from '../../services/hermes/model-context' import { getActiveProfileName } from '../../services/hermes/hermes-profile' import { getGroupChatServer } from '../../routes/hermes/group-chat' @@ -36,130 +33,75 @@ function filterPendingDeletedConversationSummaries(items: ConversationSummary[]) export async function listConversations(ctx: any) { const source = (ctx.query.source as string) || undefined - const humanOnly = (ctx.query.humanOnly as string) !== 'false' && ctx.query.humanOnly !== '0' const limit = ctx.query.limit ? parseInt(ctx.query.limit as string, 10) : undefined - if (useLocalSessionStore()) { - const profile = getActiveProfileName() - const sessions = localListSessions(profile, source, limit && limit > 0 ? limit : 200) - const summaries: ConversationSummary[] = sessions.map(s => ({ - id: s.id, - source: s.source, - model: s.model, - title: s.title, - started_at: s.started_at, - ended_at: s.ended_at, - last_active: s.last_active, - message_count: s.message_count, - tool_call_count: s.tool_call_count, - input_tokens: s.input_tokens, - output_tokens: s.output_tokens, - cache_read_tokens: s.cache_read_tokens, - cache_write_tokens: s.cache_write_tokens, - reasoning_tokens: s.reasoning_tokens, - billing_provider: s.billing_provider, - estimated_cost_usd: s.estimated_cost_usd, - actual_cost_usd: s.actual_cost_usd, - cost_status: s.cost_status, - preview: s.preview, - workspace: s.workspace || null, - is_active: s.ended_at == null && (Date.now() / 1000 - s.last_active) <= 300, - thread_session_count: 1, - })) - ctx.body = { sessions: filterPendingDeletedConversationSummaries(summaries) } - return - } - - try { - const sessions = await listConversationSummariesFromDb({ source, humanOnly, limit }) - ctx.body = { sessions: filterPendingDeletedConversationSummaries(sessions) } - return - } catch (err) { - logger.warn(err, 'Hermes Conversation DB: summary query failed, falling back to CLI export') - } - - const sessions = await listConversationSummaries({ source, humanOnly, limit }) - ctx.body = { sessions: filterPendingDeletedConversationSummaries(sessions) } + const profile = getActiveProfileName() + const sessions = localListSessions(profile, source, limit && limit > 0 ? limit : 200) + const summaries: ConversationSummary[] = sessions.map(s => ({ + id: s.id, + source: s.source, + model: s.model, + title: s.title, + started_at: s.started_at, + ended_at: s.ended_at, + last_active: s.last_active, + message_count: s.message_count, + tool_call_count: s.tool_call_count, + input_tokens: s.input_tokens, + output_tokens: s.output_tokens, + cache_read_tokens: s.cache_read_tokens, + cache_write_tokens: s.cache_write_tokens, + reasoning_tokens: s.reasoning_tokens, + billing_provider: s.billing_provider, + estimated_cost_usd: s.estimated_cost_usd, + actual_cost_usd: s.actual_cost_usd, + cost_status: s.cost_status, + preview: s.preview, + workspace: s.workspace || null, + is_active: s.ended_at == null && (Date.now() / 1000 - s.last_active) <= 300, + thread_session_count: 1, + })) + ctx.body = { sessions: filterPendingDeletedConversationSummaries(summaries) } } export async function getConversationMessages(ctx: any) { - const source = (ctx.query.source as string) || undefined const humanOnly = (ctx.query.humanOnly as string) !== 'false' && ctx.query.humanOnly !== '0' - if (useLocalSessionStore()) { - const detail = localGetSessionDetail(ctx.params.id) - if (!detail) { - ctx.status = 404 - ctx.body = { error: 'Conversation not found' } - return - } - const messages = detail.messages - .filter(m => { - if (humanOnly && m.role !== 'user' && m.role !== 'assistant') return false - if (!m.content) return false - return true - }) - .map(m => ({ - id: m.id, - session_id: m.session_id, - role: m.role as 'user' | 'assistant', - content: m.content, - timestamp: m.timestamp, - })) - ctx.body = { - session_id: ctx.params.id, - messages, - visible_count: messages.length, - thread_session_count: 1, - } - return - } - - try { - const detail = await getConversationDetailFromDb(ctx.params.id, { source, humanOnly }) - if (!detail) { - ctx.status = 404 - ctx.body = { error: 'Conversation not found' } - return - } - ctx.body = detail - return - } catch (err) { - logger.warn(err, 'Hermes Conversation DB: detail query failed, falling back to CLI export') - } - - const detail = await getConversationDetail(ctx.params.id, { source, humanOnly }) + const detail = localGetSessionDetail(ctx.params.id) if (!detail) { ctx.status = 404 ctx.body = { error: 'Conversation not found' } return } - ctx.body = detail + const messages = detail.messages + .filter(m => { + if (humanOnly && m.role !== 'user' && m.role !== 'assistant') return false + if (!m.content) return false + return true + }) + .map(m => ({ + id: m.id, + session_id: m.session_id, + role: m.role as 'user' | 'assistant', + content: m.content, + timestamp: m.timestamp, + })) + ctx.body = { + session_id: ctx.params.id, + messages, + visible_count: messages.length, + thread_session_count: 1, + } } export async function list(ctx: any) { - if (useLocalSessionStore()) { - const source = (ctx.query.source as string) || undefined - const limit = ctx.query.limit ? parseInt(ctx.query.limit as string, 10) : undefined - const profile = getActiveProfileName() - const sessions = localListSessions(profile, source, limit && limit > 0 ? limit : 2000) - ctx.body = { sessions: filterPendingDeletedSessions(sessions) } - return - } - const source = (ctx.query.source as string) || undefined const limit = ctx.query.limit ? parseInt(ctx.query.limit as string, 10) : undefined + const profile = getActiveProfileName() + const effectiveLimit = limit && limit > 0 ? limit : 2000 - try { - const sessions = await listSessionSummaries(source, limit && limit > 0 ? limit : 2000) - ctx.body = { sessions: filterPendingDeletedSessions(sessions) } - return - } catch (err) { - logger.warn(err, 'Hermes Session DB: summary query failed, falling back to CLI') - } - - const sessions = await hermesCli.listSessions(source, limit) - ctx.body = { sessions: filterPendingDeletedSessions(sessions) } + const allSessions = localListSessions(profile, source, effectiveLimit) + ctx.body = { sessions: filterPendingDeletedSessions(allSessions.filter(s => s.source === 'api_server' || s.source === 'cli')) } } /** @@ -169,58 +111,23 @@ export async function list(ctx: any) { export async function listHermesSessions(ctx: any) { const source = (ctx.query.source as string) || undefined const limit = ctx.query.limit ? parseInt(ctx.query.limit as string, 10) : undefined + const profile = getActiveProfileName() + const effectiveLimit = limit && limit > 0 ? limit : 2000 - try { - const sessions = await listSessionSummaries(source, limit && limit > 0 ? limit : 2000) - ctx.body = { sessions: filterPendingDeletedSessions(sessions.filter(s => s.source !== 'api_server')) } - return - } catch (err) { - logger.warn(err, 'Hermes Session DB: summary query failed, falling back to CLI') - } - - const sessions = await hermesCli.listSessions(source, limit) - ctx.body = { sessions: filterPendingDeletedSessions(sessions.filter(s => s.source !== 'api_server')) } + const allSessions = localListSessions(profile, source, effectiveLimit) + ctx.body = { sessions: filterPendingDeletedSessions(allSessions.filter(s => s.source !== 'api_server')) } } export async function search(ctx: any) { - if (useLocalSessionStore()) { - const q = typeof ctx.query.q === 'string' ? ctx.query.q : '' - const limit = ctx.query.limit ? parseInt(ctx.query.limit as string, 10) : undefined - const profile = getActiveProfileName() - const results = localSearchSessions(profile, q, limit && limit > 0 ? limit : 20) - ctx.body = { results: filterPendingDeletedSessions(results) } - return - } - const q = typeof ctx.query.q === 'string' ? ctx.query.q : '' - const source = typeof ctx.query.source === 'string' && ctx.query.source.trim() - ? ctx.query.source.trim() - : undefined const limit = ctx.query.limit ? parseInt(ctx.query.limit as string, 10) : undefined - - try { - const results = await searchSessionSummaries(q, source, limit && limit > 0 ? limit : 20) - ctx.body = { results: filterPendingDeletedSessions(results) } - } catch (err) { - logger.error(err, 'Hermes Session DB: search failed') - ctx.status = 500 - ctx.body = { error: 'Failed to search sessions' } - } + const profile = getActiveProfileName() + const results = localSearchSessions(profile, q, limit && limit > 0 ? limit : 20) + ctx.body = { results: filterPendingDeletedSessions(results) } } export async function get(ctx: any) { - if (useLocalSessionStore()) { - const session = localGetSessionDetail(ctx.params.id) - if (!session) { - ctx.status = 404 - ctx.body = { error: 'Session not found' } - return - } - ctx.body = { session } - return - } - - const session = await hermesCli.getSession(ctx.params.id) + const session = localGetSessionDetail(ctx.params.id) if (!session) { ctx.status = 404 ctx.body = { error: 'Session not found' } @@ -262,21 +169,8 @@ export async function getHermesSession(ctx: any) { } export async function remove(ctx: any) { - if (useLocalSessionStore()) { - const sessionId = ctx.params.id - const ok = localDeleteSession(sessionId) - if (!ok) { - ctx.status = 500 - ctx.body = { error: 'Failed to delete session' } - return - } - deleteUsage(sessionId) - ctx.body = { ok: true } - return - } - const sessionId = ctx.params.id - const ok = await hermesCli.deleteSession(sessionId) + const ok = localDeleteSession(sessionId) if (!ok) { ctx.status = 500 ctx.body = { error: 'Failed to delete session' } @@ -307,27 +201,14 @@ export async function batchRemove(ctx: any) { errors: [] as Array<{ id: string; error: string }> } - if (useLocalSessionStore()) { - for (const id of validIds) { - const ok = localDeleteSession(id) - if (ok) { - deleteUsage(id) - results.deleted++ - } else { - results.failed++ - results.errors.push({ id, error: 'Failed to delete session' }) - } - } - } else { - for (const id of validIds) { - const ok = await hermesCli.deleteSession(id) - if (ok) { - deleteUsage(id) - results.deleted++ - } else { - results.failed++ - results.errors.push({ id, error: 'Failed to delete session' }) - } + for (const id of validIds) { + const ok = localDeleteSession(id) + if (ok) { + deleteUsage(id) + results.deleted++ + } else { + results.failed++ + results.errors.push({ id, error: 'Failed to delete session' }) } } @@ -354,30 +235,13 @@ export async function usageSingle(ctx: any) { } export async function rename(ctx: any) { - if (useLocalSessionStore()) { - const { title } = ctx.request.body as { title?: string } - if (!title || typeof title !== 'string') { - ctx.status = 400 - ctx.body = { error: 'title is required' } - return - } - const ok = localRenameSession(ctx.params.id, title.trim()) - if (!ok) { - ctx.status = 500 - ctx.body = { error: 'Failed to rename session' } - return - } - ctx.body = { ok: true } - return - } - const { title } = ctx.request.body as { title?: string } if (!title || typeof title !== 'string') { ctx.status = 400 ctx.body = { error: 'title is required' } return } - const ok = await hermesCli.renameSession(ctx.params.id, title.trim()) + const ok = localRenameSession(ctx.params.id, title.trim()) if (!ok) { ctx.status = 500 ctx.body = { error: 'Failed to rename session' } @@ -393,20 +257,14 @@ export async function setWorkspace(ctx: any) { ctx.body = { error: 'workspace must be a string or null' } return } - if (useLocalSessionStore()) { - const { updateSession, getSession, createSession } = await import('../../db/hermes/session-store') - const { getActiveProfileName } = await import('../../services/hermes/hermes-profile') - const id = ctx.params.id - // Create session if it doesn't exist yet (user may set workspace before sending first message) - if (!getSession(id)) { - createSession({ id, profile: getActiveProfileName(), title: '' }) - } - updateSession(id, { workspace: workspace || null } as any) - ctx.body = { ok: true } - return + const { updateSession, getSession, createSession } = await import('../../db/hermes/session-store') + const { getActiveProfileName } = await import('../../services/hermes/hermes-profile') + const id = ctx.params.id + if (!getSession(id)) { + createSession({ id, profile: getActiveProfileName(), title: '' }) } - ctx.status = 501 - ctx.body = { error: 'Workspace setting only supported in local session store mode' } + updateSession(id, { workspace: workspace || null } as any) + ctx.body = { ok: true } } export async function contextLength(ctx: any) { @@ -418,11 +276,6 @@ export async function usageStats(ctx: any) { const rawDays = parseInt(String(ctx.query?.days ?? '30'), 10) const days = Number.isFinite(rawDays) && rawDays > 0 ? Math.min(rawDays, 365) : 30 - // Local Web UI chat usage is kept in the dashboard DB and must be merged - // with Hermes' native state.db analytics for the same period. - const currentProfile = getActiveProfileName() - const local = getLocalUsageStats(currentProfile, days) - let hermes = { input_tokens: 0, output_tokens: 0, @@ -442,29 +295,6 @@ export async function usageStats(ctx: any) { logger.warn(err, 'usageStats: failed to load Hermes usage analytics from state.db') } - const totalInput = local.input_tokens + hermes.input_tokens - const totalOutput = local.output_tokens + hermes.output_tokens - const totalCacheRead = local.cache_read_tokens + hermes.cache_read_tokens - const totalCacheWrite = local.cache_write_tokens + hermes.cache_write_tokens - const totalReasoning = local.reasoning_tokens + hermes.reasoning_tokens - const totalSessions = local.sessions + hermes.sessions - - const modelMap = new Map() - for (const m of [...local.by_model, ...hermes.by_model]) { - const model = (m.model || '').trim() || 'unknown' - const existing = modelMap.get(model) - if (existing) { - existing.input_tokens += m.input_tokens - existing.output_tokens += m.output_tokens - existing.cache_read_tokens += m.cache_read_tokens - existing.cache_write_tokens += m.cache_write_tokens - existing.reasoning_tokens += m.reasoning_tokens - existing.sessions += m.sessions - } else { - modelMap.set(model, { ...m, model }) - } - } - const dayMap = new Map() const now = new Date() for (let i = days - 1; i >= 0; i--) { @@ -473,7 +303,7 @@ export async function usageStats(ctx: any) { const key = d.toISOString().slice(0, 10) dayMap.set(key, { date: key, input_tokens: 0, output_tokens: 0, cache_read_tokens: 0, cache_write_tokens: 0, sessions: 0, errors: 0, cost: 0 }) } - for (const d of [...local.by_day, ...hermes.by_day]) { + for (const d of hermes.by_day) { const existing = dayMap.get(d.date) if (existing) { existing.input_tokens += d.input_tokens; existing.output_tokens += d.output_tokens @@ -483,16 +313,16 @@ export async function usageStats(ctx: any) { } ctx.body = { - total_input_tokens: totalInput, - total_output_tokens: totalOutput, - total_cache_read_tokens: totalCacheRead, - total_cache_write_tokens: totalCacheWrite, - total_reasoning_tokens: totalReasoning, - total_sessions: totalSessions, + total_input_tokens: hermes.input_tokens, + total_output_tokens: hermes.output_tokens, + total_cache_read_tokens: hermes.cache_read_tokens, + total_cache_write_tokens: hermes.cache_write_tokens, + total_reasoning_tokens: hermes.reasoning_tokens, + total_sessions: hermes.sessions, total_cost: hermes.cost, total_api_calls: hermes.total_api_calls, period_days: days, - model_usage: [...modelMap.values()].sort((a, b) => (b.input_tokens + b.output_tokens) - (a.input_tokens + a.output_tokens)), + model_usage: hermes.by_model.sort((a, b) => (b.input_tokens + b.output_tokens) - (a.input_tokens + a.output_tokens)), daily_usage: [...dayMap.values()], } } @@ -545,20 +375,7 @@ export async function listWorkspaceFolders(ctx: any) { const exportCompressor = new ExportCompressor() export async function exportSession(ctx: any) { - let session: any = null - - if (useLocalSessionStore()) { - session = localGetSessionDetail(ctx.params.id) - } else { - try { - session = await getSessionDetailFromDb(ctx.params.id) - } catch (err) { - logger.warn(err, 'Hermes Session DB: export detail query failed, falling back to CLI') - } - if (!session) { - session = await hermesCli.getSession(ctx.params.id) - } - } + const session = localGetSessionDetail(ctx.params.id) if (!session) { ctx.status = 404 @@ -630,38 +447,32 @@ export async function getConversationMessagesPaginated(ctx: any) { const offset = ctx.query.offset ? parseInt(ctx.query.offset as string, 10) : 0 const limit = ctx.query.limit ? parseInt(ctx.query.limit as string, 10) : 50 - if (useLocalSessionStore()) { - const { getSessionDetailPaginated } = await import('../../db/hermes/session-store') - const result = getSessionDetailPaginated(ctx.params.id, offset, limit) + const { getSessionDetailPaginated } = await import('../../db/hermes/session-store') + const result = getSessionDetailPaginated(ctx.params.id, offset, limit) - if (!result) { - ctx.status = 404 - ctx.body = { error: 'Conversation not found' } - return - } - - ctx.body = { - session: { - id: result.session.id, - source: result.session.source, - model: result.session.model, - title: result.session.title, - started_at: result.session.started_at, - ended_at: result.session.ended_at, - last_active: result.session.last_active, - message_count: result.session.message_count, - input_tokens: result.session.input_tokens, - output_tokens: result.session.output_tokens, - }, - messages: result.messages, - total: result.total, - offset: result.offset, - limit: result.limit, - hasMore: result.hasMore, - } + if (!result) { + ctx.status = 404 + ctx.body = { error: 'Conversation not found' } return } - ctx.status = 404 - ctx.body = { error: 'Conversation not found' } + ctx.body = { + session: { + id: result.session.id, + source: result.session.source, + model: result.session.model, + title: result.session.title, + started_at: result.session.started_at, + ended_at: result.session.ended_at, + last_active: result.session.last_active, + message_count: result.session.message_count, + input_tokens: result.session.input_tokens, + output_tokens: result.session.output_tokens, + }, + messages: result.messages, + total: result.total, + offset: result.offset, + limit: result.limit, + hasMore: result.hasMore, + } } diff --git a/packages/server/src/db/hermes/session-store.ts b/packages/server/src/db/hermes/session-store.ts index 7669f50..36406ef 100644 --- a/packages/server/src/db/hermes/session-store.ts +++ b/packages/server/src/db/hermes/session-store.ts @@ -129,14 +129,16 @@ function mapMessageRow(row: Record): HermesMessageRow { export function createSession(data: { id: string profile?: string + source?: string model?: string title?: string workspace?: string }): HermesSessionRow { const now = Math.floor(Date.now() / 1000) + const source = data.source || 'api_server' if (!isSqliteAvailable()) { return { - id: data.id, profile: data.profile || 'default', source: 'api_server', + id: data.id, profile: data.profile || 'default', source, user_id: null, model: data.model || '', title: data.title || null, started_at: now, ended_at: null, end_reason: null, message_count: 0, tool_call_count: 0, @@ -148,8 +150,8 @@ export function createSession(data: { const db = getDb()! db.prepare( `INSERT INTO ${SESSIONS_TABLE} (id, profile, source, model, title, started_at, last_active, workspace) - VALUES (?, ?, 'api_server', ?, ?, ?, ?, ?)`, - ).run(data.id, data.profile || 'default', data.model || '', data.title || null, now, now, data.workspace || null) + VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, + ).run(data.id, data.profile || 'default', source, data.model || '', data.title || null, now, now, data.workspace || null) return getSession(data.id)! } diff --git a/packages/server/src/db/hermes/sessions-db.ts b/packages/server/src/db/hermes/sessions-db.ts index d68c126..8ceb0ea 100644 --- a/packages/server/src/db/hermes/sessions-db.ts +++ b/packages/server/src/db/hermes/sessions-db.ts @@ -565,6 +565,10 @@ function aggregateSessionDetail( } } +function chainOrderSql(ids: string[]): string { + return ids.map((_, index) => `WHEN ? THEN ${index}`).join(' ') +} + async function openSessionDb() { if (!SQLITE_AVAILABLE) { throw new Error(`node:sqlite requires Node >= 22.5, current: ${process.versions.node}`) @@ -598,7 +602,7 @@ export async function getSessionMessagesFromDb(sessionId: string): Promise<{ const messageRows = db.prepare(` SELECT * FROM messages WHERE session_id = ? - ORDER BY timestamp, id + ORDER BY id `).all(sessionId) as Record[] return { @@ -622,11 +626,12 @@ export async function getSessionDetailFromDb(sessionId: string): Promise session.id) const placeholders = ids.map(() => '?').join(', ') + const orderSql = chainOrderSql(ids) const messageRows = db.prepare(` SELECT * FROM messages WHERE session_id IN (${placeholders}) - ORDER BY timestamp, id - `).all(...ids) as Record[] + ORDER BY CASE session_id ${orderSql} ELSE ${ids.length} END, id + `).all(...ids, ...ids) as Record[] const messages = messageRows.map(mapMessageRow) return aggregateSessionDetail(chain, messages, sessionId) } finally { @@ -648,11 +653,12 @@ export async function getSessionDetailFromDbWithProfile(sessionId: string, profi const ids = chain.map(session => session.id) const placeholders = ids.map(() => '?').join(', ') + const orderSql = chainOrderSql(ids) const messageRows = db.prepare(` SELECT * FROM messages WHERE session_id IN (${placeholders}) - ORDER BY timestamp, id - `).all(...ids) as Record[] + ORDER BY CASE session_id ${orderSql} ELSE ${ids.length} END, id + `).all(...ids, ...ids) as Record[] const messages = messageRows.map(mapMessageRow) return aggregateSessionDetail(chain, messages, sessionId) } finally { @@ -672,7 +678,7 @@ export async function getExactSessionDetailFromDbWithProfile(sessionId: string, const messageRows = db.prepare(` SELECT * FROM messages WHERE session_id = ? - ORDER BY timestamp, id + ORDER BY id `).all(sessionId) as Record[] const messages = messageRows.map(mapMessageRow) return aggregateSessionDetail([requested], messages, sessionId) @@ -818,10 +824,6 @@ export async function getUsageStatsFromDb( const apiCallsExpr = tableHasColumn(db, 'sessions', 'api_call_count') ? 'COALESCE(SUM(api_call_count), 0)' : '0' - const sourceFilter = tableHasColumn(db, 'sessions', 'source') - ? " AND COALESCE(source, '') != 'api_server'" - : '' - const totals = db.prepare(` SELECT COALESCE(SUM(input_tokens), 0) AS input_tokens, @@ -833,7 +835,7 @@ export async function getUsageStatsFromDb( COUNT(*) AS sessions, ${apiCallsExpr} AS total_api_calls FROM sessions - WHERE started_at > ?${sourceFilter} + WHERE started_at > ? `).get(since) as Record | undefined if (!totals) return empty @@ -848,7 +850,7 @@ export async function getUsageStatsFromDb( COALESCE(SUM(reasoning_tokens), 0) AS reasoning_tokens, COUNT(*) AS sessions FROM sessions - WHERE started_at > ?${sourceFilter} AND model IS NOT NULL + WHERE started_at > ? AND model IS NOT NULL GROUP BY model ORDER BY COALESCE(SUM(input_tokens), 0) + COALESCE(SUM(output_tokens), 0) DESC `).all(since).map(row => ({ @@ -871,7 +873,7 @@ export async function getUsageStatsFromDb( COUNT(*) AS sessions, COALESCE(SUM(COALESCE(actual_cost_usd, estimated_cost_usd, 0)), 0) AS cost FROM sessions - WHERE started_at > ?${sourceFilter} + WHERE started_at > ? GROUP BY date ORDER BY date ASC `).all(since).map(row => ({ diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index 011030f..7ec94a8 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -20,6 +20,7 @@ import { setGroupChatServer } from './routes/hermes/group-chat' import { setChatRunServer } from './routes/hermes/chat-run' import { GroupChatServer } from './services/hermes/group-chat' import { ChatRunSocket } from './services/hermes/chat-run-socket' +import { startAgentBridgeManager } from './services/hermes/agent-bridge' import { logger } from './services/logger' // Injected by esbuild at build time; fallback to reading package.json in dev mode @@ -46,6 +47,7 @@ process.on('unhandledRejection', (reason) => { let server: any = null let servers: any[] = [] let chatRunServer: any = null +let agentBridgeManager: any = null interface ListenResult { primary: any @@ -94,6 +96,13 @@ export async function bootstrap() { await initGatewayManager() console.log('[bootstrap] gateway manager initialized') + try { + agentBridgeManager = await startAgentBridgeManager() + console.log('[bootstrap] agent bridge started') + } catch (err) { + logger.warn(err, '[bootstrap] agent bridge failed to start') + console.warn('[bootstrap] agent bridge failed to start:', err instanceof Error ? err.message : err) + } await new Promise(resolve => setTimeout(resolve, 1000)) // Initialize all web-ui SQLite tables const { initAllStores } = await import('./db/hermes/init') @@ -102,11 +111,6 @@ export async function bootstrap() { await new Promise(resolve => setTimeout(resolve, 1000)) console.log('[bootstrap] all stores initialized') - // Sync Hermes sessions from all profiles (only if local DB is empty) - const { syncAllHermesSessionsOnStartup } = await import('./services/hermes/session-sync') - await syncAllHermesSessionsOnStartup() - console.log('[bootstrap] Hermes session sync completed') - app.use(cors({ origin: config.corsOrigins })) app.use(bodyParser()) console.log('[bootstrap] cors + bodyParser registered') @@ -187,7 +191,7 @@ export async function bootstrap() { }) }) - bindShutdown(servers, groupChatServer, chatRunServer) + bindShutdown(servers, groupChatServer, chatRunServer, agentBridgeManager) startVersionCheck() } diff --git a/packages/server/src/services/hermes/agent-bridge/README.md b/packages/server/src/services/hermes/agent-bridge/README.md new file mode 100644 index 0000000..a87e5de --- /dev/null +++ b/packages/server/src/services/hermes/agent-bridge/README.md @@ -0,0 +1,85 @@ +# Agent Bridge + +Optional backend-side bridge for talking to `~/.hermes/hermes-agent` by +instantiating `run_agent.AIAgent` directly in a Python process. + +This is intentionally separate from the current Web UI chat path. + +## Python Service + +```bash +python packages/server/src/services/hermes/agent-bridge/hermes_bridge.py +``` + +Default endpoint: + +```text +ipc:///tmp/hermes-agent-bridge.sock +``` + +On Windows, the default endpoint is TCP because Python may not support Unix +domain sockets there: + +```text +tcp://127.0.0.1:18765 +``` + +Override with: + +```bash +HERMES_AGENT_BRIDGE_ENDPOINT=tcp://127.0.0.1:8765 python packages/server/src/services/hermes/agent-bridge/hermes_bridge.py +``` + +The service discovers Hermes Agent in this order: + +1. `--agent-root` +2. `HERMES_AGENT_ROOT` +3. the installed `hermes` command path +4. current working directory and parent directories +5. common locations such as `~/.hermes/hermes-agent`, `~/hermes-agent`, and `/opt/hermes-agent` + +Hermes home is resolved from `--hermes-home`, `HERMES_HOME`, then `~/.hermes`. + +Default agent root: + +```text +~/.hermes/hermes-agent +``` + +You can pass both paths explicitly: + +```bash +python packages/server/src/services/hermes/agent-bridge/hermes_bridge.py \ + --agent-root ~/.hermes/hermes-agent \ + --hermes-home ~/.hermes +``` + +The socket transport uses Python and Node standard libraries. No ZMQ dependency +is required. + +## Backend Usage + +```ts +import { AgentBridgeClient } from './services/hermes/agent-bridge' + +const bridge = new AgentBridgeClient() +const run = await bridge.chat(sessionId, message) + +for await (const chunk of bridge.streamOutput(run.run_id)) { + if (chunk.delta) { + // forward chunk.delta to Socket.IO/SSE/etc. + } +} +``` + +The external chat call only sends `session_id` and `message`. Provider, model, +keys, tools, reasoning, and session DB are resolved by hermes-agent from the +normal Hermes config and environment. + +The bridge instantiates `AIAgent` with `platform="cli"` by default so behavior +matches CLI chat. Override it only if a caller intentionally needs a distinct +platform identity: + +```bash +HERMES_AGENT_BRIDGE_PLATFORM=agent-bridge python packages/server/src/services/hermes/agent-bridge/hermes_bridge.py +``` diff --git a/packages/server/src/services/hermes/agent-bridge/client.ts b/packages/server/src/services/hermes/agent-bridge/client.ts new file mode 100644 index 0000000..a6cbae3 --- /dev/null +++ b/packages/server/src/services/hermes/agent-bridge/client.ts @@ -0,0 +1,330 @@ +import { setTimeout as delay } from 'timers/promises' +import { createConnection, type Socket } from 'net' +import { URL } from 'url' + +export const DEFAULT_AGENT_BRIDGE_ENDPOINT = process.platform === 'win32' + ? 'tcp://127.0.0.1:18765' + : 'ipc:///tmp/hermes-agent-bridge.sock' +export const DEFAULT_AGENT_BRIDGE_TIMEOUT_MS = 120000 + +function envPositiveInt(name: string): number | undefined { + const raw = process.env[name] + if (!raw) return undefined + const value = Number(raw) + return Number.isFinite(value) && value > 0 ? value : undefined +} + +export type AgentBridgeStatus = 'running' | 'complete' | 'interrupted' | 'error' + +export interface AgentBridgeOptions { + endpoint?: string + timeoutMs?: number +} + +export interface AgentBridgeRequestOptions { + timeoutMs?: number +} + +export type AgentBridgeMessage = + | string + | Array> + +export interface AgentBridgeResponse { + ok: true + [key: string]: unknown +} + +export interface AgentBridgeChatStarted extends AgentBridgeResponse { + run_id: string + session_id: string + status: AgentBridgeStatus +} + +export interface AgentBridgeOutput extends AgentBridgeResponse { + run_id: string + session_id: string + status: AgentBridgeStatus + delta: string + cursor: number + output: string + done: boolean + result?: unknown + error?: string | null + events: Array> + event_cursor: number +} + +export interface AgentBridgeRunResult extends AgentBridgeResponse { + run_id: string + session_id: string + status: AgentBridgeStatus + output: string + deltas: string[] + events: unknown[] + result?: unknown + error?: string | null +} + +export interface AgentBridgeCommandResult extends AgentBridgeResponse { + session_id: string + command: string + handled: boolean + message?: string + new_session_id?: string + history?: unknown[] + retry?: boolean + retry_input?: AgentBridgeMessage + title?: string +} + +export class AgentBridgeError extends Error { + response?: unknown +} + +export class AgentBridgeClient { + readonly endpoint: string + readonly timeoutMs: number + private lock: Promise = Promise.resolve() + + constructor(options: AgentBridgeOptions = {}) { + this.endpoint = options.endpoint || process.env.HERMES_AGENT_BRIDGE_ENDPOINT || DEFAULT_AGENT_BRIDGE_ENDPOINT + this.timeoutMs = options.timeoutMs ?? envPositiveInt('HERMES_AGENT_BRIDGE_TIMEOUT_MS') ?? DEFAULT_AGENT_BRIDGE_TIMEOUT_MS + } + + async connect(): Promise { + return this + } + + async close(): Promise { + return undefined + } + + private connectSocket(): Promise { + return new Promise((resolveConnect, rejectConnect) => { + const endpoint = this.endpoint + let socket: Socket + if (endpoint.startsWith('ipc://')) { + socket = createConnection(endpoint.slice('ipc://'.length)) + } else if (endpoint.startsWith('tcp://')) { + const url = new URL(endpoint) + socket = createConnection({ + host: url.hostname || '127.0.0.1', + port: Number(url.port), + }) + } else { + rejectConnect(new Error(`unsupported agent bridge endpoint: ${endpoint}`)) + return + } + + const cleanup = () => { + socket.off('connect', onConnect) + socket.off('error', onError) + } + const onConnect = () => { + cleanup() + resolveConnect(socket) + } + const onError = (err: Error) => { + cleanup() + socket.destroy() + rejectConnect(err) + } + socket.once('connect', onConnect) + socket.once('error', onError) + }) + } + + private readResponse(socket: Socket, timeoutMs: number): Promise { + return new Promise((resolveRead, rejectRead) => { + let buffer = '' + const timeout = timeoutMs > 0 + ? setTimeout(() => { + cleanup() + socket.destroy() + rejectRead(new Error(`Agent bridge request timed out after ${timeoutMs}ms`)) + }, timeoutMs) + : null + + const cleanup = () => { + if (timeout) clearTimeout(timeout) + socket.off('data', onData) + socket.off('error', onError) + socket.off('end', onEnd) + socket.off('close', onClose) + } + const finish = (line: string) => { + cleanup() + socket.end() + resolveRead(line) + } + const onData = (chunk: Buffer) => { + buffer += chunk.toString('utf8') + const idx = buffer.indexOf('\n') + if (idx >= 0) finish(buffer.slice(0, idx)) + } + const onError = (err: Error) => { + cleanup() + socket.destroy() + rejectRead(err) + } + const onEnd = () => { + const line = buffer.trim() + if (line) finish(line) + } + const onClose = () => { + if (!buffer.trim()) { + cleanup() + rejectRead(new Error('Agent bridge socket closed without a response')) + } + } + + socket.on('data', onData) + socket.once('error', onError) + socket.once('end', onEnd) + socket.once('close', onClose) + }) + } + + async request( + payload: Record, + options: AgentBridgeRequestOptions = {}, + ): Promise { + const run = async (): Promise => { + const timeoutMs = options.timeoutMs || this.timeoutMs + const socket = await this.connectSocket() + socket.write(`${JSON.stringify(payload)}\n`) + const raw = await this.readResponse(socket, timeoutMs) + const response = JSON.parse(raw) as { ok?: boolean; error?: string } + if (!response.ok) { + const error = new AgentBridgeError(response.error || 'Agent bridge request failed') + error.response = response + throw error + } + return response as T + } + + const next = this.lock.then(run, run) + this.lock = next.catch(() => undefined) + return next + } + + ping(): Promise { + return this.request({ action: 'ping' }) + } + + chat( + sessionId: string, + message: AgentBridgeMessage, + conversationHistory?: unknown[], + instructions?: string, + profile?: string, + ): Promise { + return this.request({ + action: 'chat', + session_id: sessionId, + message, + ...(conversationHistory ? { conversation_history: conversationHistory } : {}), + ...(instructions ? { instructions } : {}), + ...(profile ? { profile } : {}), + }) + } + + command(sessionId: string, command: string): Promise { + return this.request({ + action: 'command', + session_id: sessionId, + command, + }) + } + + getOutput(runId: string, cursor = 0, eventCursor = 0, options: AgentBridgeRequestOptions = {}): Promise { + return this.request({ + action: 'get_output', + run_id: runId, + cursor, + event_cursor: eventCursor, + }, options) + } + + async *streamOutput( + runId: string, + options: AgentBridgeRequestOptions & { intervalMs?: number } = {}, + ): AsyncGenerator { + const intervalMs = options.intervalMs || 100 + let cursor = 0 + let eventCursor = 0 + for (;;) { + const chunk = await this.getOutput(runId, cursor, eventCursor, options) + cursor = chunk.cursor + eventCursor = chunk.event_cursor + if (chunk.delta || chunk.done || (chunk.events && chunk.events.length > 0)) yield chunk + if (chunk.done) return + await delay(intervalMs) + } + } + + async chatStream( + sessionId: string, + message: AgentBridgeMessage, + onDelta: (delta: string, chunk: AgentBridgeOutput) => void | Promise, + options: AgentBridgeRequestOptions & { intervalMs?: number } = {}, + ): Promise { + const started = await this.chat(sessionId, message) + let last: AgentBridgeOutput | null = null + for await (const chunk of this.streamOutput(started.run_id, options)) { + last = chunk + if (chunk.delta) await onDelta(chunk.delta, chunk) + } + if (!last) throw new Error(`Agent bridge run ${started.run_id} produced no output state`) + return last + } + + getResult(runId: string, options: AgentBridgeRequestOptions = {}): Promise { + return this.request({ action: 'get_result', run_id: runId }, options) + } + + interrupt(sessionId: string, message?: string): Promise { + return this.request({ action: 'interrupt', session_id: sessionId, message }) + } + + steer(sessionId: string, text: string): Promise { + return this.request({ action: 'steer', session_id: sessionId, text }) + } + + approvalRespond(approvalId: string, choice: string): Promise { + return this.request({ action: 'approval_respond', approval_id: approvalId, choice }) + } + + compressionRespond( + requestId: string, + payload: { messages?: unknown[]; system_message?: string; error?: string }, + ): Promise { + return this.request({ + action: 'compression_respond', + request_id: requestId, + ...payload, + }, { timeoutMs: this.timeoutMs }) + } + + destroyAll(): Promise { + return this.request({ action: 'destroy_all' }) + } + + getHistory(sessionId: string): Promise { + return this.request({ action: 'get_history', session_id: sessionId }) + } + + destroy(sessionId: string): Promise { + return this.request({ action: 'destroy', session_id: sessionId }) + } + + list(): Promise { + return this.request({ action: 'list' }) + } + + shutdown(): Promise { + return this.request({ action: 'shutdown' }) + } +} + +export default AgentBridgeClient diff --git a/packages/server/src/services/hermes/agent-bridge/hermes_bridge.py b/packages/server/src/services/hermes/agent-bridge/hermes_bridge.py new file mode 100644 index 0000000..37296eb --- /dev/null +++ b/packages/server/src/services/hermes/agent-bridge/hermes_bridge.py @@ -0,0 +1,1184 @@ +#!/usr/bin/env python3 +"""Hermes in-process agent bridge. + +This service intentionally lives outside the existing Web UI chat path. It +imports hermes-agent from HERMES_AGENT_ROOT (default: ~/.hermes/hermes-agent), +keeps AIAgent instances in memory by session_id, and exposes a small newline- +delimited JSON request/response protocol over a local socket. +""" + +from __future__ import annotations + +import argparse +import copy +import json +import os +import queue +import shutil +import socket +import sys +import threading +import time +import traceback +import uuid +from dataclasses import dataclass, field +from pathlib import Path +from urllib.parse import urlparse +from typing import Any + + +DEFAULT_ENDPOINT = "tcp://127.0.0.1:18765" if os.name == "nt" else "ipc:///tmp/hermes-agent-bridge.sock" +DEFAULT_AGENT_ROOT = "~/.hermes/hermes-agent" +DEFAULT_HERMES_HOME = "~/.hermes" + + +def _bridge_platform() -> str: + return os.environ.get("HERMES_AGENT_BRIDGE_PLATFORM", "cli").strip() or "cli" + + +def _candidate_agent_roots(raw: str | None = None) -> list[Path]: + candidates: list[Path] = [] + if raw: + candidates.append(Path(raw).expanduser()) + + env_root = os.environ.get("HERMES_AGENT_ROOT") + if env_root: + candidates.append(Path(env_root).expanduser()) + + hermes_bin = shutil.which(os.environ.get("HERMES_BIN", "hermes")) + if hermes_bin: + bin_path = Path(hermes_bin).resolve() + candidates.extend([ + bin_path.parent.parent, + bin_path.parent.parent.parent, + bin_path.parent.parent / "hermes-agent", + ]) + + script_path = Path(__file__).resolve() + candidates.extend([ + Path.cwd(), + Path.cwd() / ".hermes" / "hermes-agent", + Path.cwd() / "hermes-agent", + script_path.parent, + script_path.parent.parent, + script_path.parent.parent.parent, + script_path.parent.parent.parent / ".hermes" / "hermes-agent", + ]) + for parent in script_path.parents: + candidates.extend([ + parent / ".hermes" / "hermes-agent", + parent / "hermes-agent", + ]) + + candidates.extend([ + Path.home() / ".hermes" / "hermes-agent", + Path.home() / "hermes-agent", + Path("/opt/hermes/hermes-agent"), + Path("/opt/hermes-agent"), + Path("/usr/local/hermes-agent"), + ]) + candidates.append(Path(DEFAULT_AGENT_ROOT).expanduser()) + + unique: list[Path] = [] + seen: set[str] = set() + for candidate in candidates: + try: + resolved = candidate.resolve() + except OSError: + resolved = candidate + key = str(resolved) + if key not in seen: + seen.add(key) + unique.append(resolved) + return unique + + +def _discover_agent_root(raw: str | None = None) -> Path: + for candidate in _candidate_agent_roots(raw): + if (candidate / "run_agent.py").exists(): + return candidate + attempted = ", ".join(str(path) for path in _candidate_agent_roots(raw)) + raise RuntimeError( + "hermes-agent run_agent.py not found. Pass --agent-root or set " + f"HERMES_AGENT_ROOT. Tried: {attempted}" + ) + + +def _discover_hermes_home(raw: str | None = None) -> Path: + if raw: + return Path(raw).expanduser().resolve() + env_home = os.environ.get("HERMES_HOME") + if env_home: + return Path(env_home).expanduser().resolve() + return Path(DEFAULT_HERMES_HOME).expanduser().resolve() + + +def _jsonable(value: Any) -> Any: + try: + json.dumps(value) + return value + except TypeError: + if isinstance(value, dict): + return {str(k): _jsonable(v) for k, v in value.items()} + if isinstance(value, (list, tuple)): + return [_jsonable(v) for v in value] + return str(value) + + +def _agent_root() -> Path: + return _discover_agent_root(os.environ.get("HERMES_AGENT_ROOT")) + + +def _hermes_home() -> Path: + return _discover_hermes_home(os.environ.get("HERMES_HOME")) + + +def _base_hermes_home() -> Path: + return _discover_hermes_home(os.environ.get("HERMES_AGENT_BRIDGE_BASE_HOME") or DEFAULT_HERMES_HOME) + + +def _profile_home(profile: str | None) -> Path: + base = _base_hermes_home() + if not profile or profile == "default": + return base + profile_home = base / "profiles" / profile + return profile_home if profile_home.exists() else base + + +def _set_path_env(agent_root: str | None = None, hermes_home: str | None = None) -> None: + os.environ["HERMES_AGENT_ROOT"] = str(_discover_agent_root(agent_root)) + resolved_home = str(_discover_hermes_home(hermes_home)) + os.environ["HERMES_HOME"] = resolved_home + os.environ["HERMES_AGENT_BRIDGE_BASE_HOME"] = resolved_home + + +def _ensure_agent_imports() -> None: + root = _agent_root() + if not (root / "run_agent.py").exists(): + raise RuntimeError(f"hermes-agent run_agent.py not found under {root}") + root_s = str(root) + if root_s not in sys.path: + sys.path.insert(0, root_s) + os.environ.setdefault("HERMES_HOME", str(_hermes_home())) + os.environ.setdefault("HERMES_AGENT_BRIDGE_BASE_HOME", str(_hermes_home())) + + +def _load_cfg(profile: str | None = None) -> dict[str, Any]: + _ensure_agent_imports() + try: + from hermes_cli.config import load_config + + cfg = load_config() + return cfg if isinstance(cfg, dict) else {} + except Exception: + try: + import yaml + + path = _hermes_home() / "config.yaml" + if not path.exists(): + return {} + return yaml.safe_load(path.read_text(encoding="utf-8")) or {} + except Exception: + return {} + + +def _apply_profile_env(profile: str | None) -> str | None: + """Temporarily set HERMES_HOME to the profile directory. + Returns the original HERMES_HOME value to restore later. + """ + if not profile or profile == "default": + return os.environ.get("HERMES_HOME") + profile_home = _profile_home(profile) + if not (profile_home / "config.yaml").exists(): + return os.environ.get("HERMES_HOME") + original = os.environ.get("HERMES_HOME") + os.environ["HERMES_HOME"] = str(profile_home) + return original + + +def _restore_profile_env(original: str | None) -> None: + """Restore HERMES_HOME after profile-scoped agent creation.""" + if original is not None: + os.environ["HERMES_HOME"] = original + else: + os.environ.pop("HERMES_HOME", None) + + +def _resolve_model(cfg: dict[str, Any]) -> str: + env_model = ( + os.environ.get("HERMES_MODEL", "") + or os.environ.get("HERMES_INFERENCE_MODEL", "") + ).strip() + if env_model: + return env_model + model_cfg = cfg.get("model", "") + if isinstance(model_cfg, dict): + return str(model_cfg.get("default") or "").strip() + if isinstance(model_cfg, str): + return model_cfg.strip() + return "" + + +def _resolve_runtime(model: str, provider: str | None = None) -> dict[str, Any]: + _ensure_agent_imports() + from hermes_cli.runtime_provider import resolve_runtime_provider + + requested = provider or os.environ.get("HERMES_BRIDGE_PROVIDER", "").strip() or None + return resolve_runtime_provider(requested=requested, target_model=model or None) + + +def _load_enabled_toolsets() -> list[str] | None: + _ensure_agent_imports() + raw = os.environ.get("HERMES_BRIDGE_TOOLSETS", "").strip() + if raw: + values = [part.strip() for part in raw.split(",") if part.strip()] + if any(value in {"all", "*"} for value in values): + return None + return values or None + + try: + from hermes_cli.config import load_config + from hermes_cli.tools_config import _get_platform_tools + from toolsets import resolve_toolset + + cfg = load_config() + platform = _bridge_platform() + enabled = sorted(_get_platform_tools(cfg, platform, include_default_mcp_servers=True)) + if platform != "cli": + resolved_tools: set[str] = set() + for toolset_name in enabled: + try: + resolved_tools.update(resolve_toolset(toolset_name)) + except Exception: + pass + if not resolved_tools: + enabled = sorted(_get_platform_tools(cfg, "cli", include_default_mcp_servers=True)) + return enabled or None + except Exception: + return None + + +def _load_reasoning_config() -> dict[str, Any] | None: + _ensure_agent_imports() + try: + from hermes_constants import parse_reasoning_effort + + effort = str((_load_cfg().get("agent") or {}).get("reasoning_effort", "") or "").strip() + return parse_reasoning_effort(effort) + except Exception: + return None + + +def _load_service_tier() -> str | None: + raw = str((_load_cfg().get("agent") or {}).get("service_tier", "") or "").strip().lower() + if raw in {"fast", "priority", "on"}: + return "priority" + return None + + +def _cfg_max_turns(cfg: dict[str, Any], default: int = 90) -> int: + try: + env_max = int(os.environ.get("HERMES_BRIDGE_MAX_TURNS", "") or 0) + if env_max > 0: + return env_max + except ValueError: + pass + agent_cfg = cfg.get("agent") or {} + try: + return int(agent_cfg.get("max_turns") or cfg.get("max_turns") or default) + except (TypeError, ValueError): + return default + + +class SessionDbHolder: + def __init__(self) -> None: + self._lock = threading.Lock() + self._db_by_path: dict[str, Any] = {} + self._error: str | None = None + + def get(self, db_path: Path | None = None): + with self._lock: + key = str((db_path or (_base_hermes_home() / "state.db")).resolve()) + if key in self._db_by_path: + return self._db_by_path[key] + _ensure_agent_imports() + try: + from hermes_state import SessionDB + + db = SessionDB(db_path=Path(key)) + self._db_by_path[key] = db + self._error = None + return db + except Exception as exc: + self._error = str(exc) + return None + + @property + def error(self) -> str | None: + return self._error + + def get_for_profile(self, profile: str | None) -> Any: + """Get a SessionDB for the given profile using an explicit DB path.""" + return self.get(_profile_home(profile) / "state.db") + + +@dataclass +class RunRecord: + run_id: str + session_id: str + status: str = "running" + started_at: float = field(default_factory=time.time) + ended_at: float | None = None + result: dict[str, Any] | None = None + error: str | None = None + deltas: list[str] = field(default_factory=list) + events: list[dict[str, Any]] = field(default_factory=list) + + +@dataclass +class AgentSession: + session_id: str + agent: Any + history: list[dict[str, Any]] = field(default_factory=list) + config: dict[str, Any] = field(default_factory=dict) + running: bool = False + current_run_id: str | None = None + lock: threading.RLock = field(default_factory=threading.RLock) + created_at: float = field(default_factory=time.time) + last_used_at: float = field(default_factory=time.time) + + +class AgentPool: + def __init__(self) -> None: + self._sessions: dict[str, AgentSession] = {} + self._runs: dict[str, RunRecord] = {} + self._lock = threading.RLock() + self._db = SessionDbHolder() + self._approval_requests: dict[str, queue.Queue[str]] = {} + self._compression_requests: dict[str, queue.Queue[dict[str, Any]]] = {} + + def get_or_create( + self, + session_id: str, + profile: str | None = None, + ) -> AgentSession: + with self._lock: + existing = self._sessions.get(session_id) + if existing is not None: + # If profile changed, destroy old session and recreate + if profile and existing.config.get("profile") != profile: + if not existing.running: + self._destroy_session(session_id) + else: + existing.last_used_at = time.time() + return existing + else: + existing.last_used_at = time.time() + return existing + + _ensure_agent_imports() + from run_agent import AIAgent + + original_home = _apply_profile_env(profile) + try: + cfg = _load_cfg() + resolved_model = _resolve_model(cfg) + runtime = _resolve_runtime(resolved_model) + agent_cfg = cfg.get("agent") or {} + prompt = str(agent_cfg.get("system_prompt", "") or "").strip() or None + + agent = AIAgent( + model=resolved_model, + max_iterations=_cfg_max_turns(cfg, 90), + provider=runtime.get("provider"), + base_url=runtime.get("base_url"), + api_key=runtime.get("api_key"), + api_mode=runtime.get("api_mode"), + acp_command=runtime.get("command"), + acp_args=runtime.get("args"), + credential_pool=runtime.get("credential_pool"), + quiet_mode=True, + verbose_logging=False, + reasoning_config=_load_reasoning_config(), + service_tier=_load_service_tier(), + enabled_toolsets=_load_enabled_toolsets(), + platform=_bridge_platform(), + session_id=session_id, + session_db=self._db.get_for_profile(profile), + ephemeral_system_prompt=prompt, + status_callback=self._status_callback(session_id), + thinking_callback=self._text_event_callback(session_id, "thinking.delta"), + reasoning_callback=self._text_event_callback(session_id, "reasoning.delta"), + tool_progress_callback=self._tool_progress_callback(session_id), + tool_start_callback=self._tool_start_callback(session_id), + tool_complete_callback=self._tool_complete_callback(session_id), + ) + agent.compression_enabled = False + self._install_compression_hook(agent, session_id) + + session = AgentSession( + session_id=session_id, + agent=agent, + history=[], + config={ + "requested_session_id": session_id, + "profile": profile or "default", + "model": resolved_model, + "provider": runtime.get("provider"), + "base_url": runtime.get("base_url"), + "api_mode": runtime.get("api_mode"), + "platform": _bridge_platform(), + "resumed": False, + "resumed_message_count": 0, + "db_error": self._db.error, + }, + ) + self._sessions[session_id] = session + return session + finally: + _restore_profile_env(original_home) + + def _install_compression_hook(self, agent: Any, session_id: str) -> None: + original = getattr(agent, "_compress_context", None) + if not callable(original): + return + + def wrapped_compress_context(messages, system_message, **kwargs): + before_count = len(messages) if isinstance(messages, list) else 0 + request_id = uuid.uuid4().hex + response_queue: queue.Queue[dict[str, Any]] = queue.Queue(maxsize=1) + with self._lock: + self._compression_requests[request_id] = response_queue + self._append_event(session_id, { + "event": "bridge.compression.requested", + "request_id": request_id, + "message_count": before_count, + "approx_tokens": kwargs.get("approx_tokens"), + "focus_topic": kwargs.get("focus_topic"), + "messages": _jsonable(messages), + }) + try: + response = response_queue.get(timeout=180) + if response.get("error"): + raise RuntimeError(str(response.get("error"))) + compressed_messages = response.get("messages") + if not isinstance(compressed_messages, list): + raise RuntimeError("bridge compression response missing messages") + next_system_message = response.get("system_message", system_message) + self._append_event(session_id, { + "event": "bridge.compression.completed", + "request_id": request_id, + "message_count": before_count, + "result_messages": len(compressed_messages), + "approx_tokens": kwargs.get("approx_tokens"), + "compressed": True, + }) + return compressed_messages, next_system_message + except queue.Empty: + self._append_event(session_id, { + "event": "bridge.compression.failed", + "request_id": request_id, + "message_count": before_count, + "approx_tokens": kwargs.get("approx_tokens"), + "error": "bridge compression timed out", + }) + raise RuntimeError("bridge compression timed out") + except Exception as exc: + self._append_event(session_id, { + "event": "bridge.compression.failed", + "request_id": request_id, + "message_count": before_count, + "approx_tokens": kwargs.get("approx_tokens"), + "error": str(exc), + }) + raise + finally: + with self._lock: + self._compression_requests.pop(request_id, None) + + agent._compress_context = wrapped_compress_context + + def respond_compression( + self, + request_id: str, + messages: list[dict[str, Any]] | None = None, + system_message: str | None = None, + error: str | None = None, + ) -> dict[str, Any]: + with self._lock: + response_queue = self._compression_requests.get(request_id) + if response_queue is None: + raise RuntimeError(f"compression request {request_id} not found") + response_queue.put({ + "messages": messages, + "system_message": system_message, + "error": error, + }) + return {"request_id": request_id, "handled": True} + + def _destroy_session(self, session_id: str) -> None: + session = self._sessions.pop(session_id, None) + if session is None: + return + with self._lock: + for rid in list(self._runs): + if self._runs[rid].session_id == session_id: + del self._runs[rid] + + def _append_event(self, session_id: str, event: dict[str, Any]) -> None: + with self._lock: + session = self._sessions.get(session_id) + run_id = session.current_run_id if session else None + if run_id and run_id in self._runs: + self._runs[run_id].events.append(_jsonable(event)) + + def _status_callback(self, session_id: str): + def callback(kind, text=None): + self._append_event(session_id, {"event": "status", "kind": str(kind), "text": None if text is None else str(text)}) + + return callback + + def _text_event_callback(self, session_id: str, event_name: str): + def callback(text): + self._append_event(session_id, {"event": event_name, "text": str(text)}) + + return callback + + def _tool_start_callback(self, session_id: str): + def callback(tool_call_id, function_name, function_args): + self._append_event(session_id, { + "event": "tool.started", + "tool_call_id": str(tool_call_id) if tool_call_id else "", + "tool_name": str(function_name) if function_name else "", + "args": _jsonable(function_args) if function_args else {}, + }) + + return callback + + def _tool_complete_callback(self, session_id: str): + def callback(tool_call_id, function_name, function_args, function_result=None): + result_text = "" if function_result is None else str(function_result) + print( + "[hermes_bridge] tool_complete_callback " + f"session={session_id} tool={function_name} " + f"tool_call_id={tool_call_id} result_none={function_result is None} " + f"result_len={len(result_text)}", + file=sys.stderr, + flush=True, + ) + self._append_event(session_id, { + "event": "tool.completed", + "tool_call_id": str(tool_call_id) if tool_call_id else "", + "tool_name": str(function_name) if function_name else "", + "args": _jsonable(function_args) if function_args else {}, + "result": _jsonable(function_result) if function_result is not None else None, + "result_preview": str(function_result)[:500] if function_result else None, + }) + + return callback + + def _tool_progress_callback(self, session_id: str): + def callback(event_type, function_name=None, preview=None, function_args=None, **kwargs): + if event_type in (None, "tool.started", "tool.completed"): + print( + "[hermes_bridge] tool_progress_callback " + f"session={session_id} event={event_type} tool={function_name} " + f"kwargs_keys={sorted(kwargs.keys())} " + f"preview_len={len(str(preview)) if preview is not None else 0}", + file=sys.stderr, + flush=True, + ) + if event_type == "reasoning.available": + self._append_event(session_id, { + "event": "reasoning.available", + "text": str(preview) if preview else "", + }) + return + + if event_type == "_thinking": + text = function_name + if text: + self._append_event(session_id, { + "event": "reasoning.delta", + "text": str(text), + }) + return + + if event_type in (None, "tool.started"): + # AIAgent also calls tool_start_callback with the real tool_call_id. + # Use that event as canonical so resume/replay can match results. + return + + if event_type == "tool.completed": + # AIAgent sends the full function_result to tool_complete_callback. + return + + return callback + + def _step_callback(self, session_id: str): + def callback(step_info=None): + self._append_event(session_id, { + "event": "step", + "step_info": _jsonable(step_info) if step_info else None, + }) + + return callback + + def _stream_delta_callback(self, session_id: str): + def callback(delta=None): + if delta is None: + # Turn boundary signal (tools about to execute) + self._append_event(session_id, { + "event": "turn.boundary", + }) + return + if delta: + self._append_event(session_id, { + "event": "stream.delta", + "delta": str(delta), + }) + + return callback + + def _approval_callback(self, session_id: str): + def callback(command: str, description: str, *, allow_permanent: bool = True) -> str: + approval_id = uuid.uuid4().hex + response_queue: queue.Queue[str] = queue.Queue(maxsize=1) + with self._lock: + self._approval_requests[approval_id] = response_queue + choices = ["once", "session", "always", "deny"] if allow_permanent else ["once", "session", "deny"] + self._append_event(session_id, { + "event": "approval.requested", + "approval_id": approval_id, + "command": str(command or ""), + "description": str(description or ""), + "choices": choices, + "allow_permanent": bool(allow_permanent), + "timeout_ms": 60_000, + }) + try: + choice = response_queue.get(timeout=60) + except queue.Empty: + choice = "deny" + finally: + with self._lock: + self._approval_requests.pop(approval_id, None) + self._append_event(session_id, { + "event": "approval.resolved", + "approval_id": approval_id, + "choice": choice, + }) + return choice + + return callback + + def _prepersist_user_message( + self, + session: AgentSession, + message: Any, + conversation_history: list[dict[str, Any]] | None, + profile: str | None, + ) -> bool: + user_content = str(message) if not isinstance(message, dict) else str(message.get("content", message)) + if not user_content.strip(): + return False + + db = self._db.get_for_profile(profile) + if db is None: + return False + + try: + if hasattr(db, "create_session"): + db.create_session( + session_id=session.session_id, + source=_bridge_platform(), + model=session.config.get("model"), + ) + + if hasattr(db, "get_messages"): + messages = db.get_messages(session.session_id) + if messages: + last = messages[-1] + if last.get("role") == "user" and last.get("content") == user_content: + return False + + db.append_message( + session_id=session.session_id, + role="user", + content=user_content, + ) + + # AIAgent will build messages as conversation_history + current user. + # Since the current user was pre-persisted above, advance its flush + # cursor so the normal end-of-turn flush only writes assistant/tool + # messages for this turn. + history_len = len(conversation_history) if conversation_history else 0 + try: + session.agent._last_flushed_db_idx = max( + int(getattr(session.agent, "_last_flushed_db_idx", 0) or 0), + history_len + 1, + ) + except Exception: + pass + return True + except Exception: + return False + + def start_chat( + self, + session_id: str, + message: Any, + instructions: str | None = None, + conversation_history: list[dict[str, Any]] | None = None, + profile: str | None = None, + ) -> RunRecord: + session = self.get_or_create(session_id, profile=profile) + with session.lock: + if session.running: + raise RuntimeError(f"session {session_id} is already running") + run_id = uuid.uuid4().hex + record = RunRecord(run_id=run_id, session_id=session_id) + with self._lock: + self._runs[run_id] = record + session.running = True + session.current_run_id = run_id + session.last_used_at = time.time() + + thread = threading.Thread( + target=self._run_chat, + args=(session, record, message, instructions, conversation_history, profile), + daemon=True, + name=f"hermes-bridge-run-{run_id[:8]}", + ) + thread.start() + return record + + def _run_chat(self, session: AgentSession, record: RunRecord, message: Any, instructions: str | None = None, conversation_history: list[dict[str, Any]] | None = None, profile: str | None = None) -> None: + def stream_callback(delta: str) -> None: + with self._lock: + record.deltas.append(str(delta)) + + try: + previous_approval_callback = None + previous_exec_ask = os.environ.get("HERMES_EXEC_ASK") + approval_session_token = None + try: + from tools.terminal_tool import _get_approval_callback, set_approval_callback + from tools.approval import set_current_session_key + + previous_approval_callback = _get_approval_callback() + set_approval_callback(self._approval_callback(session.session_id)) + approval_session_token = set_current_session_key(session.session_id) + os.environ["HERMES_EXEC_ASK"] = "1" + except Exception: + previous_approval_callback = None + self._prepersist_user_message(session, message, conversation_history, profile) + kwargs: dict[str, Any] = dict( + task_id=session.session_id, + stream_callback=stream_callback, + ) + if instructions: + kwargs["system_message"] = instructions + if conversation_history is not None: + kwargs["conversation_history"] = conversation_history + result = session.agent.run_conversation( + message, + **kwargs, + ) + result = _jsonable(result if isinstance(result, dict) else {"value": result}) + with session.lock: + if isinstance(result.get("messages"), list): + session.history = result["messages"] + record.status = "interrupted" if result.get("interrupted") else "complete" + record.result = result + record.ended_at = time.time() + session.running = False + session.current_run_id = None + session.last_used_at = time.time() + except Exception as exc: + with session.lock: + record.status = "error" + record.error = str(exc) + record.result = {"error": str(exc), "traceback": traceback.format_exc()} + record.ended_at = time.time() + session.running = False + session.current_run_id = None + session.last_used_at = time.time() + finally: + try: + from tools.terminal_tool import set_approval_callback + + set_approval_callback(previous_approval_callback) + except Exception: + pass + if approval_session_token is not None: + try: + from tools.approval import reset_current_session_key + + reset_current_session_key(approval_session_token) + except Exception: + pass + if previous_exec_ask is None: + os.environ.pop("HERMES_EXEC_ASK", None) + else: + os.environ["HERMES_EXEC_ASK"] = previous_exec_ask + + def interrupt(self, session_id: str, message: str | None = None) -> dict[str, Any]: + with self._lock: + session = self._sessions.get(session_id) + if session is None: + raise KeyError(f"unknown session: {session_id}") + if not hasattr(session.agent, "interrupt"): + raise RuntimeError("agent does not support interrupt") + session.agent.interrupt(message) + return {"status": "interrupted", "session_id": session_id} + + def steer(self, session_id: str, text: str) -> dict[str, Any]: + with self._lock: + session = self._sessions.get(session_id) + if session is None: + raise KeyError(f"unknown session: {session_id}") + if not hasattr(session.agent, "steer"): + raise RuntimeError("agent does not support steer") + accepted = bool(session.agent.steer(text)) + return {"status": "queued" if accepted else "rejected", "accepted": accepted, "text": text} + + def respond_approval(self, approval_id: str, choice: str) -> dict[str, Any]: + cleaned = str(choice or "deny").strip().lower() + if cleaned not in {"once", "session", "always", "deny"}: + cleaned = "deny" + with self._lock: + response_queue = self._approval_requests.get(approval_id) + if response_queue is None: + return {"approval_id": approval_id, "resolved": False, "choice": cleaned} + try: + response_queue.put_nowait(cleaned) + except queue.Full: + pass + return {"approval_id": approval_id, "resolved": True, "choice": cleaned} + + def get_history(self, session_id: str) -> dict[str, Any]: + with self._lock: + session = self._sessions.get(session_id) + if session is None: + raise KeyError(f"unknown session: {session_id}") + with session.lock: + return {"session_id": session_id, "history": copy.deepcopy(session.history)} + + def get_result(self, run_id: str) -> dict[str, Any]: + with self._lock: + record = self._runs.get(run_id) + if record is None: + raise KeyError(f"unknown run: {run_id}") + return { + "run_id": record.run_id, + "session_id": record.session_id, + "status": record.status, + "started_at": record.started_at, + "ended_at": record.ended_at, + "output": "".join(record.deltas), + "deltas": list(record.deltas), + "events": list(record.events), + "result": record.result, + "error": record.error, + } + + def get_output(self, run_id: str, cursor: int = 0, event_cursor: int = 0) -> dict[str, Any]: + with self._lock: + record = self._runs.get(run_id) + if record is None: + raise KeyError(f"unknown run: {run_id}") + cursor = max(0, int(cursor or 0)) + deltas = list(record.deltas) + next_cursor = len(deltas) + event_cursor = max(0, int(event_cursor or 0)) + events = list(record.events) + new_events = _jsonable(events[event_cursor:]) + next_event_cursor = len(events) + return { + "run_id": record.run_id, + "session_id": record.session_id, + "status": record.status, + "delta": "".join(deltas[cursor:]), + "cursor": next_cursor, + "output": "".join(deltas), + "done": record.status != "running", + "result": record.result if record.status != "running" else None, + "error": record.error, + "events": new_events, + "event_cursor": next_event_cursor, + } + + def destroy(self, session_id: str) -> dict[str, Any]: + with self._lock: + session = self._sessions.pop(session_id, None) + if session is None: + return {"session_id": session_id, "destroyed": False} + if session.running and hasattr(session.agent, "interrupt"): + try: + session.agent.interrupt("Session destroyed") + except Exception: + pass + return {"session_id": session_id, "destroyed": True} + + def destroy_all(self) -> dict[str, Any]: + with self._lock: + ids = list(self._sessions.keys()) + destroyed = [] + for sid in ids: + result = self.destroy(sid) + destroyed.append(result) + return {"destroyed": len(destroyed)} + + def status(self, session_id: str) -> dict[str, Any]: + with self._lock: + session = self._sessions.get(session_id) + if session is None: + return { + "session_id": session_id, + "exists": False, + "running": False, + "message_count": 0, + } + with session.lock: + return { + "session_id": session_id, + "exists": True, + "running": session.running, + "current_run_id": session.current_run_id, + "created_at": session.created_at, + "last_used_at": session.last_used_at, + "message_count": len(session.history), + "config": session.config, + } + + def list_sessions(self) -> dict[str, Any]: + with self._lock: + sessions = list(self._sessions.values()) + return { + "sessions": [ + { + "session_id": s.session_id, + "running": s.running, + "current_run_id": s.current_run_id, + "created_at": s.created_at, + "last_used_at": s.last_used_at, + "message_count": len(s.history), + "config": s.config, + } + for s in sessions + ] + } + + +class BridgeServer: + IDLE_TIMEOUT_SECONDS = 30 * 60 # 30 minutes + GC_INTERVAL_SECONDS = 60 # check every minute + + def __init__(self, endpoint: str) -> None: + self.endpoint = endpoint + self.pool = AgentPool() + self._stop = threading.Event() + self._last_gc = time.time() + + def handle(self, req: dict[str, Any]) -> dict[str, Any]: + action = str(req.get("action") or "").strip() + if not action: + raise ValueError("action is required") + + if action == "ping": + return {"pong": True, "time": time.time(), "agent_root": str(_agent_root())} + + if action == "chat": + session_id = str(req.get("session_id") or "").strip() or uuid.uuid4().hex + message = req.get("message", req.get("input", "")) + instructions = req.get("instructions") or req.get("system_message") + conversation_history = req.get("conversation_history") + profile = req.get("profile") + record = self.pool.start_chat(session_id, message, instructions, conversation_history, profile) + if req.get("wait"): + timeout = float(req.get("timeout", 0) or 0) + deadline = time.time() + timeout if timeout > 0 else None + while record.status == "running": + if deadline is not None and time.time() >= deadline: + break + time.sleep(0.05) + return self.pool.get_result(record.run_id) + return {"run_id": record.run_id, "session_id": session_id, "status": record.status} + + if action == "get_result": + return self.pool.get_result(str(req.get("run_id") or "")) + + if action == "get_output": + return self.pool.get_output( + str(req.get("run_id") or ""), + int(req.get("cursor") or 0), + int(req.get("event_cursor") or 0), + ) + + if action == "interrupt": + return self.pool.interrupt(str(req.get("session_id") or ""), req.get("message")) + + if action == "steer": + text = str(req.get("text") or req.get("message") or "").strip() + if not text: + raise ValueError("text is required") + return self.pool.steer(str(req.get("session_id") or ""), text) + + if action == "approval_respond": + approval_id = str(req.get("approval_id") or "").strip() + if not approval_id: + raise ValueError("approval_id is required") + return self.pool.respond_approval(approval_id, str(req.get("choice") or "deny")) + + if action == "compression_respond": + request_id = str(req.get("request_id") or "").strip() + if not request_id: + raise ValueError("request_id is required") + messages = req.get("messages") + if messages is not None and not isinstance(messages, list): + raise ValueError("messages must be a list") + return self.pool.respond_compression( + request_id, + messages=messages, + system_message=req.get("system_message"), + error=req.get("error"), + ) + + if action == "get_history": + return self.pool.get_history(str(req.get("session_id") or "")) + + if action == "destroy": + return self.pool.destroy(str(req.get("session_id") or "")) + + if action == "destroy_all": + return self.pool.destroy_all() + + if action == "list": + return self.pool.list_sessions() + + if action == "shutdown": + self._stop.set() + return {"status": "shutting_down"} + + raise ValueError(f"unknown action: {action}") + + def _make_server_socket(self) -> socket.socket: + if self.endpoint.startswith("ipc://"): + if not hasattr(socket, "AF_UNIX"): + raise RuntimeError("ipc:// endpoints require Unix domain socket support; use tcp://host:port on this platform") + sock_path = Path(self.endpoint.removeprefix("ipc://")) + sock_path.parent.mkdir(parents=True, exist_ok=True) + try: + sock_path.unlink(missing_ok=True) + except OSError: + pass + server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + server.bind(str(sock_path)) + return server + + parsed = urlparse(self.endpoint) + if parsed.scheme != "tcp": + raise RuntimeError(f"unsupported endpoint scheme: {self.endpoint}") + host = parsed.hostname or "127.0.0.1" + port = int(parsed.port or 0) + if port <= 0: + raise RuntimeError(f"tcp endpoint requires a port: {self.endpoint}") + server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server.bind((host, port)) + return server + + def _read_request(self, conn: socket.socket) -> dict[str, Any]: + chunks: list[bytes] = [] + while True: + chunk = conn.recv(65536) + if not chunk: + break + chunks.append(chunk) + if b"\n" in chunk: + break + if not chunks: + raise RuntimeError("empty request") + line = b"".join(chunks).split(b"\n", 1)[0].strip() + if not line: + raise RuntimeError("empty request") + return json.loads(line.decode("utf-8")) + + def _write_response(self, conn: socket.socket, resp: dict[str, Any]) -> None: + payload = json.dumps(resp, ensure_ascii=False, default=str) + "\n" + conn.sendall(payload.encode("utf-8")) + + def _gc_idle_sessions(self) -> None: + """Destroy sessions idle longer than IDLE_TIMEOUT_SECONDS.""" + now = time.time() + if now - self._last_gc < self.GC_INTERVAL_SECONDS: + return + self._last_gc = now + with self.pool._lock: + idle_ids = [ + sid for sid, s in self.pool._sessions.items() + if not s.running and now - s.last_used_at > self.IDLE_TIMEOUT_SECONDS + ] + for sid in idle_ids: + self.pool.destroy(sid) + + def serve_forever(self) -> None: + server = self._make_server_socket() + server.listen(16) + server.settimeout(0.2) + print(json.dumps({"event": "ready", "endpoint": self.endpoint}), flush=True) + + while not self._stop.is_set(): + conn: socket.socket | None = None + try: + try: + conn, _addr = server.accept() + except socket.timeout: + self._gc_idle_sessions() + continue + try: + req = self._read_request(conn) + data = self.handle(req) + resp = {"ok": True, **_jsonable(data)} + except Exception as exc: + resp = { + "ok": False, + "error": str(exc), + "error_type": exc.__class__.__name__, + } + self._write_response(conn, resp) + except KeyboardInterrupt: + break + except Exception as exc: + print(f"[hermes-bridge] server loop error: {exc}", file=sys.stderr, flush=True) + finally: + if conn is not None: + try: + conn.close() + except OSError: + pass + + server.close() + if self.endpoint.startswith("ipc://"): + try: + Path(self.endpoint.removeprefix("ipc://")).unlink(missing_ok=True) + except OSError: + pass + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description="Hermes AIAgent in-process bridge") + parser.add_argument("--endpoint", default=os.environ.get("HERMES_AGENT_BRIDGE_ENDPOINT", DEFAULT_ENDPOINT)) + parser.add_argument("--agent-root", default=os.environ.get("HERMES_AGENT_ROOT", DEFAULT_AGENT_ROOT)) + parser.add_argument("--hermes-home", default=os.environ.get("HERMES_HOME", DEFAULT_HERMES_HOME)) + args = parser.parse_args(argv) + + _set_path_env(args.agent_root, args.hermes_home) + _ensure_agent_imports() + BridgeServer(args.endpoint).serve_forever() + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/packages/server/src/services/hermes/agent-bridge/index.ts b/packages/server/src/services/hermes/agent-bridge/index.ts new file mode 100644 index 0000000..2a1bde2 --- /dev/null +++ b/packages/server/src/services/hermes/agent-bridge/index.ts @@ -0,0 +1,2 @@ +export * from './client' +export * from './manager' diff --git a/packages/server/src/services/hermes/agent-bridge/manager.ts b/packages/server/src/services/hermes/agent-bridge/manager.ts new file mode 100644 index 0000000..0018124 --- /dev/null +++ b/packages/server/src/services/hermes/agent-bridge/manager.ts @@ -0,0 +1,360 @@ +import { execFileSync, spawn, type ChildProcess } from 'child_process' +import { existsSync, readFileSync } from 'fs' +import { dirname, isAbsolute, join, resolve } from 'path' +import { logger } from '../../logger' +import { detectHermesHome, getHermesBin } from '../hermes-path' +import { DEFAULT_AGENT_BRIDGE_ENDPOINT } from './client' + +const DEFAULT_AGENT_BRIDGE_STARTUP_TIMEOUT_MS = 120000 + +export interface AgentBridgeManagerOptions { + endpoint?: string + python?: string + agentRoot?: string + hermesHome?: string + startupTimeoutMs?: number +} + +interface BridgeCommand { + command: string + argsPrefix: string[] + agentRoot?: string + hermesHome: string +} + +function envPositiveInt(name: string): number | undefined { + const raw = process.env[name] + if (!raw) return undefined + const value = Number(raw) + return Number.isFinite(value) && value > 0 ? value : undefined +} + +function pathCandidates(agentRoot?: string): string[] { + if (!agentRoot) return [] + return process.platform === 'win32' + ? [ + join(agentRoot, 'venv', 'Scripts', 'python.exe'), + join(agentRoot, 'venv', 'Scripts', 'python3.exe'), + join(agentRoot, '.venv', 'Scripts', 'python.exe'), + join(agentRoot, '.venv', 'Scripts', 'python3.exe'), + ] + : [ + join(agentRoot, 'venv', 'bin', 'python3'), + join(agentRoot, 'venv', 'bin', 'python'), + join(agentRoot, '.venv', 'bin', 'python3'), + join(agentRoot, '.venv', 'bin', 'python'), + ] +} + +function uvCandidates(agentRoot?: string): string[] { + return [ + process.env.HERMES_AGENT_BRIDGE_UV, + process.env.UV, + ...(process.platform === 'win32' + ? [ + agentRoot ? join(agentRoot, 'venv', 'Scripts', 'uv.exe') : '', + agentRoot ? join(agentRoot, 'venv', 'Scripts', 'uv.cmd') : '', + agentRoot ? join(agentRoot, '.venv', 'Scripts', 'uv.exe') : '', + agentRoot ? join(agentRoot, '.venv', 'Scripts', 'uv.cmd') : '', + ] + : [ + agentRoot ? join(agentRoot, 'venv', 'bin', 'uv') : '', + agentRoot ? join(agentRoot, '.venv', 'bin', 'uv') : '', + ]), + 'uv', + ].filter((value): value is string => !!value && value.trim().length > 0) +} + +function resolveExecutable(command: string): string | undefined { + const trimmed = command.trim() + if (!trimmed) return undefined + if (isAbsolute(trimmed) || trimmed.includes('/') || trimmed.includes('\\')) { + return existsSync(trimmed) ? resolve(trimmed) : undefined + } + try { + const lookup = process.platform === 'win32' + ? execFileSync('where.exe', [trimmed], { encoding: 'utf-8', windowsHide: true }) + : execFileSync('which', [trimmed], { encoding: 'utf-8' }) + return lookup.split(/\r?\n/).map(line => line.trim()).find(Boolean) + } catch { + return undefined + } +} + +function agentRootFromHermesBin(): string | undefined { + const hermesBin = resolveExecutable(getHermesBin()) + if (!hermesBin) return undefined + + const binDir = dirname(hermesBin) + const rootCandidates = [ + resolve(binDir, '..'), + resolve(binDir, '..', '..'), + resolve(binDir, '..', 'hermes-agent'), + resolve(binDir, '..', '..', 'hermes-agent'), + ] + const root = rootCandidates.find(candidate => existsSync(join(candidate, 'run_agent.py'))) + if (root) return root + + try { + const first = readFileSync(hermesBin, 'utf-8').split(/\r?\n/, 1)[0] + const match = first.match(/^#!\s*(.+)$/) + const python = match?.[1]?.trim().split(/\s+/)[0] + if (python) { + const pyDir = dirname(python) + const shebangRootCandidates = [ + resolve(pyDir, '..', '..'), + resolve(pyDir, '..', '..', 'hermes-agent'), + ] + return shebangRootCandidates.find(candidate => existsSync(join(candidate, 'run_agent.py'))) + } + } catch {} + return undefined +} + +function hermesBinPython(): string | undefined { + const hermesBin = resolveExecutable(getHermesBin()) + if (!hermesBin) return undefined + try { + const first = readFileSync(hermesBin, 'utf-8').split(/\r?\n/, 1)[0] + const match = first.match(/^#!\s*(.+)$/) + const python = match?.[1]?.trim().split(/\s+/)[0] + return python && existsSync(python) ? python : undefined + } catch { + return undefined + } +} + +function firstExistingExecutable(candidates: string[]): string | undefined { + for (const candidate of candidates) { + if (!isAbsolute(candidate) && !candidate.includes('/') && !candidate.includes('\\')) { + const resolved = resolveExecutable(candidate) + if (resolved) return resolved + continue + } + try { + if (existsSync(candidate)) return candidate + } catch {} + } + return undefined +} + +function resolveAgentRoot(explicit?: string, hermesHome = detectHermesHome()): string | undefined { + const candidates = [ + explicit, + process.env.HERMES_AGENT_ROOT, + join(hermesHome, 'hermes-agent'), + agentRootFromHermesBin(), + process.cwd(), + join(process.cwd(), 'hermes-agent'), + ].filter((value): value is string => !!value && value.trim().length > 0) + return candidates.find(candidate => existsSync(join(candidate, 'run_agent.py'))) +} + +function bridgeCommand(options: AgentBridgeManagerOptions): BridgeCommand { + const hermesHome = options.hermesHome || detectHermesHome() + const agentRoot = resolveAgentRoot(options.agentRoot, hermesHome) + const explicitPython = options.python || process.env.HERMES_AGENT_BRIDGE_PYTHON + if (explicitPython) { + return { command: explicitPython, argsPrefix: [], agentRoot, hermesHome } + } + + const venvPython = firstExistingExecutable(pathCandidates(agentRoot)) + if (venvPython) { + return { command: venvPython, argsPrefix: [], agentRoot, hermesHome } + } + + const shebangPython = hermesBinPython() + if (shebangPython && existsSync(shebangPython)) { + return { command: shebangPython, argsPrefix: [], agentRoot, hermesHome } + } + + const uv = firstExistingExecutable(uvCandidates(agentRoot)) + if (uv) { + const prefix = ['run'] + if (agentRoot) prefix.push('--project', agentRoot) + prefix.push('python') + return { command: uv, argsPrefix: prefix, agentRoot, hermesHome } + } + + const fallback = firstExistingExecutable([ + process.env.PYTHON || '', + ...(process.platform === 'win32' ? ['py', 'python', 'python3'] : ['python3', 'python']), + ]) || (process.platform === 'win32' ? 'python' : 'python3') + return { command: fallback, argsPrefix: [], agentRoot, hermesHome } +} + +function bridgeScriptPath(): string { + const candidates = [ + // Built server: dist/server/index.js -> dist/server/agent-bridge/hermes_bridge.py + resolve(__dirname, 'agent-bridge', 'hermes_bridge.py'), + // ts-node/dev source tree. + resolve(__dirname, 'services/hermes/agent-bridge/hermes_bridge.py'), + resolve(process.cwd(), 'packages/server/src/services/hermes/agent-bridge/hermes_bridge.py'), + ] + const found = candidates.find(candidate => existsSync(candidate)) + if (!found) { + throw new Error(`agent bridge Python script not found. Tried: ${candidates.join(', ')}`) + } + return found +} + +export class AgentBridgeManager { + readonly endpoint: string + private readonly options: AgentBridgeManagerOptions + private child: ChildProcess | null = null + private starting: Promise | null = null + private ready = false + + constructor(options: AgentBridgeManagerOptions = {}) { + this.options = options + this.endpoint = options.endpoint || process.env.HERMES_AGENT_BRIDGE_ENDPOINT || DEFAULT_AGENT_BRIDGE_ENDPOINT + } + + get running(): boolean { + return !!this.child && !this.child.killed && this.ready + } + + async start(): Promise { + if (this.running) return + if (this.starting) return this.starting + this.starting = this.startProcess() + try { + await this.starting + } finally { + this.starting = null + } + } + + private async startProcess(): Promise { + const script = bridgeScriptPath() + const command = bridgeCommand(this.options) + const args = [...command.argsPrefix, script, '--endpoint', this.endpoint] + const agentRoot = command.agentRoot + const hermesHome = command.hermesHome + if (agentRoot) args.push('--agent-root', agentRoot) + if (hermesHome) args.push('--hermes-home', hermesHome) + + const env = { + ...process.env, + HERMES_AGENT_BRIDGE_ENDPOINT: this.endpoint, + HERMES_HOME: hermesHome, + ...(agentRoot ? { HERMES_AGENT_ROOT: agentRoot } : {}), + } + + logger.info('[agent-bridge] starting: %s %s', command.command, args.join(' ')) + const child = spawn(command.command, args, { + env, + cwd: process.cwd(), + stdio: ['ignore', 'pipe', 'pipe'], + windowsHide: true, + }) + this.child = child + this.ready = false + + child.once('exit', (code, signal) => { + logger.warn('[agent-bridge] exited code=%s signal=%s', code, signal) + this.ready = false + if (this.child === child) this.child = null + }) + + child.stderr?.on('data', chunk => { + const text = String(chunk).trim() + if (text) logger.warn('[agent-bridge] %s', text) + }) + + await new Promise((resolveReady, rejectReady) => { + let buffered = '' + const startupTimeoutMs = this.options.startupTimeoutMs + ?? envPositiveInt('HERMES_AGENT_BRIDGE_STARTUP_TIMEOUT_MS') + ?? DEFAULT_AGENT_BRIDGE_STARTUP_TIMEOUT_MS + const timeout = setTimeout(() => { + cleanup() + rejectReady(new Error(`agent bridge did not become ready within ${startupTimeoutMs}ms`)) + }, startupTimeoutMs) + + const cleanup = () => { + clearTimeout(timeout) + child.off('exit', onExitBeforeReady) + child.off('error', onError) + } + + const onError = (err: Error) => { + cleanup() + child.stdout?.off('data', onStdout) + rejectReady(err) + } + + const onExitBeforeReady = (code: number | null, signal: NodeJS.Signals | null) => { + cleanup() + child.stdout?.off('data', onStdout) + rejectReady(new Error(`agent bridge exited before ready code=${code} signal=${signal}`)) + } + + let readyResolved = false + const onStdout = (chunk: Buffer) => { + const text = chunk.toString('utf8') + buffered += text + for (;;) { + const newline = buffered.indexOf('\n') + if (newline < 0) break + const line = buffered.slice(0, newline).trim() + buffered = buffered.slice(newline + 1) + if (!line) continue + logger.info('[agent-bridge] %s', line) + if (!readyResolved) { + try { + const parsed = JSON.parse(line) + if (parsed?.event === 'ready') { + this.ready = true + readyResolved = true + cleanup() + resolveReady() + return + } + } catch {} + } + } + } + + child.once('error', onError) + child.once('exit', onExitBeforeReady) + child.stdout?.on('data', onStdout) + }) + + logger.info('[agent-bridge] ready at %s', this.endpoint) + } + + async stop(): Promise { + const child = this.child + if (!child) return + this.ready = false + this.child = null + + await new Promise((resolveStop) => { + const timeout = setTimeout(() => { + if (!child.killed) child.kill('SIGKILL') + resolveStop() + }, 1500) + child.once('exit', () => { + clearTimeout(timeout) + resolveStop() + }) + if (!child.killed) { + child.kill('SIGTERM') + } + }) + } +} + +let singleton: AgentBridgeManager | null = null + +export function getAgentBridgeManager(): AgentBridgeManager { + if (!singleton) singleton = new AgentBridgeManager() + return singleton +} + +export async function startAgentBridgeManager(): Promise { + const manager = getAgentBridgeManager() + await manager.start() + return manager +} diff --git a/packages/server/src/services/hermes/chat-run-socket.ts b/packages/server/src/services/hermes/chat-run-socket.ts index f7f8dcb..055f549 100644 --- a/packages/server/src/services/hermes/chat-run-socket.ts +++ b/packages/server/src/services/hermes/chat-run-socket.ts @@ -26,6 +26,9 @@ import { getCompressionSnapshot } from '../../db/hermes/compression-snapshot' import { parseAnthropicContentArray } from '../../lib/llm-json' import { updateUsage } from '../../db/hermes/usage-store' import { logger } from '../logger' +import { AgentBridgeClient, type AgentBridgeMessage, type AgentBridgeOutput } from './agent-bridge' +import { getActiveProfileName } from './hermes-profile' +import type { ChatMessage } from '../../lib/context-compressor' /** * Content block types for Anthropic-compatible message format @@ -165,6 +168,7 @@ interface QueuedRun { model?: string instructions?: string profile: string + source?: ChatRunSource } interface SessionState { @@ -179,6 +183,17 @@ interface SessionState { isAborting?: boolean queue: QueuedRun[] responseRun?: ResponseRunState + source?: ChatRunSource + bridgePendingAssistantContent?: string + bridgePendingReasoningContent?: string + bridgeOutput?: string + bridgeToolCounter?: number + bridgePendingTools?: Array<{ + id: string + name: string + arguments: string + startedAt: number + }> } interface ResponseRunState { @@ -188,11 +203,14 @@ interface ResponseRunState { toolCalls: Map } +type ChatRunSource = 'api_server' | 'cli' + // --- ChatRunSocket --- export class ChatRunSocket { private nsp: ReturnType private gatewayManager: any + private bridge = new AgentBridgeClient() /** sessionId → session state (messages, working status, events, run tracking) */ private sessionMap = new Map() @@ -224,7 +242,8 @@ export class ChatRunSocket { // --- Connection handler --- private onConnection(socket: Socket) { - const profile = (socket.handshake.query?.profile as string) || 'default' + const socketProfile = (socket.handshake.query?.profile as string) || 'default' + const currentProfile = () => getActiveProfileName() || socketProfile || 'default' socket.on('run', async (data: { input: string | ContentBlock[] @@ -232,6 +251,7 @@ export class ChatRunSocket { model?: string instructions?: string queue_id?: string + source?: string }) => { if (data.session_id) { const state = this.getOrCreateSession(data.session_id) @@ -241,7 +261,8 @@ export class ChatRunSocket { input: data.input, model: data.model, instructions: data.instructions, - profile, + profile: currentProfile(), + source: this.resolveRunSource(data.source, data.session_id), }) this.nsp.to(`session:${data.session_id}`).emit('run.queued', { event: 'run.queued', @@ -252,7 +273,7 @@ export class ChatRunSocket { return } } - await this.handleRun(socket, data, profile) + await this.handleRun(socket, data, currentProfile()) }) socket.on('cancel_queued_run', (data: { session_id?: string; queue_id?: string }) => { @@ -284,6 +305,27 @@ export class ChatRunSocket { void this.handleAbort(socket, data.session_id) } }) + + socket.on('approval.respond', async (data: { session_id?: string; approval_id?: string; choice?: string }) => { + if (!data.session_id || !data.approval_id) return + try { + const result = await this.bridge.approvalRespond(data.approval_id, data.choice || 'deny') + this.emitToSession(socket, data.session_id, 'approval.resolved', { + event: 'approval.resolved', + approval_id: data.approval_id, + choice: data.choice || 'deny', + resolved: Boolean(result.resolved), + }) + } catch (err) { + this.emitToSession(socket, data.session_id, 'approval.resolved', { + event: 'approval.resolved', + approval_id: data.approval_id, + choice: data.choice || 'deny', + resolved: false, + error: err instanceof Error ? err.message : String(err), + }) + } + }) } private handleMessage(messages: SessionMessage[], sid: string): any[] { let _messages = [] @@ -487,11 +529,30 @@ export class ChatRunSocket { private async handleRun( socket: Socket, - data: { input: string | ContentBlock[]; session_id?: string; model?: string; instructions?: string }, + data: { input: string | ContentBlock[]; session_id?: string; model?: string; instructions?: string; source?: string }, profile: string, skipUserMessage = false, ) { const { input, session_id, model, instructions } = data + const source = this.resolveRunSource(data.source, session_id) + + // Build full instructions with system prompt + workspace context (shared by both paths) + let fullInstructions = instructions + ? `${getSystemPrompt()}\n${instructions}` + : getSystemPrompt() + if (session_id) { + const sessionRow = getSession(session_id) + if (sessionRow?.workspace) { + const workspaceCtx = `[Current working directory: ${sessionRow.workspace}]` + fullInstructions = `\n${workspaceCtx}\n${fullInstructions}` + } + } + + if (source === 'cli') { + await this.handleBridgeRun(socket, { ...data, instructions: fullInstructions }, profile, skipUserMessage) + return + } + const upstream = this.gatewayManager.getUpstream(profile).replace(/\/$/, '') const apiKey = this.gatewayManager.getApiKey(profile) || undefined @@ -512,6 +573,7 @@ export class ChatRunSocket { } state.isWorking = true state.profile = profile + state.source = 'api_server' if (!skipUserMessage) { // Convert ContentBlock[] to string for storage @@ -529,7 +591,7 @@ export class ChatRunSocket { if (!getSession(session_id)) { const previewText = extractTextForPreview(input) const preview = previewText.replace(/[\r\n]/g, ' ').substring(0, 100) - createSession({ id: session_id, profile, model, title: preview }) + createSession({ id: session_id, profile, source: 'api_server', model, title: preview }) } // Write user message to local DB immediately @@ -554,7 +616,7 @@ export class ChatRunSocket { if (!getSession(session_id)) { const previewText = extractTextForPreview(input) const preview = previewText.replace(/[\r\n]/g, ' ').substring(0, 100) - createSession({ id: session_id, profile, model, title: preview }) + createSession({ id: session_id, profile, source: 'api_server', model, title: preview }) } addMessage({ session_id, @@ -580,316 +642,12 @@ export class ChatRunSocket { // Build upstream request body const body: Record = { input } if (model) body.model = model - if (instructions) { - body.instructions = `${getSystemPrompt()}\n${instructions}` - } else { - body.instructions = getSystemPrompt() - } - // Inject workspace context if set for this session - if (session_id) { - const sessionRow = getSession(session_id) - if (sessionRow?.workspace) { - const workspaceCtx = `[Current working directory: ${sessionRow.workspace}]` - body.instructions = body.instructions - ? `\n${workspaceCtx}\n${body.instructions}` - : `\n${workspaceCtx}` - } - } + body.instructions = fullInstructions // Build conversation_history from DB if session_id is provided if (session_id) { - try { - const detail = useLocalSessionStore() - ? getSessionDetail(session_id) - : await getSessionDetailFromDb(session_id) - if (detail?.messages?.length) { - // Filter valid messages - const validMessages = detail.messages.filter(m => - (m.role === 'user' || m.role === 'assistant' || m.role === 'tool') && m.content !== undefined - ) - - // Exclude the last user message (just added in handleRun) - const lastUserMsgIndex = [...validMessages].reverse().findIndex(m => m.role === 'user') - let history: Array<{ - role: string - content: string - tool_calls?: any[] - tool_call_id?: string - name?: string - reasoning_content?: string | null - }> = (lastUserMsgIndex >= 0 - ? validMessages.slice(0, validMessages.length - lastUserMsgIndex - 1) - : validMessages - ).map((m, idx, arr) => { - const msg: any = { role: m.role, content: m.content || '' } - if (m.reasoning_content) msg.reasoning_content = m.reasoning_content - if (m.tool_calls?.length) { - // Filter out tool_calls with empty/invalid id and remove internal fields - const cleanedToolCalls = m.tool_calls - .filter((tc: any) => tc.id && tc.id.length > 0) - .map((tc: any) => ({ - id: tc.id, - type: tc.type, - function: tc.function - })) - if (cleanedToolCalls.length > 0) { - msg.tool_calls = cleanedToolCalls - } - } - - // For tool messages, ensure tool_call_id exists - if (m.role === 'tool') { - let callId = m.tool_call_id - if (!callId || callId.length === 0) { - // Try to reconstruct tool_call_id from previous assistant message - const prevMsg = arr[idx - 1] - if (prevMsg?.role === 'assistant' && prevMsg.tool_calls?.length) { - const tc = prevMsg.tool_calls.find((t: any) => t.function?.name === m.tool_name) - if (tc?.id) { - callId = tc.id - } - } - } - // Skip tool message if no valid tool_call_id - if (!callId || callId.length === 0) { - return null - } - msg.tool_call_id = callId - } - - if (m.tool_name) msg.name = m.tool_name - return msg - }) - .filter(m => m !== null) - // Context compression with snapshot awareness - const contextLength = getModelContextLength(profile) - const triggerTokens = Math.floor(contextLength / 2) - const cState = this.getOrCreateSession(session_id) - - // Calculate inputTokens + outputTokens from DB (unified method) - const assembledTokens = await this.calcAndUpdateUsage(session_id, cState, emit) - const totalTokens = assembledTokens.inputTokens + assembledTokens.outputTokens - // Step 1: Check existing snapshot — if present, assemble summary + new messages - const snapshot = session_id ? getCompressionSnapshot(session_id) : null - if (snapshot) { - const newMessages = history.slice(snapshot.lastMessageIndex + 1) - logger.info('[context-compress] session=%s: snapshot at %d, %d new messages, assembled ~%d tokens (threshold %d)', - session_id, snapshot.lastMessageIndex, newMessages.length, totalTokens, triggerTokens) - // triggerTokens - if (totalTokens <= triggerTokens && newMessages.length <= 150) { - // Under threshold — use assembled context directly, no LLM call needed - history = [ - { role: 'user', content: SUMMARY_PREFIX + '\n\n' + snapshot.summary }, - ...newMessages, - ] - } else { - this.pushState(session_id, 'compression.started', { - event: 'compression.started', - message_count: newMessages.length, - token_count: totalTokens, - }) - emit('compression.started', { - event: 'compression.started', - message_count: newMessages.length, - token_count: totalTokens, - }) - - try { - const result = await compressor.compress( - history, upstream, apiKey, session_id, - ) - const afterTokens = await this.calcAndUpdateUsage(session_id, cState, emit) - this.replaceState(session_id, 'compression.completed', { - event: 'compression.completed', - compressed: result.meta.compressed, - llmCompressed: result.meta.llmCompressed, - totalMessages: result.meta.totalMessages, - resultMessages: result.messages.length, - beforeTokens: totalTokens, - afterTokens: afterTokens.inputTokens + afterTokens.outputTokens, - summaryTokens: result.meta.summaryTokenEstimate, - verbatimCount: result.meta.verbatimCount, - compressedStartIndex: result.meta.compressedStartIndex, - }) - logger.info('[context-compress] AFTER session=%s: %d messages, ~%d tokens (was %d)', session_id, result.messages.length, afterTokens.inputTokens + afterTokens.outputTokens, totalTokens) - - emit('compression.completed', { - event: 'compression.completed', - compressed: result.meta.compressed, - llmCompressed: result.meta.llmCompressed, - totalMessages: result.meta.totalMessages, - resultMessages: result.messages.length, - beforeTokens: totalTokens, - afterTokens: afterTokens.inputTokens + afterTokens.outputTokens, - summaryTokens: result.meta.summaryTokenEstimate, - verbatimCount: result.meta.verbatimCount, - compressedStartIndex: result.meta.compressedStartIndex, - }) - - history = result.messages.map(m => { - const msg: any = { - role: m.role, - content: m.content, - tool_call_id: m.tool_call_id, - name: m.name, - } - if (m.reasoning_content) msg.reasoning_content = m.reasoning_content - // Filter tool_calls if present, remove internal fields - if (m.tool_calls?.length) { - const cleanedToolCalls = m.tool_calls - .filter((tc: any) => tc.id && tc.id.length > 0) - .map((tc: any) => ({ - id: tc.id, - type: tc.type, - function: tc.function - })) - if (cleanedToolCalls.length > 0) { - msg.tool_calls = cleanedToolCalls - } - } - return msg - }) - // Update usage from DB (snapshot now updated by compressor) - await this.calcAndUpdateUsage(session_id, cState, emit) - } catch (err: any) { - this.replaceState(session_id, 'compression.completed', { - event: 'compression.completed', - compressed: false, - totalMessages: newMessages.length, - resultMessages: newMessages.length, - beforeTokens: totalTokens, - afterTokens: totalTokens, - summaryTokens: 0, - verbatimCount: newMessages.length, - compressedStartIndex: -1, - error: err.message, - }) - logger.warn(err, '[chat-run-socket] compression failed for session %s, using assembled context', session_id) - emit('compression.completed', { - event: 'compression.completed', - compressed: false, - totalMessages: newMessages.length, - resultMessages: newMessages.length, - beforeTokens: totalTokens, - afterTokens: totalTokens, - summaryTokens: 0, - verbatimCount: newMessages.length, - compressedStartIndex: -1, - error: err.message, - }) - } - } - } else if (history.length > 4) { - // No snapshot — check if raw history exceeds threshold - - if (totalTokens <= triggerTokens && history.length <= 150) { - // Under threshold — use raw history as-is - logger.info('[context-compress] session=%s: %d messages, ~%d tokens — under threshold, skip', session_id, history.length, totalTokens) - } else { - // Over threshold — full LLM compression - logger.info('[context-compress] BEFORE session=%s: %d messages, ~%d tokens (threshold %d)', session_id, history.length, totalTokens, triggerTokens) - - this.pushState(session_id, 'compression.started', { - event: 'compression.started', - message_count: history.length, - token_count: totalTokens, - }) - emit('compression.started', { - event: 'compression.started', - message_count: history.length, - token_count: totalTokens, - }) - - try { - const result = await compressor.compress( - history, upstream, apiKey, session_id, - ) - const cState = this.getOrCreateSession(session_id) - const afterTokens = await this.calcAndUpdateUsage(session_id, cState, emit) - this.replaceState(session_id, 'compression.completed', { - event: 'compression.completed', - compressed: result.meta.compressed, - llmCompressed: result.meta.llmCompressed, - totalMessages: result.meta.totalMessages, - resultMessages: result.messages.length, - beforeTokens: totalTokens, - afterTokens: afterTokens.inputTokens + afterTokens.outputTokens, - summaryTokens: result.meta.summaryTokenEstimate, - verbatimCount: result.meta.verbatimCount, - compressedStartIndex: result.meta.compressedStartIndex, - }) - logger.info('[context-compress] AFTER session=%s: %d messages, ~%d tokens (was %d)', session_id, result.messages.length, afterTokens.inputTokens + afterTokens.outputTokens, totalTokens) - - emit('compression.completed', { - event: 'compression.completed', - compressed: result.meta.compressed, - llmCompressed: result.meta.llmCompressed, - totalMessages: result.meta.totalMessages, - resultMessages: result.messages.length, - beforeTokens: totalTokens, - afterTokens: afterTokens.inputTokens + afterTokens.outputTokens, - summaryTokens: result.meta.summaryTokenEstimate, - verbatimCount: result.meta.verbatimCount, - compressedStartIndex: result.meta.compressedStartIndex, - }) - - history = result.messages.map(m => { - const msg: any = { - role: m.role, - content: m.content, - tool_call_id: m.tool_call_id, - name: m.name, - } - if (m.reasoning_content) msg.reasoning_content = m.reasoning_content - // Filter tool_calls if present, remove internal fields - if (m.tool_calls?.length) { - const cleanedToolCalls = m.tool_calls - .filter((tc: any) => tc.id && tc.id.length > 0) - .map((tc: any) => ({ - id: tc.id, - type: tc.type, - function: tc.function - })) - if (cleanedToolCalls.length > 0) { - msg.tool_calls = cleanedToolCalls - } - } - return msg - }) - await this.calcAndUpdateUsage(session_id, cState, emit) - } catch (err: any) { - this.replaceState(session_id, 'compression.completed', { - event: 'compression.completed', - compressed: false, - totalMessages: history.length, - resultMessages: history.length, - beforeTokens: totalTokens, - afterTokens: totalTokens, - summaryTokens: 0, - verbatimCount: history.length, - compressedStartIndex: -1, - error: err.message, - }) - logger.warn(err, '[chat-run-socket] compression failed for session %s, using raw history', session_id) - emit('compression.completed', { - event: 'compression.completed', - compressed: false, - totalMessages: history.length, - resultMessages: history.length, - beforeTokens: totalTokens, - afterTokens: totalTokens, - summaryTokens: 0, - verbatimCount: history.length, - compressedStartIndex: -1, - error: err.message, - }) - } - } - } - - body.conversation_history = history - } - } catch (err) { - logger.warn(err, '[chat-run-socket] failed to load conversation history for session %s', session_id) + const compressed = await this.buildCompressedHistory(session_id, profile, upstream, apiKey, emit) + if (compressed.length > 0) { + body.conversation_history = compressed } } @@ -1032,6 +790,747 @@ export class ChatRunSocket { } } + /** + * Load conversation history from DB with full message structure (user/assistant/tool), + * then apply context compression (snapshot-aware + LLM) identically for both + * api_server and CLI bridge runs. + */ + private async buildCompressedHistory( + sessionId: string, + profile: string, + upstream: string, + apiKey: string | undefined, + emit: (event: string, payload: any) => void, + ): Promise { + try { + const detail = useLocalSessionStore() + ? getSessionDetail(sessionId) + : await getSessionDetailFromDb(sessionId) + if (!detail?.messages?.length) return [] + + const validMessages = detail.messages.filter(m => + (m.role === 'user' || m.role === 'assistant' || m.role === 'tool') && m.content !== undefined, + ) + + // Exclude the last user message (just added by the caller) + const lastUserMsgIndex = [...validMessages].reverse().findIndex(m => m.role === 'user') + let history: ChatMessage[] = (lastUserMsgIndex >= 0 + ? validMessages.slice(0, validMessages.length - lastUserMsgIndex - 1) + : validMessages + ).map((m, idx, arr) => { + const msg: any = { role: m.role, content: m.content || '' } + if (m.reasoning_content) msg.reasoning_content = m.reasoning_content + if (m.tool_calls?.length) { + const cleanedToolCalls = m.tool_calls + .filter((tc: any) => tc.id && tc.id.length > 0) + .map((tc: any) => ({ id: tc.id, type: tc.type, function: tc.function })) + if (cleanedToolCalls.length > 0) msg.tool_calls = cleanedToolCalls + } + if (m.role === 'tool') { + let callId = m.tool_call_id + if (!callId || callId.length === 0) { + const prevMsg = arr[idx - 1] + if (prevMsg?.role === 'assistant' && prevMsg.tool_calls?.length) { + const tc = prevMsg.tool_calls.find((t: any) => t.function?.name === m.tool_name) + if (tc?.id) callId = tc.id + } + } + if (!callId || callId.length === 0) return null + msg.tool_call_id = callId + } + if (m.tool_name) msg.name = m.tool_name + return msg + }).filter((m): m is ChatMessage => m !== null) + + if (history.length === 0) return [] + + // Context compression with snapshot awareness + const contextLength = getModelContextLength(profile) + const triggerTokens = Math.floor(contextLength / 2) + const cState = this.getOrCreateSession(sessionId) + const assembledTokens = await this.calcAndUpdateUsage(sessionId, cState, emit) + const totalTokens = assembledTokens.inputTokens + assembledTokens.outputTokens + + const snapshot = getCompressionSnapshot(sessionId) + + if (snapshot) { + const newMessages = history.slice(snapshot.lastMessageIndex + 1) + logger.info('[context-compress] session=%s: snapshot at %d, %d new messages, assembled ~%d tokens (threshold %d)', + sessionId, snapshot.lastMessageIndex, newMessages.length, totalTokens, triggerTokens) + if (totalTokens <= triggerTokens && newMessages.length <= 150) { + history = [ + { role: 'user', content: SUMMARY_PREFIX + '\n\n' + snapshot.summary }, + ...newMessages, + ] as ChatMessage[] + } else { + history = await this.compressHistory(history, newMessages, sessionId, upstream, apiKey, cState, totalTokens, emit) + } + } else if (history.length > 4) { + if (totalTokens <= triggerTokens && history.length <= 150) { + logger.info('[context-compress] session=%s: %d messages, ~%d tokens — under threshold, skip', sessionId, history.length, totalTokens) + } else { + history = await this.compressHistory(history, null, sessionId, upstream, apiKey, cState, totalTokens, emit) + } + } + + return history + } catch (err) { + logger.warn(err, '[chat-run-socket] failed to build compressed history for session %s', sessionId) + return [] + } + } + + private async compressHistory( + history: ChatMessage[], + newMessagesOnly: ChatMessage[] | null, + sessionId: string, + upstream: string, + apiKey: string | undefined, + cState: SessionState, + totalTokens: number, + emit: (event: string, payload: any) => void, + ): Promise { + const msgCount = newMessagesOnly ? newMessagesOnly.length : history.length + this.pushState(sessionId, 'compression.started', { + event: 'compression.started', message_count: msgCount, token_count: totalTokens, + }) + emit('compression.started', { + event: 'compression.started', message_count: msgCount, token_count: totalTokens, + }) + + try { + const result = await compressor.compress(history, upstream, apiKey, sessionId) + const afterTokens = await this.calcAndUpdateUsage(sessionId, cState, emit) + const compressedMeta = { + event: 'compression.completed' as const, + compressed: result.meta.compressed, + llmCompressed: result.meta.llmCompressed, + totalMessages: result.meta.totalMessages, + resultMessages: result.messages.length, + beforeTokens: totalTokens, + afterTokens: afterTokens.inputTokens + afterTokens.outputTokens, + summaryTokens: result.meta.summaryTokenEstimate, + verbatimCount: result.meta.verbatimCount, + compressedStartIndex: result.meta.compressedStartIndex, + } + this.replaceState(sessionId, 'compression.completed', compressedMeta) + logger.info('[context-compress] AFTER session=%s: %d messages, ~%d tokens (was %d)', + sessionId, result.messages.length, afterTokens.inputTokens + afterTokens.outputTokens, totalTokens) + emit('compression.completed', compressedMeta) + + const compressed = result.messages.map(m => { + const msg: any = { role: m.role, content: m.content, tool_call_id: m.tool_call_id, name: m.name } + if (m.reasoning_content) msg.reasoning_content = m.reasoning_content + if (m.tool_calls?.length) { + const cleanedToolCalls = m.tool_calls + .filter((tc: any) => tc.id && tc.id.length > 0) + .map((tc: any) => ({ id: tc.id, type: tc.type, function: tc.function })) + if (cleanedToolCalls.length > 0) msg.tool_calls = cleanedToolCalls + } + return msg + }) + await this.calcAndUpdateUsage(sessionId, cState, emit) + return compressed + } catch (err: any) { + const failedMeta = { + event: 'compression.completed' as const, + compressed: false, + totalMessages: msgCount, + resultMessages: msgCount, + beforeTokens: totalTokens, + afterTokens: totalTokens, + summaryTokens: 0, + verbatimCount: msgCount, + compressedStartIndex: -1, + error: err.message, + } + this.replaceState(sessionId, 'compression.completed', failedMeta) + logger.warn(err, '[chat-run-socket] compression failed for session %s, using assembled context', sessionId) + emit('compression.completed', failedMeta) + return history + } + } + + private async forceCompressBridgeHistory( + sessionId: string, + profile: string, + messages: ChatMessage[], + ): Promise { + const history = messages + .filter(m => m && (m.role === 'user' || m.role === 'assistant' || m.role === 'tool' || m.role === 'system')) + .map(m => { + const msg: any = { role: m.role, content: m.content || '' } + if (m.reasoning_content) msg.reasoning_content = m.reasoning_content + if (m.tool_calls?.length) { + const cleanedToolCalls = m.tool_calls + .filter((tc: any) => tc.id && tc.id.length > 0) + .map((tc: any) => ({ id: tc.id, type: tc.type, function: tc.function })) + if (cleanedToolCalls.length > 0) msg.tool_calls = cleanedToolCalls + } + if (m.tool_call_id) msg.tool_call_id = m.tool_call_id + if (m.name) msg.name = m.name + return msg as ChatMessage + }) + + if (history.length === 0) return [] + + const upstream = this.gatewayManager.getUpstream(profile).replace(/\/$/, '') + const apiKey = this.gatewayManager.getApiKey(profile) || undefined + const totalTokens = countTokens(JSON.stringify(history)) + logger.info('[context-compress] bridge forced compression session=%s: %d messages, ~%d tokens', + sessionId, history.length, totalTokens) + + const result = await compressor.compress(history, upstream, apiKey, undefined, profile) + logger.info('[context-compress] bridge forced compression done session=%s: %d -> %d messages', + sessionId, history.length, result.messages.length) + + return result.messages.map(m => { + const msg: any = { role: m.role, content: m.content } + if (m.reasoning_content) msg.reasoning_content = m.reasoning_content + if (m.tool_calls?.length) { + const cleanedToolCalls = m.tool_calls + .filter((tc: any) => tc.id && tc.id.length > 0) + .map((tc: any) => ({ id: tc.id, type: tc.type, function: tc.function })) + if (cleanedToolCalls.length > 0) msg.tool_calls = cleanedToolCalls + } + if (m.tool_call_id) msg.tool_call_id = m.tool_call_id + if (m.name) msg.name = m.name + return msg + }) + } + + private resolveRunSource(source?: string, sessionId?: string): ChatRunSource { + const normalized = String(source || '').trim() + if (normalized === 'cli') return 'cli' + if (normalized === 'api_server') return 'api_server' + if (sessionId) { + const existing = getSession(sessionId) + if (existing?.source === 'cli') return 'cli' + } + return 'api_server' + } + + private async handleBridgeRun( + socket: Socket, + data: { input: string | ContentBlock[]; session_id?: string; model?: string; instructions?: string; source?: string }, + profile: string, + _skipUserMessage = false, + ) { + const { input, session_id, model, instructions } = data + if (!session_id) { + socket.emit('run.failed', { event: 'run.failed', error: 'session_id is required for cli source' }) + return + } + + const runMarker = `cli_run_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}` + const now = Math.floor(Date.now() / 1000) + let state = this.sessionMap.get(session_id) + if (!state) { + state = getSession(session_id) + ? await this.loadSessionStateFromDb(session_id) + : { messages: [], isWorking: false, events: [], queue: [] } + this.sessionMap.set(session_id, state) + } + + state.isWorking = true + state.isAborting = false + state.profile = profile + state.source = 'cli' + state.runId = undefined + state.abortController = undefined + state.bridgeOutput = '' + state.bridgePendingAssistantContent = '' + state.bridgePendingReasoningContent = '' + state.bridgeToolCounter = 0 + state.bridgePendingTools = [] + state.responseRun = undefined + + const inputStr = contentBlocksToString(input) + state.messages.push({ + id: state.messages.length + 1, + session_id, + runMarker, + role: 'user', + content: inputStr, + timestamp: now, + }) + + if (!getSession(session_id)) { + const previewText = extractTextForPreview(input) + const preview = previewText.replace(/[\r\n]/g, ' ').substring(0, 100) + createSession({ id: session_id, profile, source: 'cli', model, title: preview }) + } + addMessage({ + session_id, + role: 'user', + content: inputStr, + timestamp: now, + }) + + socket.join(`session:${session_id}`) + const emit = (event: string, payload: any) => this.emitToSession(socket, session_id, event, payload) + + const history = await this.buildCompressedHistory( + session_id, profile, + this.gatewayManager.getUpstream(profile).replace(/\/$/, ''), + this.gatewayManager.getApiKey(profile) || undefined, + emit, + ) + + try { + logger.info('[chat-run-socket] starting CLI bridge run for session %s', session_id) + const started = await this.bridge.chat(session_id, input as AgentBridgeMessage, history, instructions, profile) + state.runId = started.run_id + this.pushState(session_id, 'run.started', { + event: 'run.started', + run_id: started.run_id, + queue_length: state.queue.length || 0, + }) + emit('run.started', { + event: 'run.started', + run_id: started.run_id, + queue_length: state.queue.length || 0, + }) + + for await (const chunk of this.bridge.streamOutput(started.run_id)) { + await this.applyBridgeChunk(socket, state, session_id, runMarker, chunk, emit, profile) + if (chunk.done) break + } + // Update usage after normal completion (applyBridgeChunk already did cleanup) + const usage = await this.calcAndUpdateUsage(session_id, state, emit) + updateUsage(session_id, { + inputTokens: usage.inputTokens, + outputTokens: usage.outputTokens, + profile: state.profile, + }) + } catch (err: any) { + if (!state.isWorking) return + const queueLen = state.queue?.length ?? 0 + state.isWorking = false + state.isAborting = false + state.profile = undefined + state.runId = undefined + state.events = [] + this.flushBridgePendingToDb(state, session_id) + updateSessionStats(session_id) + const message = err instanceof Error ? err.message : String(err) + emit('run.failed', { event: 'run.failed', error: message, queue_remaining: queueLen }) + const errUsage = await this.calcAndUpdateUsage(session_id, state, emit) + updateUsage(session_id, { + inputTokens: errUsage.inputTokens, + outputTokens: errUsage.outputTokens, + profile: state.profile, + }) + if (queueLen > 0) this.dequeueNextQueuedRun(socket, session_id) + } + } + + private applyBridgeChunk( + socket: Socket, + state: SessionState, + sessionId: string, + runMarker: string, + chunk: AgentBridgeOutput, + emit: (event: string, payload: any) => void, + profile: string, + ): Promise { + return this.applyBridgeChunkAsync(socket, state, sessionId, runMarker, chunk, emit, profile) + } + + private async applyBridgeChunkAsync( + socket: Socket, + state: SessionState, + sessionId: string, + runMarker: string, + chunk: AgentBridgeOutput, + emit: (event: string, payload: any) => void, + profile: string, + ): Promise { + state.runId = chunk.run_id + + for (const ev of chunk.events || []) { + const evType = ev.event as string | undefined + if (evType === 'tool.started') { + this.flushBridgePendingToDb(state, sessionId, runMarker) + const toolName = (ev.tool_name as string) || '' + const args = ev.args as Record | undefined + const tool = this.recordBridgeToolStarted(state, sessionId, runMarker, toolName, args, ev.tool_call_id) + const payload = { + event: 'tool.started', + run_id: chunk.run_id, + tool_call_id: tool.id, + tool: toolName, + name: toolName, + arguments: tool.arguments, + preview: ev.preview || summarizeToolArguments(tool.arguments), + } + this.pushState(sessionId, 'tool.started', payload) + emit('tool.started', payload) + } else if (evType === 'tool.completed') { + const toolName = (ev.tool_name as string) || '' + const completed = this.recordBridgeToolCompleted(state, sessionId, runMarker, toolName, ev) + const payload = { + event: 'tool.completed', + run_id: chunk.run_id, + tool_call_id: completed.id, + tool: toolName, + name: toolName, + output: completed.output, + duration: completed.duration ?? ev.duration, + error: ev.is_error || undefined, + } + this.pushState(sessionId, 'tool.completed', payload) + emit('tool.completed', payload) + } else if (evType === 'turn.boundary') { + this.flushBridgePendingToDb(state, sessionId, runMarker) + } else if (evType === 'reasoning.delta' || evType === 'thinking.delta') { + const text = String(ev.text || '') + if (text) { + state.bridgePendingReasoningContent = (state.bridgePendingReasoningContent || '') + text + const message = this.ensureOpenBridgeAssistantMessage(state, sessionId, runMarker) + message.reasoning = (message.reasoning || '') + text + message.reasoning_content = (message.reasoning_content || '') + text + } + emit(evType, { + event: evType, + run_id: chunk.run_id, + text, + }) + } else if (evType === 'reasoning.available') { + emit('reasoning.available', { + event: 'reasoning.available', + run_id: chunk.run_id, + }) + } else if (evType === 'approval.requested') { + const payload = { + event: 'approval.requested', + run_id: chunk.run_id, + approval_id: ev.approval_id, + command: ev.command, + description: ev.description, + choices: ev.choices, + allow_permanent: ev.allow_permanent, + timeout_ms: ev.timeout_ms, + } + this.replaceState(sessionId, 'approval.requested', payload) + emit('approval.requested', payload) + } else if (evType === 'approval.resolved') { + const payload = { + event: 'approval.resolved', + run_id: chunk.run_id, + approval_id: ev.approval_id, + choice: ev.choice, + } + this.replaceState(sessionId, 'approval.resolved', payload) + emit('approval.resolved', payload) + } else if (evType === 'bridge.compression.requested') { + const payload = { + event: 'compression.started', + run_id: chunk.run_id, + request_id: ev.request_id, + message_count: ev.message_count, + token_count: ev.approx_tokens, + source: 'bridge', + } + this.replaceState(sessionId, 'compression.started', payload) + emit('compression.started', payload) + if (ev.request_id && Array.isArray(ev.messages)) { + try { + const compressed = await this.forceCompressBridgeHistory( + sessionId, + profile, + ev.messages as ChatMessage[], + ) + await this.bridge.compressionRespond(String(ev.request_id), { messages: compressed }) + } catch (err: any) { + await this.bridge.compressionRespond(String(ev.request_id), { + error: err?.message || String(err), + }).catch(() => undefined) + } + } + } else if (evType === 'bridge.compression.completed') { + const payload = { + event: 'compression.completed', + run_id: chunk.run_id, + request_id: ev.request_id, + compressed: ev.compressed !== false, + totalMessages: ev.message_count, + resultMessages: ev.result_messages, + beforeTokens: ev.approx_tokens, + source: 'bridge', + } + this.replaceState(sessionId, 'compression.completed', payload) + emit('compression.completed', payload) + } else if (evType === 'bridge.compression.failed') { + const payload = { + event: 'compression.completed', + run_id: chunk.run_id, + request_id: ev.request_id, + compressed: false, + totalMessages: ev.message_count, + resultMessages: ev.message_count, + beforeTokens: ev.approx_tokens, + error: ev.error, + source: 'bridge', + } + this.replaceState(sessionId, 'compression.completed', payload) + emit('compression.completed', payload) + } else if (evType === 'status') { + emit('agent.event', { + event: 'agent.event', + run_id: chunk.run_id, + ...ev, + }) + } + } + + if (chunk.delta) { + state.bridgeOutput = (state.bridgeOutput || '') + chunk.delta + state.bridgePendingAssistantContent = (state.bridgePendingAssistantContent || '') + chunk.delta + const last = [...state.messages].reverse().find(m => m.runMarker === runMarker) + if (last?.role === 'assistant' && last.finish_reason == null) { + last.content += chunk.delta + this.syncBridgeReasoningToMessage(last, state.bridgePendingReasoningContent) + } else { + state.messages.push({ + id: state.messages.length + 1, + session_id: sessionId, + runMarker, + role: 'assistant', + content: chunk.delta, + reasoning: state.bridgePendingReasoningContent || null, + reasoning_content: state.bridgePendingReasoningContent || null, + timestamp: Math.floor(Date.now() / 1000), + }) + } + emit('message.delta', { + event: 'message.delta', + run_id: chunk.run_id, + delta: chunk.delta, + output: state.bridgeOutput, + }) + } + + if (!chunk.done) return + if (!state.isWorking) return + + this.flushBridgePendingToDb(state, sessionId, runMarker) + updateSessionStats(sessionId) + state.isWorking = false + state.isAborting = false + state.profile = undefined + state.runId = undefined + state.events = [] + const eventName = chunk.status === 'error' ? 'run.failed' : 'run.completed' + const payload = { + event: eventName, + run_id: chunk.run_id, + output: chunk.output || state.bridgeOutput || '', + result: chunk.result, + error: chunk.error, + queue_remaining: state.queue.length, + } + emit(eventName, payload) + if (state.queue.length > 0) { + this.dequeueNextQueuedRun(socket, sessionId) + } + } + + private flushBridgePendingToDb(state: SessionState, sessionId: string, runMarker?: string) { + const content = state.bridgePendingAssistantContent || '' + const reasoning = state.bridgePendingReasoningContent || '' + if (!content.trim()) return + if (runMarker) { + const last = this.findOpenBridgeAssistantMessage(state, runMarker) + if (last) this.syncBridgeReasoningToMessage(last, reasoning) + } + addMessage({ + session_id: sessionId, + role: 'assistant', + content, + reasoning: reasoning || null, + reasoning_content: reasoning || null, + timestamp: Math.floor(Date.now() / 1000), + }) + state.bridgePendingAssistantContent = '' + state.bridgePendingReasoningContent = '' + if (runMarker) { + const last = this.findOpenBridgeAssistantMessage(state, runMarker) + if (last && last.finish_reason == null) last.finish_reason = 'stop' + } + } + + private findOpenBridgeAssistantMessage(state: SessionState, runMarker: string): SessionMessage | undefined { + return [...state.messages] + .reverse() + .find(m => m.runMarker === runMarker && m.role === 'assistant' && m.finish_reason == null) + } + + private ensureOpenBridgeAssistantMessage( + state: SessionState, + sessionId: string, + runMarker: string, + ): SessionMessage { + const existing = this.findOpenBridgeAssistantMessage(state, runMarker) + if (existing) return existing + const message: SessionMessage = { + id: state.messages.length + 1, + session_id: sessionId, + runMarker, + role: 'assistant', + content: '', + timestamp: Math.floor(Date.now() / 1000), + } + state.messages.push(message) + return message + } + + private syncBridgeReasoningToMessage(message: SessionMessage, reasoning?: string) { + if (!reasoning) return + message.reasoning = reasoning + message.reasoning_content = reasoning + } + + private recordBridgeToolStarted( + state: SessionState, + sessionId: string, + runMarker: string, + toolName: string, + args: Record | undefined, + rawToolCallId: unknown, + ): { id: string; name: string; arguments: string } { + const id = this.bridgeToolCallId(state, rawToolCallId, toolName) + const argsString = args ? JSON.stringify(args) : '{}' + const reasoning = state.bridgePendingReasoningContent || '' + const toolCall = { + id, + type: 'function', + function: { + name: toolName, + arguments: argsString, + }, + } + const timestamp = Math.floor(Date.now() / 1000) + + state.bridgePendingTools = state.bridgePendingTools || [] + state.bridgePendingTools.push({ + id, + name: toolName, + arguments: argsString, + startedAt: Date.now(), + }) + + const openMessage = this.findOpenBridgeAssistantMessage(state, runMarker) + if (openMessage && !openMessage.content && !openMessage.tool_calls?.length) { + openMessage.tool_calls = [toolCall] + openMessage.finish_reason = 'tool_calls' + openMessage.reasoning = reasoning || openMessage.reasoning || null + openMessage.reasoning_content = reasoning || openMessage.reasoning_content || null + openMessage.timestamp = timestamp + } else { + state.messages.push({ + id: state.messages.length + 1, + session_id: sessionId, + runMarker, + role: 'assistant', + content: '', + tool_calls: [toolCall], + finish_reason: 'tool_calls', + reasoning: reasoning || null, + reasoning_content: reasoning || null, + timestamp, + }) + } + addMessage({ + session_id: sessionId, + role: 'assistant', + content: '', + tool_calls: [toolCall], + finish_reason: 'tool_calls', + reasoning: reasoning || null, + reasoning_content: reasoning || null, + timestamp, + }) + state.bridgePendingReasoningContent = '' + + return { id, name: toolName, arguments: argsString } + } + + private recordBridgeToolCompleted( + state: SessionState, + sessionId: string, + runMarker: string, + toolName: string, + ev: Record, + ): { id: string; output: string; duration?: number } { + state.bridgePendingTools = state.bridgePendingTools || [] + const rawId = ev.tool_call_id + let idx = rawId + ? state.bridgePendingTools.findIndex(tool => tool.id === String(rawId)) + : -1 + if (idx < 0 && toolName) { + idx = state.bridgePendingTools.findIndex(tool => tool.name === toolName) + } + if (idx < 0) { + idx = state.bridgePendingTools.length - 1 + } + const pending = idx >= 0 ? state.bridgePendingTools.splice(idx, 1)[0] : undefined + const id = pending?.id || this.bridgeToolCallId(state, rawId, toolName) + const output = this.bridgeToolOutput(ev) + const timestamp = Math.floor(Date.now() / 1000) + logger.info( + '[chat-run-socket][bridge] recording CLI tool result session=%s tool=%s tool_call_id=%s raw_tool_call_id=%s output_len=%d has_result=%s has_output=%s has_result_preview=%s has_preview=%s event_keys=%s', + sessionId, + toolName, + id, + String(rawId || ''), + output.length, + String(ev.result != null), + String(ev.output != null), + String(ev.result_preview != null), + String(ev.preview != null), + Object.keys(ev).join(','), + ) + + state.messages.push({ + id: state.messages.length + 1, + session_id: sessionId, + runMarker, + role: 'tool', + content: output, + tool_call_id: id, + tool_name: toolName || pending?.name || null, + timestamp, + }) + addMessage({ + session_id: sessionId, + role: 'tool', + content: output, + tool_call_id: id, + tool_name: toolName || pending?.name || null, + timestamp, + }) + + const duration = pending?.startedAt + ? Math.round((Date.now() - pending.startedAt) / 10) / 100 + : undefined + + return { id, output, duration } + } + + private bridgeToolCallId(state: SessionState, rawToolCallId: unknown, toolName: string): string { + const raw = String(rawToolCallId || '').trim() + if (raw) return raw + state.bridgeToolCounter = (state.bridgeToolCounter || 0) + 1 + const safeName = (toolName || 'tool').replace(/[^a-zA-Z0-9_-]/g, '_') + return `cli_${safeName}_${state.bridgeToolCounter}` + } + + private bridgeToolOutput(ev: Record): string { + const value = ev.result ?? ev.output ?? ev.result_preview ?? ev.preview ?? '' + return typeof value === 'string' ? value : JSON.stringify(value ?? '') + } + private applyResponseStreamEvent( state: SessionState, sessionId: string, @@ -1272,9 +1771,19 @@ export class ChatRunSocket { logger.info({ sessionId, runId }, '[chat-run-socket][abort] started') // Flush in-memory assistant text to DB before aborting the stream. - this.flushResponseRunToDb(state, sessionId) + if (state.source === 'cli') { + this.flushBridgePendingToDb(state, sessionId) + } else { + this.flushResponseRunToDb(state, sessionId) + } - if (state.abortController) { + if (state.source === 'cli') { + try { + await this.bridge.interrupt(sessionId, 'Aborted by user') + } catch (err) { + logger.warn(err, '[chat-run-socket][abort] failed to interrupt CLI bridge for session %s', sessionId) + } + } else if (state.abortController) { state.abortController.abort() } @@ -1323,6 +1832,7 @@ export class ChatRunSocket { session_id: sessionId, model: next.model, instructions: next.instructions, + source: next.source, }, next.profile || fallbackProfile, true) return true } @@ -1333,6 +1843,10 @@ export class ChatRunSocket { const profile = state.profile updateSessionStats(sessionId) + const emit = (event: string, payload: any) => { + this.nsp.to(`session:${sessionId}`).emit(event, { ...payload, session_id: sessionId }) + } + await this.calcAndUpdateUsage(sessionId, state, emit) state.isWorking = false state.isAborting = false @@ -1367,6 +1881,7 @@ export class ChatRunSocket { session_id: sessionId, model: next.model, instructions: next.instructions, + source: next.source, }, next.profile || profile || 'default', true) return } diff --git a/packages/server/src/services/hermes/group-chat/agent-clients.ts b/packages/server/src/services/hermes/group-chat/agent-clients.ts index 4e659d5..d8956ab 100644 --- a/packages/server/src/services/hermes/group-chat/agent-clients.ts +++ b/packages/server/src/services/hermes/group-chat/agent-clients.ts @@ -101,6 +101,8 @@ class AgentClient { reconnectionAttempts: Infinity, reconnectionDelay: 1000, reconnectionDelayMax: 30000, + randomizationFactor: 0.5, + timeout: 30000, }) this.bindEvents() diff --git a/packages/server/src/services/hermes/group-chat/index.ts b/packages/server/src/services/hermes/group-chat/index.ts index 0aa9960..9407da7 100644 --- a/packages/server/src/services/hermes/group-chat/index.ts +++ b/packages/server/src/services/hermes/group-chat/index.ts @@ -424,7 +424,13 @@ export class GroupChatServer { const servers = Array.isArray(httpServers) ? httpServers : [httpServers] this.io = new Server(servers[0], { - cors: { origin: '*' } + cors: { origin: '*' }, + pingInterval: 25_000, + pingTimeout: 90_000, + connectionStateRecovery: { + maxDisconnectionDuration: 2 * 60_000, + skipMiddlewares: true, + }, }) servers.slice(1).forEach((httpServer) => this.io.attach(httpServer)) this.nsp = this.io.of('/group-chat') diff --git a/packages/server/src/services/shutdown.ts b/packages/server/src/services/shutdown.ts index b0b0488..f28dff3 100644 --- a/packages/server/src/services/shutdown.ts +++ b/packages/server/src/services/shutdown.ts @@ -27,7 +27,7 @@ function shouldStopGatewaysOnShutdown(signal: string): boolean { return shouldStop } -export function bindShutdown(server: any, groupChatServer?: any, chatRunServer?: any): void { +export function bindShutdown(server: any, groupChatServer?: any, chatRunServer?: any, agentBridgeManager?: any): void { let isShuttingDown = false const shutdown = async (signal: string) => { @@ -58,6 +58,15 @@ export function bindShutdown(server: any, groupChatServer?: any, chatRunServer?: logger.info('Skipping gateway shutdown for %s', signal) } + if (agentBridgeManager) { + try { + await agentBridgeManager.stop() + logger.info('Agent bridge stopped') + } catch (err) { + logger.warn(err, 'Failed to stop agent bridge (non-fatal)') + } + } + // Close ChatRunSocket first to abort all active runs and close EventSource connections if (chatRunServer) { chatRunServer.close() diff --git a/packages/server/src/shared/providers.ts b/packages/server/src/shared/providers.ts index 75ea881..4938834 100644 --- a/packages/server/src/shared/providers.ts +++ b/packages/server/src/shared/providers.ts @@ -62,13 +62,9 @@ export const PROVIDER_PRESETS: ProviderPreset[] = [ base_url: 'https://generativelanguage.googleapis.com/v1beta/openai', models: [ 'gemini-3.1-pro-preview', + 'gemini-3-pro-preview', 'gemini-3-flash-preview', 'gemini-3.1-flash-lite-preview', - 'gemini-2.5-pro', - 'gemini-2.5-flash', - 'gemini-2.5-flash-lite', - 'gemma-4-31b-it', - 'gemma-4-26b-it', ], }, { @@ -76,7 +72,7 @@ export const PROVIDER_PRESETS: ProviderPreset[] = [ value: 'deepseek', builtin: true, base_url: 'https://api.deepseek.com', - models: ['deepseek-v4-flash', 'deepseek-v4-pro'], + models: ['deepseek-v4-pro', 'deepseek-v4-flash', 'deepseek-chat', 'deepseek-reasoner'], }, { label: 'Z.AI / GLM', @@ -98,7 +94,6 @@ export const PROVIDER_PRESETS: ProviderPreset[] = [ builtin: true, base_url: 'https://api.kimi.com/coding/v1', models: [ - 'kimi-for-coding', 'kimi-k2.6', 'kimi-k2.5', 'kimi-k2-thinking', @@ -124,7 +119,17 @@ export const PROVIDER_PRESETS: ProviderPreset[] = [ value: 'xai', builtin: true, base_url: 'https://api.x.ai/v1', - models: ['grok-4.20-reasoning', 'grok-4-1-fast-reasoning'], + models: [ + 'grok-4.20-0309-reasoning', + 'grok-4.20-0309-non-reasoning', + 'grok-4.20-multi-agent-0309', + 'grok-4-1-fast', + 'grok-4-1-fast-non-reasoning', + 'grok-4-fast', + 'grok-4-fast-non-reasoning', + 'grok-4', + 'grok-code-fast-1', + ], }, { label: 'MiniMax', @@ -146,12 +151,13 @@ export const PROVIDER_PRESETS: ProviderPreset[] = [ builtin: true, base_url: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1', models: [ + 'qwen3.6-plus', + 'kimi-k2.5', 'qwen3.5-plus', 'qwen3-coder-plus', 'qwen3-coder-next', 'glm-5', 'glm-4.7', - 'kimi-k2.5', 'MiniMax-M2.5', ], }, @@ -166,13 +172,13 @@ export const PROVIDER_PRESETS: ProviderPreset[] = [ // returns HTTP 401 for those keys. base_url: 'https://coding-intl.dashscope.aliyuncs.com/v1', models: [ + 'qwen3.6-plus', 'qwen3.5-plus', - 'qwen3-max-2026-01-23', - 'qwen3-coder-next', 'qwen3-coder-plus', + 'qwen3-coder-next', + 'kimi-k2.5', 'glm-5', 'glm-4.7', - 'kimi-k2.5', 'MiniMax-M2.5', ], }, @@ -182,14 +188,15 @@ export const PROVIDER_PRESETS: ProviderPreset[] = [ builtin: true, base_url: 'https://router.huggingface.co/v1', models: [ + 'moonshotai/Kimi-K2.5', 'Qwen/Qwen3.5-397B-A17B', 'Qwen/Qwen3.5-35B-A3B', 'deepseek-ai/DeepSeek-V3.2', - 'moonshotai/Kimi-K2.5', 'MiniMaxAI/MiniMax-M2.5', 'zai-org/GLM-5', 'XiaomiMiMo/MiMo-V2-Flash', 'moonshotai/Kimi-K2-Thinking', + 'moonshotai/Kimi-K2.6', ], }, { @@ -214,14 +221,11 @@ export const PROVIDER_PRESETS: ProviderPreset[] = [ builtin: true, base_url: 'https://api.xiaomimimo.com/v1', models: [ - 'mimo-v2-omni', - 'mimo-v2-pro', - 'mimo-v2-tts', - 'mimo-v2.5', 'mimo-v2.5-pro', - 'mimo-v2.5-tts', - 'mimo-v2.5-tts-voiceclone', - 'mimo-v2.5-tts-voicedesign', + 'mimo-v2.5', + 'mimo-v2-pro', + 'mimo-v2-omni', + 'mimo-v2-flash', ], }, { @@ -243,18 +247,21 @@ export const PROVIDER_PRESETS: ProviderPreset[] = [ builtin: true, base_url: 'https://ai-gateway.vercel.sh/v1', models: [ - 'anthropic/claude-opus-4.6', + 'moonshotai/kimi-k2.6', + 'alibaba/qwen3.6-plus', + 'zai/glm-5.1', + 'minimax/minimax-m2.7', 'anthropic/claude-sonnet-4.6', - 'anthropic/claude-sonnet-4.5', + 'anthropic/claude-opus-4.7', + 'anthropic/claude-opus-4.6', 'anthropic/claude-haiku-4.5', - 'openai/gpt-5', - 'openai/gpt-4.1', - 'openai/gpt-4.1-mini', - 'google/gemini-3-pro-preview', + 'openai/gpt-5.4', + 'openai/gpt-5.4-mini', + 'openai/gpt-5.3-codex', + 'google/gemini-3.1-pro-preview', 'google/gemini-3-flash', - 'google/gemini-2.5-pro', - 'google/gemini-2.5-flash', - 'deepseek/deepseek-v3.2', + 'google/gemini-3.1-flash-lite-preview', + 'xai/grok-4.20-reasoning', ], }, { @@ -277,10 +284,10 @@ export const PROVIDER_PRESETS: ProviderPreset[] = [ builtin: true, base_url: 'https://opencode.ai/zen/v1', models: [ + 'kimi-k2.5', 'gpt-5.4-pro', 'gpt-5.4', 'gpt-5.3-codex', - 'gpt-5.3-codex-spark', 'gpt-5.2', 'gpt-5.2-codex', 'gpt-5.1', @@ -308,7 +315,6 @@ export const PROVIDER_PRESETS: ProviderPreset[] = [ 'glm-5', 'glm-4.7', 'glm-4.6', - 'kimi-k2.5', 'kimi-k2-thinking', 'kimi-k2', 'qwen3-coder', @@ -320,7 +326,20 @@ export const PROVIDER_PRESETS: ProviderPreset[] = [ value: 'opencode-go', builtin: true, base_url: 'https://opencode.ai/zen/go/v1', - models: ['glm-5.1', 'glm-5', 'kimi-k2.5', 'mimo-v2-pro', 'mimo-v2-omni', 'minimax-m2.7', 'minimax-m2.5'], + models: [ + 'kimi-k2.6', + 'kimi-k2.5', + 'glm-5.1', + 'glm-5', + 'mimo-v2.5-pro', + 'mimo-v2.5', + 'mimo-v2-pro', + 'mimo-v2-omni', + 'minimax-m2.7', + 'minimax-m2.5', + 'qwen3.6-plus', + 'qwen3.5-plus', + ], }, { label: 'LongCat', @@ -352,12 +371,13 @@ export const PROVIDER_PRESETS: ProviderPreset[] = [ 'moonshotai/kimi-k2.6', 'xiaomi/mimo-v2.5-pro', 'xiaomi/mimo-v2.5', + 'tencent/hy3-preview', 'anthropic/claude-opus-4.7', 'anthropic/claude-opus-4.6', 'anthropic/claude-sonnet-4.6', 'anthropic/claude-sonnet-4.5', 'anthropic/claude-haiku-4.5', - 'openai/gpt-5.4', + 'openai/gpt-5.5', 'openai/gpt-5.4-mini', 'openai/gpt-5.3-codex', 'google/gemini-3-pro-preview', @@ -374,10 +394,12 @@ export const PROVIDER_PRESETS: ProviderPreset[] = [ 'z-ai/glm-5v-turbo', 'z-ai/glm-5-turbo', 'x-ai/grok-4.20-beta', + 'x-ai/grok-4.3', 'nvidia/nemotron-3-super-120b-a12b', 'arcee-ai/trinity-large-thinking', - 'openai/gpt-5.4-pro', + 'openai/gpt-5.5-pro', 'openai/gpt-5.4-nano', + 'deepseek/deepseek-v4-pro', ], }, { diff --git a/scripts/build-server.mjs b/scripts/build-server.mjs index c223d9d..c2a712c 100644 --- a/scripts/build-server.mjs +++ b/scripts/build-server.mjs @@ -1,7 +1,7 @@ import * as esbuild from 'esbuild' import { resolve, dirname } from 'path' import { fileURLToPath } from 'url' -import { readFileSync } from 'fs' +import { cpSync, mkdirSync, readFileSync } from 'fs' const rootDir = resolve(dirname(fileURLToPath(import.meta.url)), '..') const pkg = JSON.parse(readFileSync(resolve(rootDir, 'package.json'), 'utf-8')) @@ -23,3 +23,10 @@ await esbuild.build({ treeShaking: true, logLevel: 'info', }) + +const bridgeOutDir = resolve(rootDir, 'dist/server/agent-bridge') +mkdirSync(bridgeOutDir, { recursive: true }) +cpSync( + resolve(rootDir, 'packages/server/src/services/hermes/agent-bridge/hermes_bridge.py'), + resolve(bridgeOutDir, 'hermes_bridge.py'), +) diff --git a/tests/shared/provider-presets.test.ts b/tests/shared/provider-presets.test.ts index 5b3a290..34d2b08 100644 --- a/tests/shared/provider-presets.test.ts +++ b/tests/shared/provider-presets.test.ts @@ -4,10 +4,6 @@ import { PROVIDER_PRESETS as SERVER_PROVIDER_PRESETS, buildProviderModelMap as buildServerProviderModelMap, } from '../../packages/server/src/shared/providers' -import { - PROVIDER_PRESETS as CLIENT_PROVIDER_PRESETS, - buildProviderModelMap as buildClientProviderModelMap, -} from '../../packages/client/src/shared/providers' const OPENAI_CODEX_PROVIDER = 'openai-codex' const GPT_5_5_MODEL = 'gpt-5.5' @@ -19,13 +15,11 @@ function modelsForProvider(providerPresets: Array<{ value: string; models: strin } describe('provider presets', () => { - it('lists GPT-5.5 for OpenAI Codex on both client and server', () => { - expect(modelsForProvider(CLIENT_PROVIDER_PRESETS, OPENAI_CODEX_PROVIDER)).toContain(GPT_5_5_MODEL) + it('lists GPT-5.5 for OpenAI Codex', () => { expect(modelsForProvider(SERVER_PROVIDER_PRESETS, OPENAI_CODEX_PROVIDER)).toContain(GPT_5_5_MODEL) }) it('exposes GPT-5.5 through provider model maps', () => { - expect(buildClientProviderModelMap()[OPENAI_CODEX_PROVIDER]).toContain(GPT_5_5_MODEL) expect(buildServerProviderModelMap()[OPENAI_CODEX_PROVIDER]).toContain(GPT_5_5_MODEL) }) })