From 97fbfac2d6c3cee4b8c0776970ad6ba79d8d381a Mon Sep 17 00:00:00 2001
From: ww <wuchxu521@163.com>
Date: Sat, 25 Apr 2026 16:21:07 +0800
Subject: [PATCH] =?UTF-8?q?fix(chat):=20=E6=A3=80=E6=B5=8B=E5=B9=B6?=
 =?UTF-8?q?=E6=8F=90=E7=A4=BA=E8=A2=AB=E9=9D=99=E9=BB=98=E5=90=9E=E6=8E=89?=
 =?UTF-8?q?=E7=9A=84=20run=20=E9=94=99=E8=AF=AF=20(#206)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

当上游 hermes-agent 的 agent 层捕获了 LLM 错误（如无效 API key、
模型不被 provider 支持）后，gateway 仍会发送 run.completed 事件，
但 output 为空、usage 为零。前端因此误认为成功，UI 完全无任何
错误提示。

本次修复在 chat store 的 SSE 处理中加入两个 per-run 标志：
- runProducedAssistantText：仅 reasoning/thinking/message.delta 置位
- runHadToolActivity：仅 tool.started/tool.completed 置位

run.completed 时：
1. 兜底渲染：若本次 run 没有 assistant 文本但 evt.output 非空，
   将 output 作为 assistant 消息落盘（兼容某些只在 completed
   事件给出最终文本的 provider）。
2. 静默错误检测：若三者均不成立（无 assistant 文本 + 无 tool
   活动 + output 空），追加一条 system 消息提示用户检查
   hermes-agent 日志。usage=0 不再作为强条件，避免某些 provider
   或本地模型不返回 usage 时的误报。

api/hermes/chat.ts 给 RunEvent 增加 output 字段类型。

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 packages/client/src/api/hermes/chat.ts    |  3 ++
 packages/client/src/stores/hermes/chat.ts | 51 +++++++++++++++++++++++
 2 files changed, 54 insertions(+)
diff --git a/packages/client/src/api/hermes/chat.ts b/packages/client/src/api/hermes/chat.ts
index e44f4f6..bbd399c 100644
--- a/packages/client/src/api/hermes/chat.ts
+++ b/packages/client/src/api/hermes/chat.ts
@@ -30,6 +30,9 @@ export interface RunEvent {
   preview?: string
   timestamp?: number
   error?: string
+  /** Final response text on `run.completed`. May be empty/null if the agent
+   * silently swallowed an upstream error — see chat store for fallback. */
+  output?: string | null
   usage?: {
     input_tokens: number
     output_tokens: number
diff --git a/packages/client/src/stores/hermes/chat.ts b/packages/client/src/stores/hermes/chat.ts
index c529ecb..d3a4413 100644
--- a/packages/client/src/stores/hermes/chat.ts
+++ b/packages/client/src/stores/hermes/chat.ts
@@ -834,6 +834,15 @@ export const useChatStore = defineStore('chat', () => {
       // the partial reply. 800ms keeps quota pressure low while guaranteeing
       // at most ~1s of unsaved delta on reload.
       let persistTimer: ReturnType<typeof setTimeout> | null = null
+      // Per-run flags used to detect silently-swallowed errors at run.completed.
+      // hermes-agent occasionally emits run.completed with empty output and no
+      // usage when the agent layer caught an upstream error (e.g. invalid API
+      // key). We need to distinguish: (a) run with assistant text produced,
+      // (b) run with only tool activity, (c) run with truly nothing visible.
+      // Reset per send() call — closures captured by SSE callbacks are scoped
+      // to this run, so there is no cross-run contamination.
+      let runProducedAssistantText = false
+      let runHadToolActivity = false
       const schedulePersist = () => {
         if (sid !== activeSessionId.value || persistTimer) return
         persistTimer = setTimeout(() => {
@@ -855,6 +864,7 @@ export const useChatStore = defineStore('chat', () => {
             case 'thinking.delta': {
               const text = evt.text || evt.delta || ''
               if (!text) break
+              runProducedAssistantText = true
               const msgs = getSessionMsgs(sid)
               const last = msgs[msgs.length - 1]
               if (last?.role === 'assistant' && last.isStreaming) {
@@ -894,6 +904,7 @@ export const useChatStore = defineStore('chat', () => {
             }
 
             case 'message.delta': {
+              if (evt.delta) runProducedAssistantText = true
               const msgs = getSessionMsgs(sid)
               const last = msgs[msgs.length - 1]
               if (last?.role === 'assistant' && last.isStreaming) {
@@ -920,6 +931,7 @@ export const useChatStore = defineStore('chat', () => {
             }
 
             case 'tool.started': {
+              runHadToolActivity = true
               const msgs = getSessionMsgs(sid)
               const last = msgs[msgs.length - 1]
               if (last?.isStreaming) {
@@ -939,6 +951,7 @@ export const useChatStore = defineStore('chat', () => {
             }
 
             case 'tool.completed': {
+              runHadToolActivity = true
               const msgs = getSessionMsgs(sid)
               const toolMsgs = msgs.filter(
                 m => m.role === 'tool' && m.toolStatus === 'running',
@@ -964,6 +977,44 @@ export const useChatStore = defineStore('chat', () => {
                   target.outputTokens = evt.usage.output_tokens
                 }
               }
+              // Belt-and-suspenders: some providers may deliver the final
+              // assistant text only via run.completed.output (no message.delta
+              // stream). If we never produced assistant text but the gateway
+              // reports a non-empty output, fall back to rendering it as a
+              // single assistant message so the user actually sees the reply.
+              const finalOutput =
+                typeof evt.output === 'string' ? evt.output : ''
+              const finalOutputTrimmed = finalOutput.trim()
+              if (!runProducedAssistantText && finalOutputTrimmed !== '') {
+                addMessage(sid, {
+                  id: uid(),
+                  role: 'assistant',
+                  content: finalOutput,
+                  timestamp: Date.now(),
+                })
+                runProducedAssistantText = true
+              }
+              // Workaround for upstream hermes-agent bug: when the agent
+              // layer silently swallows an error (e.g. invalid API key,
+              // unsupported model), the gateway still emits run.completed
+              // with an empty output. Without surfacing it here the chat UI
+              // looks frozen / "succeeded with no reply". Detect by the
+              // combination of: no assistant text AND no tool activity AND
+              // empty final output. Usage being zero is a *supporting*
+              // signal but not required, since some providers/local models
+              // legitimately omit usage.
+              const swallowedError =
+                !runProducedAssistantText &&
+                !runHadToolActivity &&
+                finalOutputTrimmed === ''
+              if (swallowedError) {
+                addMessage(sid, {
+                  id: uid(),
+                  role: 'system',
+                  content: 'Error: Agent returned no output. The model call may have failed (e.g. invalid API key, model not supported by provider, or context exceeded). Check the hermes-agent logs for details.',
+                  timestamp: Date.now(),
+                })
+              }
               cleanup()
               updateSessionTitle(sid)
               // the in-flight marker. If the browser is reloading right now