From 97fbfac2d6c3cee4b8c0776970ad6ba79d8d381a Mon Sep 17 00:00:00 2001 From: ww Date: Sat, 25 Apr 2026 16:21:07 +0800 Subject: [PATCH] =?UTF-8?q?fix(chat):=20=E6=A3=80=E6=B5=8B=E5=B9=B6?= =?UTF-8?q?=E6=8F=90=E7=A4=BA=E8=A2=AB=E9=9D=99=E9=BB=98=E5=90=9E=E6=8E=89?= =?UTF-8?q?=E7=9A=84=20run=20=E9=94=99=E8=AF=AF=20(#206)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 当上游 hermes-agent 的 agent 层捕获了 LLM 错误(如无效 API key、 模型不被 provider 支持)后,gateway 仍会发送 run.completed 事件, 但 output 为空、usage 为零。前端因此误认为成功,UI 完全无任何 错误提示。 本次修复在 chat store 的 SSE 处理中加入两个 per-run 标志: - runProducedAssistantText:仅 reasoning/thinking/message.delta 置位 - runHadToolActivity:仅 tool.started/tool.completed 置位 run.completed 时: 1. 兜底渲染:若本次 run 没有 assistant 文本但 evt.output 非空, 将 output 作为 assistant 消息落盘(兼容某些只在 completed 事件给出最终文本的 provider)。 2. 静默错误检测:若三者均不成立(无 assistant 文本 + 无 tool 活动 + output 空),追加一条 system 消息提示用户检查 hermes-agent 日志。usage=0 不再作为强条件,避免某些 provider 或本地模型不返回 usage 时的误报。 api/hermes/chat.ts 给 RunEvent 增加 output 字段类型。 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- packages/client/src/api/hermes/chat.ts | 3 ++ packages/client/src/stores/hermes/chat.ts | 51 +++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/packages/client/src/api/hermes/chat.ts b/packages/client/src/api/hermes/chat.ts index e44f4f6..bbd399c 100644 --- a/packages/client/src/api/hermes/chat.ts +++ b/packages/client/src/api/hermes/chat.ts @@ -30,6 +30,9 @@ export interface RunEvent { preview?: string timestamp?: number error?: string + /** Final response text on `run.completed`. May be empty/null if the agent + * silently swallowed an upstream error — see chat store for fallback. */ + output?: string | null usage?: { input_tokens: number output_tokens: number diff --git a/packages/client/src/stores/hermes/chat.ts b/packages/client/src/stores/hermes/chat.ts index c529ecb..d3a4413 100644 --- a/packages/client/src/stores/hermes/chat.ts +++ b/packages/client/src/stores/hermes/chat.ts @@ -834,6 +834,15 @@ export const useChatStore = defineStore('chat', () => { // the partial reply. 800ms keeps quota pressure low while guaranteeing // at most ~1s of unsaved delta on reload. let persistTimer: ReturnType | null = null + // Per-run flags used to detect silently-swallowed errors at run.completed. + // hermes-agent occasionally emits run.completed with empty output and no + // usage when the agent layer caught an upstream error (e.g. invalid API + // key). We need to distinguish: (a) run with assistant text produced, + // (b) run with only tool activity, (c) run with truly nothing visible. + // Reset per send() call — closures captured by SSE callbacks are scoped + // to this run, so there is no cross-run contamination. + let runProducedAssistantText = false + let runHadToolActivity = false const schedulePersist = () => { if (sid !== activeSessionId.value || persistTimer) return persistTimer = setTimeout(() => { @@ -855,6 +864,7 @@ export const useChatStore = defineStore('chat', () => { case 'thinking.delta': { const text = evt.text || evt.delta || '' if (!text) break + runProducedAssistantText = true const msgs = getSessionMsgs(sid) const last = msgs[msgs.length - 1] if (last?.role === 'assistant' && last.isStreaming) { @@ -894,6 +904,7 @@ export const useChatStore = defineStore('chat', () => { } case 'message.delta': { + if (evt.delta) runProducedAssistantText = true const msgs = getSessionMsgs(sid) const last = msgs[msgs.length - 1] if (last?.role === 'assistant' && last.isStreaming) { @@ -920,6 +931,7 @@ export const useChatStore = defineStore('chat', () => { } case 'tool.started': { + runHadToolActivity = true const msgs = getSessionMsgs(sid) const last = msgs[msgs.length - 1] if (last?.isStreaming) { @@ -939,6 +951,7 @@ export const useChatStore = defineStore('chat', () => { } case 'tool.completed': { + runHadToolActivity = true const msgs = getSessionMsgs(sid) const toolMsgs = msgs.filter( m => m.role === 'tool' && m.toolStatus === 'running', @@ -964,6 +977,44 @@ export const useChatStore = defineStore('chat', () => { target.outputTokens = evt.usage.output_tokens } } + // Belt-and-suspenders: some providers may deliver the final + // assistant text only via run.completed.output (no message.delta + // stream). If we never produced assistant text but the gateway + // reports a non-empty output, fall back to rendering it as a + // single assistant message so the user actually sees the reply. + const finalOutput = + typeof evt.output === 'string' ? evt.output : '' + const finalOutputTrimmed = finalOutput.trim() + if (!runProducedAssistantText && finalOutputTrimmed !== '') { + addMessage(sid, { + id: uid(), + role: 'assistant', + content: finalOutput, + timestamp: Date.now(), + }) + runProducedAssistantText = true + } + // Workaround for upstream hermes-agent bug: when the agent + // layer silently swallows an error (e.g. invalid API key, + // unsupported model), the gateway still emits run.completed + // with an empty output. Without surfacing it here the chat UI + // looks frozen / "succeeded with no reply". Detect by the + // combination of: no assistant text AND no tool activity AND + // empty final output. Usage being zero is a *supporting* + // signal but not required, since some providers/local models + // legitimately omit usage. + const swallowedError = + !runProducedAssistantText && + !runHadToolActivity && + finalOutputTrimmed === '' + if (swallowedError) { + addMessage(sid, { + id: uid(), + role: 'system', + content: 'Error: Agent returned no output. The model call may have failed (e.g. invalid API key, model not supported by provider, or context exceeded). Check the hermes-agent logs for details.', + timestamp: Date.now(), + }) + } cleanup() updateSessionTitle(sid) // the in-flight marker. If the browser is reloading right now