fix(chat): 检测并提示被静默吞掉的 run 错误 (#206)
当上游 hermes-agent 的 agent 层捕获了 LLM 错误(如无效 API key、 模型不被 provider 支持)后,gateway 仍会发送 run.completed 事件, 但 output 为空、usage 为零。前端因此误认为成功,UI 完全无任何 错误提示。 本次修复在 chat store 的 SSE 处理中加入两个 per-run 标志: - runProducedAssistantText:仅 reasoning/thinking/message.delta 置位 - runHadToolActivity:仅 tool.started/tool.completed 置位 run.completed 时: 1. 兜底渲染:若本次 run 没有 assistant 文本但 evt.output 非空, 将 output 作为 assistant 消息落盘(兼容某些只在 completed 事件给出最终文本的 provider)。 2. 静默错误检测:若三者均不成立(无 assistant 文本 + 无 tool 活动 + output 空),追加一条 system 消息提示用户检查 hermes-agent 日志。usage=0 不再作为强条件,避免某些 provider 或本地模型不返回 usage 时的误报。 api/hermes/chat.ts 给 RunEvent 增加 output 字段类型。 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -30,6 +30,9 @@ export interface RunEvent {
|
||||
preview?: string
|
||||
timestamp?: number
|
||||
error?: string
|
||||
/** Final response text on `run.completed`. May be empty/null if the agent
|
||||
* silently swallowed an upstream error — see chat store for fallback. */
|
||||
output?: string | null
|
||||
usage?: {
|
||||
input_tokens: number
|
||||
output_tokens: number
|
||||
|
||||
@@ -834,6 +834,15 @@ export const useChatStore = defineStore('chat', () => {
|
||||
// the partial reply. 800ms keeps quota pressure low while guaranteeing
|
||||
// at most ~1s of unsaved delta on reload.
|
||||
let persistTimer: ReturnType<typeof setTimeout> | null = null
|
||||
// Per-run flags used to detect silently-swallowed errors at run.completed.
|
||||
// hermes-agent occasionally emits run.completed with empty output and no
|
||||
// usage when the agent layer caught an upstream error (e.g. invalid API
|
||||
// key). We need to distinguish: (a) run with assistant text produced,
|
||||
// (b) run with only tool activity, (c) run with truly nothing visible.
|
||||
// Reset per send() call — closures captured by SSE callbacks are scoped
|
||||
// to this run, so there is no cross-run contamination.
|
||||
let runProducedAssistantText = false
|
||||
let runHadToolActivity = false
|
||||
const schedulePersist = () => {
|
||||
if (sid !== activeSessionId.value || persistTimer) return
|
||||
persistTimer = setTimeout(() => {
|
||||
@@ -855,6 +864,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||
case 'thinking.delta': {
|
||||
const text = evt.text || evt.delta || ''
|
||||
if (!text) break
|
||||
runProducedAssistantText = true
|
||||
const msgs = getSessionMsgs(sid)
|
||||
const last = msgs[msgs.length - 1]
|
||||
if (last?.role === 'assistant' && last.isStreaming) {
|
||||
@@ -894,6 +904,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||
}
|
||||
|
||||
case 'message.delta': {
|
||||
if (evt.delta) runProducedAssistantText = true
|
||||
const msgs = getSessionMsgs(sid)
|
||||
const last = msgs[msgs.length - 1]
|
||||
if (last?.role === 'assistant' && last.isStreaming) {
|
||||
@@ -920,6 +931,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||
}
|
||||
|
||||
case 'tool.started': {
|
||||
runHadToolActivity = true
|
||||
const msgs = getSessionMsgs(sid)
|
||||
const last = msgs[msgs.length - 1]
|
||||
if (last?.isStreaming) {
|
||||
@@ -939,6 +951,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||
}
|
||||
|
||||
case 'tool.completed': {
|
||||
runHadToolActivity = true
|
||||
const msgs = getSessionMsgs(sid)
|
||||
const toolMsgs = msgs.filter(
|
||||
m => m.role === 'tool' && m.toolStatus === 'running',
|
||||
@@ -964,6 +977,44 @@ export const useChatStore = defineStore('chat', () => {
|
||||
target.outputTokens = evt.usage.output_tokens
|
||||
}
|
||||
}
|
||||
// Belt-and-suspenders: some providers may deliver the final
|
||||
// assistant text only via run.completed.output (no message.delta
|
||||
// stream). If we never produced assistant text but the gateway
|
||||
// reports a non-empty output, fall back to rendering it as a
|
||||
// single assistant message so the user actually sees the reply.
|
||||
const finalOutput =
|
||||
typeof evt.output === 'string' ? evt.output : ''
|
||||
const finalOutputTrimmed = finalOutput.trim()
|
||||
if (!runProducedAssistantText && finalOutputTrimmed !== '') {
|
||||
addMessage(sid, {
|
||||
id: uid(),
|
||||
role: 'assistant',
|
||||
content: finalOutput,
|
||||
timestamp: Date.now(),
|
||||
})
|
||||
runProducedAssistantText = true
|
||||
}
|
||||
// Workaround for upstream hermes-agent bug: when the agent
|
||||
// layer silently swallows an error (e.g. invalid API key,
|
||||
// unsupported model), the gateway still emits run.completed
|
||||
// with an empty output. Without surfacing it here the chat UI
|
||||
// looks frozen / "succeeded with no reply". Detect by the
|
||||
// combination of: no assistant text AND no tool activity AND
|
||||
// empty final output. Usage being zero is a *supporting*
|
||||
// signal but not required, since some providers/local models
|
||||
// legitimately omit usage.
|
||||
const swallowedError =
|
||||
!runProducedAssistantText &&
|
||||
!runHadToolActivity &&
|
||||
finalOutputTrimmed === ''
|
||||
if (swallowedError) {
|
||||
addMessage(sid, {
|
||||
id: uid(),
|
||||
role: 'system',
|
||||
content: 'Error: Agent returned no output. The model call may have failed (e.g. invalid API key, model not supported by provider, or context exceeded). Check the hermes-agent logs for details.',
|
||||
timestamp: Date.now(),
|
||||
})
|
||||
}
|
||||
cleanup()
|
||||
updateSessionTitle(sid)
|
||||
// the in-flight marker. If the browser is reloading right now
|
||||
|
||||
Reference in New Issue
Block a user