fix: harden Hermes stream recovery around tool boundaries (#189)
This commit is contained in:
@@ -72,7 +72,9 @@ export function streamRunEvents(
|
||||
onDone()
|
||||
}
|
||||
} catch {
|
||||
onEvent({ event: 'message', delta: e.data })
|
||||
// Some SSE adapters may deliver raw text frames. Treat them as
|
||||
// assistant deltas so they render instead of being silently ignored.
|
||||
onEvent({ event: 'message.delta', delta: e.data })
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { startRun, streamRunEvents, type ChatMessage, type RunEvent } from '@/api/hermes/chat'
|
||||
import { deleteSession as deleteSessionApi, fetchSession, fetchSessions, fetchSessionUsageSingle, type HermesMessage, type SessionSummary } from '@/api/hermes/sessions'
|
||||
import { deleteSession as deleteSessionApi, fetchSession, fetchSessions, fetchSessionUsageSingle, type HermesMessage, type SessionDetail, type SessionSummary } from '@/api/hermes/sessions'
|
||||
import { defineStore } from 'pinia'
|
||||
import { ref, computed } from 'vue'
|
||||
import { useAppStore } from './app'
|
||||
@@ -162,6 +162,42 @@ function mapHermesSession(s: SessionSummary): Session {
|
||||
}
|
||||
}
|
||||
|
||||
function assistantTranscriptLength(msgs: Message[]): number {
|
||||
return msgs.reduce((total, m) => total + (m.role === 'assistant' ? (m.content?.length ?? 0) : 0), 0)
|
||||
}
|
||||
|
||||
function userTurnCount(msgs: Message[]): number {
|
||||
return msgs.filter(m => m.role === 'user').length
|
||||
}
|
||||
|
||||
function serverMessagesAreAheadOrEqual(local: Message[], server: Message[]): boolean {
|
||||
const localUsers = userTurnCount(local)
|
||||
const serverUsers = userTurnCount(server)
|
||||
return serverUsers > localUsers
|
||||
|| (serverUsers === localUsers && assistantTranscriptLength(server) >= assistantTranscriptLength(local))
|
||||
}
|
||||
|
||||
function serverHasCaughtUpToLocalTurn(local: Message[], server: Message[]): boolean {
|
||||
return userTurnCount(server) >= userTurnCount(local)
|
||||
}
|
||||
|
||||
function applySessionMetaFromDetail(target: Session, detail: SessionDetail) {
|
||||
if (detail.title) target.title = detail.title
|
||||
target.endedAt = detail.ended_at != null ? Math.round(detail.ended_at * 1000) : null
|
||||
target.lastActiveAt = detail.last_active != null ? Math.round(detail.last_active * 1000) : target.lastActiveAt
|
||||
target.updatedAt = Math.round((detail.last_active || detail.ended_at || target.updatedAt / 1000) * 1000)
|
||||
}
|
||||
|
||||
function applyServerMessagesIfAhead(target: Session, detail: SessionDetail): boolean {
|
||||
const mapped = mapHermesMessages(detail.messages || [])
|
||||
applySessionMetaFromDetail(target, detail)
|
||||
if (serverMessagesAreAheadOrEqual(target.messages, mapped)) {
|
||||
target.messages = mapped
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Cache keys for stale-while-revalidate loading of sessions / messages.
|
||||
// All keys include the active profile name to isolate cache between profiles.
|
||||
// Rendering from cache on boot avoids the multi-round-trip wait the user sees
|
||||
@@ -356,8 +392,10 @@ export const useChatStore = defineStore('chat', () => {
|
||||
saveJsonWithLegacy(inFlightKey(sid), { runId, startedAt: Date.now() } as InFlightRun, legacyInFlightKey(sid))
|
||||
}
|
||||
|
||||
function clearInFlight(sid: string) {
|
||||
function clearInFlight(sid: string, runId?: string): boolean {
|
||||
if (runId && readInFlight(sid)?.runId !== runId) return false
|
||||
removeItemWithLegacy(inFlightKey(sid), legacyInFlightKey(sid))
|
||||
return true
|
||||
}
|
||||
|
||||
function readInFlight(sid: string): InFlightRun | null {
|
||||
@@ -381,8 +419,8 @@ export const useChatStore = defineStore('chat', () => {
|
||||
}
|
||||
|
||||
// Poll fetchSession while an in-flight run is recovering. Exits when the
|
||||
// server's message signature is stable for POLL_STABLE_EXITS ticks (run
|
||||
// presumed done), TTL elapses, or the user explicitly starts streaming.
|
||||
// server reports a terminal session and its message signature is stable for
|
||||
// POLL_STABLE_EXITS ticks, TTL elapses, or the user explicitly starts streaming.
|
||||
function startPolling(sid: string) {
|
||||
if (pollTimers.has(sid)) return
|
||||
resumingRuns.value = new Set([...resumingRuns.value, sid])
|
||||
@@ -403,34 +441,21 @@ export const useChatStore = defineStore('chat', () => {
|
||||
const mapped = mapHermesMessages(detail.messages || [])
|
||||
const target = sessions.value.find(s => s.id === sid)
|
||||
if (!target) return
|
||||
// Use the same "content-aware" comparison as switchSession: server
|
||||
// is ahead iff it knows about at least as many user turns and its
|
||||
// last assistant text is at least as long as ours.
|
||||
const local = target.messages
|
||||
const localLastAssistant = [...local].reverse().find(m => m.role === 'assistant')
|
||||
const serverLastAssistant = [...mapped].reverse().find(m => m.role === 'assistant')
|
||||
const localAssistantLen = localLastAssistant?.content?.length ?? 0
|
||||
const serverAssistantLen = serverLastAssistant?.content?.length ?? 0
|
||||
const localUsers = local.filter(m => m.role === 'user').length
|
||||
const serverUsers = mapped.filter(m => m.role === 'user').length
|
||||
const serverIsCaughtUp = serverUsers >= localUsers
|
||||
// Same rationale as switchSession: strictly more user turns means
|
||||
// server is ahead (new turn complete). Equal user turns + longer
|
||||
// assistant means server caught up on the current turn.
|
||||
const serverIsAhead =
|
||||
serverUsers > localUsers
|
||||
|| (serverUsers === localUsers && serverAssistantLen >= localAssistantLen)
|
||||
const serverIsCaughtUp = serverHasCaughtUpToLocalTurn(target.messages, mapped)
|
||||
const serverIsAhead = serverMessagesAreAheadOrEqual(target.messages, mapped)
|
||||
const serverIsTerminal = detail.ended_at != null
|
||||
|
||||
applySessionMetaFromDetail(target, detail)
|
||||
if (serverIsAhead) {
|
||||
target.messages = mapped
|
||||
if (detail.title && !target.title) target.title = detail.title
|
||||
if (sid === activeSessionId.value) persistActiveMessages()
|
||||
}
|
||||
// Stability detection ONLY matters when the server has at least as
|
||||
// many user turns as we do. Otherwise the server is still catching
|
||||
// up (e.g. the new turn we just sent hasn't been flushed server-side
|
||||
// yet) and a "stable" signature is a false positive — the stability
|
||||
// is the server NOT having our latest turn, not the run being done.
|
||||
if (!serverIsCaughtUp) {
|
||||
|
||||
// Stability detection ONLY matters when the server has caught up to
|
||||
// our latest user turn AND the session is terminal. During long tool
|
||||
// calls the persisted transcript may be stable while the run is still
|
||||
// active; treating that as completion is the truncation failure mode.
|
||||
if (!serverIsCaughtUp || !serverIsTerminal) {
|
||||
pollSignatures.delete(sid)
|
||||
} else {
|
||||
const last = mapped[mapped.length - 1]
|
||||
@@ -439,14 +464,13 @@ export const useChatStore = defineStore('chat', () => {
|
||||
if (prev && prev.sig === sig) {
|
||||
prev.stableTicks += 1
|
||||
if (prev.stableTicks >= POLL_STABLE_EXITS) {
|
||||
// Run is done on the server. Force-apply server view even if
|
||||
// our "don't retreat" guard above skipped it — the server is
|
||||
// now the authoritative source of truth.
|
||||
target.messages = mapped
|
||||
if (detail.title) target.title = detail.title
|
||||
// Server confirms this run is terminal. Keep any longer local
|
||||
// stream text if the final session export lags behind.
|
||||
if (serverIsAhead) target.messages = mapped
|
||||
if (sid === activeSessionId.value) persistActiveMessages()
|
||||
clearInFlight(sid)
|
||||
stopPolling(sid)
|
||||
if (clearInFlight(sid, inFlight.runId)) {
|
||||
stopPolling(sid)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
pollSignatures.set(sid, { sig, stableTicks: 0 })
|
||||
@@ -521,9 +545,9 @@ export const useChatStore = defineStore('chat', () => {
|
||||
}
|
||||
}
|
||||
|
||||
// Re-pull active session from server and overwrite local messages. Used on
|
||||
// SSE drop and on tab-visible events — mobile browsers kill EventSource
|
||||
// while backgrounded, but the backend run usually completes anyway.
|
||||
// Re-pull active session from server without retreating from longer local
|
||||
// streamed text. Used on SSE drop and tab-visible events — mobile browsers
|
||||
// can kill EventSource while the backend run continues.
|
||||
async function refreshActiveSession(): Promise<boolean> {
|
||||
const sid = activeSessionId.value
|
||||
if (!sid) return false
|
||||
@@ -532,17 +556,28 @@ export const useChatStore = defineStore('chat', () => {
|
||||
if (!detail) return false
|
||||
const target = sessions.value.find(s => s.id === sid)
|
||||
if (!target) return false
|
||||
const mapped = mapHermesMessages(detail.messages || [])
|
||||
target.messages = mapped
|
||||
if (detail.title) target.title = detail.title
|
||||
persistActiveMessages()
|
||||
return true
|
||||
const applied = applyServerMessagesIfAhead(target, detail)
|
||||
if (applied && sid === activeSessionId.value) persistActiveMessages()
|
||||
return applied
|
||||
} catch (err) {
|
||||
console.error('Failed to refresh active session:', err)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
async function reconcileSessionAfterCompletion(sid: string): Promise<void> {
|
||||
try {
|
||||
const detail = await fetchSession(sid)
|
||||
if (!detail) return
|
||||
const target = sessions.value.find(s => s.id === sid)
|
||||
if (!target) return
|
||||
const applied = applyServerMessagesIfAhead(target, detail)
|
||||
if (applied && sid === activeSessionId.value) persistActiveMessages()
|
||||
} catch (err) {
|
||||
console.error('Failed to reconcile completed session:', err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function createSession(): Session {
|
||||
const session: Session = {
|
||||
@@ -588,34 +623,14 @@ export const useChatStore = defineStore('chat', () => {
|
||||
const detail = await fetchSession(sessionId)
|
||||
if (detail && detail.messages) {
|
||||
const mapped = mapHermesMessages(detail.messages)
|
||||
// Pick whichever view has more information. Simple length comparison
|
||||
// is wrong because mapHermesMessages folds tool_call-only assistant
|
||||
// msgs and matches them with tool-result msgs — so post-fold `mapped`
|
||||
// can be SHORTER than the raw SSE-built local array even when the
|
||||
// server is strictly ahead. Instead, compare the last assistant
|
||||
// message content: if the server's is at least as long, the server
|
||||
// is up-to-date (and has the final complete text); otherwise keep
|
||||
// local (in-flight window where server hasn't flushed the new turn).
|
||||
const local = activeSession.value.messages
|
||||
const localLastAssistant = [...local].reverse().find(m => m.role === 'assistant')
|
||||
const serverLastAssistant = [...mapped].reverse().find(m => m.role === 'assistant')
|
||||
const localAssistantLen = localLastAssistant?.content?.length ?? 0
|
||||
const serverAssistantLen = serverLastAssistant?.content?.length ?? 0
|
||||
const localUsers = local.filter(m => m.role === 'user').length
|
||||
const serverUsers = mapped.filter(m => m.role === 'user').length
|
||||
// Trust server when:
|
||||
// - it has STRICTLY MORE user turns than we do (new turn landed),
|
||||
// OR
|
||||
// - same user-turn count AND server's last assistant is at least
|
||||
// as long as ours (same turn, server caught up or further)
|
||||
// Otherwise keep local (protects against the server-not-yet-flushed
|
||||
// race during in-flight runs). Length comparison alone is wrong
|
||||
// across different turns because each turn's last assistant is
|
||||
// unrelated to the previous turn's.
|
||||
const serverIsAhead =
|
||||
serverUsers > localUsers
|
||||
|| (serverUsers === localUsers && serverAssistantLen >= localAssistantLen)
|
||||
if (serverIsAhead) {
|
||||
// Pick whichever view has more information. Simple array length
|
||||
// comparison is wrong because mapHermesMessages folds tool-call-only
|
||||
// assistant msgs into tool-result msgs. Also, tool boundaries can
|
||||
// split one assistant turn into pre-tool and post-tool assistant
|
||||
// segments, so comparing only the last assistant segment can retreat
|
||||
// a fuller local transcript to stale pre-tool server text. Compare
|
||||
// user-turn count plus total assistant transcript length instead.
|
||||
if (serverMessagesAreAheadOrEqual(activeSession.value.messages, mapped)) {
|
||||
activeSession.value.messages = mapped
|
||||
}
|
||||
// Update title: use Hermes title, or fallback to first user message
|
||||
@@ -823,7 +838,8 @@ export const useChatStore = defineStore('chat', () => {
|
||||
case 'run.started':
|
||||
break
|
||||
|
||||
case 'message.delta': {
|
||||
case 'message.delta':
|
||||
case 'message': {
|
||||
const msgs = getSessionMsgs(sid)
|
||||
const last = msgs[msgs.length - 1]
|
||||
if (last?.role === 'assistant' && last.isStreaming) {
|
||||
@@ -888,14 +904,14 @@ export const useChatStore = defineStore('chat', () => {
|
||||
}
|
||||
cleanup()
|
||||
updateSessionTitle(sid)
|
||||
// the in-flight marker. If the browser is reloading right now
|
||||
// and kills us between the two localStorage writes, we want
|
||||
// the next page load to still see in-flight === true (so
|
||||
// polling kicks in and recovers) rather than the other way
|
||||
// around (cleared in-flight + stale streaming cache = UI stuck).
|
||||
// Persist the terminal local view before clearing the in-flight
|
||||
// marker. If final SSE deltas were missed, reconcile once from
|
||||
// the authoritative session export without retreating from
|
||||
// longer local text.
|
||||
if (sid === activeSessionId.value) persistActiveMessages()
|
||||
clearInFlight(sid)
|
||||
stopPolling(sid)
|
||||
void reconcileSessionAfterCompletion(sid).finally(() => {
|
||||
if (readInFlight(sid)?.runId === runId) startPolling(sid)
|
||||
})
|
||||
break
|
||||
}
|
||||
|
||||
@@ -923,8 +939,9 @@ export const useChatStore = defineStore('chat', () => {
|
||||
})
|
||||
cleanup()
|
||||
if (sid === activeSessionId.value) persistActiveMessages()
|
||||
clearInFlight(sid)
|
||||
stopPolling(sid)
|
||||
if (clearInFlight(sid, runId)) {
|
||||
stopPolling(sid)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
@@ -953,13 +970,9 @@ export const useChatStore = defineStore('chat', () => {
|
||||
if (last?.isStreaming) {
|
||||
updateMessage(sid, last.id, { isStreaming: false })
|
||||
}
|
||||
// Any tool messages still marked 'running' will be replaced by the
|
||||
// server's view after refresh; clear their spinner state now.
|
||||
msgs.forEach((m, i) => {
|
||||
if (m.role === 'tool' && m.toolStatus === 'running') {
|
||||
msgs[i] = { ...m, toolStatus: 'done' }
|
||||
}
|
||||
})
|
||||
// Keep running tool state until refresh/polling sees the server's
|
||||
// terminal transcript. A dropped SSE connection is not proof the
|
||||
// tool completed.
|
||||
cleanup()
|
||||
if (sid === activeSessionId.value) {
|
||||
void refreshActiveSession()
|
||||
|
||||
@@ -96,28 +96,43 @@ function buildProxyHeaders(ctx: Context, upstream: string): Record<string, strin
|
||||
const SSE_EVENTS_PATH = /^\/v1\/runs\/([^/]+)\/events$/
|
||||
|
||||
/**
|
||||
* Parse SSE text chunks and extract run.completed events.
|
||||
* Returns the run_id if a run.completed was found.
|
||||
* Parse one complete SSE event block and record usage for run.completed.
|
||||
* The public stream is forwarded elsewhere; parser failures are accounting-only
|
||||
* and must never abort the client stream.
|
||||
*/
|
||||
function extractRunCompletedFromChunk(chunk: string): string | null {
|
||||
// SSE format: each line is "data: {...}\n\n"
|
||||
const lines = chunk.split('\n')
|
||||
for (const line of lines) {
|
||||
if (!line.startsWith('data: ')) continue
|
||||
try {
|
||||
const data = JSON.parse(line.slice(6))
|
||||
if (data.event === 'run.completed' && data.usage && data.run_id) {
|
||||
const sessionId = getSessionForRun(data.run_id)
|
||||
if (sessionId) {
|
||||
updateUsage(sessionId, data.usage.input_tokens, data.usage.output_tokens)
|
||||
return data.run_id
|
||||
}
|
||||
}
|
||||
} catch { /* not JSON, skip */ }
|
||||
function extractRunCompletedFromBlock(block: string): string | null {
|
||||
const dataLines: string[] = []
|
||||
for (const rawLine of block.split(/\r?\n/)) {
|
||||
if (!rawLine.startsWith('data:')) continue
|
||||
let data = rawLine.slice(5)
|
||||
if (data.startsWith(' ')) data = data.slice(1)
|
||||
dataLines.push(data)
|
||||
}
|
||||
if (dataLines.length === 0) return null
|
||||
|
||||
try {
|
||||
const data = JSON.parse(dataLines.join('\n'))
|
||||
if (data.event === 'run.completed' && data.usage && data.run_id) {
|
||||
const sessionId = getSessionForRun(data.run_id)
|
||||
if (sessionId) {
|
||||
updateUsage(sessionId, data.usage.input_tokens, data.usage.output_tokens)
|
||||
return data.run_id
|
||||
}
|
||||
}
|
||||
} catch { /* not JSON or usage accounting failed; skip */ }
|
||||
return null
|
||||
}
|
||||
|
||||
function takeSSEBlock(buffer: string): { block: string; rest: string } | null {
|
||||
const lf = buffer.indexOf('\n\n')
|
||||
const crlf = buffer.indexOf('\r\n\r\n')
|
||||
if (lf === -1 && crlf === -1) return null
|
||||
if (crlf !== -1 && (lf === -1 || crlf < lf)) {
|
||||
return { block: buffer.slice(0, crlf), rest: buffer.slice(crlf + 4) }
|
||||
}
|
||||
return { block: buffer.slice(0, lf), rest: buffer.slice(lf + 2) }
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream an SSE response while intercepting run.completed events.
|
||||
*/
|
||||
@@ -142,18 +157,18 @@ async function streamSSE(ctx: Context, res: Response): Promise<void> {
|
||||
// Also decode for interception
|
||||
buffer += decoder.decode(value, { stream: true })
|
||||
|
||||
// Process complete SSE lines (delimited by double newline)
|
||||
let newlineIdx: number
|
||||
while ((newlineIdx = buffer.indexOf('\n\n')) !== -1) {
|
||||
const eventBlock = buffer.slice(0, newlineIdx)
|
||||
buffer = buffer.slice(newlineIdx + 2)
|
||||
extractRunCompletedFromChunk(eventBlock)
|
||||
// Process complete SSE event blocks (LF or CRLF blank-line delimiters).
|
||||
let next: { block: string; rest: string } | null
|
||||
while ((next = takeSSEBlock(buffer)) !== null) {
|
||||
buffer = next.rest
|
||||
extractRunCompletedFromBlock(next.block)
|
||||
}
|
||||
}
|
||||
|
||||
buffer += decoder.decode()
|
||||
// Process remaining buffer
|
||||
if (buffer.trim()) {
|
||||
extractRunCompletedFromChunk(buffer)
|
||||
extractRunCompletedFromBlock(buffer)
|
||||
}
|
||||
} finally {
|
||||
ctx.res.end()
|
||||
@@ -232,6 +247,9 @@ export async function proxy(ctx: Context) {
|
||||
// Intercept SSE streams for /v1/runs/{id}/events
|
||||
const sseMatch = upstreamPath.match(SSE_EVENTS_PATH)
|
||||
if (sseMatch) {
|
||||
ctx.set('Content-Type', 'text/event-stream')
|
||||
ctx.set('Cache-Control', 'no-cache, no-transform')
|
||||
ctx.set('X-Accel-Buffering', 'no')
|
||||
await streamSSE(ctx, res)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
// @vitest-environment jsdom
|
||||
import { beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
import { streamRunEvents, type RunEvent } from '@/api/hermes/chat'
|
||||
|
||||
class MockEventSource {
|
||||
static instances: MockEventSource[] = []
|
||||
|
||||
url: string
|
||||
onmessage: ((event: { data: string }) => void) | null = null
|
||||
onerror: (() => void) | null = null
|
||||
close = vi.fn()
|
||||
|
||||
constructor(url: string) {
|
||||
this.url = url
|
||||
MockEventSource.instances.push(this)
|
||||
}
|
||||
|
||||
emit(data: string) {
|
||||
this.onmessage?.({ data })
|
||||
}
|
||||
|
||||
fail() {
|
||||
this.onerror?.()
|
||||
}
|
||||
}
|
||||
|
||||
describe('streamRunEvents', () => {
|
||||
beforeEach(() => {
|
||||
window.localStorage.clear()
|
||||
MockEventSource.instances = []
|
||||
vi.stubGlobal('EventSource', MockEventSource)
|
||||
})
|
||||
|
||||
it('maps non-JSON EventSource data to message.delta so raw text is rendered', () => {
|
||||
const events: RunEvent[] = []
|
||||
|
||||
streamRunEvents('run-raw', event => events.push(event), vi.fn(), vi.fn())
|
||||
MockEventSource.instances[0].emit('原因:raw fallback')
|
||||
|
||||
expect(events).toEqual([{ event: 'message.delta', delta: '原因:raw fallback' }])
|
||||
})
|
||||
|
||||
it('parses colon-containing JSON deltas and closes on completion', () => {
|
||||
const events: RunEvent[] = []
|
||||
const onDone = vi.fn()
|
||||
|
||||
streamRunEvents('run-json', event => events.push(event), onDone, vi.fn())
|
||||
const source = MockEventSource.instances[0]
|
||||
source.emit(JSON.stringify({ event: 'message.delta', delta: '让我直接读文件:A: B' }))
|
||||
source.emit(JSON.stringify({ event: 'run.completed' }))
|
||||
|
||||
expect(events).toEqual([
|
||||
{ event: 'message.delta', delta: '让我直接读文件:A: B' },
|
||||
{ event: 'run.completed' },
|
||||
])
|
||||
expect(source.close).toHaveBeenCalledTimes(1)
|
||||
expect(onDone).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
})
|
||||
@@ -41,13 +41,36 @@ function makeSummary(id: string, title = 'Session') {
|
||||
}
|
||||
}
|
||||
|
||||
function makeDetail(id: string, messages: Array<Record<string, any>>) {
|
||||
function makeDetail(id: string, messages: Array<Record<string, any>>, overrides: Record<string, any> = {}) {
|
||||
return {
|
||||
...makeSummary(id),
|
||||
...overrides,
|
||||
messages,
|
||||
}
|
||||
}
|
||||
|
||||
function makeHermesMessage(
|
||||
id: number,
|
||||
role: 'user' | 'assistant' | 'system' | 'tool',
|
||||
content: string,
|
||||
overrides: Record<string, any> = {},
|
||||
) {
|
||||
return {
|
||||
id,
|
||||
session_id: overrides.session_id || 'sess-1',
|
||||
role,
|
||||
content,
|
||||
tool_call_id: null,
|
||||
tool_calls: null,
|
||||
tool_name: null,
|
||||
timestamp: 1710000000 + id,
|
||||
token_count: null,
|
||||
finish_reason: null,
|
||||
reasoning: null,
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
async function flushPromises() {
|
||||
await Promise.resolve()
|
||||
await Promise.resolve()
|
||||
@@ -294,4 +317,186 @@ describe('Chat Store', () => {
|
||||
expect(store.isRunActive).toBe(false)
|
||||
expect(window.localStorage.getItem(inFlightKey('sess-1'))).toBeNull()
|
||||
})
|
||||
|
||||
it('keeps colon deltas before and after a tool boundary', async () => {
|
||||
mockChatApi.streamRunEvents.mockImplementation((
|
||||
_runId: string,
|
||||
onEvent: (event: any) => void,
|
||||
) => {
|
||||
onEvent({ event: 'message.delta', delta: '让我直接读文件:' })
|
||||
onEvent({ event: 'tool.started', tool: 'read_file', preview: 'notes.md' })
|
||||
onEvent({ event: 'tool.completed' })
|
||||
onEvent({ event: 'message.delta', delta: '读取后结论: final' })
|
||||
onEvent({ event: 'run.completed' })
|
||||
return { abort: vi.fn() }
|
||||
})
|
||||
|
||||
const store = useChatStore()
|
||||
await flushPromises()
|
||||
await store.sendMessage('check file')
|
||||
await flushPromises()
|
||||
|
||||
const assistantText = store.messages
|
||||
.filter(m => m.role === 'assistant')
|
||||
.map(m => m.content)
|
||||
.join('')
|
||||
expect(assistantText).toBe('让我直接读文件:读取后结论: final')
|
||||
expect(store.messages.some(m => m.role === 'tool' && m.toolName === 'read_file' && m.toolStatus === 'done')).toBe(true)
|
||||
})
|
||||
|
||||
it('renders raw SSE fallback message events as assistant deltas', async () => {
|
||||
mockChatApi.streamRunEvents.mockImplementation((
|
||||
_runId: string,
|
||||
onEvent: (event: any) => void,
|
||||
) => {
|
||||
onEvent({ event: 'message', delta: '原因:raw fallback' })
|
||||
onEvent({ event: 'run.completed' })
|
||||
return { abort: vi.fn() }
|
||||
})
|
||||
|
||||
const store = useChatStore()
|
||||
await flushPromises()
|
||||
await store.sendMessage('raw stream')
|
||||
await flushPromises()
|
||||
|
||||
expect(store.messages.some(m => m.role === 'assistant' && m.content === '原因:raw fallback')).toBe(true)
|
||||
})
|
||||
|
||||
it('does not stop polling when server messages are stable but the session is still active', async () => {
|
||||
vi.useFakeTimers()
|
||||
|
||||
let fetchSessionCalls = 0
|
||||
mockSessionsApi.fetchSession.mockImplementation(async () => {
|
||||
fetchSessionCalls += 1
|
||||
if (fetchSessionCalls === 1) return null
|
||||
return makeDetail('sess-1', [
|
||||
makeHermesMessage(1, 'user', 'tool gap prompt'),
|
||||
makeHermesMessage(2, 'assistant', '让我直接读文件:'),
|
||||
], { ended_at: null })
|
||||
})
|
||||
|
||||
mockChatApi.streamRunEvents.mockImplementation((
|
||||
_runId: string,
|
||||
onEvent: (event: any) => void,
|
||||
_onDone: () => void,
|
||||
onError: (err: Error) => void,
|
||||
) => {
|
||||
onEvent({ event: 'message.delta', delta: '让我直接读文件:' })
|
||||
setTimeout(() => onError(new Error('SSE connection error')), 0)
|
||||
return { abort: vi.fn() }
|
||||
})
|
||||
|
||||
const store = useChatStore()
|
||||
await flushPromises()
|
||||
await store.sendMessage('tool gap prompt')
|
||||
const sid = store.activeSessionId!
|
||||
|
||||
await vi.advanceTimersByTimeAsync(0)
|
||||
await flushPromises()
|
||||
await vi.advanceTimersByTimeAsync(9000)
|
||||
await flushPromises()
|
||||
|
||||
expect(window.localStorage.getItem(inFlightKey(sid))).toBeTruthy()
|
||||
expect(store.isRunActive).toBe(true)
|
||||
})
|
||||
|
||||
it('reconciles the final session after run.completed to recover missed deltas', async () => {
|
||||
let fetchSessionCalls = 0
|
||||
mockSessionsApi.fetchSession.mockImplementation(async () => {
|
||||
fetchSessionCalls += 1
|
||||
if (fetchSessionCalls === 1) return null
|
||||
return makeDetail('sess-1', [
|
||||
makeHermesMessage(1, 'user', 'finish prompt'),
|
||||
makeHermesMessage(2, 'assistant', '让我直接读文件:读取后结论:完整回答'),
|
||||
], { ended_at: 1710000010 })
|
||||
})
|
||||
|
||||
mockChatApi.streamRunEvents.mockImplementation((
|
||||
_runId: string,
|
||||
onEvent: (event: any) => void,
|
||||
) => {
|
||||
onEvent({ event: 'message.delta', delta: '让我直接读文件:' })
|
||||
onEvent({ event: 'run.completed' })
|
||||
return { abort: vi.fn() }
|
||||
})
|
||||
|
||||
const store = useChatStore()
|
||||
await flushPromises()
|
||||
await store.sendMessage('finish prompt')
|
||||
await flushPromises()
|
||||
|
||||
expect(store.messages.some(m => m.role === 'assistant' && m.content === '让我直接读文件:读取后结论:完整回答')).toBe(true)
|
||||
})
|
||||
|
||||
it('does not replace longer local tool-boundary text with a stale shorter final fetch', async () => {
|
||||
let fetchSessionCalls = 0
|
||||
const stalePrefix = '让我直接读文件:较长的工具前说明'
|
||||
mockSessionsApi.fetchSession.mockImplementation(async () => {
|
||||
fetchSessionCalls += 1
|
||||
if (fetchSessionCalls === 1) return null
|
||||
return makeDetail('sess-1', [
|
||||
makeHermesMessage(1, 'user', 'stale prompt'),
|
||||
makeHermesMessage(2, 'assistant', stalePrefix),
|
||||
], { ended_at: 1710000010 })
|
||||
})
|
||||
|
||||
mockChatApi.streamRunEvents.mockImplementation((
|
||||
_runId: string,
|
||||
onEvent: (event: any) => void,
|
||||
) => {
|
||||
onEvent({ event: 'message.delta', delta: stalePrefix })
|
||||
onEvent({ event: 'tool.started', tool: 'read_file', preview: 'notes.md' })
|
||||
onEvent({ event: 'tool.completed' })
|
||||
onEvent({ event: 'message.delta', delta: 'OK' })
|
||||
onEvent({ event: 'run.completed' })
|
||||
return { abort: vi.fn() }
|
||||
})
|
||||
|
||||
const store = useChatStore()
|
||||
await flushPromises()
|
||||
await store.sendMessage('stale prompt')
|
||||
await flushPromises()
|
||||
|
||||
const assistantText = store.messages
|
||||
.filter(m => m.role === 'assistant')
|
||||
.map(m => m.content)
|
||||
.join('')
|
||||
expect(assistantText).toBe(`${stalePrefix}OK`)
|
||||
expect(store.messages.some(m => m.role === 'tool' && m.toolStatus === 'done')).toBe(true)
|
||||
})
|
||||
|
||||
it('does not let delayed completion reconciliation clear a newer in-flight run', async () => {
|
||||
let resolveReconcile: ((detail: any) => void) | null = null
|
||||
const reconcilePromise = new Promise<any>(resolve => { resolveReconcile = resolve })
|
||||
mockSessionsApi.fetchSession.mockImplementation(() => reconcilePromise)
|
||||
mockChatApi.startRun
|
||||
.mockResolvedValueOnce({ run_id: 'run-1', status: 'queued' })
|
||||
.mockResolvedValueOnce({ run_id: 'run-2', status: 'queued' })
|
||||
let firstRunEvent: ((event: any) => void) | null = null
|
||||
mockChatApi.streamRunEvents.mockImplementation((
|
||||
runId: string,
|
||||
onEvent: (event: any) => void,
|
||||
) => {
|
||||
if (runId === 'run-1') firstRunEvent = onEvent
|
||||
return { abort: vi.fn() }
|
||||
})
|
||||
|
||||
const store = useChatStore()
|
||||
await flushPromises()
|
||||
await store.sendMessage('first')
|
||||
firstRunEvent!({ event: 'run.completed' })
|
||||
await flushPromises()
|
||||
const sid = store.activeSessionId!
|
||||
await store.sendMessage('second')
|
||||
|
||||
expect(JSON.parse(window.localStorage.getItem(inFlightKey(sid)) || '{}').runId).toBe('run-2')
|
||||
|
||||
resolveReconcile!(makeDetail(sid, [
|
||||
makeHermesMessage(1, 'user', 'first', { session_id: sid }),
|
||||
makeHermesMessage(2, 'assistant', 'first done', { session_id: sid }),
|
||||
], { ended_at: 1710000010 }))
|
||||
await flushPromises()
|
||||
|
||||
expect(JSON.parse(window.localStorage.getItem(inFlightKey(sid)) || '{}').runId).toBe('run-2')
|
||||
})
|
||||
})
|
||||
|
||||
@@ -414,4 +414,89 @@ describe('SSE stream interception — run.completed', () => {
|
||||
|
||||
expect(mockUpdateUsage).toHaveBeenCalledWith('session-split', 200, 50)
|
||||
})
|
||||
|
||||
it('forwards colon-containing SSE deltas around tool events unchanged and disables buffering', async () => {
|
||||
const runId = 'run-colon-tool'
|
||||
const sseData = [
|
||||
`data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: '让我直接读文件:A: B' })}\n\n`,
|
||||
`data: ${JSON.stringify({ event: 'tool.started', run_id: runId, tool: 'read_file', preview: 'file:a.md' })}\n\n`,
|
||||
`data: ${JSON.stringify({ event: 'tool.completed', run_id: runId })}\n\n`,
|
||||
`data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: '继续:done' })}\n\n`,
|
||||
]
|
||||
|
||||
mockFetch.mockResolvedValue({
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'text/event-stream' }),
|
||||
body: createSSEBody(sseData),
|
||||
})
|
||||
|
||||
const ctx = createMockCtx({
|
||||
path: `/api/hermes/v1/runs/${runId}/events`,
|
||||
search: '',
|
||||
})
|
||||
|
||||
await proxy(ctx)
|
||||
|
||||
const forwarded = ctx.res.write.mock.calls
|
||||
.map(([chunk]: [Uint8Array]) => new TextDecoder().decode(chunk))
|
||||
.join('')
|
||||
expect(forwarded).toBe(sseData.join(''))
|
||||
expect(ctx.set).toHaveBeenCalledWith('Content-Type', 'text/event-stream')
|
||||
expect(ctx.set).toHaveBeenCalledWith('Cache-Control', 'no-cache, no-transform')
|
||||
expect(ctx.set).toHaveBeenCalledWith('X-Accel-Buffering', 'no')
|
||||
})
|
||||
|
||||
it('intercepts run.completed with CRLF delimiters and data without a space', async () => {
|
||||
const runId = 'run-crlf'
|
||||
setRunSession(runId, 'session-crlf')
|
||||
const completedJson = JSON.stringify({ event: 'run.completed', run_id: runId, usage: { input_tokens: 321, output_tokens: 45, total_tokens: 366 } })
|
||||
const sseData = [`data:${completedJson}\r\n\r\n`]
|
||||
|
||||
mockFetch.mockResolvedValue({
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'text/event-stream' }),
|
||||
body: createSSEBody(sseData),
|
||||
})
|
||||
|
||||
const ctx = createMockCtx({
|
||||
path: `/api/hermes/v1/runs/${runId}/events`,
|
||||
search: '',
|
||||
})
|
||||
|
||||
await proxy(ctx)
|
||||
|
||||
expect(mockUpdateUsage).toHaveBeenCalledWith('session-crlf', 321, 45)
|
||||
})
|
||||
|
||||
it('does not let usage accounting failures abort the SSE stream', async () => {
|
||||
const runId = 'run-usage-fails'
|
||||
setRunSession(runId, 'session-usage-fails')
|
||||
mockUpdateUsage.mockImplementationOnce(() => {
|
||||
throw new Error('usage db unavailable')
|
||||
})
|
||||
const sseData = [
|
||||
`data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'before:' })}\n\n`,
|
||||
`data: ${JSON.stringify({ event: 'run.completed', run_id: runId, usage: { input_tokens: 1, output_tokens: 2, total_tokens: 3 } })}\n\n`,
|
||||
]
|
||||
|
||||
mockFetch.mockResolvedValue({
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'text/event-stream' }),
|
||||
body: createSSEBody(sseData),
|
||||
})
|
||||
|
||||
const ctx = createMockCtx({
|
||||
path: `/api/hermes/v1/runs/${runId}/events`,
|
||||
search: '',
|
||||
})
|
||||
|
||||
await proxy(ctx)
|
||||
|
||||
const forwarded = ctx.res.write.mock.calls
|
||||
.map(([chunk]: [Uint8Array]) => new TextDecoder().decode(chunk))
|
||||
.join('')
|
||||
expect(ctx.status).toBe(200)
|
||||
expect(forwarded).toBe(sseData.join(''))
|
||||
expect(ctx.res.end).toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user