[codex] fix bridge tool marker flush persistence (#1002)

* fix: don't drop pending tool-call-marker prefix on tool.started/run.done The `filterBridgeToolCallMarkupDelta` filter holds back any text that ends in a partial prefix of `[Calling tool:` (i.e. `[`, `[C`, `[Ca`, ..., `[Calling tool`) so it can decide whether the buffered chars are the start of a tool-call markup block to be hidden, or just regular text to be released by the next delta. The bug: that "release on next delta" assumption breaks at TWO points: 1. **On `tool.started`**: the next chunk for this assistant message is the tool call itself, NOT a follow-up text delta. Buffered chars sit there forever and nothing flushes them — they vanish silently from the user-visible stream. 2. **On run completion**: the code did `state.bridgePendingToolCallMarkup = undefined` directly, dropping any pending chars without forwarding them. Both cases produce the user-visible symptom of "abrupt cuts in text right before/after tool calls (terminal, read_file, write_file...)" — 1 to 13 characters disappear at exactly the boundary where the model was emitting natural prose that happened to end with `[`. The fix introduces `flushPendingToolCallMarkup(state)` and calls it: - In the `tool.started` branch BEFORE recording the tool call, so the buffered chars are appended to the open assistant message and emitted as a normal `message.delta` to the client. - At run-done BEFORE clearing the buffer, same flush path. This is a pure recovery patch — no change to the marker detection logic itself. If the buffer turns out to actually be a real `[Calling tool: ...]` marker that just hasn't completed yet, that case is still caught by the existing `markerIdx >= 0` branch in the filter on the next delta. The only behavioral change is that the "orphan" cases (text that ends with `[` but never becomes a marker) are no longer dropped. * fix bridge marker flush persistence --------- Co-authored-by: Paulo Cavallari <paulocavallari@users.noreply.github.com>
2026-05-25 11:09:16 +08:00
parent 9e35d81f48
commit bbb8b1d536
4 changed files with 141 additions and 2 deletions
@@ -1,6 +1,6 @@
 import { describe, expect, it } from 'vitest'

-import { filterBridgeToolCallMarkupDelta } from '../../packages/server/src/services/hermes/run-chat/bridge-delta'
+import { filterBridgeToolCallMarkupDelta, flushPendingToolCallMarkup } from '../../packages/server/src/services/hermes/run-chat/bridge-delta'

 describe('run-chat bridge delta filtering', () => {
  it('keeps ordinary assistant text', () => {
@@ -37,4 +37,12 @@ describe('run-chat bridge delta filtering', () => {
    expect(filterBridgeToolCallMarkupDelta(state, 'Text [Call')).toBe('Text ')
    expect(filterBridgeToolCallMarkupDelta(state, 'ing tool: terminal with arguments: {}]\nDone')).toBe('Done')
  })
+
+  it('flushes an orphan partial marker suffix when no text chunk follows', () => {
+    const state = {}
+
+    expect(filterBridgeToolCallMarkupDelta(state, 'Text [Call')).toBe('Text ')
+    expect(flushPendingToolCallMarkup(state)).toBe('[Call')
+    expect(flushPendingToolCallMarkup(state)).toBe('')
+  })
 })
@@ -232,6 +232,73 @@ describe('bridge run final context usage', () => {
    }))
  })

+  it('persists pending tool marker text before a bridge run completes', async () => {
+    const emit = vi.fn()
+    const nsp = makeNamespace(emit)
+    const socket = makeSocket()
+    const state = makeState()
+    const persistedContent: string[] = []
+    flushBridgePendingToDbMock.mockImplementation((targetState: any) => {
+      persistedContent.push(targetState.bridgePendingAssistantContent || '')
+      targetState.bridgePendingAssistantContent = ''
+    })
+    ensureOpenBridgeAssistantMessageMock.mockImplementation((targetState: any, sessionId: string, runMarker: string) => {
+      let message = [...targetState.messages].reverse().find((m: any) => m.runMarker === runMarker && m.role === 'assistant' && m.finish_reason == null)
+      if (!message) {
+        message = {
+          id: targetState.messages.length + 1,
+          session_id: sessionId,
+          runMarker,
+          role: 'assistant',
+          content: '',
+          timestamp: Math.floor(Date.now() / 1000),
+        }
+        targetState.messages.push(message)
+      }
+      return message
+    })
+    const sessionMap = new Map([['session-1', state]])
+    const bridge = {
+      chat: vi.fn().mockResolvedValue({ run_id: 'run-1', status: 'started' }),
+      contextEstimate: vi.fn().mockResolvedValue({
+        token_count: 12345,
+        message_count: 2,
+        tool_count: 4,
+        system_prompt_chars: 13,
+      }),
+      streamOutput: vi.fn(async function* () {
+        yield { run_id: 'run-1', done: false, status: 'running', delta: 'Text [Call', events: [] }
+        yield { run_id: 'run-1', done: true, status: 'completed', output: '', events: [] }
+      }),
+    } as any
+
+    const { handleBridgeRun } = await import('../../packages/server/src/services/hermes/run-chat/handle-bridge-run')
+    await handleBridgeRun(
+      nsp,
+      socket,
+      { input: 'hello', session_id: 'session-1' },
+      'default',
+      sessionMap,
+      bridge,
+      false,
+      vi.fn(),
+      vi.fn(),
+    )
+
+    expect(persistedContent).toContain('Text [Call')
+    expect(emit).toHaveBeenCalledWith('message.delta', expect.objectContaining({
+      delta: 'Text ',
+      output: 'Text ',
+    }))
+    expect(emit).toHaveBeenCalledWith('message.delta', expect.objectContaining({
+      delta: '[Call',
+      output: 'Text [Call',
+    }))
+    expect(emit).toHaveBeenCalledWith('run.completed', expect.objectContaining({
+      output: 'Text [Call',
+    }))
+  })
+
  it('refreshes full context tokens when a bridge run fails', async () => {
    const emit = vi.fn()
    const nsp = makeNamespace(emit)