[codex] integrate goal command workflow (#1025)

* feat: integrate goal command workflow * fix: keep goal done visible * fix: add goal done slash command * fix: promote queued message on run start
2026-05-25 19:26:23 +08:00
parent 0eab6a1125
commit badb17cf8e
30 changed files with 1535 additions and 85 deletions
@@ -0,0 +1,88 @@
+import { beforeEach, describe, expect, it, vi } from 'vitest'
+
+const updateSessionStatsMock = vi.fn()
+const flushBridgePendingToDbMock = vi.fn()
+const flushResponseRunToDbMock = vi.fn()
+const replaceStateMock = vi.fn()
+const calcAndUpdateUsageMock = vi.fn()
+
+vi.mock('../../packages/server/src/db/hermes/session-store', () => ({
+  updateSessionStats: updateSessionStatsMock,
+}))
+
+vi.mock('../../packages/server/src/services/logger', () => ({
+  logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() },
+}))
+
+vi.mock('../../packages/server/src/services/hermes/run-chat/bridge-message', () => ({
+  flushBridgePendingToDb: flushBridgePendingToDbMock,
+}))
+
+vi.mock('../../packages/server/src/services/hermes/run-chat/response-stream', () => ({
+  flushResponseRunToDb: flushResponseRunToDbMock,
+}))
+
+vi.mock('../../packages/server/src/services/hermes/run-chat/compression', () => ({
+  replaceState: replaceStateMock,
+}))
+
+vi.mock('../../packages/server/src/services/hermes/run-chat/usage', () => ({
+  calcAndUpdateUsage: calcAndUpdateUsageMock,
+}))
+
+function makeHarness() {
+  const emit = vi.fn()
+  const nsp = {
+    adapter: { rooms: new Map([['session:session-1', new Set(['socket-1'])]]) },
+    to: vi.fn(() => ({ emit })),
+  }
+  const socket = {
+    connected: true,
+    emit: vi.fn(),
+  }
+  return { emit, nsp, socket }
+}
+
+describe('run chat abort goal handling', () => {
+  beforeEach(() => {
+    vi.clearAllMocks()
+    calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 0, outputTokens: 0 })
+  })
+
+  it('pauses an active goal and clears hidden goal continuations when aborting a CLI run', async () => {
+    const { handleAbort } = await import('../../packages/server/src/services/hermes/run-chat/abort')
+    const { emit, nsp, socket } = makeHarness()
+    const state = {
+      messages: [],
+      isWorking: true,
+      isAborting: false,
+      events: [],
+      queue: [
+        { queue_id: 'goal-1', input: 'continue goal', profile: 'default', goalContinuation: true },
+        { queue_id: 'user-1', input: 'normal follow-up', profile: 'default', source: 'cli' },
+      ],
+      runId: 'run-1',
+      profile: 'default',
+      source: 'cli',
+    } as any
+    const sessionMap = new Map([['session-1', state]])
+    const bridge = {
+      interrupt: vi.fn().mockResolvedValue({ ok: true }),
+      goalPause: vi.fn().mockResolvedValue({ handled: true, status: 'paused', reason: 'user-interrupted' }),
+    }
+    const runQueuedItem = vi.fn()
+
+    await handleAbort(nsp as any, socket as any, 'session-1', sessionMap, bridge, runQueuedItem)
+
+    expect(bridge.interrupt).toHaveBeenCalledWith('session-1', 'Aborted by user', 'default')
+    expect(bridge.goalPause).toHaveBeenCalledWith('session-1', 'user-interrupted', 'default')
+    expect(runQueuedItem).toHaveBeenCalledWith(socket, 'session-1', expect.objectContaining({
+      queue_id: 'user-1',
+    }), 'default')
+    expect(state.queue).toEqual([])
+    expect(emit).toHaveBeenCalledWith('abort.completed', expect.objectContaining({
+      session_id: 'session-1',
+      synced: true,
+    }))
+  })
+})
@@ -188,6 +188,139 @@ describe('bridge run final context usage', () => {
    }))
  })

+  it('evaluates active goals after a successful bridge run and queues continuation prompts', async () => {
+    const emit = vi.fn()
+    const nsp = makeNamespace(emit)
+    const socket = makeSocket()
+    const state = makeState()
+    const sessionMap = new Map([['session-1', state]])
+    const dequeueNextQueuedRun = vi.fn()
+    addMessageMock.mockReturnValue(42)
+    const bridge = {
+      chat: vi.fn().mockResolvedValue({ run_id: 'run-1', status: 'started' }),
+      contextEstimate: vi.fn().mockResolvedValue({
+        token_count: 12345,
+        message_count: 2,
+        tool_count: 4,
+        system_prompt_chars: 13,
+      }),
+      goalEvaluate: vi.fn().mockResolvedValue({
+        handled: true,
+        should_continue: true,
+        continuation_prompt: '[Continuing toward your standing goal]\nGoal: fix tests',
+        message: '↻ Continuing toward goal (1/20): tests still fail',
+        verdict: 'continue',
+      }),
+      streamOutput: vi.fn(async function* () {
+        yield {
+          run_id: 'run-1',
+          done: true,
+          status: 'completed',
+          output: 'not finished',
+          result: { final_response: 'not finished' },
+        }
+      }),
+    } as any
+
+    const { handleBridgeRun } = await import('../../packages/server/src/services/hermes/run-chat/handle-bridge-run')
+    await handleBridgeRun(
+      nsp,
+      socket,
+      {
+        input: 'hello',
+        session_id: 'session-1',
+        model_groups: [{ provider: 'openai', models: ['gpt-test'] }],
+      },
+      'default',
+      sessionMap,
+      bridge,
+      false,
+      vi.fn(),
+      dequeueNextQueuedRun,
+    )
+
+    expect(bridge.goalEvaluate).toHaveBeenCalledWith('session-1', 'not finished', 'default')
+    expect(addMessageMock).toHaveBeenCalledWith(expect.objectContaining({
+      session_id: 'session-1',
+      role: 'command',
+      content: '↻ Continuing toward goal (1/20): tests still fail',
+    }))
+    expect(emit).toHaveBeenCalledWith('session.command', expect.objectContaining({
+      command: 'goal',
+      action: 'continue',
+      message: '↻ Continuing toward goal (1/20): tests still fail',
+    }))
+    expect(state.queue).toEqual([expect.objectContaining({
+      input: '[Continuing toward your standing goal]\nGoal: fix tests',
+      displayInput: null,
+      storageMessage: '[Continuing toward your standing goal]\nGoal: fix tests',
+      model: 'gpt-test',
+      provider: 'openai',
+      model_groups: [{ provider: 'openai', models: ['gpt-test'] }],
+      goalContinuation: true,
+    })])
+    expect(dequeueNextQueuedRun).toHaveBeenCalledWith(socket, 'session-1')
+  })
+
+  it('skips hidden goal continuation runs without pausing when the judge is unavailable', async () => {
+    const emit = vi.fn()
+    const nsp = makeNamespace(emit)
+    const socket = makeSocket()
+    const state = makeState()
+    const sessionMap = new Map([['session-1', state]])
+    const dequeueNextQueuedRun = vi.fn()
+    addMessageMock.mockReturnValue(43)
+    const bridge = {
+      chat: vi.fn().mockResolvedValue({ run_id: 'run-1', status: 'started' }),
+      command: vi.fn(),
+      contextEstimate: vi.fn().mockResolvedValue({
+        token_count: 12345,
+        message_count: 2,
+        tool_count: 4,
+        system_prompt_chars: 13,
+      }),
+      goalEvaluate: vi.fn().mockResolvedValue({
+        handled: true,
+        should_continue: true,
+        continuation_prompt: '[Continuing toward your standing goal]\nGoal: fix tests',
+        message: '↻ Continuing toward goal (1/20): no auxiliary client configured',
+        verdict: 'continue',
+        reason: 'no auxiliary client configured',
+      }),
+      streamOutput: vi.fn(async function* () {
+        yield {
+          run_id: 'run-1',
+          done: true,
+          status: 'completed',
+          output: 'done',
+          result: { final_response: 'done' },
+        }
+      }),
+    } as any
+
+    const { handleBridgeRun } = await import('../../packages/server/src/services/hermes/run-chat/handle-bridge-run')
+    await handleBridgeRun(
+      nsp,
+      socket,
+      { input: 'hello', session_id: 'session-1' },
+      'default',
+      sessionMap,
+      bridge,
+      false,
+      vi.fn(),
+      dequeueNextQueuedRun,
+    )
+
+    expect(bridge.command).not.toHaveBeenCalled()
+    expect(state.queue).toEqual([])
+    expect(dequeueNextQueuedRun).not.toHaveBeenCalled()
+    expect(emit).toHaveBeenCalledWith('session.command', expect.objectContaining({
+      command: 'goal',
+      action: 'judge_unavailable',
+      message: 'Goal judge is not configured; automatic goal continuation was skipped. The goal remains active, but Hermes cannot mark it done automatically.',
+    }))
+  })
+
  it('uses cached fixed context instead of bridge estimate when available', async () => {
    const emit = vi.fn()
    const nsp = makeNamespace(emit)
@@ -402,4 +535,56 @@ describe('bridge run final context usage', () => {
      contextTokens: 54321,
    }))
  })
+
+  it('emits bridge lifecycle status events so retries are visible', async () => {
+    const emit = vi.fn()
+    const nsp = makeNamespace(emit)
+    const socket = makeSocket()
+    const state = makeState()
+    const sessionMap = new Map([['session-1', state]])
+    const bridge = {
+      chat: vi.fn().mockResolvedValue({ run_id: 'run-1', status: 'started' }),
+      contextEstimate: vi.fn().mockResolvedValue({
+        token_count: 12345,
+        message_count: 2,
+        tool_count: 4,
+        system_prompt_chars: 13,
+      }),
+      streamOutput: vi.fn(async function* () {
+        yield {
+          run_id: 'run-1',
+          done: false,
+          status: 'running',
+          events: [
+            { event: 'status', kind: 'lifecycle', text: 'Retrying in 3.0s (attempt 1/3)...' },
+          ],
+        }
+        yield { run_id: 'run-1', done: true, status: 'completed', output: 'done' }
+      }),
+    } as any
+
+    const { handleBridgeRun } = await import('../../packages/server/src/services/hermes/run-chat/handle-bridge-run')
+    await handleBridgeRun(
+      nsp,
+      socket,
+      { input: 'hello', session_id: 'session-1' },
+      'default',
+      sessionMap,
+      bridge,
+      false,
+      vi.fn(),
+      vi.fn(),
+    )
+
+    expect(replaceStateMock).toHaveBeenCalledWith(sessionMap, 'session-1', 'agent.event', expect.objectContaining({
+      event: 'agent.event',
+      kind: 'lifecycle',
+      text: 'Retrying in 3.0s (attempt 1/3)...',
+    }))
+    expect(emit).toHaveBeenCalledWith('agent.event', expect.objectContaining({
+      event: 'agent.event',
+      kind: 'lifecycle',
+      text: 'Retrying in 3.0s (attempt 1/3)...',
+    }))
+  })
 })
@@ -47,6 +47,19 @@ describe('run chat model config', () => {
    expect(readConfigYamlForProfileMock).not.toHaveBeenCalled()
  })

+  it('keeps an explicit model when no model group list is available', async () => {
+    const { resolveBridgeRunModelConfig } = await import('../../packages/server/src/services/hermes/run-chat/model-config')
+
+    const result = await resolveBridgeRunModelConfig({
+      profile: 'default',
+      requestedModel: 'gpt-5.5',
+      requestedProvider: 'custom',
+    })
+
+    expect(result).toEqual({ model: 'gpt-5.5', provider: 'custom' })
+    expect(readConfigYamlForProfileMock).not.toHaveBeenCalled()
+  })
+
  it('falls back to the profile default when the candidate model is unavailable', async () => {
    const { resolveBridgeRunModelConfig } = await import('../../packages/server/src/services/hermes/run-chat/model-config')

@@ -40,7 +40,10 @@ vi.mock('../../packages/server/src/services/hermes/run-chat/bridge-message', ()
  flushBridgePendingToDb: vi.fn(),
 }))

-function makeContext(state: any) {
+function makeContext(state: any, commandResult: Record<string, unknown> = {
+  handled: true,
+  message: '[IMPORTANT: expanded plan skill prompt]',
+}) {
  const namespaceEmit = vi.fn()
  const nsp = {
    to: vi.fn(() => ({ emit: namespaceEmit })),
@@ -55,9 +58,12 @@ function makeContext(state: any) {
  const sessionMap = new Map([['session-1', state]])
  const runQueuedItem = vi.fn()
  const bridge = {
-    command: vi.fn(async () => ({
-      handled: true,
-      message: '[IMPORTANT: expanded plan skill prompt]',
+    command: vi.fn(async () => commandResult),
+    status: vi.fn(async () => ({
+      exists: true,
+      running: false,
+      current_run_id: null,
+      message_count: 0,
    })),
  }
  return { bridge, namespaceEmit, nsp, runQueuedItem, sessionMap, socket }
@@ -105,4 +111,196 @@ describe('plan session command', () => {
    }))
    expect(namespaceEmit).not.toHaveBeenCalledWith('session.command', expect.anything())
  })
+
+  it('starts an idle goal command as a hidden kickoff run', async () => {
+    const state = { messages: [], isWorking: false, events: [], queue: [] }
+    const { bridge, namespaceEmit, runQueuedItem, sessionMap, socket, nsp } = makeContext(state, {
+      handled: true,
+      type: 'goal',
+      action: 'set',
+      message: 'Goal set.',
+      kickoff_prompt: 'fix the tests',
+      max_turns: 20,
+    })
+    const { handleSessionCommand, parseSessionCommand } = await import('../../packages/server/src/services/hermes/run-chat/session-command')
+    const command = parseSessionCommand('/goal fix the tests')!
+
+    await handleSessionCommand('session-1', command, {
+      nsp: nsp as any,
+      socket: socket as any,
+      sessionMap,
+      bridge: bridge as any,
+      profile: 'default',
+      queueId: 'goal-queue-id',
+      runQueuedItem,
+    })
+
+    expect(bridge.command).toHaveBeenCalledWith('session-1', 'goal fix the tests', 'default')
+    expect(namespaceEmit).toHaveBeenCalledWith('session.command', expect.objectContaining({
+      action: 'set',
+      message: 'Goal set.',
+      terminal: false,
+      started: true,
+    }))
+    expect(runQueuedItem).toHaveBeenCalledWith(socket, 'session-1', expect.objectContaining({
+      queue_id: 'goal-queue-id',
+      input: 'fix the tests',
+      displayInput: null,
+      storageMessage: 'fix the tests',
+      source: 'cli',
+    }), 'default')
+  })
+
+  it('clears queued goal continuations when pausing a goal', async () => {
+    const state = {
+      messages: [],
+      isWorking: true,
+      events: [],
+      queue: [
+        { queue_id: 'goal-1', input: 'continue', displayInput: null, storageMessage: 'continue', profile: 'default', goalContinuation: true },
+        { queue_id: 'user-1', input: 'user message', profile: 'default' },
+      ],
+    }
+    const { bridge, namespaceEmit, runQueuedItem, sessionMap, socket, nsp } = makeContext(state, {
+      handled: true,
+      type: 'goal',
+      action: 'pause',
+      message: 'Goal paused.',
+      clear_goal_continuations: true,
+    })
+    const { handleSessionCommand, parseSessionCommand } = await import('../../packages/server/src/services/hermes/run-chat/session-command')
+    const command = parseSessionCommand('/goal pause')!
+
+    await handleSessionCommand('session-1', command, {
+      nsp: nsp as any,
+      socket: socket as any,
+      sessionMap,
+      bridge: bridge as any,
+      profile: 'default',
+      runQueuedItem,
+    })
+
+    expect(runQueuedItem).not.toHaveBeenCalled()
+    expect(state.queue).toEqual([expect.objectContaining({ queue_id: 'user-1' })])
+    expect(namespaceEmit).toHaveBeenCalledWith('run.queued', expect.objectContaining({
+      queue_length: 1,
+      queued_messages: [expect.objectContaining({ id: 'user-1', content: 'user message' })],
+    }))
+  })
+
+  it('emits a goal-specific clear action for goal done', async () => {
+    const state = {
+      messages: [],
+      isWorking: false,
+      events: [],
+      queue: [
+        { queue_id: 'goal-1', input: 'continue', displayInput: null, storageMessage: 'continue', profile: 'default', goalContinuation: true },
+      ],
+    }
+    const { bridge, namespaceEmit, runQueuedItem, sessionMap, socket, nsp } = makeContext(state, {
+      handled: true,
+      type: 'goal',
+      action: 'clear',
+      message: 'Goal cleared.',
+      clear_goal_continuations: true,
+    })
+    const { handleSessionCommand, parseSessionCommand } = await import('../../packages/server/src/services/hermes/run-chat/session-command')
+    const command = parseSessionCommand('/goal done')!
+
+    await handleSessionCommand('session-1', command, {
+      nsp: nsp as any,
+      socket: socket as any,
+      sessionMap,
+      bridge: bridge as any,
+      profile: 'default',
+      runQueuedItem,
+    })
+
+    expect(bridge.command).toHaveBeenCalledWith('session-1', 'goal done', 'default')
+    expect(runQueuedItem).not.toHaveBeenCalled()
+    expect(state.queue).toEqual([])
+    expect(namespaceEmit).toHaveBeenCalledWith('session.command', expect.objectContaining({
+      command: 'goal',
+      action: 'goal_clear',
+      message: 'Goal cleared.',
+      terminal: true,
+      started: false,
+    }))
+  })
+
+  it('starts a resumed goal as a hidden continuation run', async () => {
+    const state = { messages: [], isWorking: false, events: [], queue: [] }
+    const { bridge, namespaceEmit, runQueuedItem, sessionMap, socket, nsp } = makeContext(state, {
+      handled: true,
+      type: 'goal',
+      action: 'resume',
+      message: 'Goal resumed.',
+      kickoff_prompt: '[Continuing toward your standing goal]\nGoal: fix the tests',
+      max_turns: 20,
+    })
+    const { handleSessionCommand, parseSessionCommand } = await import('../../packages/server/src/services/hermes/run-chat/session-command')
+    const command = parseSessionCommand('/goal resume')!
+
+    await handleSessionCommand('session-1', command, {
+      nsp: nsp as any,
+      socket: socket as any,
+      sessionMap,
+      bridge: bridge as any,
+      profile: 'default',
+      queueId: 'resume-queue-id',
+      runQueuedItem,
+    })
+
+    expect(bridge.command).toHaveBeenCalledWith('session-1', 'goal resume', 'default')
+    expect(namespaceEmit).toHaveBeenCalledWith('session.command', expect.objectContaining({
+      action: 'resume',
+      message: 'Goal resumed.',
+      terminal: false,
+      started: true,
+    }))
+    expect(runQueuedItem).toHaveBeenCalledWith(socket, 'session-1', expect.objectContaining({
+      queue_id: 'resume-queue-id',
+      input: '[Continuing toward your standing goal]\nGoal: fix the tests',
+      displayInput: null,
+      storageMessage: '[Continuing toward your standing goal]\nGoal: fix the tests',
+      source: 'cli',
+    }), 'default')
+  })
+
+  it('includes bridge run state in goal status output', async () => {
+    const state = { messages: [], isWorking: false, events: [], queue: [] }
+    const { bridge, namespaceEmit, runQueuedItem, sessionMap, socket, nsp } = makeContext(state, {
+      handled: true,
+      type: 'goal',
+      action: 'goal_status',
+      message: 'Goal (active, 0/20 turns): build docs',
+    })
+    bridge.status.mockResolvedValueOnce({
+      exists: true,
+      running: true,
+      current_run_id: 'run-123',
+      message_count: 4,
+    })
+    const { handleSessionCommand, parseSessionCommand } = await import('../../packages/server/src/services/hermes/run-chat/session-command')
+    const command = parseSessionCommand('/goal status')!
+
+    await handleSessionCommand('session-1', command, {
+      nsp: nsp as any,
+      socket: socket as any,
+      sessionMap,
+      bridge: bridge as any,
+      profile: 'default',
+      runQueuedItem,
+    })
+
+    expect(runQueuedItem).not.toHaveBeenCalled()
+    expect(namespaceEmit).toHaveBeenCalledWith('session.command', expect.objectContaining({
+      action: 'goal_status',
+      message: 'Goal (active, 0/20 turns): build docs\nCurrent turn: 1/20 running (completed turns: 0/20; count updates after the judge).\nRun: running (run-123)',
+      bridgeStatus: expect.objectContaining({
+        running: true,
+        currentRunId: 'run-123',
+      }),
+    }))
+  })
 })