[codex] integrate goal command workflow (#1025)

* feat: integrate goal command workflow

* fix: keep goal done visible

* fix: add goal done slash command

* fix: promote queued message on run start
This commit is contained in:
ekko
2026-05-25 19:26:23 +08:00
committed by GitHub
parent 0eab6a1125
commit badb17cf8e
30 changed files with 1535 additions and 85 deletions
+88
View File
@@ -0,0 +1,88 @@
import { beforeEach, describe, expect, it, vi } from 'vitest'
const updateSessionStatsMock = vi.fn()
const flushBridgePendingToDbMock = vi.fn()
const flushResponseRunToDbMock = vi.fn()
const replaceStateMock = vi.fn()
const calcAndUpdateUsageMock = vi.fn()
vi.mock('../../packages/server/src/db/hermes/session-store', () => ({
updateSessionStats: updateSessionStatsMock,
}))
vi.mock('../../packages/server/src/services/logger', () => ({
logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() },
}))
vi.mock('../../packages/server/src/services/hermes/run-chat/bridge-message', () => ({
flushBridgePendingToDb: flushBridgePendingToDbMock,
}))
vi.mock('../../packages/server/src/services/hermes/run-chat/response-stream', () => ({
flushResponseRunToDb: flushResponseRunToDbMock,
}))
vi.mock('../../packages/server/src/services/hermes/run-chat/compression', () => ({
replaceState: replaceStateMock,
}))
vi.mock('../../packages/server/src/services/hermes/run-chat/usage', () => ({
calcAndUpdateUsage: calcAndUpdateUsageMock,
}))
function makeHarness() {
const emit = vi.fn()
const nsp = {
adapter: { rooms: new Map([['session:session-1', new Set(['socket-1'])]]) },
to: vi.fn(() => ({ emit })),
}
const socket = {
connected: true,
emit: vi.fn(),
}
return { emit, nsp, socket }
}
describe('run chat abort goal handling', () => {
beforeEach(() => {
vi.clearAllMocks()
calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 0, outputTokens: 0 })
})
it('pauses an active goal and clears hidden goal continuations when aborting a CLI run', async () => {
const { handleAbort } = await import('../../packages/server/src/services/hermes/run-chat/abort')
const { emit, nsp, socket } = makeHarness()
const state = {
messages: [],
isWorking: true,
isAborting: false,
events: [],
queue: [
{ queue_id: 'goal-1', input: 'continue goal', profile: 'default', goalContinuation: true },
{ queue_id: 'user-1', input: 'normal follow-up', profile: 'default', source: 'cli' },
],
runId: 'run-1',
profile: 'default',
source: 'cli',
} as any
const sessionMap = new Map([['session-1', state]])
const bridge = {
interrupt: vi.fn().mockResolvedValue({ ok: true }),
goalPause: vi.fn().mockResolvedValue({ handled: true, status: 'paused', reason: 'user-interrupted' }),
}
const runQueuedItem = vi.fn()
await handleAbort(nsp as any, socket as any, 'session-1', sessionMap, bridge, runQueuedItem)
expect(bridge.interrupt).toHaveBeenCalledWith('session-1', 'Aborted by user', 'default')
expect(bridge.goalPause).toHaveBeenCalledWith('session-1', 'user-interrupted', 'default')
expect(runQueuedItem).toHaveBeenCalledWith(socket, 'session-1', expect.objectContaining({
queue_id: 'user-1',
}), 'default')
expect(state.queue).toEqual([])
expect(emit).toHaveBeenCalledWith('abort.completed', expect.objectContaining({
session_id: 'session-1',
synced: true,
}))
})
})
@@ -188,6 +188,139 @@ describe('bridge run final context usage', () => {
}))
})
it('evaluates active goals after a successful bridge run and queues continuation prompts', async () => {
const emit = vi.fn()
const nsp = makeNamespace(emit)
const socket = makeSocket()
const state = makeState()
const sessionMap = new Map([['session-1', state]])
const dequeueNextQueuedRun = vi.fn()
addMessageMock.mockReturnValue(42)
const bridge = {
chat: vi.fn().mockResolvedValue({ run_id: 'run-1', status: 'started' }),
contextEstimate: vi.fn().mockResolvedValue({
token_count: 12345,
message_count: 2,
tool_count: 4,
system_prompt_chars: 13,
}),
goalEvaluate: vi.fn().mockResolvedValue({
handled: true,
should_continue: true,
continuation_prompt: '[Continuing toward your standing goal]\nGoal: fix tests',
message: '↻ Continuing toward goal (1/20): tests still fail',
verdict: 'continue',
}),
streamOutput: vi.fn(async function* () {
yield {
run_id: 'run-1',
done: true,
status: 'completed',
output: 'not finished',
result: { final_response: 'not finished' },
}
}),
} as any
const { handleBridgeRun } = await import('../../packages/server/src/services/hermes/run-chat/handle-bridge-run')
await handleBridgeRun(
nsp,
socket,
{
input: 'hello',
session_id: 'session-1',
model_groups: [{ provider: 'openai', models: ['gpt-test'] }],
},
'default',
sessionMap,
bridge,
false,
vi.fn(),
dequeueNextQueuedRun,
)
expect(bridge.goalEvaluate).toHaveBeenCalledWith('session-1', 'not finished', 'default')
expect(addMessageMock).toHaveBeenCalledWith(expect.objectContaining({
session_id: 'session-1',
role: 'command',
content: '↻ Continuing toward goal (1/20): tests still fail',
}))
expect(emit).toHaveBeenCalledWith('session.command', expect.objectContaining({
command: 'goal',
action: 'continue',
message: '↻ Continuing toward goal (1/20): tests still fail',
}))
expect(state.queue).toEqual([expect.objectContaining({
input: '[Continuing toward your standing goal]\nGoal: fix tests',
displayInput: null,
storageMessage: '[Continuing toward your standing goal]\nGoal: fix tests',
model: 'gpt-test',
provider: 'openai',
model_groups: [{ provider: 'openai', models: ['gpt-test'] }],
goalContinuation: true,
})])
expect(dequeueNextQueuedRun).toHaveBeenCalledWith(socket, 'session-1')
})
it('skips hidden goal continuation runs without pausing when the judge is unavailable', async () => {
const emit = vi.fn()
const nsp = makeNamespace(emit)
const socket = makeSocket()
const state = makeState()
const sessionMap = new Map([['session-1', state]])
const dequeueNextQueuedRun = vi.fn()
addMessageMock.mockReturnValue(43)
const bridge = {
chat: vi.fn().mockResolvedValue({ run_id: 'run-1', status: 'started' }),
command: vi.fn(),
contextEstimate: vi.fn().mockResolvedValue({
token_count: 12345,
message_count: 2,
tool_count: 4,
system_prompt_chars: 13,
}),
goalEvaluate: vi.fn().mockResolvedValue({
handled: true,
should_continue: true,
continuation_prompt: '[Continuing toward your standing goal]\nGoal: fix tests',
message: '↻ Continuing toward goal (1/20): no auxiliary client configured',
verdict: 'continue',
reason: 'no auxiliary client configured',
}),
streamOutput: vi.fn(async function* () {
yield {
run_id: 'run-1',
done: true,
status: 'completed',
output: 'done',
result: { final_response: 'done' },
}
}),
} as any
const { handleBridgeRun } = await import('../../packages/server/src/services/hermes/run-chat/handle-bridge-run')
await handleBridgeRun(
nsp,
socket,
{ input: 'hello', session_id: 'session-1' },
'default',
sessionMap,
bridge,
false,
vi.fn(),
dequeueNextQueuedRun,
)
expect(bridge.command).not.toHaveBeenCalled()
expect(state.queue).toEqual([])
expect(dequeueNextQueuedRun).not.toHaveBeenCalled()
expect(emit).toHaveBeenCalledWith('session.command', expect.objectContaining({
command: 'goal',
action: 'judge_unavailable',
message: 'Goal judge is not configured; automatic goal continuation was skipped. The goal remains active, but Hermes cannot mark it done automatically.',
}))
})
it('uses cached fixed context instead of bridge estimate when available', async () => {
const emit = vi.fn()
const nsp = makeNamespace(emit)
@@ -402,4 +535,56 @@ describe('bridge run final context usage', () => {
contextTokens: 54321,
}))
})
it('emits bridge lifecycle status events so retries are visible', async () => {
const emit = vi.fn()
const nsp = makeNamespace(emit)
const socket = makeSocket()
const state = makeState()
const sessionMap = new Map([['session-1', state]])
const bridge = {
chat: vi.fn().mockResolvedValue({ run_id: 'run-1', status: 'started' }),
contextEstimate: vi.fn().mockResolvedValue({
token_count: 12345,
message_count: 2,
tool_count: 4,
system_prompt_chars: 13,
}),
streamOutput: vi.fn(async function* () {
yield {
run_id: 'run-1',
done: false,
status: 'running',
events: [
{ event: 'status', kind: 'lifecycle', text: 'Retrying in 3.0s (attempt 1/3)...' },
],
}
yield { run_id: 'run-1', done: true, status: 'completed', output: 'done' }
}),
} as any
const { handleBridgeRun } = await import('../../packages/server/src/services/hermes/run-chat/handle-bridge-run')
await handleBridgeRun(
nsp,
socket,
{ input: 'hello', session_id: 'session-1' },
'default',
sessionMap,
bridge,
false,
vi.fn(),
vi.fn(),
)
expect(replaceStateMock).toHaveBeenCalledWith(sessionMap, 'session-1', 'agent.event', expect.objectContaining({
event: 'agent.event',
kind: 'lifecycle',
text: 'Retrying in 3.0s (attempt 1/3)...',
}))
expect(emit).toHaveBeenCalledWith('agent.event', expect.objectContaining({
event: 'agent.event',
kind: 'lifecycle',
text: 'Retrying in 3.0s (attempt 1/3)...',
}))
})
})
@@ -47,6 +47,19 @@ describe('run chat model config', () => {
expect(readConfigYamlForProfileMock).not.toHaveBeenCalled()
})
it('keeps an explicit model when no model group list is available', async () => {
const { resolveBridgeRunModelConfig } = await import('../../packages/server/src/services/hermes/run-chat/model-config')
const result = await resolveBridgeRunModelConfig({
profile: 'default',
requestedModel: 'gpt-5.5',
requestedProvider: 'custom',
})
expect(result).toEqual({ model: 'gpt-5.5', provider: 'custom' })
expect(readConfigYamlForProfileMock).not.toHaveBeenCalled()
})
it('falls back to the profile default when the candidate model is unavailable', async () => {
const { resolveBridgeRunModelConfig } = await import('../../packages/server/src/services/hermes/run-chat/model-config')
+202 -4
View File
@@ -40,7 +40,10 @@ vi.mock('../../packages/server/src/services/hermes/run-chat/bridge-message', ()
flushBridgePendingToDb: vi.fn(),
}))
function makeContext(state: any) {
function makeContext(state: any, commandResult: Record<string, unknown> = {
handled: true,
message: '[IMPORTANT: expanded plan skill prompt]',
}) {
const namespaceEmit = vi.fn()
const nsp = {
to: vi.fn(() => ({ emit: namespaceEmit })),
@@ -55,9 +58,12 @@ function makeContext(state: any) {
const sessionMap = new Map([['session-1', state]])
const runQueuedItem = vi.fn()
const bridge = {
command: vi.fn(async () => ({
handled: true,
message: '[IMPORTANT: expanded plan skill prompt]',
command: vi.fn(async () => commandResult),
status: vi.fn(async () => ({
exists: true,
running: false,
current_run_id: null,
message_count: 0,
})),
}
return { bridge, namespaceEmit, nsp, runQueuedItem, sessionMap, socket }
@@ -105,4 +111,196 @@ describe('plan session command', () => {
}))
expect(namespaceEmit).not.toHaveBeenCalledWith('session.command', expect.anything())
})
it('starts an idle goal command as a hidden kickoff run', async () => {
const state = { messages: [], isWorking: false, events: [], queue: [] }
const { bridge, namespaceEmit, runQueuedItem, sessionMap, socket, nsp } = makeContext(state, {
handled: true,
type: 'goal',
action: 'set',
message: 'Goal set.',
kickoff_prompt: 'fix the tests',
max_turns: 20,
})
const { handleSessionCommand, parseSessionCommand } = await import('../../packages/server/src/services/hermes/run-chat/session-command')
const command = parseSessionCommand('/goal fix the tests')!
await handleSessionCommand('session-1', command, {
nsp: nsp as any,
socket: socket as any,
sessionMap,
bridge: bridge as any,
profile: 'default',
queueId: 'goal-queue-id',
runQueuedItem,
})
expect(bridge.command).toHaveBeenCalledWith('session-1', 'goal fix the tests', 'default')
expect(namespaceEmit).toHaveBeenCalledWith('session.command', expect.objectContaining({
action: 'set',
message: 'Goal set.',
terminal: false,
started: true,
}))
expect(runQueuedItem).toHaveBeenCalledWith(socket, 'session-1', expect.objectContaining({
queue_id: 'goal-queue-id',
input: 'fix the tests',
displayInput: null,
storageMessage: 'fix the tests',
source: 'cli',
}), 'default')
})
it('clears queued goal continuations when pausing a goal', async () => {
const state = {
messages: [],
isWorking: true,
events: [],
queue: [
{ queue_id: 'goal-1', input: 'continue', displayInput: null, storageMessage: 'continue', profile: 'default', goalContinuation: true },
{ queue_id: 'user-1', input: 'user message', profile: 'default' },
],
}
const { bridge, namespaceEmit, runQueuedItem, sessionMap, socket, nsp } = makeContext(state, {
handled: true,
type: 'goal',
action: 'pause',
message: 'Goal paused.',
clear_goal_continuations: true,
})
const { handleSessionCommand, parseSessionCommand } = await import('../../packages/server/src/services/hermes/run-chat/session-command')
const command = parseSessionCommand('/goal pause')!
await handleSessionCommand('session-1', command, {
nsp: nsp as any,
socket: socket as any,
sessionMap,
bridge: bridge as any,
profile: 'default',
runQueuedItem,
})
expect(runQueuedItem).not.toHaveBeenCalled()
expect(state.queue).toEqual([expect.objectContaining({ queue_id: 'user-1' })])
expect(namespaceEmit).toHaveBeenCalledWith('run.queued', expect.objectContaining({
queue_length: 1,
queued_messages: [expect.objectContaining({ id: 'user-1', content: 'user message' })],
}))
})
it('emits a goal-specific clear action for goal done', async () => {
const state = {
messages: [],
isWorking: false,
events: [],
queue: [
{ queue_id: 'goal-1', input: 'continue', displayInput: null, storageMessage: 'continue', profile: 'default', goalContinuation: true },
],
}
const { bridge, namespaceEmit, runQueuedItem, sessionMap, socket, nsp } = makeContext(state, {
handled: true,
type: 'goal',
action: 'clear',
message: 'Goal cleared.',
clear_goal_continuations: true,
})
const { handleSessionCommand, parseSessionCommand } = await import('../../packages/server/src/services/hermes/run-chat/session-command')
const command = parseSessionCommand('/goal done')!
await handleSessionCommand('session-1', command, {
nsp: nsp as any,
socket: socket as any,
sessionMap,
bridge: bridge as any,
profile: 'default',
runQueuedItem,
})
expect(bridge.command).toHaveBeenCalledWith('session-1', 'goal done', 'default')
expect(runQueuedItem).not.toHaveBeenCalled()
expect(state.queue).toEqual([])
expect(namespaceEmit).toHaveBeenCalledWith('session.command', expect.objectContaining({
command: 'goal',
action: 'goal_clear',
message: 'Goal cleared.',
terminal: true,
started: false,
}))
})
it('starts a resumed goal as a hidden continuation run', async () => {
const state = { messages: [], isWorking: false, events: [], queue: [] }
const { bridge, namespaceEmit, runQueuedItem, sessionMap, socket, nsp } = makeContext(state, {
handled: true,
type: 'goal',
action: 'resume',
message: 'Goal resumed.',
kickoff_prompt: '[Continuing toward your standing goal]\nGoal: fix the tests',
max_turns: 20,
})
const { handleSessionCommand, parseSessionCommand } = await import('../../packages/server/src/services/hermes/run-chat/session-command')
const command = parseSessionCommand('/goal resume')!
await handleSessionCommand('session-1', command, {
nsp: nsp as any,
socket: socket as any,
sessionMap,
bridge: bridge as any,
profile: 'default',
queueId: 'resume-queue-id',
runQueuedItem,
})
expect(bridge.command).toHaveBeenCalledWith('session-1', 'goal resume', 'default')
expect(namespaceEmit).toHaveBeenCalledWith('session.command', expect.objectContaining({
action: 'resume',
message: 'Goal resumed.',
terminal: false,
started: true,
}))
expect(runQueuedItem).toHaveBeenCalledWith(socket, 'session-1', expect.objectContaining({
queue_id: 'resume-queue-id',
input: '[Continuing toward your standing goal]\nGoal: fix the tests',
displayInput: null,
storageMessage: '[Continuing toward your standing goal]\nGoal: fix the tests',
source: 'cli',
}), 'default')
})
it('includes bridge run state in goal status output', async () => {
const state = { messages: [], isWorking: false, events: [], queue: [] }
const { bridge, namespaceEmit, runQueuedItem, sessionMap, socket, nsp } = makeContext(state, {
handled: true,
type: 'goal',
action: 'goal_status',
message: 'Goal (active, 0/20 turns): build docs',
})
bridge.status.mockResolvedValueOnce({
exists: true,
running: true,
current_run_id: 'run-123',
message_count: 4,
})
const { handleSessionCommand, parseSessionCommand } = await import('../../packages/server/src/services/hermes/run-chat/session-command')
const command = parseSessionCommand('/goal status')!
await handleSessionCommand('session-1', command, {
nsp: nsp as any,
socket: socket as any,
sessionMap,
bridge: bridge as any,
profile: 'default',
runQueuedItem,
})
expect(runQueuedItem).not.toHaveBeenCalled()
expect(namespaceEmit).toHaveBeenCalledWith('session.command', expect.objectContaining({
action: 'goal_status',
message: 'Goal (active, 0/20 turns): build docs\nCurrent turn: 1/20 running (completed turns: 0/20; count updates after the judge).\nRun: running (run-123)',
bridgeStatus: expect.objectContaining({
running: true,
currentRunId: 'run-123',
}),
}))
})
})