add bridge performance monitoring

This commit is contained in:
ekko
2026-05-23 09:05:03 +08:00
committed by ekko
parent 4223014e0c
commit c184519c5d
21 changed files with 1778 additions and 91 deletions
@@ -393,6 +393,80 @@ assert calls == []
pool._run_context.session_id = "session-a"
assert pool._approval_dispatcher("cmd", "desc", allow_permanent=False) == "once"
assert calls == [("cmd", "desc", False)]
`)
})
it('cleans broker workers and wires worker parent watchdog state', () => {
runPython(String.raw`
${harness}
class FakeWorker:
def __init__(self):
self.running = True
self.stopped = False
def stop(self):
self.running = False
self.stopped = True
broker = bridge.BridgeBroker("ipc:///tmp/unused.sock")
worker = FakeWorker()
broker._workers["default"] = worker
broker._run_profile["run-a"] = "default"
broker._session_profile["session-a"] = "default"
broker._approval_profile["approval-a"] = "default"
broker._compression_profile["compression-a"] = "default"
broker.stop()
assert broker._stop.is_set()
assert worker.stopped
assert broker._workers == {}
assert broker._run_profile == {}
assert broker._session_profile == {}
assert broker._approval_profile == {}
assert broker._compression_profile == {}
created = {}
class FakeProcess:
stdout = None
stderr = None
def poll(self):
return None
def fake_popen(args, **kwargs):
created["args"] = args
created["env"] = kwargs["env"]
return FakeProcess()
original_popen = bridge.subprocess.Popen
original_getpid = bridge.os.getpid
try:
bridge.subprocess.Popen = fake_popen
bridge.os.getpid = lambda: 4242
proc_worker = bridge.WorkerProcess("default", "ipc:///tmp/worker.sock", "/agent", "/home")
proc_worker._pipe_stderr = lambda: None
proc_worker._wait_ready = lambda: None
proc_worker.start()
finally:
bridge.subprocess.Popen = original_popen
bridge.os.getpid = original_getpid
assert created["env"]["HERMES_AGENT_BRIDGE_BROKER_PID"] == "4242"
assert created["env"]["HERMES_AGENT_BRIDGE_WORKER_PROFILE"] == "default"
stop_event = threading.Event()
seen_pids = []
original_process_exists = bridge._process_exists
try:
bridge._process_exists = lambda pid: seen_pids.append(pid) and False
bridge._start_parent_process_watchdog(12345, stop_event, "test", interval=0.01)
assert wait_for(stop_event.is_set, timeout=2)
finally:
bridge._process_exists = original_process_exists
assert seen_pids == [12345]
`)
})
})
@@ -0,0 +1,40 @@
import { afterEach, describe, expect, it, vi } from 'vitest'
const getOpsRuntimeSnapshot = vi.fn()
vi.mock('../../packages/server/src/services/hermes/ops-monitor', () => ({
createEmptyOpsRuntimeSnapshot: (error?: string) => ({ timestamp: 0, error }),
getOpsRuntimeSnapshot,
}))
describe('performance monitor controller', () => {
afterEach(() => {
vi.clearAllMocks()
})
it('returns the runtime snapshot from the performance service', async () => {
const snapshot = {
timestamp: 1,
bridge: { workers: [] },
sessions: { active: 0 },
}
getOpsRuntimeSnapshot.mockResolvedValue(snapshot)
const ctx: any = {}
const { runtime } = await import('../../packages/server/src/controllers/hermes/performance-monitor')
await runtime(ctx)
expect(ctx.body).toBe(snapshot)
})
it('returns a zero snapshot when metrics collection fails', async () => {
getOpsRuntimeSnapshot.mockRejectedValue(new Error('boom'))
const ctx: any = {}
const { runtime } = await import('../../packages/server/src/controllers/hermes/performance-monitor')
await runtime(ctx)
expect(ctx.status).toBeUndefined()
expect(ctx.body).toEqual({ timestamp: 0, error: 'boom' })
})
})