feat: add token usage tracking, context display, and dynamic context length (#132)

* fix: specify TS_NODE_PROJECT for dev:server script ts-node/register resolves tsconfig from the entry file upward, finding the root solution-style tsconfig.json (no compilerOptions). This causes target to default to ES3, breaking MapIterator spread syntax (TS2802). Set TS_NODE_PROJECT env var to point to the server tsconfig which targets ES2024. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: add token usage tracking, context display, and dynamic context length - Intercept SSE proxy to capture run.completed events and persist token usage (input_tokens, output_tokens) per session to SQLite/JSON store - Display context usage bar in ChatInput showing used/total/remaining tokens - Resolve actual context length from Hermes models_dev_cache.json based on the active profile's default model (fallback 200K), with 5min in-memory cache - Move sessions-db.ts to db/hermes/ for unified database layer - Add usage store with SQLite + JSON fallback (auto-migration via ensureTable) - Fix proxy SSE path regex to match rewritten upstream path - Fix route ordering: /sessions/usage before /sessions/:id to avoid 404 - Fetch per-session usage on session enter instead of batch - Add unit tests for usage-store, db index, and proxy SSE interception Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-22 16:14:50 +08:00
parent ce3bf5f3eb
commit 6f69c69802
26 changed files with 1203 additions and 144 deletions
@@ -9,10 +9,18 @@ vi.mock('../../packages/server/src/services/gateway-bootstrap', () => ({
  getGatewayManagerInstance: () => null,
 }))

+// Mock updateUsage so we can assert calls without real DB
+const { mockUpdateUsage } = vi.hoisted(() => ({
+  mockUpdateUsage: vi.fn(),
+}))
+vi.mock('../../packages/server/src/db/hermes/usage-store', () => ({
+  updateUsage: mockUpdateUsage,
+}))
+
 const mockFetch = vi.fn()
 vi.stubGlobal('fetch', mockFetch)

-import { proxy } from '../../packages/server/src/routes/hermes/proxy-handler'
+import { proxy, setRunSession } from '../../packages/server/src/routes/hermes/proxy-handler'

 function createMockCtx(overrides: Record<string, any> = {}) {
  const ctx: any = {
@@ -42,6 +50,25 @@ function createMockCtx(overrides: Record<string, any> = {}) {
  return ctx
 }

+/**
+ * Helper: create a ReadableStream from string chunks.
+ * Each chunk is a Uint8Array segment delivered sequentially.
+ */
+function createSSEBody(events: string[]): ReadableStream<Uint8Array> {
+  const encoder = new TextEncoder()
+  let idx = 0
+  return new ReadableStream({
+    pull(controller) {
+      if (idx < events.length) {
+        controller.enqueue(encoder.encode(events[idx]))
+        idx++
+      } else {
+        controller.close()
+      }
+    },
+  })
+}
+
 describe('Proxy Handler', () => {
  beforeEach(() => {
    vi.clearAllMocks()
@@ -130,9 +157,6 @@ describe('Proxy Handler', () => {
  })

  it('returns 502 on connection failure', async () => {
-    // waitForGatewayReady loops calling fetch(healthUrl) until res.ok or timeout.
-    // Return ok:true for health checks so the loop exits immediately (gateway
-    // "ready"), then the retry fetch also fails with ECONNREFUSED → 502.
    mockFetch.mockImplementation((url: string) => {
      if (typeof url === 'string' && url.includes('/health')) {
        return Promise.resolve({ ok: true })
@@ -161,3 +185,233 @@ describe('Proxy Handler', () => {
    expect(ctx.status).toBe(404)
  })
 })
+
+describe('POST /v1/runs — session_id capture', () => {
+  beforeEach(() => {
+    vi.clearAllMocks()
+  })
+
+  it('captures run_id → session_id mapping from POST /v1/runs', async () => {
+    const runId = 'run-abc-123'
+    const sessionId = 'session-xyz'
+    const responseBody = JSON.stringify({ run_id: runId, status: 'queued' })
+
+    mockFetch.mockResolvedValue({
+      status: 200,
+      headers: new Headers({ 'content-type': 'application/json' }),
+      text: () => Promise.resolve(responseBody),
+      body: null,
+    })
+
+    const ctx = createMockCtx({
+      path: '/api/hermes/v1/runs',
+      req: { method: 'POST' },
+      request: {
+        body: { session_id: sessionId, input: 'hello', model: 'gpt-4' },
+      },
+    })
+
+    await proxy(ctx)
+
+    // Verify the response was forwarded to client
+    expect(ctx.res.write).toHaveBeenCalledWith(responseBody)
+    expect(ctx.res.end).toHaveBeenCalled()
+  })
+
+  it('falls through to normal stream when POST body has no session_id', async () => {
+    const responseBody = JSON.stringify({ run_id: 'r1', status: 'queued' })
+    mockFetch.mockResolvedValue({
+      status: 200,
+      headers: new Headers({ 'content-type': 'application/json' }),
+      text: () => Promise.resolve(responseBody),
+      body: null,
+    })
+
+    const ctx = createMockCtx({
+      path: '/api/hermes/v1/runs',
+      req: { method: 'POST' },
+      request: { body: { input: 'hello' } }, // no session_id
+    })
+
+    await proxy(ctx)
+
+    // Should still forward the response
+    expect(ctx.res.end).toHaveBeenCalled()
+  })
+
+  it('serializes parsed JSON body when rawBody is not available', async () => {
+    const responseBody = JSON.stringify({ run_id: 'r1', status: 'queued' })
+    mockFetch.mockResolvedValue({
+      status: 200,
+      headers: new Headers({ 'content-type': 'application/json' }),
+      body: {
+        getReader: () => {
+          const encoder = new TextEncoder()
+          let done = false
+          return {
+            read: () => {
+              if (done) return Promise.resolve({ done: true, value: undefined })
+              done = true
+              return Promise.resolve({ done: false, value: encoder.encode(responseBody) })
+            },
+          }
+        },
+      },
+    })
+
+    const ctx = createMockCtx({
+      path: '/api/hermes/v1/runs',
+      req: { method: 'POST' },
+      request: { body: { session_id: 's1', input: 'test' } },
+    })
+
+    await proxy(ctx)
+
+    // Verify fetch was called with stringified body
+    const [, options] = mockFetch.mock.calls[0]
+    expect(typeof options.body).toBe('string')
+    const parsed = JSON.parse(options.body)
+    expect(parsed.session_id).toBe('s1')
+    expect(parsed.input).toBe('test')
+  })
+})
+
+describe('SSE stream interception — run.completed', () => {
+  beforeEach(() => {
+    vi.clearAllMocks()
+  })
+
+  it('intercepts run.completed and calls updateUsage', async () => {
+    const runId = 'run-test-1'
+    const sessionId = 'session-test-1'
+
+    // Pre-populate the run → session mapping
+    setRunSession(runId, sessionId)
+
+    const sseData = [
+      `data: ${JSON.stringify({ event: 'run.started', run_id: runId })}\n\n`,
+      `data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'Hello' })}\n\n`,
+      `data: ${JSON.stringify({ event: 'run.completed', run_id: runId, usage: { input_tokens: 13949, output_tokens: 45, total_tokens: 13994 } })}\n\n`,
+    ]
+
+    mockFetch.mockResolvedValue({
+      status: 200,
+      headers: new Headers({ 'content-type': 'text/event-stream' }),
+      body: createSSEBody(sseData),
+    })
+
+    const ctx = createMockCtx({
+      path: `/api/hermes/v1/runs/${runId}/events`,
+      search: `?token=test&profile=default`,
+    })
+
+    await proxy(ctx)
+
+    // Verify updateUsage was called with correct values
+    expect(mockUpdateUsage).toHaveBeenCalledWith(sessionId, 13949, 45)
+    // Verify SSE data was forwarded to client
+    expect(ctx.res.write).toHaveBeenCalled()
+    expect(ctx.res.end).toHaveBeenCalled()
+  })
+
+  it('does not call updateUsage when no mapping exists', async () => {
+    const sseData = [
+      `data: ${JSON.stringify({ event: 'run.completed', run_id: 'unknown-run', usage: { input_tokens: 100, output_tokens: 50, total_tokens: 150 } })}\n\n`,
+    ]
+
+    mockFetch.mockResolvedValue({
+      status: 200,
+      headers: new Headers({ 'content-type': 'text/event-stream' }),
+      body: createSSEBody(sseData),
+    })
+
+    const ctx = createMockCtx({
+      path: '/api/hermes/v1/runs/unknown-run/events',
+      search: '',
+    })
+
+    await proxy(ctx)
+
+    expect(mockUpdateUsage).not.toHaveBeenCalled()
+  })
+
+  it('does not call updateUsage for non-run.completed events', async () => {
+    const runId = 'run-no-complete'
+    setRunSession(runId, 'session-x')
+
+    const sseData = [
+      `data: ${JSON.stringify({ event: 'run.started', run_id: runId })}\n\n`,
+      `data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'Hi' })}\n\n`,
+      `data: ${JSON.stringify({ event: 'run.failed', run_id: runId, error: 'timeout' })}\n\n`,
+    ]
+
+    mockFetch.mockResolvedValue({
+      status: 200,
+      headers: new Headers({ 'content-type': 'text/event-stream' }),
+      body: createSSEBody(sseData),
+    })
+
+    const ctx = createMockCtx({
+      path: `/api/hermes/v1/runs/${runId}/events`,
+      search: '',
+    })
+
+    await proxy(ctx)
+
+    expect(mockUpdateUsage).not.toHaveBeenCalled()
+  })
+
+  it('handles SSE with multiple events in a single chunk', async () => {
+    const runId = 'run-multi'
+    setRunSession(runId, 'session-multi')
+
+    // All events in one chunk
+    const singleChunk = [
+      `data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'A' })}\n\n`,
+      `data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'B' })}\n\n`,
+      `data: ${JSON.stringify({ event: 'run.completed', run_id: runId, usage: { input_tokens: 500, output_tokens: 100, total_tokens: 600 } })}\n\n`,
+    ].join('')
+
+    mockFetch.mockResolvedValue({
+      status: 200,
+      headers: new Headers({ 'content-type': 'text/event-stream' }),
+      body: createSSEBody([singleChunk]),
+    })
+
+    const ctx = createMockCtx({
+      path: `/api/hermes/v1/runs/${runId}/events`,
+      search: '',
+    })
+
+    await proxy(ctx)
+
+    expect(mockUpdateUsage).toHaveBeenCalledWith('session-multi', 500, 100)
+  })
+
+  it('handles SSE split across multiple chunks', async () => {
+    const runId = 'run-split'
+    setRunSession(runId, 'session-split')
+
+    const completedJson = JSON.stringify({ event: 'run.completed', run_id: runId, usage: { input_tokens: 200, output_tokens: 50, total_tokens: 250 } })
+    const sseEvent = `data: ${completedJson}\n\n`
+
+    // Split the event across two chunks
+    const chunk1 = sseEvent.slice(0, 30)
+    const chunk2 = sseEvent.slice(30)
+
+    mockFetch.mockResolvedValue({
+      status: 200,
+      headers: new Headers({ 'content-type': 'text/event-stream' }),
+      body: createSSEBody([chunk1, chunk2]),
+    })
+
+    const ctx = createMockCtx({
+      path: `/api/hermes/v1/runs/${runId}/events`,
+      search: '',
+    })
+
+    await proxy(ctx)
+
+    expect(mockUpdateUsage).toHaveBeenCalledWith('session-split', 200, 50)
+  })
+})