feat: add token usage tracking, context display, and dynamic context length (#132)
* fix: specify TS_NODE_PROJECT for dev:server script ts-node/register resolves tsconfig from the entry file upward, finding the root solution-style tsconfig.json (no compilerOptions). This causes target to default to ES3, breaking MapIterator spread syntax (TS2802). Set TS_NODE_PROJECT env var to point to the server tsconfig which targets ES2024. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: add token usage tracking, context display, and dynamic context length - Intercept SSE proxy to capture run.completed events and persist token usage (input_tokens, output_tokens) per session to SQLite/JSON store - Display context usage bar in ChatInput showing used/total/remaining tokens - Resolve actual context length from Hermes models_dev_cache.json based on the active profile's default model (fallback 200K), with 5min in-memory cache - Move sessions-db.ts to db/hermes/ for unified database layer - Add usage store with SQLite + JSON fallback (auto-migration via ensureTable) - Fix proxy SSE path regex to match rewritten upstream path - Fix route ordering: /sessions/usage before /sessions/:id to avoid 404 - Fetch per-session usage on session enter instead of batch - Add unit tests for usage-store, db index, and proxy SSE interception Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -9,10 +9,18 @@ vi.mock('../../packages/server/src/services/gateway-bootstrap', () => ({
|
||||
getGatewayManagerInstance: () => null,
|
||||
}))
|
||||
|
||||
// Mock updateUsage so we can assert calls without real DB
|
||||
const { mockUpdateUsage } = vi.hoisted(() => ({
|
||||
mockUpdateUsage: vi.fn(),
|
||||
}))
|
||||
vi.mock('../../packages/server/src/db/hermes/usage-store', () => ({
|
||||
updateUsage: mockUpdateUsage,
|
||||
}))
|
||||
|
||||
const mockFetch = vi.fn()
|
||||
vi.stubGlobal('fetch', mockFetch)
|
||||
|
||||
import { proxy } from '../../packages/server/src/routes/hermes/proxy-handler'
|
||||
import { proxy, setRunSession } from '../../packages/server/src/routes/hermes/proxy-handler'
|
||||
|
||||
function createMockCtx(overrides: Record<string, any> = {}) {
|
||||
const ctx: any = {
|
||||
@@ -42,6 +50,25 @@ function createMockCtx(overrides: Record<string, any> = {}) {
|
||||
return ctx
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper: create a ReadableStream from string chunks.
|
||||
* Each chunk is a Uint8Array segment delivered sequentially.
|
||||
*/
|
||||
function createSSEBody(events: string[]): ReadableStream<Uint8Array> {
|
||||
const encoder = new TextEncoder()
|
||||
let idx = 0
|
||||
return new ReadableStream({
|
||||
pull(controller) {
|
||||
if (idx < events.length) {
|
||||
controller.enqueue(encoder.encode(events[idx]))
|
||||
idx++
|
||||
} else {
|
||||
controller.close()
|
||||
}
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
describe('Proxy Handler', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
@@ -130,9 +157,6 @@ describe('Proxy Handler', () => {
|
||||
})
|
||||
|
||||
it('returns 502 on connection failure', async () => {
|
||||
// waitForGatewayReady loops calling fetch(healthUrl) until res.ok or timeout.
|
||||
// Return ok:true for health checks so the loop exits immediately (gateway
|
||||
// "ready"), then the retry fetch also fails with ECONNREFUSED → 502.
|
||||
mockFetch.mockImplementation((url: string) => {
|
||||
if (typeof url === 'string' && url.includes('/health')) {
|
||||
return Promise.resolve({ ok: true })
|
||||
@@ -161,3 +185,233 @@ describe('Proxy Handler', () => {
|
||||
expect(ctx.status).toBe(404)
|
||||
})
|
||||
})
|
||||
|
||||
describe('POST /v1/runs — session_id capture', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
})
|
||||
|
||||
it('captures run_id → session_id mapping from POST /v1/runs', async () => {
|
||||
const runId = 'run-abc-123'
|
||||
const sessionId = 'session-xyz'
|
||||
const responseBody = JSON.stringify({ run_id: runId, status: 'queued' })
|
||||
|
||||
mockFetch.mockResolvedValue({
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'application/json' }),
|
||||
text: () => Promise.resolve(responseBody),
|
||||
body: null,
|
||||
})
|
||||
|
||||
const ctx = createMockCtx({
|
||||
path: '/api/hermes/v1/runs',
|
||||
req: { method: 'POST' },
|
||||
request: {
|
||||
body: { session_id: sessionId, input: 'hello', model: 'gpt-4' },
|
||||
},
|
||||
})
|
||||
|
||||
await proxy(ctx)
|
||||
|
||||
// Verify the response was forwarded to client
|
||||
expect(ctx.res.write).toHaveBeenCalledWith(responseBody)
|
||||
expect(ctx.res.end).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('falls through to normal stream when POST body has no session_id', async () => {
|
||||
const responseBody = JSON.stringify({ run_id: 'r1', status: 'queued' })
|
||||
mockFetch.mockResolvedValue({
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'application/json' }),
|
||||
text: () => Promise.resolve(responseBody),
|
||||
body: null,
|
||||
})
|
||||
|
||||
const ctx = createMockCtx({
|
||||
path: '/api/hermes/v1/runs',
|
||||
req: { method: 'POST' },
|
||||
request: { body: { input: 'hello' } }, // no session_id
|
||||
})
|
||||
|
||||
await proxy(ctx)
|
||||
|
||||
// Should still forward the response
|
||||
expect(ctx.res.end).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('serializes parsed JSON body when rawBody is not available', async () => {
|
||||
const responseBody = JSON.stringify({ run_id: 'r1', status: 'queued' })
|
||||
mockFetch.mockResolvedValue({
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'application/json' }),
|
||||
body: {
|
||||
getReader: () => {
|
||||
const encoder = new TextEncoder()
|
||||
let done = false
|
||||
return {
|
||||
read: () => {
|
||||
if (done) return Promise.resolve({ done: true, value: undefined })
|
||||
done = true
|
||||
return Promise.resolve({ done: false, value: encoder.encode(responseBody) })
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
const ctx = createMockCtx({
|
||||
path: '/api/hermes/v1/runs',
|
||||
req: { method: 'POST' },
|
||||
request: { body: { session_id: 's1', input: 'test' } },
|
||||
})
|
||||
|
||||
await proxy(ctx)
|
||||
|
||||
// Verify fetch was called with stringified body
|
||||
const [, options] = mockFetch.mock.calls[0]
|
||||
expect(typeof options.body).toBe('string')
|
||||
const parsed = JSON.parse(options.body)
|
||||
expect(parsed.session_id).toBe('s1')
|
||||
expect(parsed.input).toBe('test')
|
||||
})
|
||||
})
|
||||
|
||||
describe('SSE stream interception — run.completed', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
})
|
||||
|
||||
it('intercepts run.completed and calls updateUsage', async () => {
|
||||
const runId = 'run-test-1'
|
||||
const sessionId = 'session-test-1'
|
||||
|
||||
// Pre-populate the run → session mapping
|
||||
setRunSession(runId, sessionId)
|
||||
|
||||
const sseData = [
|
||||
`data: ${JSON.stringify({ event: 'run.started', run_id: runId })}\n\n`,
|
||||
`data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'Hello' })}\n\n`,
|
||||
`data: ${JSON.stringify({ event: 'run.completed', run_id: runId, usage: { input_tokens: 13949, output_tokens: 45, total_tokens: 13994 } })}\n\n`,
|
||||
]
|
||||
|
||||
mockFetch.mockResolvedValue({
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'text/event-stream' }),
|
||||
body: createSSEBody(sseData),
|
||||
})
|
||||
|
||||
const ctx = createMockCtx({
|
||||
path: `/api/hermes/v1/runs/${runId}/events`,
|
||||
search: `?token=test&profile=default`,
|
||||
})
|
||||
|
||||
await proxy(ctx)
|
||||
|
||||
// Verify updateUsage was called with correct values
|
||||
expect(mockUpdateUsage).toHaveBeenCalledWith(sessionId, 13949, 45)
|
||||
// Verify SSE data was forwarded to client
|
||||
expect(ctx.res.write).toHaveBeenCalled()
|
||||
expect(ctx.res.end).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('does not call updateUsage when no mapping exists', async () => {
|
||||
const sseData = [
|
||||
`data: ${JSON.stringify({ event: 'run.completed', run_id: 'unknown-run', usage: { input_tokens: 100, output_tokens: 50, total_tokens: 150 } })}\n\n`,
|
||||
]
|
||||
|
||||
mockFetch.mockResolvedValue({
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'text/event-stream' }),
|
||||
body: createSSEBody(sseData),
|
||||
})
|
||||
|
||||
const ctx = createMockCtx({
|
||||
path: '/api/hermes/v1/runs/unknown-run/events',
|
||||
search: '',
|
||||
})
|
||||
|
||||
await proxy(ctx)
|
||||
|
||||
expect(mockUpdateUsage).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('does not call updateUsage for non-run.completed events', async () => {
|
||||
const runId = 'run-no-complete'
|
||||
setRunSession(runId, 'session-x')
|
||||
|
||||
const sseData = [
|
||||
`data: ${JSON.stringify({ event: 'run.started', run_id: runId })}\n\n`,
|
||||
`data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'Hi' })}\n\n`,
|
||||
`data: ${JSON.stringify({ event: 'run.failed', run_id: runId, error: 'timeout' })}\n\n`,
|
||||
]
|
||||
|
||||
mockFetch.mockResolvedValue({
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'text/event-stream' }),
|
||||
body: createSSEBody(sseData),
|
||||
})
|
||||
|
||||
const ctx = createMockCtx({
|
||||
path: `/api/hermes/v1/runs/${runId}/events`,
|
||||
search: '',
|
||||
})
|
||||
|
||||
await proxy(ctx)
|
||||
|
||||
expect(mockUpdateUsage).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('handles SSE with multiple events in a single chunk', async () => {
|
||||
const runId = 'run-multi'
|
||||
setRunSession(runId, 'session-multi')
|
||||
|
||||
// All events in one chunk
|
||||
const singleChunk = [
|
||||
`data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'A' })}\n\n`,
|
||||
`data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'B' })}\n\n`,
|
||||
`data: ${JSON.stringify({ event: 'run.completed', run_id: runId, usage: { input_tokens: 500, output_tokens: 100, total_tokens: 600 } })}\n\n`,
|
||||
].join('')
|
||||
|
||||
mockFetch.mockResolvedValue({
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'text/event-stream' }),
|
||||
body: createSSEBody([singleChunk]),
|
||||
})
|
||||
|
||||
const ctx = createMockCtx({
|
||||
path: `/api/hermes/v1/runs/${runId}/events`,
|
||||
search: '',
|
||||
})
|
||||
|
||||
await proxy(ctx)
|
||||
|
||||
expect(mockUpdateUsage).toHaveBeenCalledWith('session-multi', 500, 100)
|
||||
})
|
||||
|
||||
it('handles SSE split across multiple chunks', async () => {
|
||||
const runId = 'run-split'
|
||||
setRunSession(runId, 'session-split')
|
||||
|
||||
const completedJson = JSON.stringify({ event: 'run.completed', run_id: runId, usage: { input_tokens: 200, output_tokens: 50, total_tokens: 250 } })
|
||||
const sseEvent = `data: ${completedJson}\n\n`
|
||||
|
||||
// Split the event across two chunks
|
||||
const chunk1 = sseEvent.slice(0, 30)
|
||||
const chunk2 = sseEvent.slice(30)
|
||||
|
||||
mockFetch.mockResolvedValue({
|
||||
status: 200,
|
||||
headers: new Headers({ 'content-type': 'text/event-stream' }),
|
||||
body: createSSEBody([chunk1, chunk2]),
|
||||
})
|
||||
|
||||
const ctx = createMockCtx({
|
||||
path: `/api/hermes/v1/runs/${runId}/events`,
|
||||
search: '',
|
||||
})
|
||||
|
||||
await proxy(ctx)
|
||||
|
||||
expect(mockUpdateUsage).toHaveBeenCalledWith('session-split', 200, 50)
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user