feat: add token usage tracking, context display, and dynamic context length (#132)

* fix: specify TS_NODE_PROJECT for dev:server script

ts-node/register resolves tsconfig from the entry file upward,
finding the root solution-style tsconfig.json (no compilerOptions).
This causes target to default to ES3, breaking MapIterator spread
syntax (TS2802). Set TS_NODE_PROJECT env var to point to the server
tsconfig which targets ES2024.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* feat: add token usage tracking, context display, and dynamic context length

- Intercept SSE proxy to capture run.completed events and persist token
  usage (input_tokens, output_tokens) per session to SQLite/JSON store
- Display context usage bar in ChatInput showing used/total/remaining tokens
- Resolve actual context length from Hermes models_dev_cache.json based
  on the active profile's default model (fallback 200K), with 5min in-memory cache
- Move sessions-db.ts to db/hermes/ for unified database layer
- Add usage store with SQLite + JSON fallback (auto-migration via ensureTable)
- Fix proxy SSE path regex to match rewritten upstream path
- Fix route ordering: /sessions/usage before /sessions/:id to avoid 404
- Fetch per-session usage on session enter instead of batch
- Add unit tests for usage-store, db index, and proxy SSE interception

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
ekko
2026-04-22 16:14:50 +08:00
committed by GitHub
parent ce3bf5f3eb
commit 6f69c69802
26 changed files with 1203 additions and 144 deletions
+116
View File
@@ -0,0 +1,116 @@
import { describe, it, expect, vi } from 'vitest'
// Force JSON fallback by mocking isSqliteAvailable
vi.mock('../../packages/server/src/db/index', async (importOriginal) => {
const actual = await importOriginal() as any
return {
...actual,
isSqliteAvailable: () => false,
getDb: () => null,
}
})
import {
jsonGet,
jsonSet,
jsonGetAll,
jsonDelete,
} from '../../packages/server/src/db/index'
describe('JSON fallback store', () => {
it('jsonSet and jsonGet round-trip', () => {
expect(typeof jsonSet).toBe('function')
expect(typeof jsonGet).toBe('function')
expect(typeof jsonGetAll).toBe('function')
expect(typeof jsonDelete).toBe('function')
})
})
// Test ensureTable with a real in-memory SQLite (Node 22+)
describe('SQLite ensureTable', () => {
it('creates table with correct columns and handles migration', () => {
// This test requires Node 22.5+ for node:sqlite
const nodeVersion = process.versions.node.split('.').map(Number)
const isAvailable = nodeVersion[0] > 22 || (nodeVersion[0] === 22 && nodeVersion[1] >= 5)
if (!isAvailable) {
console.log('Skipping SQLite test — Node < 22.5')
return
}
const { DatabaseSync } = require('node:sqlite')
const db = new DatabaseSync(':memory:')
// Simulate ensureTable logic
function ensureTable(tableName: string, schema: Record<string, string>): void {
const colDefs = Object.entries(schema)
.map(([col, def]) => `"${col}" ${def}`)
.join(', ')
db.exec(`CREATE TABLE IF NOT EXISTS "${tableName}" (${colDefs})`)
const rows = db.prepare(`PRAGMA table_info("${tableName}")`).all() as Array<{ name: string }>
const existingCols = new Set(rows.map(r => r.name))
const expectedCols = new Set(Object.keys(schema))
for (const col of expectedCols) {
if (!existingCols.has(col)) {
db.exec(`ALTER TABLE "${tableName}" ADD COLUMN "${col}" ${schema[col]}`)
}
}
for (const col of existingCols) {
if (!expectedCols.has(col)) {
db.exec(`ALTER TABLE "${tableName}" DROP COLUMN "${col}"`)
}
}
}
// Initial schema
const schema: Record<string, string> = {
session_id: 'TEXT PRIMARY KEY',
input_tokens: 'INTEGER NOT NULL DEFAULT 0',
output_tokens: 'INTEGER NOT NULL DEFAULT 0',
updated_at: 'INTEGER NOT NULL',
}
ensureTable('session_usage', schema)
// Verify columns
const cols = db.prepare(`PRAGMA table_info("session_usage")`).all() as Array<{ name: string }>
const colNames = cols.map(c => c.name)
expect(colNames).toContain('session_id')
expect(colNames).toContain('input_tokens')
expect(colNames).toContain('output_tokens')
expect(colNames).toContain('updated_at')
// Add a column
schema['cost_usd'] = 'REAL DEFAULT 0'
ensureTable('session_usage', schema)
const cols2 = db.prepare(`PRAGMA table_info("session_usage")`).all() as Array<{ name: string }>
const colNames2 = cols2.map(c => c.name)
expect(colNames2).toContain('cost_usd')
// Remove a column
delete schema['cost_usd']
ensureTable('session_usage', schema)
const cols3 = db.prepare(`PRAGMA table_info("session_usage")`).all() as Array<{ name: string }>
const colNames3 = cols3.map(c => c.name)
expect(colNames3).not.toContain('cost_usd')
// Verify INSERT works
db.prepare(
`INSERT INTO session_usage (session_id, input_tokens, output_tokens, updated_at)
VALUES (?, ?, ?, ?)`,
).run('test-session', 100, 50, Date.now())
const row = db.prepare('SELECT * FROM session_usage WHERE session_id = ?').get('test-session') as any
expect(row.session_id).toBe('test-session')
expect(row.input_tokens).toBe(100)
expect(row.output_tokens).toBe(50)
// Verify DELETE works
db.prepare('DELETE FROM session_usage WHERE session_id = ?').run('test-session')
const deleted = db.prepare('SELECT * FROM session_usage WHERE session_id = ?').get('test-session')
expect(deleted).toBeUndefined()
db.close()
})
})
+258 -4
View File
@@ -9,10 +9,18 @@ vi.mock('../../packages/server/src/services/gateway-bootstrap', () => ({
getGatewayManagerInstance: () => null,
}))
// Mock updateUsage so we can assert calls without real DB
const { mockUpdateUsage } = vi.hoisted(() => ({
mockUpdateUsage: vi.fn(),
}))
vi.mock('../../packages/server/src/db/hermes/usage-store', () => ({
updateUsage: mockUpdateUsage,
}))
const mockFetch = vi.fn()
vi.stubGlobal('fetch', mockFetch)
import { proxy } from '../../packages/server/src/routes/hermes/proxy-handler'
import { proxy, setRunSession } from '../../packages/server/src/routes/hermes/proxy-handler'
function createMockCtx(overrides: Record<string, any> = {}) {
const ctx: any = {
@@ -42,6 +50,25 @@ function createMockCtx(overrides: Record<string, any> = {}) {
return ctx
}
/**
* Helper: create a ReadableStream from string chunks.
* Each chunk is a Uint8Array segment delivered sequentially.
*/
function createSSEBody(events: string[]): ReadableStream<Uint8Array> {
const encoder = new TextEncoder()
let idx = 0
return new ReadableStream({
pull(controller) {
if (idx < events.length) {
controller.enqueue(encoder.encode(events[idx]))
idx++
} else {
controller.close()
}
},
})
}
describe('Proxy Handler', () => {
beforeEach(() => {
vi.clearAllMocks()
@@ -130,9 +157,6 @@ describe('Proxy Handler', () => {
})
it('returns 502 on connection failure', async () => {
// waitForGatewayReady loops calling fetch(healthUrl) until res.ok or timeout.
// Return ok:true for health checks so the loop exits immediately (gateway
// "ready"), then the retry fetch also fails with ECONNREFUSED → 502.
mockFetch.mockImplementation((url: string) => {
if (typeof url === 'string' && url.includes('/health')) {
return Promise.resolve({ ok: true })
@@ -161,3 +185,233 @@ describe('Proxy Handler', () => {
expect(ctx.status).toBe(404)
})
})
describe('POST /v1/runs — session_id capture', () => {
beforeEach(() => {
vi.clearAllMocks()
})
it('captures run_id → session_id mapping from POST /v1/runs', async () => {
const runId = 'run-abc-123'
const sessionId = 'session-xyz'
const responseBody = JSON.stringify({ run_id: runId, status: 'queued' })
mockFetch.mockResolvedValue({
status: 200,
headers: new Headers({ 'content-type': 'application/json' }),
text: () => Promise.resolve(responseBody),
body: null,
})
const ctx = createMockCtx({
path: '/api/hermes/v1/runs',
req: { method: 'POST' },
request: {
body: { session_id: sessionId, input: 'hello', model: 'gpt-4' },
},
})
await proxy(ctx)
// Verify the response was forwarded to client
expect(ctx.res.write).toHaveBeenCalledWith(responseBody)
expect(ctx.res.end).toHaveBeenCalled()
})
it('falls through to normal stream when POST body has no session_id', async () => {
const responseBody = JSON.stringify({ run_id: 'r1', status: 'queued' })
mockFetch.mockResolvedValue({
status: 200,
headers: new Headers({ 'content-type': 'application/json' }),
text: () => Promise.resolve(responseBody),
body: null,
})
const ctx = createMockCtx({
path: '/api/hermes/v1/runs',
req: { method: 'POST' },
request: { body: { input: 'hello' } }, // no session_id
})
await proxy(ctx)
// Should still forward the response
expect(ctx.res.end).toHaveBeenCalled()
})
it('serializes parsed JSON body when rawBody is not available', async () => {
const responseBody = JSON.stringify({ run_id: 'r1', status: 'queued' })
mockFetch.mockResolvedValue({
status: 200,
headers: new Headers({ 'content-type': 'application/json' }),
body: {
getReader: () => {
const encoder = new TextEncoder()
let done = false
return {
read: () => {
if (done) return Promise.resolve({ done: true, value: undefined })
done = true
return Promise.resolve({ done: false, value: encoder.encode(responseBody) })
},
}
},
},
})
const ctx = createMockCtx({
path: '/api/hermes/v1/runs',
req: { method: 'POST' },
request: { body: { session_id: 's1', input: 'test' } },
})
await proxy(ctx)
// Verify fetch was called with stringified body
const [, options] = mockFetch.mock.calls[0]
expect(typeof options.body).toBe('string')
const parsed = JSON.parse(options.body)
expect(parsed.session_id).toBe('s1')
expect(parsed.input).toBe('test')
})
})
describe('SSE stream interception — run.completed', () => {
beforeEach(() => {
vi.clearAllMocks()
})
it('intercepts run.completed and calls updateUsage', async () => {
const runId = 'run-test-1'
const sessionId = 'session-test-1'
// Pre-populate the run → session mapping
setRunSession(runId, sessionId)
const sseData = [
`data: ${JSON.stringify({ event: 'run.started', run_id: runId })}\n\n`,
`data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'Hello' })}\n\n`,
`data: ${JSON.stringify({ event: 'run.completed', run_id: runId, usage: { input_tokens: 13949, output_tokens: 45, total_tokens: 13994 } })}\n\n`,
]
mockFetch.mockResolvedValue({
status: 200,
headers: new Headers({ 'content-type': 'text/event-stream' }),
body: createSSEBody(sseData),
})
const ctx = createMockCtx({
path: `/api/hermes/v1/runs/${runId}/events`,
search: `?token=test&profile=default`,
})
await proxy(ctx)
// Verify updateUsage was called with correct values
expect(mockUpdateUsage).toHaveBeenCalledWith(sessionId, 13949, 45)
// Verify SSE data was forwarded to client
expect(ctx.res.write).toHaveBeenCalled()
expect(ctx.res.end).toHaveBeenCalled()
})
it('does not call updateUsage when no mapping exists', async () => {
const sseData = [
`data: ${JSON.stringify({ event: 'run.completed', run_id: 'unknown-run', usage: { input_tokens: 100, output_tokens: 50, total_tokens: 150 } })}\n\n`,
]
mockFetch.mockResolvedValue({
status: 200,
headers: new Headers({ 'content-type': 'text/event-stream' }),
body: createSSEBody(sseData),
})
const ctx = createMockCtx({
path: '/api/hermes/v1/runs/unknown-run/events',
search: '',
})
await proxy(ctx)
expect(mockUpdateUsage).not.toHaveBeenCalled()
})
it('does not call updateUsage for non-run.completed events', async () => {
const runId = 'run-no-complete'
setRunSession(runId, 'session-x')
const sseData = [
`data: ${JSON.stringify({ event: 'run.started', run_id: runId })}\n\n`,
`data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'Hi' })}\n\n`,
`data: ${JSON.stringify({ event: 'run.failed', run_id: runId, error: 'timeout' })}\n\n`,
]
mockFetch.mockResolvedValue({
status: 200,
headers: new Headers({ 'content-type': 'text/event-stream' }),
body: createSSEBody(sseData),
})
const ctx = createMockCtx({
path: `/api/hermes/v1/runs/${runId}/events`,
search: '',
})
await proxy(ctx)
expect(mockUpdateUsage).not.toHaveBeenCalled()
})
it('handles SSE with multiple events in a single chunk', async () => {
const runId = 'run-multi'
setRunSession(runId, 'session-multi')
// All events in one chunk
const singleChunk = [
`data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'A' })}\n\n`,
`data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'B' })}\n\n`,
`data: ${JSON.stringify({ event: 'run.completed', run_id: runId, usage: { input_tokens: 500, output_tokens: 100, total_tokens: 600 } })}\n\n`,
].join('')
mockFetch.mockResolvedValue({
status: 200,
headers: new Headers({ 'content-type': 'text/event-stream' }),
body: createSSEBody([singleChunk]),
})
const ctx = createMockCtx({
path: `/api/hermes/v1/runs/${runId}/events`,
search: '',
})
await proxy(ctx)
expect(mockUpdateUsage).toHaveBeenCalledWith('session-multi', 500, 100)
})
it('handles SSE split across multiple chunks', async () => {
const runId = 'run-split'
setRunSession(runId, 'session-split')
const completedJson = JSON.stringify({ event: 'run.completed', run_id: runId, usage: { input_tokens: 200, output_tokens: 50, total_tokens: 250 } })
const sseEvent = `data: ${completedJson}\n\n`
// Split the event across two chunks
const chunk1 = sseEvent.slice(0, 30)
const chunk2 = sseEvent.slice(30)
mockFetch.mockResolvedValue({
status: 200,
headers: new Headers({ 'content-type': 'text/event-stream' }),
body: createSSEBody([chunk1, chunk2]),
})
const ctx = createMockCtx({
path: `/api/hermes/v1/runs/${runId}/events`,
search: '',
})
await proxy(ctx)
expect(mockUpdateUsage).toHaveBeenCalledWith('session-split', 200, 50)
})
})
+4 -4
View File
@@ -63,7 +63,7 @@ describe('session DB summaries', () => {
},
])
const mod = await import('../../packages/server/src/services/hermes/sessions-db')
const mod = await import('../../packages/server/src/db/hermes/sessions-db')
const rows = await mod.listSessionSummaries(undefined, 50)
expect(databaseSyncMock).toHaveBeenCalledWith('/tmp/hermes-profile/state.db', { open: true, readOnly: true })
@@ -124,7 +124,7 @@ describe('session DB summaries', () => {
},
])
const mod = await import('../../packages/server/src/services/hermes/sessions-db')
const mod = await import('../../packages/server/src/db/hermes/sessions-db')
const rows = await mod.listSessionSummaries('telegram', 2)
expect(prepareMock).toHaveBeenCalledWith(expect.stringContaining('AND s.source = ?'))
@@ -218,7 +218,7 @@ describe('session DB summaries', () => {
},
])
const mod = await import('../../packages/server/src/services/hermes/sessions-db')
const mod = await import('../../packages/server/src/db/hermes/sessions-db')
const rows = await mod.searchSessionSummaries('docker', undefined, 10)
expect(prepareMock).toHaveBeenCalledWith(expect.stringContaining('messages_fts MATCH'))
@@ -265,7 +265,7 @@ describe('session DB summaries', () => {
},
])
const mod = await import('../../packages/server/src/services/hermes/sessions-db')
const mod = await import('../../packages/server/src/db/hermes/sessions-db')
const rows = await mod.searchSessionSummaries('记忆断裂', undefined, 10)
expect(likeAllMock).toHaveBeenCalledWith('记忆断裂', '%记忆断裂%')
+9
View File
@@ -7,6 +7,9 @@ const searchMock = vi.fn(async (ctx: any) => { ctx.body = { results: [{ id: 'sea
const getMock = vi.fn(async (ctx: any) => { ctx.body = { session: { id: ctx.params.id } } })
const removeMock = vi.fn(async (ctx: any) => { ctx.body = { ok: true } })
const renameMock = vi.fn(async (ctx: any) => { ctx.body = { ok: true } })
const usageBatchMock = vi.fn(async (ctx: any) => { ctx.body = {} })
const usageSingleMock = vi.fn(async (ctx: any) => { ctx.body = { input_tokens: 0, output_tokens: 0 } })
const contextLengthMock = vi.fn(async (ctx: any) => { ctx.body = { context_length: 200000 } })
vi.mock('../../packages/server/src/controllers/hermes/sessions', () => ({
listConversations: listConversationsMock,
@@ -16,6 +19,9 @@ vi.mock('../../packages/server/src/controllers/hermes/sessions', () => ({
get: getMock,
remove: removeMock,
rename: renameMock,
usageBatch: usageBatchMock,
usageSingle: usageSingleMock,
contextLength: contextLengthMock,
}))
describe('session routes', () => {
@@ -40,7 +46,10 @@ describe('session routes', () => {
'/api/hermes/sessions',
'/api/hermes/search/sessions',
'/api/hermes/sessions/search',
'/api/hermes/sessions/usage',
'/api/hermes/sessions/context-length',
'/api/hermes/sessions/:id',
'/api/hermes/sessions/:id/usage',
'/api/hermes/sessions/:id/rename',
]))
})
+159
View File
@@ -0,0 +1,159 @@
import { describe, it, expect, vi, beforeEach } from 'vitest'
// Mock the db index module so we can test usage-store in isolation
const { mockEnsureTable, mockJsonSet, mockJsonGet, mockJsonGetAll, mockJsonDelete } = vi.hoisted(() => ({
mockEnsureTable: vi.fn(),
mockJsonSet: vi.fn(),
mockJsonGet: vi.fn(),
mockJsonGetAll: vi.fn(),
mockJsonDelete: vi.fn(),
}))
vi.mock('../../packages/server/src/db/index', () => ({
isSqliteAvailable: () => false, // Force JSON fallback path
ensureTable: mockEnsureTable,
getDb: () => null,
jsonSet: mockJsonSet,
jsonGet: mockJsonGet,
jsonGetAll: mockJsonGetAll,
jsonDelete: mockJsonDelete,
}))
import {
initUsageStore,
updateUsage,
getUsage,
getUsageBatch,
deleteUsage,
} from '../../packages/server/src/db/hermes/usage-store'
describe('Usage Store (JSON fallback)', () => {
beforeEach(() => {
vi.clearAllMocks()
})
it('initUsageStore calls ensureTable when SQLite is available', () => {
// In our mock, isSqliteAvailable returns false, so ensureTable should NOT be called
initUsageStore()
expect(mockEnsureTable).not.toHaveBeenCalled()
})
it('updateUsage writes via jsonSet', () => {
updateUsage('session-1', 100, 50)
expect(mockJsonSet).toHaveBeenCalledWith(
'session_usage',
'session-1',
expect.objectContaining({
input_tokens: 100,
output_tokens: 50,
updated_at: expect.any(Number),
}),
)
})
it('getUsage reads via jsonGet', () => {
mockJsonGet.mockReturnValue({ input_tokens: 200, output_tokens: 80 })
const result = getUsage('session-1')
expect(result).toEqual({ input_tokens: 200, output_tokens: 80 })
expect(mockJsonGet).toHaveBeenCalledWith('session_usage', 'session-1')
})
it('getUsage returns undefined when jsonGet returns nothing', () => {
mockJsonGet.mockReturnValue(undefined)
const result = getUsage('nonexistent')
expect(result).toBeUndefined()
})
it('getUsageBatch returns empty map for empty input', () => {
const result = getUsageBatch([])
expect(result).toEqual({})
expect(mockJsonGetAll).not.toHaveBeenCalled()
})
it('getUsageBatch returns matching records', () => {
mockJsonGetAll.mockReturnValue({
'session-1': { input_tokens: 100, output_tokens: 50 },
'session-2': { input_tokens: 200, output_tokens: 80 },
'session-3': { input_tokens: 300, output_tokens: 120 },
})
const result = getUsageBatch(['session-1', 'session-3', 'session-missing'])
expect(result).toEqual({
'session-1': { input_tokens: 100, output_tokens: 50 },
'session-3': { input_tokens: 300, output_tokens: 120 },
})
})
it('deleteUsage calls jsonDelete', () => {
deleteUsage('session-1')
expect(mockJsonDelete).toHaveBeenCalledWith('session_usage', 'session-1')
})
})
// Test with SQLite available (mocked)
describe('Usage Store (SQLite path)', () => {
let runMock: ReturnType<typeof vi.fn>
let getMock: ReturnType<typeof vi.fn>
let allMock: ReturnType<typeof vi.fn>
let deleteMock: ReturnType<typeof vi.fn>
beforeEach(() => {
vi.resetModules()
runMock = vi.fn()
getMock = vi.fn()
allMock = vi.fn()
deleteMock = vi.fn()
vi.doMock('../../packages/server/src/db/index', () => ({
isSqliteAvailable: () => true,
ensureTable: vi.fn(),
getDb: () => ({
prepare: vi.fn((sql: string) => {
if (sql.includes('INSERT') || sql.includes('UPDATE')) return { run: runMock }
if (sql.includes('SELECT') && sql.includes('WHERE session_id = ?')) return { get: getMock }
if (sql.includes('SELECT') && sql.includes('IN')) return { all: allMock }
if (sql.includes('DELETE')) return { run: deleteMock }
return { run: runMock, get: getMock, all: allMock }
}),
}),
jsonSet: vi.fn(),
jsonGet: vi.fn(),
jsonGetAll: vi.fn(),
jsonDelete: vi.fn(),
}))
})
it('updateUsage runs INSERT ... ON CONFLICT query', async () => {
const { updateUsage } = await import('../../packages/server/src/db/hermes/usage-store')
updateUsage('s1', 500, 200)
expect(runMock).toHaveBeenCalledWith('s1', 500, 200, expect.any(Number))
})
it('getUsage queries by session_id', async () => {
getMock.mockReturnValue({ input_tokens: 999, output_tokens: 111 })
const { getUsage } = await import('../../packages/server/src/db/hermes/usage-store')
const result = getUsage('s1')
expect(getMock).toHaveBeenCalledWith('s1')
expect(result).toEqual({ input_tokens: 999, output_tokens: 111 })
})
it('getUsageBatch queries with IN clause', async () => {
allMock.mockReturnValue([
{ session_id: 'a', input_tokens: 1, output_tokens: 2 },
{ session_id: 'b', input_tokens: 3, output_tokens: 4 },
])
const { getUsageBatch } = await import('../../packages/server/src/db/hermes/usage-store')
const result = getUsageBatch(['a', 'b', 'c'])
expect(allMock).toHaveBeenCalledWith('a', 'b', 'c')
expect(result).toEqual({
a: { input_tokens: 1, output_tokens: 2 },
b: { input_tokens: 3, output_tokens: 4 },
})
})
it('deleteUsage runs DELETE query', async () => {
const { deleteUsage } = await import('../../packages/server/src/db/hermes/usage-store')
deleteUsage('s1')
expect(deleteMock).toHaveBeenCalledWith('s1')
})
})