feat: add token usage tracking, context display, and dynamic context length (#132)

* fix: specify TS_NODE_PROJECT for dev:server script

ts-node/register resolves tsconfig from the entry file upward,
finding the root solution-style tsconfig.json (no compilerOptions).
This causes target to default to ES3, breaking MapIterator spread
syntax (TS2802). Set TS_NODE_PROJECT env var to point to the server
tsconfig which targets ES2024.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* feat: add token usage tracking, context display, and dynamic context length

- Intercept SSE proxy to capture run.completed events and persist token
  usage (input_tokens, output_tokens) per session to SQLite/JSON store
- Display context usage bar in ChatInput showing used/total/remaining tokens
- Resolve actual context length from Hermes models_dev_cache.json based
  on the active profile's default model (fallback 200K), with 5min in-memory cache
- Move sessions-db.ts to db/hermes/ for unified database layer
- Add usage store with SQLite + JSON fallback (auto-migration via ensureTable)
- Fix proxy SSE path regex to match rewritten upstream path
- Fix route ordering: /sessions/usage before /sessions/:id to avoid 404
- Fetch per-session usage on session enter instead of batch
- Add unit tests for usage-store, db index, and proxy SSE interception

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
ekko
2026-04-22 16:14:50 +08:00
committed by GitHub
parent ce3bf5f3eb
commit 6f69c69802
26 changed files with 1203 additions and 144 deletions
@@ -1,9 +1,26 @@
import type { Context } from 'koa'
import { config } from '../../config'
import { getGatewayManagerInstance } from '../../services/gateway-bootstrap'
import { updateUsage } from '../../db/hermes/usage-store'
function getGatewayManager() { return getGatewayManagerInstance() }
// --- run_id → session_id mapping (in-memory, ephemeral) ---
const runSessionMap = new Map<string, string>()
export function setRunSession(runId: string, sessionId: string): void {
runSessionMap.set(runId, sessionId)
// Auto-cleanup after 30 minutes
setTimeout(() => runSessionMap.delete(runId), 30 * 60 * 1000)
}
function getSessionForRun(runId: string): string | undefined {
return runSessionMap.get(runId)
}
// --- Helpers ---
function isTransientGatewayError(err: any): boolean {
const msg = String(err?.message || '')
const causeCode = String(err?.cause?.code || '')
@@ -48,19 +65,7 @@ function resolveUpstream(ctx: Context): string {
return config.upstream.replace(/\/$/, '')
}
export async function proxy(ctx: Context) {
const profile = resolveProfile(ctx)
const upstream = resolveUpstream(ctx)
// Rewrite path for upstream gateway:
// /api/hermes/v1/* -> /v1/* (upstream uses /v1/ prefix)
// /api/hermes/* -> /api/* (upstream uses /api/ prefix)
const upstreamPath = ctx.path.replace(/^\/api\/hermes\/v1/, '/v1').replace(/^\/api\/hermes/, '/api')
const params = new URLSearchParams(ctx.search || '')
params.delete('token')
const search = params.toString()
const url = `${upstream}${upstreamPath}${search ? `?${search}` : ''}`
// Build headers — forward most, strip browser/web-ui specific ones
function buildProxyHeaders(ctx: Context, upstream: string): Record<string, string> {
const headers: Record<string, string> = {}
for (const [key, value] of Object.entries(ctx.headers)) {
if (value == null) continue
@@ -75,33 +80,118 @@ export async function proxy(ctx: Context) {
}
}
// Inject Hermes gateway API key from profile's .env
const mgr = getGatewayManager()
if (mgr) {
const apiKey = mgr.getApiKey(profile)
const apiKey = mgr.getApiKey(resolveProfile(ctx))
if (apiKey) {
headers['authorization'] = `Bearer ${apiKey}`
}
}
return headers
}
// --- SSE stream interception ---
const SSE_EVENTS_PATH = /^\/v1\/runs\/([^/]+)\/events$/
/**
* Parse SSE text chunks and extract run.completed events.
* Returns the run_id if a run.completed was found.
*/
function extractRunCompletedFromChunk(chunk: string): string | null {
// SSE format: each line is "data: {...}\n\n"
const lines = chunk.split('\n')
for (const line of lines) {
if (!line.startsWith('data: ')) continue
try {
const data = JSON.parse(line.slice(6))
if (data.event === 'run.completed' && data.usage && data.run_id) {
const sessionId = getSessionForRun(data.run_id)
if (sessionId) {
updateUsage(sessionId, data.usage.input_tokens, data.usage.output_tokens)
return data.run_id
}
}
} catch { /* not JSON, skip */ }
}
return null
}
/**
* Stream an SSE response while intercepting run.completed events.
*/
async function streamSSE(ctx: Context, res: Response): Promise<void> {
if (!res.body) {
ctx.res.end()
return
}
const reader = res.body.getReader()
const decoder = new TextDecoder()
let buffer = ''
try {
// Build request body from raw body
let body: string | undefined
if (ctx.req.method !== 'GET' && ctx.req.method !== 'HEAD') {
body = (ctx as any).request.rawBody as string | undefined
while (true) {
const { done, value } = await reader.read()
if (done) break
// Forward raw bytes to client immediately
ctx.res.write(value)
// Also decode for interception
buffer += decoder.decode(value, { stream: true })
// Process complete SSE lines (delimited by double newline)
let newlineIdx: number
while ((newlineIdx = buffer.indexOf('\n\n')) !== -1) {
const eventBlock = buffer.slice(0, newlineIdx)
buffer = buffer.slice(newlineIdx + 2)
extractRunCompletedFromChunk(eventBlock)
}
}
const requestInit: RequestInit = {
method: ctx.req.method,
headers,
body,
// Process remaining buffer
if (buffer.trim()) {
extractRunCompletedFromChunk(buffer)
}
} finally {
ctx.res.end()
}
}
// --- Main proxy function ---
export async function proxy(ctx: Context) {
const profile = resolveProfile(ctx)
const upstream = resolveUpstream(ctx)
const upstreamPath = ctx.path.replace(/^\/api\/hermes\/v1/, '/v1').replace(/^\/api\/hermes/, '/api')
const params = new URLSearchParams(ctx.search || '')
params.delete('token')
const search = params.toString()
const url = `${upstream}${upstreamPath}${search ? `?${search}` : ''}`
const headers = buildProxyHeaders(ctx, upstream)
try {
let body: string | undefined
if (ctx.req.method !== 'GET' && ctx.req.method !== 'HEAD') {
// @koa/bodyparser parses JSON into ctx.request.body but doesn't store rawBody
// by default. Re-serialize the parsed body to get the string form.
const parsed = (ctx as any).request.body
if (typeof parsed === 'string') {
body = parsed
} else if (parsed && typeof parsed === 'object') {
body = JSON.stringify(parsed)
}
}
const requestInit: RequestInit = { method: ctx.req.method, headers, body }
let res: Response
try {
res = await fetch(url, requestInit)
} catch (err: any) {
// Gateway may be restarting; wait briefly and retry once.
if (isTransientGatewayError(err) && await waitForGatewayReady(upstream)) {
res = await fetch(url, requestInit)
} else {
@@ -116,10 +206,37 @@ export async function proxy(ctx: Context) {
ctx.set(key, value)
}
})
ctx.status = res.status
// Stream response body
// Intercept POST /v1/runs to capture run_id → session_id mapping
if (ctx.req.method === 'POST' && /\/v1\/runs$/.test(upstreamPath) && body) {
try {
const parsed = JSON.parse(body)
if (parsed.session_id) {
const resBody = await res.text()
ctx.res.write(resBody)
ctx.res.end()
try {
const result = JSON.parse(resBody)
if (result.run_id) {
setRunSession(result.run_id, parsed.session_id)
}
} catch { /* response not JSON, ignore */ }
return
}
} catch { /* body not JSON, fall through to normal stream */ }
// No session_id in body — fall through to normal response handling below
}
// Intercept SSE streams for /v1/runs/{id}/events
const sseMatch = upstreamPath.match(SSE_EVENTS_PATH)
if (sseMatch) {
await streamSSE(ctx, res)
return
}
// Default: pipe response body directly
if (res.body) {
const reader = res.body.getReader()
const pump = async () => {
@@ -8,6 +8,9 @@ sessionRoutes.get('/api/hermes/sessions/conversations/:id/messages', ctrl.getCon
sessionRoutes.get('/api/hermes/sessions', ctrl.list)
sessionRoutes.get('/api/hermes/search/sessions', ctrl.search)
sessionRoutes.get('/api/hermes/sessions/search', ctrl.search)
sessionRoutes.get('/api/hermes/sessions/usage', ctrl.usageBatch)
sessionRoutes.get('/api/hermes/sessions/context-length', ctrl.contextLength)
sessionRoutes.get('/api/hermes/sessions/:id', ctrl.get)
sessionRoutes.get('/api/hermes/sessions/:id/usage', ctrl.usageSingle)
sessionRoutes.delete('/api/hermes/sessions/:id', ctrl.remove)
sessionRoutes.post('/api/hermes/sessions/:id/rename', ctrl.rename)