feat: add token usage tracking, context display, and dynamic context length (#132)
* fix: specify TS_NODE_PROJECT for dev:server script ts-node/register resolves tsconfig from the entry file upward, finding the root solution-style tsconfig.json (no compilerOptions). This causes target to default to ES3, breaking MapIterator spread syntax (TS2802). Set TS_NODE_PROJECT env var to point to the server tsconfig which targets ES2024. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: add token usage tracking, context display, and dynamic context length - Intercept SSE proxy to capture run.completed events and persist token usage (input_tokens, output_tokens) per session to SQLite/JSON store - Display context usage bar in ChatInput showing used/total/remaining tokens - Resolve actual context length from Hermes models_dev_cache.json based on the active profile's default model (fallback 200K), with 5min in-memory cache - Move sessions-db.ts to db/hermes/ for unified database layer - Add usage store with SQLite + JSON fallback (auto-migration via ensureTable) - Fix proxy SSE path regex to match rewritten upstream path - Fix route ordering: /sessions/usage before /sessions/:id to avoid 404 - Fetch per-session usage on session enter instead of batch - Add unit tests for usage-store, db index, and proxy SSE interception Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,9 +1,26 @@
|
||||
import type { Context } from 'koa'
|
||||
import { config } from '../../config'
|
||||
import { getGatewayManagerInstance } from '../../services/gateway-bootstrap'
|
||||
import { updateUsage } from '../../db/hermes/usage-store'
|
||||
|
||||
function getGatewayManager() { return getGatewayManagerInstance() }
|
||||
|
||||
// --- run_id → session_id mapping (in-memory, ephemeral) ---
|
||||
|
||||
const runSessionMap = new Map<string, string>()
|
||||
|
||||
export function setRunSession(runId: string, sessionId: string): void {
|
||||
runSessionMap.set(runId, sessionId)
|
||||
// Auto-cleanup after 30 minutes
|
||||
setTimeout(() => runSessionMap.delete(runId), 30 * 60 * 1000)
|
||||
}
|
||||
|
||||
function getSessionForRun(runId: string): string | undefined {
|
||||
return runSessionMap.get(runId)
|
||||
}
|
||||
|
||||
// --- Helpers ---
|
||||
|
||||
function isTransientGatewayError(err: any): boolean {
|
||||
const msg = String(err?.message || '')
|
||||
const causeCode = String(err?.cause?.code || '')
|
||||
@@ -48,19 +65,7 @@ function resolveUpstream(ctx: Context): string {
|
||||
return config.upstream.replace(/\/$/, '')
|
||||
}
|
||||
|
||||
export async function proxy(ctx: Context) {
|
||||
const profile = resolveProfile(ctx)
|
||||
const upstream = resolveUpstream(ctx)
|
||||
// Rewrite path for upstream gateway:
|
||||
// /api/hermes/v1/* -> /v1/* (upstream uses /v1/ prefix)
|
||||
// /api/hermes/* -> /api/* (upstream uses /api/ prefix)
|
||||
const upstreamPath = ctx.path.replace(/^\/api\/hermes\/v1/, '/v1').replace(/^\/api\/hermes/, '/api')
|
||||
const params = new URLSearchParams(ctx.search || '')
|
||||
params.delete('token')
|
||||
const search = params.toString()
|
||||
const url = `${upstream}${upstreamPath}${search ? `?${search}` : ''}`
|
||||
|
||||
// Build headers — forward most, strip browser/web-ui specific ones
|
||||
function buildProxyHeaders(ctx: Context, upstream: string): Record<string, string> {
|
||||
const headers: Record<string, string> = {}
|
||||
for (const [key, value] of Object.entries(ctx.headers)) {
|
||||
if (value == null) continue
|
||||
@@ -75,33 +80,118 @@ export async function proxy(ctx: Context) {
|
||||
}
|
||||
}
|
||||
|
||||
// Inject Hermes gateway API key from profile's .env
|
||||
const mgr = getGatewayManager()
|
||||
if (mgr) {
|
||||
const apiKey = mgr.getApiKey(profile)
|
||||
const apiKey = mgr.getApiKey(resolveProfile(ctx))
|
||||
if (apiKey) {
|
||||
headers['authorization'] = `Bearer ${apiKey}`
|
||||
}
|
||||
}
|
||||
|
||||
return headers
|
||||
}
|
||||
|
||||
// --- SSE stream interception ---
|
||||
|
||||
const SSE_EVENTS_PATH = /^\/v1\/runs\/([^/]+)\/events$/
|
||||
|
||||
/**
|
||||
* Parse SSE text chunks and extract run.completed events.
|
||||
* Returns the run_id if a run.completed was found.
|
||||
*/
|
||||
function extractRunCompletedFromChunk(chunk: string): string | null {
|
||||
// SSE format: each line is "data: {...}\n\n"
|
||||
const lines = chunk.split('\n')
|
||||
for (const line of lines) {
|
||||
if (!line.startsWith('data: ')) continue
|
||||
try {
|
||||
const data = JSON.parse(line.slice(6))
|
||||
if (data.event === 'run.completed' && data.usage && data.run_id) {
|
||||
const sessionId = getSessionForRun(data.run_id)
|
||||
if (sessionId) {
|
||||
updateUsage(sessionId, data.usage.input_tokens, data.usage.output_tokens)
|
||||
return data.run_id
|
||||
}
|
||||
}
|
||||
} catch { /* not JSON, skip */ }
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream an SSE response while intercepting run.completed events.
|
||||
*/
|
||||
async function streamSSE(ctx: Context, res: Response): Promise<void> {
|
||||
if (!res.body) {
|
||||
ctx.res.end()
|
||||
return
|
||||
}
|
||||
|
||||
const reader = res.body.getReader()
|
||||
const decoder = new TextDecoder()
|
||||
let buffer = ''
|
||||
|
||||
try {
|
||||
// Build request body from raw body
|
||||
let body: string | undefined
|
||||
if (ctx.req.method !== 'GET' && ctx.req.method !== 'HEAD') {
|
||||
body = (ctx as any).request.rawBody as string | undefined
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
|
||||
// Forward raw bytes to client immediately
|
||||
ctx.res.write(value)
|
||||
|
||||
// Also decode for interception
|
||||
buffer += decoder.decode(value, { stream: true })
|
||||
|
||||
// Process complete SSE lines (delimited by double newline)
|
||||
let newlineIdx: number
|
||||
while ((newlineIdx = buffer.indexOf('\n\n')) !== -1) {
|
||||
const eventBlock = buffer.slice(0, newlineIdx)
|
||||
buffer = buffer.slice(newlineIdx + 2)
|
||||
extractRunCompletedFromChunk(eventBlock)
|
||||
}
|
||||
}
|
||||
|
||||
const requestInit: RequestInit = {
|
||||
method: ctx.req.method,
|
||||
headers,
|
||||
body,
|
||||
// Process remaining buffer
|
||||
if (buffer.trim()) {
|
||||
extractRunCompletedFromChunk(buffer)
|
||||
}
|
||||
} finally {
|
||||
ctx.res.end()
|
||||
}
|
||||
}
|
||||
|
||||
// --- Main proxy function ---
|
||||
|
||||
export async function proxy(ctx: Context) {
|
||||
const profile = resolveProfile(ctx)
|
||||
const upstream = resolveUpstream(ctx)
|
||||
const upstreamPath = ctx.path.replace(/^\/api\/hermes\/v1/, '/v1').replace(/^\/api\/hermes/, '/api')
|
||||
const params = new URLSearchParams(ctx.search || '')
|
||||
params.delete('token')
|
||||
const search = params.toString()
|
||||
const url = `${upstream}${upstreamPath}${search ? `?${search}` : ''}`
|
||||
|
||||
const headers = buildProxyHeaders(ctx, upstream)
|
||||
|
||||
try {
|
||||
let body: string | undefined
|
||||
if (ctx.req.method !== 'GET' && ctx.req.method !== 'HEAD') {
|
||||
// @koa/bodyparser parses JSON into ctx.request.body but doesn't store rawBody
|
||||
// by default. Re-serialize the parsed body to get the string form.
|
||||
const parsed = (ctx as any).request.body
|
||||
if (typeof parsed === 'string') {
|
||||
body = parsed
|
||||
} else if (parsed && typeof parsed === 'object') {
|
||||
body = JSON.stringify(parsed)
|
||||
}
|
||||
}
|
||||
|
||||
const requestInit: RequestInit = { method: ctx.req.method, headers, body }
|
||||
|
||||
let res: Response
|
||||
try {
|
||||
res = await fetch(url, requestInit)
|
||||
} catch (err: any) {
|
||||
// Gateway may be restarting; wait briefly and retry once.
|
||||
if (isTransientGatewayError(err) && await waitForGatewayReady(upstream)) {
|
||||
res = await fetch(url, requestInit)
|
||||
} else {
|
||||
@@ -116,10 +206,37 @@ export async function proxy(ctx: Context) {
|
||||
ctx.set(key, value)
|
||||
}
|
||||
})
|
||||
|
||||
ctx.status = res.status
|
||||
|
||||
// Stream response body
|
||||
// Intercept POST /v1/runs to capture run_id → session_id mapping
|
||||
if (ctx.req.method === 'POST' && /\/v1\/runs$/.test(upstreamPath) && body) {
|
||||
try {
|
||||
const parsed = JSON.parse(body)
|
||||
if (parsed.session_id) {
|
||||
const resBody = await res.text()
|
||||
ctx.res.write(resBody)
|
||||
ctx.res.end()
|
||||
|
||||
try {
|
||||
const result = JSON.parse(resBody)
|
||||
if (result.run_id) {
|
||||
setRunSession(result.run_id, parsed.session_id)
|
||||
}
|
||||
} catch { /* response not JSON, ignore */ }
|
||||
return
|
||||
}
|
||||
} catch { /* body not JSON, fall through to normal stream */ }
|
||||
// No session_id in body — fall through to normal response handling below
|
||||
}
|
||||
|
||||
// Intercept SSE streams for /v1/runs/{id}/events
|
||||
const sseMatch = upstreamPath.match(SSE_EVENTS_PATH)
|
||||
if (sseMatch) {
|
||||
await streamSSE(ctx, res)
|
||||
return
|
||||
}
|
||||
|
||||
// Default: pipe response body directly
|
||||
if (res.body) {
|
||||
const reader = res.body.getReader()
|
||||
const pump = async () => {
|
||||
|
||||
@@ -8,6 +8,9 @@ sessionRoutes.get('/api/hermes/sessions/conversations/:id/messages', ctrl.getCon
|
||||
sessionRoutes.get('/api/hermes/sessions', ctrl.list)
|
||||
sessionRoutes.get('/api/hermes/search/sessions', ctrl.search)
|
||||
sessionRoutes.get('/api/hermes/sessions/search', ctrl.search)
|
||||
sessionRoutes.get('/api/hermes/sessions/usage', ctrl.usageBatch)
|
||||
sessionRoutes.get('/api/hermes/sessions/context-length', ctrl.contextLength)
|
||||
sessionRoutes.get('/api/hermes/sessions/:id', ctrl.get)
|
||||
sessionRoutes.get('/api/hermes/sessions/:id/usage', ctrl.usageSingle)
|
||||
sessionRoutes.delete('/api/hermes/sessions/:id', ctrl.remove)
|
||||
sessionRoutes.post('/api/hermes/sessions/:id/rename', ctrl.rename)
|
||||
|
||||
Reference in New Issue
Block a user