feat: add token usage tracking, context display, and dynamic context length (#132)
* fix: specify TS_NODE_PROJECT for dev:server script ts-node/register resolves tsconfig from the entry file upward, finding the root solution-style tsconfig.json (no compilerOptions). This causes target to default to ES3, breaking MapIterator spread syntax (TS2802). Set TS_NODE_PROJECT env var to point to the server tsconfig which targets ES2024. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: add token usage tracking, context display, and dynamic context length - Intercept SSE proxy to capture run.completed events and persist token usage (input_tokens, output_tokens) per session to SQLite/JSON store - Display context usage bar in ChatInput showing used/total/remaining tokens - Resolve actual context length from Hermes models_dev_cache.json based on the active profile's default model (fallback 200K), with 5min in-memory cache - Move sessions-db.ts to db/hermes/ for unified database layer - Add usage store with SQLite + JSON fallback (auto-migration via ensureTable) - Fix proxy SSE path regex to match rewritten upstream path - Fix route ordering: /sessions/usage before /sessions/:id to avoid 404 - Fetch per-session usage on session enter instead of batch - Add unit tests for usage-store, db index, and proxy SSE interception Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,106 @@
|
||||
import { resolve, join } from 'path'
|
||||
import { homedir } from 'os'
|
||||
import { readFileSync, existsSync, statSync } from 'fs'
|
||||
|
||||
const HERMES_BASE = resolve(homedir(), '.hermes')
|
||||
const MODELS_DEV_CACHE = resolve(HERMES_BASE, 'models_dev_cache.json')
|
||||
const DEFAULT_CONTEXT_LENGTH = 200_000
|
||||
|
||||
interface ModelLimit {
|
||||
context?: number
|
||||
output?: number
|
||||
input?: number
|
||||
}
|
||||
|
||||
interface ModelEntry {
|
||||
id?: string
|
||||
limit?: ModelLimit
|
||||
}
|
||||
|
||||
interface ProviderEntry {
|
||||
models?: Record<string, ModelEntry>
|
||||
}
|
||||
|
||||
// --- In-memory cache: parsed models_dev_cache (1.7MB), invalidated by mtime ---
|
||||
|
||||
let _cache: Record<string, ProviderEntry> | null = null
|
||||
let _cacheMtime = 0
|
||||
const CACHE_TTL_MS = 5 * 60 * 1000 // 5 minutes
|
||||
let _cacheLoadedAt = 0
|
||||
|
||||
function loadModelsDevCache(): Record<string, ProviderEntry> | null {
|
||||
if (!existsSync(MODELS_DEV_CACHE)) return null
|
||||
try {
|
||||
const stat = statSync(MODELS_DEV_CACHE)
|
||||
const now = Date.now()
|
||||
// Return cached if file hasn't changed and within TTL
|
||||
if (_cache && stat.mtimeMs === _cacheMtime && now - _cacheLoadedAt < CACHE_TTL_MS) {
|
||||
return _cache
|
||||
}
|
||||
const raw = readFileSync(MODELS_DEV_CACHE, 'utf-8')
|
||||
_cache = JSON.parse(raw) as Record<string, ProviderEntry>
|
||||
_cacheMtime = stat.mtimeMs
|
||||
_cacheLoadedAt = now
|
||||
return _cache
|
||||
} catch {
|
||||
return _cache // return stale cache on error
|
||||
}
|
||||
}
|
||||
|
||||
// --- Profile helpers ---
|
||||
|
||||
function getProfileDir(profile?: string): string {
|
||||
if (!profile || profile === 'default') return HERMES_BASE
|
||||
const dir = join(HERMES_BASE, 'profiles', profile)
|
||||
return existsSync(dir) ? dir : HERMES_BASE
|
||||
}
|
||||
|
||||
function getDefaultModel(profileDir: string): string | null {
|
||||
const configPath = join(profileDir, 'config.yaml')
|
||||
if (!existsSync(configPath)) return null
|
||||
try {
|
||||
const content = readFileSync(configPath, 'utf-8')
|
||||
const match = content.match(/^model:\s*\n\s+default:\s*(.+)$/m)
|
||||
return match ? match[1].trim() : null
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
// --- Context lookup ---
|
||||
|
||||
function lookupContextFromCache(modelName: string): number | null {
|
||||
const data = loadModelsDevCache()
|
||||
if (!data) return null
|
||||
|
||||
// Exact match first
|
||||
for (const prov of Object.values(data)) {
|
||||
const models = prov.models || {}
|
||||
const entry = models[modelName]
|
||||
if (entry?.limit?.context) return entry.limit.context
|
||||
}
|
||||
|
||||
// Case-insensitive fallback
|
||||
const lower = modelName.toLowerCase()
|
||||
for (const prov of Object.values(data)) {
|
||||
const models = prov.models || {}
|
||||
for (const [name, entry] of Object.entries(models)) {
|
||||
if (name.toLowerCase() === lower && entry?.limit?.context) {
|
||||
return entry.limit.context
|
||||
}
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the context length for the current profile's default model.
|
||||
* Results are cached in memory (5min TTL) and invalidated by file mtime.
|
||||
*/
|
||||
export function getModelContextLength(profile?: string): number {
|
||||
const profileDir = getProfileDir(profile)
|
||||
const model = getDefaultModel(profileDir)
|
||||
if (!model) return DEFAULT_CONTEXT_LENGTH
|
||||
|
||||
return lookupContextFromCache(model) || DEFAULT_CONTEXT_LENGTH
|
||||
}
|
||||
Reference in New Issue
Block a user