feat: add token usage tracking, context display, and dynamic context length (#132)

* fix: specify TS_NODE_PROJECT for dev:server script ts-node/register resolves tsconfig from the entry file upward, finding the root solution-style tsconfig.json (no compilerOptions). This causes target to default to ES3, breaking MapIterator spread syntax (TS2802). Set TS_NODE_PROJECT env var to point to the server tsconfig which targets ES2024. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: add token usage tracking, context display, and dynamic context length - Intercept SSE proxy to capture run.completed events and persist token usage (input_tokens, output_tokens) per session to SQLite/JSON store - Display context usage bar in ChatInput showing used/total/remaining tokens - Resolve actual context length from Hermes models_dev_cache.json based on the active profile's default model (fallback 200K), with 5min in-memory cache - Move sessions-db.ts to db/hermes/ for unified database layer - Add usage store with SQLite + JSON fallback (auto-migration via ensureTable) - Fix proxy SSE path regex to match rewritten upstream path - Fix route ordering: /sessions/usage before /sessions/:id to avoid 404 - Fetch per-session usage on session enter instead of batch - Add unit tests for usage-store, db index, and proxy SSE interception Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-22 16:14:50 +08:00
parent ce3bf5f3eb
commit 6f69c69802
26 changed files with 1203 additions and 144 deletions
@@ -28,6 +28,11 @@ export interface RunEvent {
  preview?: string
  timestamp?: number
  error?: string
+  usage?: {
+    input_tokens: number
+    output_tokens: number
+    total_tokens: number
+  }
 }

 export async function startRun(body: StartRunRequest): Promise<StartRunResponse> {
@@ -94,3 +94,26 @@ export async function renameSession(id: string, title: string): Promise<boolean>
    return false
  }
 }
+
+export async function fetchSessionUsage(ids: string[]): Promise<Record<string, { input_tokens: number; output_tokens: number }>> {
+  if (ids.length === 0) return {}
+  const params = new URLSearchParams()
+  params.set('ids', ids.join(','))
+  return request(`/api/hermes/sessions/usage?${params}`)
+}
+
+export async function fetchSessionUsageSingle(id: string): Promise<{ input_tokens: number; output_tokens: number } | null> {
+  try {
+    return await request<{ input_tokens: number; output_tokens: number }>(`/api/hermes/sessions/${id}/usage`)
+  } catch {
+    return null
+  }
+}
+
+export async function fetchContextLength(profile?: string): Promise<number> {
+  const params = new URLSearchParams()
+  if (profile) params.set('profile', profile)
+  const query = params.toString()
+  const res = await request<{ context_length: number }>(`/api/hermes/sessions/context-length${query ? `?${query}` : ''}`)
+  return res.context_length
+}
@@ -1,8 +1,11 @@
 <script setup lang="ts">
 import type { Attachment } from '@/stores/hermes/chat'
 import { useChatStore } from '@/stores/hermes/chat'
+import { useAppStore } from '@/stores/hermes/app'
+import { useProfilesStore } from '@/stores/hermes/profiles'
+import { fetchContextLength } from '@/api/hermes/sessions'
 import { NButton, NTooltip } from 'naive-ui'
-import { computed, ref } from 'vue'
+import { computed, ref, onMounted, watch } from 'vue'
 import { useI18n } from 'vue-i18n'

 const chatStore = useChatStore()
@@ -17,25 +20,41 @@ const isComposing = ref(false)

 const canSend = computed(() => inputText.value.trim() || attachments.value.length > 0)

-// --- Voice input (Web Speech API) ---
-// TODO: re-enable when needed — browser-native speech-to-text
-// const hasSpeechRecognition = ref(false)
-// let recognition: SpeechRecognition | null = null
-// let finalTranscript = ''
-// let prefixText = ''
-// onMounted(() => {
-//   const SR = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition
-//   if (!SR) return
-//   recognition = new SR()
-//   recognition.continuous = false
-//   recognition.interimResults = true
-//   recognition.lang = 'en-US'
-//   hasSpeechRecognition.value = true
-//   recognition.onresult = (event: SpeechRecognitionEvent) => { ... }
-//   recognition.onend = () => { ... }
-//   recognition.onerror = (event: SpeechRecognitionErrorEvent) => { ... }
-// })
-// onUnmounted(() => { if (recognition && isRecording.value) recognition.stop() })
+// --- Context info ---
+
+const contextLength = ref(200000)
+const FALLBACK_CONTEXT = 200000
+
+async function loadContextLength() {
+  try {
+    const profile = useProfilesStore().activeProfileName || undefined
+    contextLength.value = await fetchContextLength(profile)
+  } catch {
+    contextLength.value = FALLBACK_CONTEXT
+  }
+}
+
+onMounted(loadContextLength)
+watch(() => useProfilesStore().activeProfileName, loadContextLength)
+watch(() => useAppStore().selectedModel, loadContextLength)
+
+const totalTokens = computed(() => {
+  const input = chatStore.activeSession?.inputTokens ?? 0
+  const output = chatStore.activeSession?.outputTokens ?? 0
+  return input + output
+})
+
+const remainingTokens = computed(() => contextLength.value - totalTokens.value)
+
+const usagePercent = computed(() =>
+  Math.min((totalTokens.value / contextLength.value) * 100, 100),
+)
+
+function formatTokens(n: number): string {
+  if (n >= 1000000) return (n / 1000000).toFixed(1) + 'M'
+  if (n >= 1000) return (n / 1000).toFixed(1) + 'k'
+  return String(n)
+}

 // --- File attachment helpers ---

@@ -176,6 +195,33 @@ function isImage(type: string): boolean {

 <template>
  <div class="chat-input-area">
+    <!-- Top bar: attach + context info -->
+    <div class="input-top-bar">
+      <NTooltip trigger="hover">
+        <template #trigger>
+          <NButton quaternary size="tiny" @click="handleAttachClick" circle>
+            <template #icon>
+              <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><path d="M21.44 11.05l-9.19 9.19a6 6 0 0 1-8.49-8.49l9.19-9.19a4 4 0 0 1 5.66 5.66l-9.2 9.19a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg>
+            </template>
+          </NButton>
+        </template>
+        {{ t('chat.attachFiles') }}
+      </NTooltip>
+      <span v-if="totalTokens > 0" class="context-info" :class="{ 'context-warning': usagePercent > 80 }">
+        {{ formatTokens(totalTokens) }} / {{ formatTokens(contextLength) }} · {{ t('chat.contextRemaining') }} {{ formatTokens(remainingTokens) }}
+      </span>
+      <div v-if="totalTokens > 0" class="context-bar">
+        <div
+          class="context-bar-fill"
+          :class="{
+            'context-bar-warn': usagePercent > 60 && usagePercent <= 80,
+            'context-bar-danger': usagePercent > 80,
+          }"
+          :style="{ width: `${usagePercent}%` }"
+        />
+      </div>
+    </div>
+
    <!-- Attachment previews -->
    <div v-if="attachments.length > 0" class="attachment-previews">
      <div
@@ -228,16 +274,6 @@ function isImage(type: string): boolean {
        @paste="handlePaste"
      ></textarea>
      <div class="input-actions">
-        <NTooltip trigger="hover">
-          <template #trigger>
-            <NButton quaternary size="small" @click="handleAttachClick" circle>
-              <template #icon>
-                <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><path d="M21.44 11.05l-9.19 9.19a6 6 0 0 1-8.49-8.49l9.19-9.19a4 4 0 0 1 5.66 5.66l-9.2 9.19a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg>
-              </template>
-            </NButton>
-          </template>
-          {{ t('chat.attachFiles') }}
-        </NTooltip>
        <NButton
          v-if="chatStore.isStreaming"
          size="small"
@@ -271,6 +307,45 @@ function isImage(type: string): boolean {
  flex-shrink: 0;
 }

+.input-top-bar {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  padding: 0 0 6px;
+}
+
+.context-info {
+  font-size: 11px;
+  color: $text-muted;
+
+  &.context-warning {
+    color: #e8a735;
+  }
+}
+
+.context-bar {
+  width: 60px;
+  height: 4px;
+  background: rgba(128, 128, 128, 0.2);
+  border-radius: 2px;
+  overflow: hidden;
+}
+
+.context-bar-fill {
+  height: 100%;
+  background: linear-gradient(90deg, rgba(128, 128, 128, 0.3), rgba(128, 128, 128, 0.6));
+  border-radius: 2px;
+  transition: width 0.3s ease;
+
+  &.context-bar-warn {
+    background: linear-gradient(90deg, #c98a1a, #e8a735);
+  }
+
+  &.context-bar-danger {
+    background: linear-gradient(90deg, #c43a2a, #e85d4a);
+  }
+}
+
 .attachment-previews {
  display: flex;
  flex-wrap: wrap;
@@ -169,57 +169,6 @@ const headerTitle = computed(() =>
  currentMode.value === 'live' ? t('chat.liveSessions') : activeSessionTitle.value,
 )

-const totalTokens = computed(() => {
-  const input = chatStore.activeSession?.inputTokens ?? 0
-  const output = chatStore.activeSession?.outputTokens ?? 0
-  return input + output
-})
-
-const MODEL_CONTEXT: Record<string, number> = {
-  'claude-opus-4': 200000,
-  'claude-sonnet-4': 200000,
-  'claude-haiku-4': 200000,
-  'claude-3.5-sonnet': 200000,
-  'claude-3.5-haiku': 200000,
-  'claude-3-opus': 200000,
-  'claude-3-sonnet': 200000,
-  'claude-3-haiku': 200000,
-  'gpt-4o': 128000,
-  'gpt-4o-mini': 128000,
-  'gpt-4-turbo': 128000,
-  'gpt-4': 8192,
-  'gpt-3.5-turbo': 16385,
-  'o1': 200000,
-  'o1-mini': 128000,
-  'o3': 200000,
-  'o3-mini': 200000,
-  'o4-mini': 200000,
-  'deepseek-chat': 65536,
-  'deepseek-reasoner': 65536,
-  'gemini-2.5-pro': 1000000,
-  'gemini-2.5-flash': 1000000,
-  'gemini-2.0-flash': 1000000,
-  'glm-4-plus': 128000,
-  'glm-4': 128000,
-  'qwen-max': 128000,
-  'qwen-plus': 128000,
-  'qwen-turbo': 128000,
-}
-
-const contextWindow = computed(() => {
-  const model = chatStore.activeSession?.model || ''
-  for (const [key, val] of Object.entries(MODEL_CONTEXT)) {
-    if (model.includes(key)) return val
-  }
-  return null
-})
-
-function formatTokens(n: number): string {
-  if (n >= 1000000) return (n / 1000000).toFixed(1) + 'M'
-  if (n >= 1000) return (n / 1000).toFixed(1) + 'k'
-  return String(n)
-}
-
 const activeSessionSource = computed(() =>
  currentMode.value === 'chat' ? (chatStore.activeSession?.source || '') : '',
 )
@@ -446,9 +395,6 @@ async function handleRenameConfirm() {

      <template v-if="currentMode === 'chat'">
        <MessageList />
-        <div v-if="contextWindow !== null" class="context-info">
-          <span>{{ formatTokens(totalTokens) }} / {{ formatTokens(contextWindow) }}</span>
-        </div>
        <ChatInput />
      </template>
      <ConversationMonitorPane v-else :human-only="sessionBrowserPrefsStore.humanOnly" />
@@ -799,20 +745,9 @@ async function handleRenameConfirm() {
  margin-right: 4px;
 }

-.context-info {
-  padding: 0 20px 4px;
-  font-size: 11px;
-  color: $text-muted;
-  flex-shrink: 0;
-}
-
@media (max-width: $breakpoint-mobile) {
  .chat-header {
    padding: 16px 12px 16px 52px;
  }
-
-  .context-info {
-    padding: 0 12px 4px;
-  }
 }
 </style>
@@ -61,6 +61,7 @@ export default {

  // Chat
  chat: {
+    contextRemaining: 'übrig',
    emptyState: 'Starten Sie eine Konversation mit Hermes Agent',
    inputPlaceholder: 'Nachricht eingeben... (Enter zum Senden, Shift+Enter fur neue Zeile)',
    attachFiles: 'Dateien anhangen',
@@ -73,6 +73,7 @@ export default {

  // Chat
  chat: {
+    contextRemaining: 'remaining',
    emptyState: 'Start a conversation with Hermes Agent',
    inputPlaceholder: 'Type a message... (Enter to send, Shift+Enter for new line)',
    attachFiles: 'Attach files',
@@ -61,6 +61,7 @@ export default {

  // Chat
  chat: {
+    contextRemaining: 'restante',
    emptyState: 'Inicia una conversacion con Hermes Agent',
    inputPlaceholder: 'Escribe un mensaje... (Enter para enviar, Shift+Enter para nueva linea)',
    attachFiles: 'Adjuntar archivos',
@@ -61,6 +61,7 @@ export default {

  // Chat
  chat: {
+    contextRemaining: 'restant',
    emptyState: 'Demarrer une conversation avec Hermes Agent',
    inputPlaceholder: 'Tapez un message... (Entree pour envoyer, Shift+Entree pour un saut de ligne)',
    attachFiles: 'Joindre des fichiers',
@@ -61,6 +61,7 @@ export default {

  // チャット
  chat: {
+    contextRemaining: '残り',
    emptyState: 'Hermes Agent と会話を開始しましょう',
    inputPlaceholder: 'メッセージを入力... (Enter で送信、Shift+Enter で改行)',
    attachFiles: 'ファイルを添付',
@@ -61,6 +61,7 @@ export default {

  // 채팅
  chat: {
+    contextRemaining: '남음',
    emptyState: 'Hermes Agent와 대화를 시작하세요',
    inputPlaceholder: '메시지를 입력하세요... (Enter로 전송, Shift+Enter로 줄바꿈)',
    attachFiles: '파일 첨부',
@@ -61,6 +61,7 @@ export default {

  // Chat
  chat: {
+    contextRemaining: 'restante',
    emptyState: 'Inicie uma conversa com o Hermes Agent',
    inputPlaceholder: 'Digite uma mensagem... (Enter para enviar, Shift+Enter para nova linha)',
    attachFiles: 'Anexar arquivos',
@@ -73,6 +73,7 @@ export default {

  // 对话
  chat: {
+    contextRemaining: '剩余',
    emptyState: '开始与 Hermes Agent 对话',
    inputPlaceholder: '输入消息... (Enter 发送，Shift+Enter 换行)',
    attachFiles: '添加附件',
@@ -1,5 +1,5 @@
 import { startRun, streamRunEvents, type ChatMessage, type RunEvent } from '@/api/hermes/chat'
-import { deleteSession as deleteSessionApi, fetchSession, fetchSessions, type HermesMessage, type SessionSummary } from '@/api/hermes/sessions'
+import { deleteSession as deleteSessionApi, fetchSession, fetchSessions, fetchSessionUsageSingle, type HermesMessage, type SessionSummary } from '@/api/hermes/sessions'
 import { defineStore } from 'pinia'
 import { ref, computed } from 'vue'
 import { useAppStore } from './app'
@@ -155,8 +155,6 @@ function mapHermesSession(s: SessionSummary): Session {
    model: s.model,
    provider: (s as any).billing_provider || '',
    messageCount: s.message_count,
-    inputTokens: s.input_tokens,
-    outputTokens: s.output_tokens,
  }
 }

@@ -340,8 +338,6 @@ export const useChatStore = defineStore('chat', () => {
          || (serverUsers === localUsers && serverAssistantLen >= localAssistantLen)
        if (serverIsAhead) {
          target.messages = mapped
-          target.inputTokens = detail.input_tokens
-          target.outputTokens = detail.output_tokens
          if (detail.title && !target.title) target.title = detail.title
          if (sid === activeSessionId.value) persistActiveMessages()
        }
@@ -363,8 +359,6 @@ export const useChatStore = defineStore('chat', () => {
              // our "don't retreat" guard above skipped it — the server is
              // now the authoritative source of truth.
              target.messages = mapped
-              target.inputTokens = detail.input_tokens
-              target.outputTokens = detail.output_tokens
              if (detail.title) target.title = detail.title
              if (sid === activeSessionId.value) persistActiveMessages()
              clearInFlight(sid)
@@ -447,8 +441,6 @@ export const useChatStore = defineStore('chat', () => {
      if (!target) return false
      const mapped = mapHermesMessages(detail.messages || [])
      target.messages = mapped
-      target.inputTokens = detail.input_tokens
-      target.outputTokens = detail.output_tokens
      if (detail.title) target.title = detail.title
      persistActiveMessages()
      return true
@@ -531,8 +523,6 @@ export const useChatStore = defineStore('chat', () => {
        if (serverIsAhead) {
          activeSession.value.messages = mapped
        }
-        activeSession.value.inputTokens = detail.input_tokens
-        activeSession.value.outputTokens = detail.output_tokens
        // Update title: use Hermes title, or fallback to first user message
        if (detail.title) {
          activeSession.value.title = detail.title
@@ -557,6 +547,15 @@ export const useChatStore = defineStore('chat', () => {
    if (readInFlight(sessionId) && !streamStates.value.has(sessionId)) {
      startPolling(sessionId)
    }
+
+    // Fetch token usage for this session from web-ui DB
+    try {
+      const usage = await fetchSessionUsageSingle(sessionId)
+      if (usage) {
+        activeSession.value.inputTokens = usage.input_tokens
+        activeSession.value.outputTokens = usage.output_tokens
+      }
+    } catch { /* non-critical */ }
  }

  function newChat() {
@@ -785,9 +784,15 @@ export const useChatStore = defineStore('chat', () => {
              if (lastMsg?.isStreaming) {
                updateMessage(sid, lastMsg.id, { isStreaming: false })
              }
+              if (evt.usage) {
+                const target = sessions.value.find(s => s.id === sid)
+                if (target) {
+                  target.inputTokens = evt.usage.input_tokens
+                  target.outputTokens = evt.usage.output_tokens
+                }
+              }
              cleanup()
              updateSessionTitle(sid)
-              // IMPORTANT ordering: persist the final cache BEFORE clearing
              // the in-flight marker. If the browser is reloading right now
              // and kills us between the two localStorage writes, we want
              // the next page load to still see in-flight === true (so
@@ -1,6 +1,8 @@
 import * as hermesCli from '../../services/hermes/hermes-cli'
 import { getConversationDetail, listConversationSummaries } from '../../services/hermes/conversations'
-import { listSessionSummaries, searchSessionSummaries } from '../../services/hermes/sessions-db'
+import { listSessionSummaries, searchSessionSummaries } from '../../db/hermes/sessions-db'
+import { deleteUsage, getUsage, getUsageBatch } from '../../db/hermes/usage-store'
+import { getModelContextLength } from '../../services/hermes/model-context'
 import { logger } from '../../services/logger'

 function parseHumanOnly(value: unknown): boolean {
@@ -84,9 +86,29 @@ export async function remove(ctx: any) {
    ctx.body = { error: 'Failed to delete session' }
    return
  }
+  deleteUsage(ctx.params.id)
  ctx.body = { ok: true }
 }

+export async function usageBatch(ctx: any) {
+  const ids = (ctx.query.ids as string)
+  if (!ids) {
+    ctx.body = {}
+    return
+  }
+  const idList = ids.split(',').filter(Boolean)
+  ctx.body = getUsageBatch(idList)
+}
+
+export async function usageSingle(ctx: any) {
+  const result = getUsage(ctx.params.id)
+  if (!result) {
+    ctx.body = { input_tokens: 0, output_tokens: 0 }
+    return
+  }
+  ctx.body = result
+}
+
 export async function rename(ctx: any) {
  const { title } = ctx.request.body as { title?: string }
  if (!title || typeof title !== 'string') {
@@ -102,3 +124,8 @@ export async function rename(ctx: any) {
  }
  ctx.body = { ok: true }
 }
+
+export async function contextLength(ctx: any) {
+  const profile = (ctx.query.profile as string) || undefined
+  ctx.body = { context_length: getModelContextLength(profile) }
+}
@@ -1,4 +1,4 @@
-import { getActiveProfileDir } from './hermes-profile'
+import { getActiveProfileDir } from '../../services/hermes/hermes-profile'

 const SQLITE_AVAILABLE = (() => {
  const [major, minor] = process.versions.node.split('.').map(Number)
@@ -0,0 +1,75 @@
+import { isSqliteAvailable, ensureTable, getDb, jsonSet, jsonGet, jsonGetAll, jsonDelete } from '../index'
+
+const TABLE = 'session_usage'
+
+const SCHEMA = {
+  session_id: 'TEXT PRIMARY KEY',
+  input_tokens: 'INTEGER NOT NULL DEFAULT 0',
+  output_tokens: 'INTEGER NOT NULL DEFAULT 0',
+  updated_at: 'INTEGER NOT NULL',
+}
+
+export function initUsageStore(): void {
+  if (isSqliteAvailable()) {
+    ensureTable(TABLE, SCHEMA)
+  }
+}
+
+export function updateUsage(sessionId: string, inputTokens: number, outputTokens: number): void {
+  const record = { input_tokens: inputTokens, output_tokens: outputTokens, updated_at: Date.now() }
+  if (isSqliteAvailable()) {
+    const db = getDb()!
+    db.prepare(
+      `INSERT INTO ${TABLE} (session_id, input_tokens, output_tokens, updated_at)
+       VALUES (?, ?, ?, ?)
+       ON CONFLICT(session_id) DO UPDATE SET
+         input_tokens = excluded.input_tokens,
+         output_tokens = excluded.output_tokens,
+         updated_at = excluded.updated_at`,
+    ).run(sessionId, inputTokens, outputTokens, record.updated_at)
+  } else {
+    jsonSet(TABLE, sessionId, record)
+  }
+}
+
+export function getUsage(sessionId: string): { input_tokens: number; output_tokens: number } | undefined {
+  if (isSqliteAvailable()) {
+    return getDb()!.prepare(
+      `SELECT input_tokens, output_tokens FROM ${TABLE} WHERE session_id = ?`,
+    ).get(sessionId) as { input_tokens: number; output_tokens: number } | undefined
+  }
+  const row = jsonGet(TABLE, sessionId)
+  if (!row) return undefined
+  return { input_tokens: row.input_tokens ?? 0, output_tokens: row.output_tokens ?? 0 }
+}
+
+export function getUsageBatch(
+  sessionIds: string[],
+): Record<string, { input_tokens: number; output_tokens: number }> {
+  if (sessionIds.length === 0) return {}
+  if (isSqliteAvailable()) {
+    const db = getDb()!
+    const placeholders = sessionIds.map(() => '?').join(',')
+    const rows = db.prepare(
+      `SELECT session_id, input_tokens, output_tokens FROM ${TABLE} WHERE session_id IN (${placeholders})`,
+    ).all(...sessionIds) as Array<{ session_id: string; input_tokens: number; output_tokens: number }>
+    const map: Record<string, { input_tokens: number; output_tokens: number }> = {}
+    for (const r of rows) map[r.session_id] = { input_tokens: r.input_tokens, output_tokens: r.output_tokens }
+    return map
+  }
+  const all = jsonGetAll(TABLE)
+  const map: Record<string, { input_tokens: number; output_tokens: number }> = {}
+  for (const id of sessionIds) {
+    const row = all[id]
+    if (row) map[id] = { input_tokens: row.input_tokens ?? 0, output_tokens: row.output_tokens ?? 0 }
+  }
+  return map
+}
+
+export function deleteUsage(sessionId: string): void {
+  if (isSqliteAvailable()) {
+    getDb()!.prepare(`DELETE FROM ${TABLE} WHERE session_id = ?`).run(sessionId)
+  } else {
+    jsonDelete(TABLE, sessionId)
+  }
+}
@@ -0,0 +1,136 @@
+import { DatabaseSync } from 'node:sqlite'
+import { mkdirSync, readFileSync, writeFileSync, existsSync } from 'fs'
+import { resolve } from 'path'
+import { homedir } from 'os'
+
+const DB_DIR = resolve(homedir(), '.hermes-web-ui')
+const DB_PATH = resolve(DB_DIR, 'hermes-web-ui.db')
+const JSON_PATH = resolve(DB_DIR, 'hermes-web-ui.json')
+
+// --- SQLite availability check ---
+
+const SQLITE_AVAILABLE = (() => {
+  const [major, minor] = process.versions.node.split('.').map(Number)
+  return major > 22 || (major === 22 && minor >= 5)
+})()
+
+export function isSqliteAvailable(): boolean {
+  return SQLITE_AVAILABLE
+}
+
+// --- SQLite backend ---
+
+let _db: DatabaseSync | null = null
+
+export function getDb(): DatabaseSync | null {
+  if (!SQLITE_AVAILABLE) return null
+  if (!_db) {
+    mkdirSync(DB_DIR, { recursive: true })
+    _db = new DatabaseSync(DB_PATH)
+    _db.exec('PRAGMA journal_mode=WAL')
+    _db.exec('PRAGMA foreign_keys=ON')
+  }
+  return _db
+}
+
+/**
+ * Ensure a table's schema matches the expected definition.
+ * - Creates the table if it does not exist
+ * - Adds missing columns (ALTER TABLE ADD COLUMN)
+ * - Drops extra columns (ALTER TABLE DROP COLUMN, SQLite 3.35+)
+ *
+ * No-op when SQLite is not available.
+ */
+export function ensureTable(tableName: string, schema: Record<string, string>): void {
+  const db = getDb()
+  if (!db) return
+
+  const colDefs = Object.entries(schema)
+    .map(([col, def]) => `"${col}" ${def}`)
+    .join(', ')
+
+  db.exec(`CREATE TABLE IF NOT EXISTS "${tableName}" (${colDefs})`)
+
+  const rows = db.prepare(`PRAGMA table_info("${tableName}")`).all() as Array<{ name: string }>
+  const existingCols = new Set(rows.map(r => r.name))
+  const expectedCols = new Set(Object.keys(schema))
+
+  for (const col of expectedCols) {
+    if (!existingCols.has(col)) {
+      db.exec(`ALTER TABLE "${tableName}" ADD COLUMN "${col}" ${schema[col]}`)
+    }
+  }
+
+  for (const col of existingCols) {
+    if (!expectedCols.has(col)) {
+      db.exec(`ALTER TABLE "${tableName}" DROP COLUMN "${col}"`)
+    }
+  }
+}
+
+// --- JSON fallback backend ---
+
+type JsonData = Record<string, Record<string, Record<string, any>>>
+
+function readJsonStore(): JsonData {
+  if (!existsSync(JSON_PATH)) return {}
+  try {
+    return JSON.parse(readFileSync(JSON_PATH, 'utf-8'))
+  } catch {
+    return {}
+  }
+}
+
+function writeJsonStore(data: JsonData): void {
+  mkdirSync(DB_DIR, { recursive: true })
+  writeFileSync(JSON_PATH, JSON.stringify(data, null, 2), 'utf-8')
+}
+
+/**
+ * Get a record from the JSON store.
+ * @param table  Table name (namespace)
+ * @param key    Primary key
+ */
+export function jsonGet(table: string, key: string): Record<string, any> | undefined {
+  const data = readJsonStore()
+  return data[table]?.[key]
+}
+
+/**
+ * Set a record in the JSON store.
+ * @param table  Table name (namespace)
+ * @param key    Primary key
+ * @param value  Record data
+ */
+export function jsonSet(table: string, key: string, value: Record<string, any>): void {
+  const data = readJsonStore()
+  if (!data[table]) data[table] = {}
+  data[table][key] = value
+  writeJsonStore(data)
+}
+
+/**
+ * Get all records from a table in the JSON store.
+ */
+export function jsonGetAll(table: string): Record<string, Record<string, any>> {
+  const data = readJsonStore()
+  return data[table] || {}
+}
+
+/**
+ * Delete a record from the JSON store.
+ */
+export function jsonDelete(table: string, key: string): void {
+  const data = readJsonStore()
+  if (data[table]) {
+    delete data[table][key]
+    writeJsonStore(data)
+  }
+}
+
+/**
+ * Get the storage path for debugging.
+ */
+export function getStoragePath(): string {
+  return SQLITE_AVAILABLE ? DB_PATH : JSON_PATH
+}
@@ -20,7 +20,7 @@ import { logger } from './services/logger'
 declare const __APP_VERSION__: string
 const APP_VERSION = typeof __APP_VERSION__ !== 'undefined'
  ? __APP_VERSION__
-  : (() => { try { return JSON.parse(readFileSync(resolve(__dirname, '../../package.json'), 'utf-8')).version } catch { return 'dev' } } )()
+  : (() => { try { return JSON.parse(readFileSync(resolve(__dirname, '../../package.json'), 'utf-8')).version } catch { return 'dev' } })()

 // Global error handlers
 process.on('uncaughtException', (err) => {
@@ -44,6 +44,12 @@ export async function bootstrap() {

  await initGatewayManager()
  console.log('[bootstrap] gateway manager initialized')
+
+  // Initialize web-ui SQLite tables
+  const { initUsageStore } = await import('./db/hermes/usage-store')
+  initUsageStore()
+  console.log('[bootstrap] usage store initialized')
+
  app.use(cors({ origin: config.corsOrigins }))
  app.use(bodyParser())
  console.log('[bootstrap] cors + bodyParser registered')
@@ -1,9 +1,26 @@
 import type { Context } from 'koa'
 import { config } from '../../config'
 import { getGatewayManagerInstance } from '../../services/gateway-bootstrap'
+import { updateUsage } from '../../db/hermes/usage-store'

 function getGatewayManager() { return getGatewayManagerInstance() }

+// --- run_id → session_id mapping (in-memory, ephemeral) ---
+
+const runSessionMap = new Map<string, string>()
+
+export function setRunSession(runId: string, sessionId: string): void {
+  runSessionMap.set(runId, sessionId)
+  // Auto-cleanup after 30 minutes
+  setTimeout(() => runSessionMap.delete(runId), 30 * 60 * 1000)
+}
+
+function getSessionForRun(runId: string): string | undefined {
+  return runSessionMap.get(runId)
+}
+
+// --- Helpers ---
+
 function isTransientGatewayError(err: any): boolean {
  const msg = String(err?.message || '')
  const causeCode = String(err?.cause?.code || '')
@@ -48,19 +65,7 @@ function resolveUpstream(ctx: Context): string {
  return config.upstream.replace(/\/$/, '')
 }

-export async function proxy(ctx: Context) {
-  const profile = resolveProfile(ctx)
-  const upstream = resolveUpstream(ctx)
-  // Rewrite path for upstream gateway:
-  //   /api/hermes/v1/* -> /v1/*  (upstream uses /v1/ prefix)
-  //   /api/hermes/*     -> /api/* (upstream uses /api/ prefix)
-  const upstreamPath = ctx.path.replace(/^\/api\/hermes\/v1/, '/v1').replace(/^\/api\/hermes/, '/api')
-  const params = new URLSearchParams(ctx.search || '')
-  params.delete('token')
-  const search = params.toString()
-  const url = `${upstream}${upstreamPath}${search ? `?${search}` : ''}`
-
-  // Build headers — forward most, strip browser/web-ui specific ones
+function buildProxyHeaders(ctx: Context, upstream: string): Record<string, string> {
  const headers: Record<string, string> = {}
  for (const [key, value] of Object.entries(ctx.headers)) {
    if (value == null) continue
@@ -75,33 +80,118 @@ export async function proxy(ctx: Context) {
    }
  }

-  // Inject Hermes gateway API key from profile's .env
  const mgr = getGatewayManager()
  if (mgr) {
-    const apiKey = mgr.getApiKey(profile)
+    const apiKey = mgr.getApiKey(resolveProfile(ctx))
    if (apiKey) {
      headers['authorization'] = `Bearer ${apiKey}`
    }
  }

+  return headers
+}
+
+// --- SSE stream interception ---
+
+const SSE_EVENTS_PATH = /^\/v1\/runs\/([^/]+)\/events$/
+
+/**
+ * Parse SSE text chunks and extract run.completed events.
+ * Returns the run_id if a run.completed was found.
+ */
+function extractRunCompletedFromChunk(chunk: string): string | null {
+  // SSE format: each line is "data: {...}\n\n"
+  const lines = chunk.split('\n')
+  for (const line of lines) {
+    if (!line.startsWith('data: ')) continue
    try {
-    // Build request body from raw body
-    let body: string | undefined
-    if (ctx.req.method !== 'GET' && ctx.req.method !== 'HEAD') {
-      body = (ctx as any).request.rawBody as string | undefined
+      const data = JSON.parse(line.slice(6))
+      if (data.event === 'run.completed' && data.usage && data.run_id) {
+        const sessionId = getSessionForRun(data.run_id)
+        if (sessionId) {
+          updateUsage(sessionId, data.usage.input_tokens, data.usage.output_tokens)
+          return data.run_id
+        }
+      }
+    } catch { /* not JSON, skip */ }
+  }
+  return null
+}
+
+/**
+ * Stream an SSE response while intercepting run.completed events.
+ */
+async function streamSSE(ctx: Context, res: Response): Promise<void> {
+  if (!res.body) {
+    ctx.res.end()
+    return
  }

-    const requestInit: RequestInit = {
-      method: ctx.req.method,
-      headers,
-      body,
+  const reader = res.body.getReader()
+  const decoder = new TextDecoder()
+  let buffer = ''
+
+  try {
+    while (true) {
+      const { done, value } = await reader.read()
+      if (done) break
+
+      // Forward raw bytes to client immediately
+      ctx.res.write(value)
+
+      // Also decode for interception
+      buffer += decoder.decode(value, { stream: true })
+
+      // Process complete SSE lines (delimited by double newline)
+      let newlineIdx: number
+      while ((newlineIdx = buffer.indexOf('\n\n')) !== -1) {
+        const eventBlock = buffer.slice(0, newlineIdx)
+        buffer = buffer.slice(newlineIdx + 2)
+        extractRunCompletedFromChunk(eventBlock)
      }
+    }
+
+    // Process remaining buffer
+    if (buffer.trim()) {
+      extractRunCompletedFromChunk(buffer)
+    }
+  } finally {
+    ctx.res.end()
+  }
+}
+
+// --- Main proxy function ---
+
+export async function proxy(ctx: Context) {
+  const profile = resolveProfile(ctx)
+  const upstream = resolveUpstream(ctx)
+  const upstreamPath = ctx.path.replace(/^\/api\/hermes\/v1/, '/v1').replace(/^\/api\/hermes/, '/api')
+  const params = new URLSearchParams(ctx.search || '')
+  params.delete('token')
+  const search = params.toString()
+  const url = `${upstream}${upstreamPath}${search ? `?${search}` : ''}`
+
+  const headers = buildProxyHeaders(ctx, upstream)
+
+  try {
+    let body: string | undefined
+    if (ctx.req.method !== 'GET' && ctx.req.method !== 'HEAD') {
+      // @koa/bodyparser parses JSON into ctx.request.body but doesn't store rawBody
+      // by default. Re-serialize the parsed body to get the string form.
+      const parsed = (ctx as any).request.body
+      if (typeof parsed === 'string') {
+        body = parsed
+      } else if (parsed && typeof parsed === 'object') {
+        body = JSON.stringify(parsed)
+      }
+    }
+
+    const requestInit: RequestInit = { method: ctx.req.method, headers, body }

    let res: Response
    try {
      res = await fetch(url, requestInit)
    } catch (err: any) {
-      // Gateway may be restarting; wait briefly and retry once.
      if (isTransientGatewayError(err) && await waitForGatewayReady(upstream)) {
        res = await fetch(url, requestInit)
      } else {
@@ -116,10 +206,37 @@ export async function proxy(ctx: Context) {
        ctx.set(key, value)
      }
    })
-
    ctx.status = res.status

-    // Stream response body
+    // Intercept POST /v1/runs to capture run_id → session_id mapping
+    if (ctx.req.method === 'POST' && /\/v1\/runs$/.test(upstreamPath) && body) {
+      try {
+        const parsed = JSON.parse(body)
+        if (parsed.session_id) {
+          const resBody = await res.text()
+          ctx.res.write(resBody)
+          ctx.res.end()
+
+          try {
+            const result = JSON.parse(resBody)
+            if (result.run_id) {
+              setRunSession(result.run_id, parsed.session_id)
+            }
+          } catch { /* response not JSON, ignore */ }
+          return
+        }
+      } catch { /* body not JSON, fall through to normal stream */ }
+      // No session_id in body — fall through to normal response handling below
+    }
+
+    // Intercept SSE streams for /v1/runs/{id}/events
+    const sseMatch = upstreamPath.match(SSE_EVENTS_PATH)
+    if (sseMatch) {
+      await streamSSE(ctx, res)
+      return
+    }
+
+    // Default: pipe response body directly
    if (res.body) {
      const reader = res.body.getReader()
      const pump = async () => {
@@ -8,6 +8,9 @@ sessionRoutes.get('/api/hermes/sessions/conversations/:id/messages', ctrl.getCon
 sessionRoutes.get('/api/hermes/sessions', ctrl.list)
 sessionRoutes.get('/api/hermes/search/sessions', ctrl.search)
 sessionRoutes.get('/api/hermes/sessions/search', ctrl.search)
+sessionRoutes.get('/api/hermes/sessions/usage', ctrl.usageBatch)
+sessionRoutes.get('/api/hermes/sessions/context-length', ctrl.contextLength)
 sessionRoutes.get('/api/hermes/sessions/:id', ctrl.get)
+sessionRoutes.get('/api/hermes/sessions/:id/usage', ctrl.usageSingle)
 sessionRoutes.delete('/api/hermes/sessions/:id', ctrl.remove)
 sessionRoutes.post('/api/hermes/sessions/:id/rename', ctrl.rename)
@@ -0,0 +1,106 @@
+import { resolve, join } from 'path'
+import { homedir } from 'os'
+import { readFileSync, existsSync, statSync } from 'fs'
+
+const HERMES_BASE = resolve(homedir(), '.hermes')
+const MODELS_DEV_CACHE = resolve(HERMES_BASE, 'models_dev_cache.json')
+const DEFAULT_CONTEXT_LENGTH = 200_000
+
+interface ModelLimit {
+  context?: number
+  output?: number
+  input?: number
+}
+
+interface ModelEntry {
+  id?: string
+  limit?: ModelLimit
+}
+
+interface ProviderEntry {
+  models?: Record<string, ModelEntry>
+}
+
+// --- In-memory cache: parsed models_dev_cache (1.7MB), invalidated by mtime ---
+
+let _cache: Record<string, ProviderEntry> | null = null
+let _cacheMtime = 0
+const CACHE_TTL_MS = 5 * 60 * 1000 // 5 minutes
+let _cacheLoadedAt = 0
+
+function loadModelsDevCache(): Record<string, ProviderEntry> | null {
+  if (!existsSync(MODELS_DEV_CACHE)) return null
+  try {
+    const stat = statSync(MODELS_DEV_CACHE)
+    const now = Date.now()
+    // Return cached if file hasn't changed and within TTL
+    if (_cache && stat.mtimeMs === _cacheMtime && now - _cacheLoadedAt < CACHE_TTL_MS) {
+      return _cache
+    }
+    const raw = readFileSync(MODELS_DEV_CACHE, 'utf-8')
+    _cache = JSON.parse(raw) as Record<string, ProviderEntry>
+    _cacheMtime = stat.mtimeMs
+    _cacheLoadedAt = now
+    return _cache
+  } catch {
+    return _cache // return stale cache on error
+  }
+}
+
+// --- Profile helpers ---
+
+function getProfileDir(profile?: string): string {
+  if (!profile || profile === 'default') return HERMES_BASE
+  const dir = join(HERMES_BASE, 'profiles', profile)
+  return existsSync(dir) ? dir : HERMES_BASE
+}
+
+function getDefaultModel(profileDir: string): string | null {
+  const configPath = join(profileDir, 'config.yaml')
+  if (!existsSync(configPath)) return null
+  try {
+    const content = readFileSync(configPath, 'utf-8')
+    const match = content.match(/^model:\s*\n\s+default:\s*(.+)$/m)
+    return match ? match[1].trim() : null
+  } catch {
+    return null
+  }
+}
+
+// --- Context lookup ---
+
+function lookupContextFromCache(modelName: string): number | null {
+  const data = loadModelsDevCache()
+  if (!data) return null
+
+  // Exact match first
+  for (const prov of Object.values(data)) {
+    const models = prov.models || {}
+    const entry = models[modelName]
+    if (entry?.limit?.context) return entry.limit.context
+  }
+
+  // Case-insensitive fallback
+  const lower = modelName.toLowerCase()
+  for (const prov of Object.values(data)) {
+    const models = prov.models || {}
+    for (const [name, entry] of Object.entries(models)) {
+      if (name.toLowerCase() === lower && entry?.limit?.context) {
+        return entry.limit.context
+      }
+    }
+  }
+  return null
+}
+
+/**
+ * Get the context length for the current profile's default model.
+ * Results are cached in memory (5min TTL) and invalidated by file mtime.
+ */
+export function getModelContextLength(profile?: string): number {
+  const profileDir = getProfileDir(profile)
+  const model = getDefaultModel(profileDir)
+  if (!model) return DEFAULT_CONTEXT_LENGTH
+
+  return lookupContextFromCache(model) || DEFAULT_CONTEXT_LENGTH
+}
@@ -0,0 +1,116 @@
+import { describe, it, expect, vi } from 'vitest'
+
+// Force JSON fallback by mocking isSqliteAvailable
+vi.mock('../../packages/server/src/db/index', async (importOriginal) => {
+  const actual = await importOriginal() as any
+  return {
+    ...actual,
+    isSqliteAvailable: () => false,
+    getDb: () => null,
+  }
+})
+
+import {
+  jsonGet,
+  jsonSet,
+  jsonGetAll,
+  jsonDelete,
+} from '../../packages/server/src/db/index'
+
+describe('JSON fallback store', () => {
+  it('jsonSet and jsonGet round-trip', () => {
+    expect(typeof jsonSet).toBe('function')
+    expect(typeof jsonGet).toBe('function')
+    expect(typeof jsonGetAll).toBe('function')
+    expect(typeof jsonDelete).toBe('function')
+  })
+})
+
+// Test ensureTable with a real in-memory SQLite (Node 22+)
+describe('SQLite ensureTable', () => {
+  it('creates table with correct columns and handles migration', () => {
+    // This test requires Node 22.5+ for node:sqlite
+    const nodeVersion = process.versions.node.split('.').map(Number)
+    const isAvailable = nodeVersion[0] > 22 || (nodeVersion[0] === 22 && nodeVersion[1] >= 5)
+
+    if (!isAvailable) {
+      console.log('Skipping SQLite test — Node < 22.5')
+      return
+    }
+
+    const { DatabaseSync } = require('node:sqlite')
+    const db = new DatabaseSync(':memory:')
+
+    // Simulate ensureTable logic
+    function ensureTable(tableName: string, schema: Record<string, string>): void {
+      const colDefs = Object.entries(schema)
+        .map(([col, def]) => `"${col}" ${def}`)
+        .join(', ')
+      db.exec(`CREATE TABLE IF NOT EXISTS "${tableName}" (${colDefs})`)
+
+      const rows = db.prepare(`PRAGMA table_info("${tableName}")`).all() as Array<{ name: string }>
+      const existingCols = new Set(rows.map(r => r.name))
+      const expectedCols = new Set(Object.keys(schema))
+
+      for (const col of expectedCols) {
+        if (!existingCols.has(col)) {
+          db.exec(`ALTER TABLE "${tableName}" ADD COLUMN "${col}" ${schema[col]}`)
+        }
+      }
+      for (const col of existingCols) {
+        if (!expectedCols.has(col)) {
+          db.exec(`ALTER TABLE "${tableName}" DROP COLUMN "${col}"`)
+        }
+      }
+    }
+
+    // Initial schema
+    const schema: Record<string, string> = {
+      session_id: 'TEXT PRIMARY KEY',
+      input_tokens: 'INTEGER NOT NULL DEFAULT 0',
+      output_tokens: 'INTEGER NOT NULL DEFAULT 0',
+      updated_at: 'INTEGER NOT NULL',
+    }
+    ensureTable('session_usage', schema)
+
+    // Verify columns
+    const cols = db.prepare(`PRAGMA table_info("session_usage")`).all() as Array<{ name: string }>
+    const colNames = cols.map(c => c.name)
+    expect(colNames).toContain('session_id')
+    expect(colNames).toContain('input_tokens')
+    expect(colNames).toContain('output_tokens')
+    expect(colNames).toContain('updated_at')
+
+    // Add a column
+    schema['cost_usd'] = 'REAL DEFAULT 0'
+    ensureTable('session_usage', schema)
+    const cols2 = db.prepare(`PRAGMA table_info("session_usage")`).all() as Array<{ name: string }>
+    const colNames2 = cols2.map(c => c.name)
+    expect(colNames2).toContain('cost_usd')
+
+    // Remove a column
+    delete schema['cost_usd']
+    ensureTable('session_usage', schema)
+    const cols3 = db.prepare(`PRAGMA table_info("session_usage")`).all() as Array<{ name: string }>
+    const colNames3 = cols3.map(c => c.name)
+    expect(colNames3).not.toContain('cost_usd')
+
+    // Verify INSERT works
+    db.prepare(
+      `INSERT INTO session_usage (session_id, input_tokens, output_tokens, updated_at)
+       VALUES (?, ?, ?, ?)`,
+    ).run('test-session', 100, 50, Date.now())
+
+    const row = db.prepare('SELECT * FROM session_usage WHERE session_id = ?').get('test-session') as any
+    expect(row.session_id).toBe('test-session')
+    expect(row.input_tokens).toBe(100)
+    expect(row.output_tokens).toBe(50)
+
+    // Verify DELETE works
+    db.prepare('DELETE FROM session_usage WHERE session_id = ?').run('test-session')
+    const deleted = db.prepare('SELECT * FROM session_usage WHERE session_id = ?').get('test-session')
+    expect(deleted).toBeUndefined()
+
+    db.close()
+  })
+})
@@ -9,10 +9,18 @@ vi.mock('../../packages/server/src/services/gateway-bootstrap', () => ({
  getGatewayManagerInstance: () => null,
 }))

+// Mock updateUsage so we can assert calls without real DB
+const { mockUpdateUsage } = vi.hoisted(() => ({
+  mockUpdateUsage: vi.fn(),
+}))
+vi.mock('../../packages/server/src/db/hermes/usage-store', () => ({
+  updateUsage: mockUpdateUsage,
+}))
+
 const mockFetch = vi.fn()
 vi.stubGlobal('fetch', mockFetch)

-import { proxy } from '../../packages/server/src/routes/hermes/proxy-handler'
+import { proxy, setRunSession } from '../../packages/server/src/routes/hermes/proxy-handler'

 function createMockCtx(overrides: Record<string, any> = {}) {
  const ctx: any = {
@@ -42,6 +50,25 @@ function createMockCtx(overrides: Record<string, any> = {}) {
  return ctx
 }

+/**
+ * Helper: create a ReadableStream from string chunks.
+ * Each chunk is a Uint8Array segment delivered sequentially.
+ */
+function createSSEBody(events: string[]): ReadableStream<Uint8Array> {
+  const encoder = new TextEncoder()
+  let idx = 0
+  return new ReadableStream({
+    pull(controller) {
+      if (idx < events.length) {
+        controller.enqueue(encoder.encode(events[idx]))
+        idx++
+      } else {
+        controller.close()
+      }
+    },
+  })
+}
+
 describe('Proxy Handler', () => {
  beforeEach(() => {
    vi.clearAllMocks()
@@ -130,9 +157,6 @@ describe('Proxy Handler', () => {
  })

  it('returns 502 on connection failure', async () => {
-    // waitForGatewayReady loops calling fetch(healthUrl) until res.ok or timeout.
-    // Return ok:true for health checks so the loop exits immediately (gateway
-    // "ready"), then the retry fetch also fails with ECONNREFUSED → 502.
    mockFetch.mockImplementation((url: string) => {
      if (typeof url === 'string' && url.includes('/health')) {
        return Promise.resolve({ ok: true })
@@ -161,3 +185,233 @@ describe('Proxy Handler', () => {
    expect(ctx.status).toBe(404)
  })
 })
+
+describe('POST /v1/runs — session_id capture', () => {
+  beforeEach(() => {
+    vi.clearAllMocks()
+  })
+
+  it('captures run_id → session_id mapping from POST /v1/runs', async () => {
+    const runId = 'run-abc-123'
+    const sessionId = 'session-xyz'
+    const responseBody = JSON.stringify({ run_id: runId, status: 'queued' })
+
+    mockFetch.mockResolvedValue({
+      status: 200,
+      headers: new Headers({ 'content-type': 'application/json' }),
+      text: () => Promise.resolve(responseBody),
+      body: null,
+    })
+
+    const ctx = createMockCtx({
+      path: '/api/hermes/v1/runs',
+      req: { method: 'POST' },
+      request: {
+        body: { session_id: sessionId, input: 'hello', model: 'gpt-4' },
+      },
+    })
+
+    await proxy(ctx)
+
+    // Verify the response was forwarded to client
+    expect(ctx.res.write).toHaveBeenCalledWith(responseBody)
+    expect(ctx.res.end).toHaveBeenCalled()
+  })
+
+  it('falls through to normal stream when POST body has no session_id', async () => {
+    const responseBody = JSON.stringify({ run_id: 'r1', status: 'queued' })
+    mockFetch.mockResolvedValue({
+      status: 200,
+      headers: new Headers({ 'content-type': 'application/json' }),
+      text: () => Promise.resolve(responseBody),
+      body: null,
+    })
+
+    const ctx = createMockCtx({
+      path: '/api/hermes/v1/runs',
+      req: { method: 'POST' },
+      request: { body: { input: 'hello' } }, // no session_id
+    })
+
+    await proxy(ctx)
+
+    // Should still forward the response
+    expect(ctx.res.end).toHaveBeenCalled()
+  })
+
+  it('serializes parsed JSON body when rawBody is not available', async () => {
+    const responseBody = JSON.stringify({ run_id: 'r1', status: 'queued' })
+    mockFetch.mockResolvedValue({
+      status: 200,
+      headers: new Headers({ 'content-type': 'application/json' }),
+      body: {
+        getReader: () => {
+          const encoder = new TextEncoder()
+          let done = false
+          return {
+            read: () => {
+              if (done) return Promise.resolve({ done: true, value: undefined })
+              done = true
+              return Promise.resolve({ done: false, value: encoder.encode(responseBody) })
+            },
+          }
+        },
+      },
+    })
+
+    const ctx = createMockCtx({
+      path: '/api/hermes/v1/runs',
+      req: { method: 'POST' },
+      request: { body: { session_id: 's1', input: 'test' } },
+    })
+
+    await proxy(ctx)
+
+    // Verify fetch was called with stringified body
+    const [, options] = mockFetch.mock.calls[0]
+    expect(typeof options.body).toBe('string')
+    const parsed = JSON.parse(options.body)
+    expect(parsed.session_id).toBe('s1')
+    expect(parsed.input).toBe('test')
+  })
+})
+
+describe('SSE stream interception — run.completed', () => {
+  beforeEach(() => {
+    vi.clearAllMocks()
+  })
+
+  it('intercepts run.completed and calls updateUsage', async () => {
+    const runId = 'run-test-1'
+    const sessionId = 'session-test-1'
+
+    // Pre-populate the run → session mapping
+    setRunSession(runId, sessionId)
+
+    const sseData = [
+      `data: ${JSON.stringify({ event: 'run.started', run_id: runId })}\n\n`,
+      `data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'Hello' })}\n\n`,
+      `data: ${JSON.stringify({ event: 'run.completed', run_id: runId, usage: { input_tokens: 13949, output_tokens: 45, total_tokens: 13994 } })}\n\n`,
+    ]
+
+    mockFetch.mockResolvedValue({
+      status: 200,
+      headers: new Headers({ 'content-type': 'text/event-stream' }),
+      body: createSSEBody(sseData),
+    })
+
+    const ctx = createMockCtx({
+      path: `/api/hermes/v1/runs/${runId}/events`,
+      search: `?token=test&profile=default`,
+    })
+
+    await proxy(ctx)
+
+    // Verify updateUsage was called with correct values
+    expect(mockUpdateUsage).toHaveBeenCalledWith(sessionId, 13949, 45)
+    // Verify SSE data was forwarded to client
+    expect(ctx.res.write).toHaveBeenCalled()
+    expect(ctx.res.end).toHaveBeenCalled()
+  })
+
+  it('does not call updateUsage when no mapping exists', async () => {
+    const sseData = [
+      `data: ${JSON.stringify({ event: 'run.completed', run_id: 'unknown-run', usage: { input_tokens: 100, output_tokens: 50, total_tokens: 150 } })}\n\n`,
+    ]
+
+    mockFetch.mockResolvedValue({
+      status: 200,
+      headers: new Headers({ 'content-type': 'text/event-stream' }),
+      body: createSSEBody(sseData),
+    })
+
+    const ctx = createMockCtx({
+      path: '/api/hermes/v1/runs/unknown-run/events',
+      search: '',
+    })
+
+    await proxy(ctx)
+
+    expect(mockUpdateUsage).not.toHaveBeenCalled()
+  })
+
+  it('does not call updateUsage for non-run.completed events', async () => {
+    const runId = 'run-no-complete'
+    setRunSession(runId, 'session-x')
+
+    const sseData = [
+      `data: ${JSON.stringify({ event: 'run.started', run_id: runId })}\n\n`,
+      `data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'Hi' })}\n\n`,
+      `data: ${JSON.stringify({ event: 'run.failed', run_id: runId, error: 'timeout' })}\n\n`,
+    ]
+
+    mockFetch.mockResolvedValue({
+      status: 200,
+      headers: new Headers({ 'content-type': 'text/event-stream' }),
+      body: createSSEBody(sseData),
+    })
+
+    const ctx = createMockCtx({
+      path: `/api/hermes/v1/runs/${runId}/events`,
+      search: '',
+    })
+
+    await proxy(ctx)
+
+    expect(mockUpdateUsage).not.toHaveBeenCalled()
+  })
+
+  it('handles SSE with multiple events in a single chunk', async () => {
+    const runId = 'run-multi'
+    setRunSession(runId, 'session-multi')
+
+    // All events in one chunk
+    const singleChunk = [
+      `data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'A' })}\n\n`,
+      `data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'B' })}\n\n`,
+      `data: ${JSON.stringify({ event: 'run.completed', run_id: runId, usage: { input_tokens: 500, output_tokens: 100, total_tokens: 600 } })}\n\n`,
+    ].join('')
+
+    mockFetch.mockResolvedValue({
+      status: 200,
+      headers: new Headers({ 'content-type': 'text/event-stream' }),
+      body: createSSEBody([singleChunk]),
+    })
+
+    const ctx = createMockCtx({
+      path: `/api/hermes/v1/runs/${runId}/events`,
+      search: '',
+    })
+
+    await proxy(ctx)
+
+    expect(mockUpdateUsage).toHaveBeenCalledWith('session-multi', 500, 100)
+  })
+
+  it('handles SSE split across multiple chunks', async () => {
+    const runId = 'run-split'
+    setRunSession(runId, 'session-split')
+
+    const completedJson = JSON.stringify({ event: 'run.completed', run_id: runId, usage: { input_tokens: 200, output_tokens: 50, total_tokens: 250 } })
+    const sseEvent = `data: ${completedJson}\n\n`
+
+    // Split the event across two chunks
+    const chunk1 = sseEvent.slice(0, 30)
+    const chunk2 = sseEvent.slice(30)
+
+    mockFetch.mockResolvedValue({
+      status: 200,
+      headers: new Headers({ 'content-type': 'text/event-stream' }),
+      body: createSSEBody([chunk1, chunk2]),
+    })
+
+    const ctx = createMockCtx({
+      path: `/api/hermes/v1/runs/${runId}/events`,
+      search: '',
+    })
+
+    await proxy(ctx)
+
+    expect(mockUpdateUsage).toHaveBeenCalledWith('session-split', 200, 50)
+  })
+})
@@ -63,7 +63,7 @@ describe('session DB summaries', () => {
      },
    ])

-    const mod = await import('../../packages/server/src/services/hermes/sessions-db')
+    const mod = await import('../../packages/server/src/db/hermes/sessions-db')
    const rows = await mod.listSessionSummaries(undefined, 50)

    expect(databaseSyncMock).toHaveBeenCalledWith('/tmp/hermes-profile/state.db', { open: true, readOnly: true })
@@ -124,7 +124,7 @@ describe('session DB summaries', () => {
      },
    ])

-    const mod = await import('../../packages/server/src/services/hermes/sessions-db')
+    const mod = await import('../../packages/server/src/db/hermes/sessions-db')
    const rows = await mod.listSessionSummaries('telegram', 2)

    expect(prepareMock).toHaveBeenCalledWith(expect.stringContaining('AND s.source = ?'))
@@ -218,7 +218,7 @@ describe('session DB summaries', () => {
      },
    ])

-    const mod = await import('../../packages/server/src/services/hermes/sessions-db')
+    const mod = await import('../../packages/server/src/db/hermes/sessions-db')
    const rows = await mod.searchSessionSummaries('docker', undefined, 10)

    expect(prepareMock).toHaveBeenCalledWith(expect.stringContaining('messages_fts MATCH'))
@@ -265,7 +265,7 @@ describe('session DB summaries', () => {
      },
    ])

-    const mod = await import('../../packages/server/src/services/hermes/sessions-db')
+    const mod = await import('../../packages/server/src/db/hermes/sessions-db')
    const rows = await mod.searchSessionSummaries('记忆断裂', undefined, 10)

    expect(likeAllMock).toHaveBeenCalledWith('记忆断裂', '%记忆断裂%')
@@ -7,6 +7,9 @@ const searchMock = vi.fn(async (ctx: any) => { ctx.body = { results: [{ id: 'sea
 const getMock = vi.fn(async (ctx: any) => { ctx.body = { session: { id: ctx.params.id } } })
 const removeMock = vi.fn(async (ctx: any) => { ctx.body = { ok: true } })
 const renameMock = vi.fn(async (ctx: any) => { ctx.body = { ok: true } })
+const usageBatchMock = vi.fn(async (ctx: any) => { ctx.body = {} })
+const usageSingleMock = vi.fn(async (ctx: any) => { ctx.body = { input_tokens: 0, output_tokens: 0 } })
+const contextLengthMock = vi.fn(async (ctx: any) => { ctx.body = { context_length: 200000 } })

 vi.mock('../../packages/server/src/controllers/hermes/sessions', () => ({
  listConversations: listConversationsMock,
@@ -16,6 +19,9 @@ vi.mock('../../packages/server/src/controllers/hermes/sessions', () => ({
  get: getMock,
  remove: removeMock,
  rename: renameMock,
+  usageBatch: usageBatchMock,
+  usageSingle: usageSingleMock,
+  contextLength: contextLengthMock,
 }))

 describe('session routes', () => {
@@ -40,7 +46,10 @@ describe('session routes', () => {
      '/api/hermes/sessions',
      '/api/hermes/search/sessions',
      '/api/hermes/sessions/search',
+      '/api/hermes/sessions/usage',
+      '/api/hermes/sessions/context-length',
      '/api/hermes/sessions/:id',
+      '/api/hermes/sessions/:id/usage',
      '/api/hermes/sessions/:id/rename',
    ]))
  })
@@ -0,0 +1,159 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest'
+
+// Mock the db index module so we can test usage-store in isolation
+const { mockEnsureTable, mockJsonSet, mockJsonGet, mockJsonGetAll, mockJsonDelete } = vi.hoisted(() => ({
+  mockEnsureTable: vi.fn(),
+  mockJsonSet: vi.fn(),
+  mockJsonGet: vi.fn(),
+  mockJsonGetAll: vi.fn(),
+  mockJsonDelete: vi.fn(),
+}))
+
+vi.mock('../../packages/server/src/db/index', () => ({
+  isSqliteAvailable: () => false, // Force JSON fallback path
+  ensureTable: mockEnsureTable,
+  getDb: () => null,
+  jsonSet: mockJsonSet,
+  jsonGet: mockJsonGet,
+  jsonGetAll: mockJsonGetAll,
+  jsonDelete: mockJsonDelete,
+}))
+
+import {
+  initUsageStore,
+  updateUsage,
+  getUsage,
+  getUsageBatch,
+  deleteUsage,
+} from '../../packages/server/src/db/hermes/usage-store'
+
+describe('Usage Store (JSON fallback)', () => {
+  beforeEach(() => {
+    vi.clearAllMocks()
+  })
+
+  it('initUsageStore calls ensureTable when SQLite is available', () => {
+    // In our mock, isSqliteAvailable returns false, so ensureTable should NOT be called
+    initUsageStore()
+    expect(mockEnsureTable).not.toHaveBeenCalled()
+  })
+
+  it('updateUsage writes via jsonSet', () => {
+    updateUsage('session-1', 100, 50)
+    expect(mockJsonSet).toHaveBeenCalledWith(
+      'session_usage',
+      'session-1',
+      expect.objectContaining({
+        input_tokens: 100,
+        output_tokens: 50,
+        updated_at: expect.any(Number),
+      }),
+    )
+  })
+
+  it('getUsage reads via jsonGet', () => {
+    mockJsonGet.mockReturnValue({ input_tokens: 200, output_tokens: 80 })
+    const result = getUsage('session-1')
+    expect(result).toEqual({ input_tokens: 200, output_tokens: 80 })
+    expect(mockJsonGet).toHaveBeenCalledWith('session_usage', 'session-1')
+  })
+
+  it('getUsage returns undefined when jsonGet returns nothing', () => {
+    mockJsonGet.mockReturnValue(undefined)
+    const result = getUsage('nonexistent')
+    expect(result).toBeUndefined()
+  })
+
+  it('getUsageBatch returns empty map for empty input', () => {
+    const result = getUsageBatch([])
+    expect(result).toEqual({})
+    expect(mockJsonGetAll).not.toHaveBeenCalled()
+  })
+
+  it('getUsageBatch returns matching records', () => {
+    mockJsonGetAll.mockReturnValue({
+      'session-1': { input_tokens: 100, output_tokens: 50 },
+      'session-2': { input_tokens: 200, output_tokens: 80 },
+      'session-3': { input_tokens: 300, output_tokens: 120 },
+    })
+    const result = getUsageBatch(['session-1', 'session-3', 'session-missing'])
+    expect(result).toEqual({
+      'session-1': { input_tokens: 100, output_tokens: 50 },
+      'session-3': { input_tokens: 300, output_tokens: 120 },
+    })
+  })
+
+  it('deleteUsage calls jsonDelete', () => {
+    deleteUsage('session-1')
+    expect(mockJsonDelete).toHaveBeenCalledWith('session_usage', 'session-1')
+  })
+})
+
+// Test with SQLite available (mocked)
+describe('Usage Store (SQLite path)', () => {
+  let runMock: ReturnType<typeof vi.fn>
+  let getMock: ReturnType<typeof vi.fn>
+  let allMock: ReturnType<typeof vi.fn>
+  let deleteMock: ReturnType<typeof vi.fn>
+
+  beforeEach(() => {
+    vi.resetModules()
+
+    runMock = vi.fn()
+    getMock = vi.fn()
+    allMock = vi.fn()
+    deleteMock = vi.fn()
+
+    vi.doMock('../../packages/server/src/db/index', () => ({
+      isSqliteAvailable: () => true,
+      ensureTable: vi.fn(),
+      getDb: () => ({
+        prepare: vi.fn((sql: string) => {
+          if (sql.includes('INSERT') || sql.includes('UPDATE')) return { run: runMock }
+          if (sql.includes('SELECT') && sql.includes('WHERE session_id = ?')) return { get: getMock }
+          if (sql.includes('SELECT') && sql.includes('IN')) return { all: allMock }
+          if (sql.includes('DELETE')) return { run: deleteMock }
+          return { run: runMock, get: getMock, all: allMock }
+        }),
+      }),
+      jsonSet: vi.fn(),
+      jsonGet: vi.fn(),
+      jsonGetAll: vi.fn(),
+      jsonDelete: vi.fn(),
+    }))
+  })
+
+  it('updateUsage runs INSERT ... ON CONFLICT query', async () => {
+    const { updateUsage } = await import('../../packages/server/src/db/hermes/usage-store')
+    updateUsage('s1', 500, 200)
+    expect(runMock).toHaveBeenCalledWith('s1', 500, 200, expect.any(Number))
+  })
+
+  it('getUsage queries by session_id', async () => {
+    getMock.mockReturnValue({ input_tokens: 999, output_tokens: 111 })
+    const { getUsage } = await import('../../packages/server/src/db/hermes/usage-store')
+    const result = getUsage('s1')
+    expect(getMock).toHaveBeenCalledWith('s1')
+    expect(result).toEqual({ input_tokens: 999, output_tokens: 111 })
+  })
+
+  it('getUsageBatch queries with IN clause', async () => {
+    allMock.mockReturnValue([
+      { session_id: 'a', input_tokens: 1, output_tokens: 2 },
+      { session_id: 'b', input_tokens: 3, output_tokens: 4 },
+    ])
+    const { getUsageBatch } = await import('../../packages/server/src/db/hermes/usage-store')
+    const result = getUsageBatch(['a', 'b', 'c'])
+    expect(allMock).toHaveBeenCalledWith('a', 'b', 'c')
+    expect(result).toEqual({
+      a: { input_tokens: 1, output_tokens: 2 },
+      b: { input_tokens: 3, output_tokens: 4 },
+    })
+  })
+
+  it('deleteUsage runs DELETE query', async () => {
+    const { deleteUsage } = await import('../../packages/server/src/db/hermes/usage-store')
+    deleteUsage('s1')
+    expect(deleteMock).toHaveBeenCalledWith('s1')
+  })
+})