feat: add token usage tracking, context display, and dynamic context length (#132)

* fix: specify TS_NODE_PROJECT for dev:server script

ts-node/register resolves tsconfig from the entry file upward,
finding the root solution-style tsconfig.json (no compilerOptions).
This causes target to default to ES3, breaking MapIterator spread
syntax (TS2802). Set TS_NODE_PROJECT env var to point to the server
tsconfig which targets ES2024.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* feat: add token usage tracking, context display, and dynamic context length

- Intercept SSE proxy to capture run.completed events and persist token
  usage (input_tokens, output_tokens) per session to SQLite/JSON store
- Display context usage bar in ChatInput showing used/total/remaining tokens
- Resolve actual context length from Hermes models_dev_cache.json based
  on the active profile's default model (fallback 200K), with 5min in-memory cache
- Move sessions-db.ts to db/hermes/ for unified database layer
- Add usage store with SQLite + JSON fallback (auto-migration via ensureTable)
- Fix proxy SSE path regex to match rewritten upstream path
- Fix route ordering: /sessions/usage before /sessions/:id to avoid 404
- Fetch per-session usage on session enter instead of batch
- Add unit tests for usage-store, db index, and proxy SSE interception

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
ekko
2026-04-22 16:14:50 +08:00
committed by GitHub
parent ce3bf5f3eb
commit 6f69c69802
26 changed files with 1203 additions and 144 deletions
+5
View File
@@ -28,6 +28,11 @@ export interface RunEvent {
preview?: string
timestamp?: number
error?: string
usage?: {
input_tokens: number
output_tokens: number
total_tokens: number
}
}
export async function startRun(body: StartRunRequest): Promise<StartRunResponse> {
@@ -94,3 +94,26 @@ export async function renameSession(id: string, title: string): Promise<boolean>
return false
}
}
export async function fetchSessionUsage(ids: string[]): Promise<Record<string, { input_tokens: number; output_tokens: number }>> {
if (ids.length === 0) return {}
const params = new URLSearchParams()
params.set('ids', ids.join(','))
return request(`/api/hermes/sessions/usage?${params}`)
}
export async function fetchSessionUsageSingle(id: string): Promise<{ input_tokens: number; output_tokens: number } | null> {
try {
return await request<{ input_tokens: number; output_tokens: number }>(`/api/hermes/sessions/${id}/usage`)
} catch {
return null
}
}
export async function fetchContextLength(profile?: string): Promise<number> {
const params = new URLSearchParams()
if (profile) params.set('profile', profile)
const query = params.toString()
const res = await request<{ context_length: number }>(`/api/hermes/sessions/context-length${query ? `?${query}` : ''}`)
return res.context_length
}
@@ -1,8 +1,11 @@
<script setup lang="ts">
import type { Attachment } from '@/stores/hermes/chat'
import { useChatStore } from '@/stores/hermes/chat'
import { useAppStore } from '@/stores/hermes/app'
import { useProfilesStore } from '@/stores/hermes/profiles'
import { fetchContextLength } from '@/api/hermes/sessions'
import { NButton, NTooltip } from 'naive-ui'
import { computed, ref } from 'vue'
import { computed, ref, onMounted, watch } from 'vue'
import { useI18n } from 'vue-i18n'
const chatStore = useChatStore()
@@ -17,25 +20,41 @@ const isComposing = ref(false)
const canSend = computed(() => inputText.value.trim() || attachments.value.length > 0)
// --- Voice input (Web Speech API) ---
// TODO: re-enable when needed — browser-native speech-to-text
// const hasSpeechRecognition = ref(false)
// let recognition: SpeechRecognition | null = null
// let finalTranscript = ''
// let prefixText = ''
// onMounted(() => {
// const SR = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition
// if (!SR) return
// recognition = new SR()
// recognition.continuous = false
// recognition.interimResults = true
// recognition.lang = 'en-US'
// hasSpeechRecognition.value = true
// recognition.onresult = (event: SpeechRecognitionEvent) => { ... }
// recognition.onend = () => { ... }
// recognition.onerror = (event: SpeechRecognitionErrorEvent) => { ... }
// })
// onUnmounted(() => { if (recognition && isRecording.value) recognition.stop() })
// --- Context info ---
const contextLength = ref(200000)
const FALLBACK_CONTEXT = 200000
async function loadContextLength() {
try {
const profile = useProfilesStore().activeProfileName || undefined
contextLength.value = await fetchContextLength(profile)
} catch {
contextLength.value = FALLBACK_CONTEXT
}
}
onMounted(loadContextLength)
watch(() => useProfilesStore().activeProfileName, loadContextLength)
watch(() => useAppStore().selectedModel, loadContextLength)
const totalTokens = computed(() => {
const input = chatStore.activeSession?.inputTokens ?? 0
const output = chatStore.activeSession?.outputTokens ?? 0
return input + output
})
const remainingTokens = computed(() => contextLength.value - totalTokens.value)
const usagePercent = computed(() =>
Math.min((totalTokens.value / contextLength.value) * 100, 100),
)
function formatTokens(n: number): string {
if (n >= 1000000) return (n / 1000000).toFixed(1) + 'M'
if (n >= 1000) return (n / 1000).toFixed(1) + 'k'
return String(n)
}
// --- File attachment helpers ---
@@ -176,6 +195,33 @@ function isImage(type: string): boolean {
<template>
<div class="chat-input-area">
<!-- Top bar: attach + context info -->
<div class="input-top-bar">
<NTooltip trigger="hover">
<template #trigger>
<NButton quaternary size="tiny" @click="handleAttachClick" circle>
<template #icon>
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><path d="M21.44 11.05l-9.19 9.19a6 6 0 0 1-8.49-8.49l9.19-9.19a4 4 0 0 1 5.66 5.66l-9.2 9.19a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg>
</template>
</NButton>
</template>
{{ t('chat.attachFiles') }}
</NTooltip>
<span v-if="totalTokens > 0" class="context-info" :class="{ 'context-warning': usagePercent > 80 }">
{{ formatTokens(totalTokens) }} / {{ formatTokens(contextLength) }} · {{ t('chat.contextRemaining') }} {{ formatTokens(remainingTokens) }}
</span>
<div v-if="totalTokens > 0" class="context-bar">
<div
class="context-bar-fill"
:class="{
'context-bar-warn': usagePercent > 60 && usagePercent <= 80,
'context-bar-danger': usagePercent > 80,
}"
:style="{ width: `${usagePercent}%` }"
/>
</div>
</div>
<!-- Attachment previews -->
<div v-if="attachments.length > 0" class="attachment-previews">
<div
@@ -228,16 +274,6 @@ function isImage(type: string): boolean {
@paste="handlePaste"
></textarea>
<div class="input-actions">
<NTooltip trigger="hover">
<template #trigger>
<NButton quaternary size="small" @click="handleAttachClick" circle>
<template #icon>
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><path d="M21.44 11.05l-9.19 9.19a6 6 0 0 1-8.49-8.49l9.19-9.19a4 4 0 0 1 5.66 5.66l-9.2 9.19a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg>
</template>
</NButton>
</template>
{{ t('chat.attachFiles') }}
</NTooltip>
<NButton
v-if="chatStore.isStreaming"
size="small"
@@ -271,6 +307,45 @@ function isImage(type: string): boolean {
flex-shrink: 0;
}
.input-top-bar {
display: flex;
align-items: center;
gap: 8px;
padding: 0 0 6px;
}
.context-info {
font-size: 11px;
color: $text-muted;
&.context-warning {
color: #e8a735;
}
}
.context-bar {
width: 60px;
height: 4px;
background: rgba(128, 128, 128, 0.2);
border-radius: 2px;
overflow: hidden;
}
.context-bar-fill {
height: 100%;
background: linear-gradient(90deg, rgba(128, 128, 128, 0.3), rgba(128, 128, 128, 0.6));
border-radius: 2px;
transition: width 0.3s ease;
&.context-bar-warn {
background: linear-gradient(90deg, #c98a1a, #e8a735);
}
&.context-bar-danger {
background: linear-gradient(90deg, #c43a2a, #e85d4a);
}
}
.attachment-previews {
display: flex;
flex-wrap: wrap;
@@ -169,57 +169,6 @@ const headerTitle = computed(() =>
currentMode.value === 'live' ? t('chat.liveSessions') : activeSessionTitle.value,
)
const totalTokens = computed(() => {
const input = chatStore.activeSession?.inputTokens ?? 0
const output = chatStore.activeSession?.outputTokens ?? 0
return input + output
})
const MODEL_CONTEXT: Record<string, number> = {
'claude-opus-4': 200000,
'claude-sonnet-4': 200000,
'claude-haiku-4': 200000,
'claude-3.5-sonnet': 200000,
'claude-3.5-haiku': 200000,
'claude-3-opus': 200000,
'claude-3-sonnet': 200000,
'claude-3-haiku': 200000,
'gpt-4o': 128000,
'gpt-4o-mini': 128000,
'gpt-4-turbo': 128000,
'gpt-4': 8192,
'gpt-3.5-turbo': 16385,
'o1': 200000,
'o1-mini': 128000,
'o3': 200000,
'o3-mini': 200000,
'o4-mini': 200000,
'deepseek-chat': 65536,
'deepseek-reasoner': 65536,
'gemini-2.5-pro': 1000000,
'gemini-2.5-flash': 1000000,
'gemini-2.0-flash': 1000000,
'glm-4-plus': 128000,
'glm-4': 128000,
'qwen-max': 128000,
'qwen-plus': 128000,
'qwen-turbo': 128000,
}
const contextWindow = computed(() => {
const model = chatStore.activeSession?.model || ''
for (const [key, val] of Object.entries(MODEL_CONTEXT)) {
if (model.includes(key)) return val
}
return null
})
function formatTokens(n: number): string {
if (n >= 1000000) return (n / 1000000).toFixed(1) + 'M'
if (n >= 1000) return (n / 1000).toFixed(1) + 'k'
return String(n)
}
const activeSessionSource = computed(() =>
currentMode.value === 'chat' ? (chatStore.activeSession?.source || '') : '',
)
@@ -446,9 +395,6 @@ async function handleRenameConfirm() {
<template v-if="currentMode === 'chat'">
<MessageList />
<div v-if="contextWindow !== null" class="context-info">
<span>{{ formatTokens(totalTokens) }} / {{ formatTokens(contextWindow) }}</span>
</div>
<ChatInput />
</template>
<ConversationMonitorPane v-else :human-only="sessionBrowserPrefsStore.humanOnly" />
@@ -799,20 +745,9 @@ async function handleRenameConfirm() {
margin-right: 4px;
}
.context-info {
padding: 0 20px 4px;
font-size: 11px;
color: $text-muted;
flex-shrink: 0;
}
@media (max-width: $breakpoint-mobile) {
.chat-header {
padding: 16px 12px 16px 52px;
}
.context-info {
padding: 0 12px 4px;
}
}
</style>
+1
View File
@@ -61,6 +61,7 @@ export default {
// Chat
chat: {
contextRemaining: 'übrig',
emptyState: 'Starten Sie eine Konversation mit Hermes Agent',
inputPlaceholder: 'Nachricht eingeben... (Enter zum Senden, Shift+Enter fur neue Zeile)',
attachFiles: 'Dateien anhangen',
+1
View File
@@ -73,6 +73,7 @@ export default {
// Chat
chat: {
contextRemaining: 'remaining',
emptyState: 'Start a conversation with Hermes Agent',
inputPlaceholder: 'Type a message... (Enter to send, Shift+Enter for new line)',
attachFiles: 'Attach files',
+1
View File
@@ -61,6 +61,7 @@ export default {
// Chat
chat: {
contextRemaining: 'restante',
emptyState: 'Inicia una conversacion con Hermes Agent',
inputPlaceholder: 'Escribe un mensaje... (Enter para enviar, Shift+Enter para nueva linea)',
attachFiles: 'Adjuntar archivos',
+1
View File
@@ -61,6 +61,7 @@ export default {
// Chat
chat: {
contextRemaining: 'restant',
emptyState: 'Demarrer une conversation avec Hermes Agent',
inputPlaceholder: 'Tapez un message... (Entree pour envoyer, Shift+Entree pour un saut de ligne)',
attachFiles: 'Joindre des fichiers',
+1
View File
@@ -61,6 +61,7 @@ export default {
// チャット
chat: {
contextRemaining: '残り',
emptyState: 'Hermes Agent と会話を開始しましょう',
inputPlaceholder: 'メッセージを入力... (Enter で送信、Shift+Enter で改行)',
attachFiles: 'ファイルを添付',
+1
View File
@@ -61,6 +61,7 @@ export default {
// 채팅
chat: {
contextRemaining: '남음',
emptyState: 'Hermes Agent와 대화를 시작하세요',
inputPlaceholder: '메시지를 입력하세요... (Enter로 전송, Shift+Enter로 줄바꿈)',
attachFiles: '파일 첨부',
+1
View File
@@ -61,6 +61,7 @@ export default {
// Chat
chat: {
contextRemaining: 'restante',
emptyState: 'Inicie uma conversa com o Hermes Agent',
inputPlaceholder: 'Digite uma mensagem... (Enter para enviar, Shift+Enter para nova linha)',
attachFiles: 'Anexar arquivos',
+1
View File
@@ -73,6 +73,7 @@ export default {
// 对话
chat: {
contextRemaining: '剩余',
emptyState: '开始与 Hermes Agent 对话',
inputPlaceholder: '输入消息... (Enter 发送,Shift+Enter 换行)',
attachFiles: '添加附件',
+17 -12
View File
@@ -1,5 +1,5 @@
import { startRun, streamRunEvents, type ChatMessage, type RunEvent } from '@/api/hermes/chat'
import { deleteSession as deleteSessionApi, fetchSession, fetchSessions, type HermesMessage, type SessionSummary } from '@/api/hermes/sessions'
import { deleteSession as deleteSessionApi, fetchSession, fetchSessions, fetchSessionUsageSingle, type HermesMessage, type SessionSummary } from '@/api/hermes/sessions'
import { defineStore } from 'pinia'
import { ref, computed } from 'vue'
import { useAppStore } from './app'
@@ -155,8 +155,6 @@ function mapHermesSession(s: SessionSummary): Session {
model: s.model,
provider: (s as any).billing_provider || '',
messageCount: s.message_count,
inputTokens: s.input_tokens,
outputTokens: s.output_tokens,
}
}
@@ -340,8 +338,6 @@ export const useChatStore = defineStore('chat', () => {
|| (serverUsers === localUsers && serverAssistantLen >= localAssistantLen)
if (serverIsAhead) {
target.messages = mapped
target.inputTokens = detail.input_tokens
target.outputTokens = detail.output_tokens
if (detail.title && !target.title) target.title = detail.title
if (sid === activeSessionId.value) persistActiveMessages()
}
@@ -363,8 +359,6 @@ export const useChatStore = defineStore('chat', () => {
// our "don't retreat" guard above skipped it — the server is
// now the authoritative source of truth.
target.messages = mapped
target.inputTokens = detail.input_tokens
target.outputTokens = detail.output_tokens
if (detail.title) target.title = detail.title
if (sid === activeSessionId.value) persistActiveMessages()
clearInFlight(sid)
@@ -447,8 +441,6 @@ export const useChatStore = defineStore('chat', () => {
if (!target) return false
const mapped = mapHermesMessages(detail.messages || [])
target.messages = mapped
target.inputTokens = detail.input_tokens
target.outputTokens = detail.output_tokens
if (detail.title) target.title = detail.title
persistActiveMessages()
return true
@@ -531,8 +523,6 @@ export const useChatStore = defineStore('chat', () => {
if (serverIsAhead) {
activeSession.value.messages = mapped
}
activeSession.value.inputTokens = detail.input_tokens
activeSession.value.outputTokens = detail.output_tokens
// Update title: use Hermes title, or fallback to first user message
if (detail.title) {
activeSession.value.title = detail.title
@@ -557,6 +547,15 @@ export const useChatStore = defineStore('chat', () => {
if (readInFlight(sessionId) && !streamStates.value.has(sessionId)) {
startPolling(sessionId)
}
// Fetch token usage for this session from web-ui DB
try {
const usage = await fetchSessionUsageSingle(sessionId)
if (usage) {
activeSession.value.inputTokens = usage.input_tokens
activeSession.value.outputTokens = usage.output_tokens
}
} catch { /* non-critical */ }
}
function newChat() {
@@ -785,9 +784,15 @@ export const useChatStore = defineStore('chat', () => {
if (lastMsg?.isStreaming) {
updateMessage(sid, lastMsg.id, { isStreaming: false })
}
if (evt.usage) {
const target = sessions.value.find(s => s.id === sid)
if (target) {
target.inputTokens = evt.usage.input_tokens
target.outputTokens = evt.usage.output_tokens
}
}
cleanup()
updateSessionTitle(sid)
// IMPORTANT ordering: persist the final cache BEFORE clearing
// the in-flight marker. If the browser is reloading right now
// and kills us between the two localStorage writes, we want
// the next page load to still see in-flight === true (so
@@ -1,6 +1,8 @@
import * as hermesCli from '../../services/hermes/hermes-cli'
import { getConversationDetail, listConversationSummaries } from '../../services/hermes/conversations'
import { listSessionSummaries, searchSessionSummaries } from '../../services/hermes/sessions-db'
import { listSessionSummaries, searchSessionSummaries } from '../../db/hermes/sessions-db'
import { deleteUsage, getUsage, getUsageBatch } from '../../db/hermes/usage-store'
import { getModelContextLength } from '../../services/hermes/model-context'
import { logger } from '../../services/logger'
function parseHumanOnly(value: unknown): boolean {
@@ -84,9 +86,29 @@ export async function remove(ctx: any) {
ctx.body = { error: 'Failed to delete session' }
return
}
deleteUsage(ctx.params.id)
ctx.body = { ok: true }
}
export async function usageBatch(ctx: any) {
const ids = (ctx.query.ids as string)
if (!ids) {
ctx.body = {}
return
}
const idList = ids.split(',').filter(Boolean)
ctx.body = getUsageBatch(idList)
}
export async function usageSingle(ctx: any) {
const result = getUsage(ctx.params.id)
if (!result) {
ctx.body = { input_tokens: 0, output_tokens: 0 }
return
}
ctx.body = result
}
export async function rename(ctx: any) {
const { title } = ctx.request.body as { title?: string }
if (!title || typeof title !== 'string') {
@@ -102,3 +124,8 @@ export async function rename(ctx: any) {
}
ctx.body = { ok: true }
}
export async function contextLength(ctx: any) {
const profile = (ctx.query.profile as string) || undefined
ctx.body = { context_length: getModelContextLength(profile) }
}
@@ -1,4 +1,4 @@
import { getActiveProfileDir } from './hermes-profile'
import { getActiveProfileDir } from '../../services/hermes/hermes-profile'
const SQLITE_AVAILABLE = (() => {
const [major, minor] = process.versions.node.split('.').map(Number)
@@ -0,0 +1,75 @@
import { isSqliteAvailable, ensureTable, getDb, jsonSet, jsonGet, jsonGetAll, jsonDelete } from '../index'
const TABLE = 'session_usage'
const SCHEMA = {
session_id: 'TEXT PRIMARY KEY',
input_tokens: 'INTEGER NOT NULL DEFAULT 0',
output_tokens: 'INTEGER NOT NULL DEFAULT 0',
updated_at: 'INTEGER NOT NULL',
}
export function initUsageStore(): void {
if (isSqliteAvailable()) {
ensureTable(TABLE, SCHEMA)
}
}
export function updateUsage(sessionId: string, inputTokens: number, outputTokens: number): void {
const record = { input_tokens: inputTokens, output_tokens: outputTokens, updated_at: Date.now() }
if (isSqliteAvailable()) {
const db = getDb()!
db.prepare(
`INSERT INTO ${TABLE} (session_id, input_tokens, output_tokens, updated_at)
VALUES (?, ?, ?, ?)
ON CONFLICT(session_id) DO UPDATE SET
input_tokens = excluded.input_tokens,
output_tokens = excluded.output_tokens,
updated_at = excluded.updated_at`,
).run(sessionId, inputTokens, outputTokens, record.updated_at)
} else {
jsonSet(TABLE, sessionId, record)
}
}
export function getUsage(sessionId: string): { input_tokens: number; output_tokens: number } | undefined {
if (isSqliteAvailable()) {
return getDb()!.prepare(
`SELECT input_tokens, output_tokens FROM ${TABLE} WHERE session_id = ?`,
).get(sessionId) as { input_tokens: number; output_tokens: number } | undefined
}
const row = jsonGet(TABLE, sessionId)
if (!row) return undefined
return { input_tokens: row.input_tokens ?? 0, output_tokens: row.output_tokens ?? 0 }
}
export function getUsageBatch(
sessionIds: string[],
): Record<string, { input_tokens: number; output_tokens: number }> {
if (sessionIds.length === 0) return {}
if (isSqliteAvailable()) {
const db = getDb()!
const placeholders = sessionIds.map(() => '?').join(',')
const rows = db.prepare(
`SELECT session_id, input_tokens, output_tokens FROM ${TABLE} WHERE session_id IN (${placeholders})`,
).all(...sessionIds) as Array<{ session_id: string; input_tokens: number; output_tokens: number }>
const map: Record<string, { input_tokens: number; output_tokens: number }> = {}
for (const r of rows) map[r.session_id] = { input_tokens: r.input_tokens, output_tokens: r.output_tokens }
return map
}
const all = jsonGetAll(TABLE)
const map: Record<string, { input_tokens: number; output_tokens: number }> = {}
for (const id of sessionIds) {
const row = all[id]
if (row) map[id] = { input_tokens: row.input_tokens ?? 0, output_tokens: row.output_tokens ?? 0 }
}
return map
}
export function deleteUsage(sessionId: string): void {
if (isSqliteAvailable()) {
getDb()!.prepare(`DELETE FROM ${TABLE} WHERE session_id = ?`).run(sessionId)
} else {
jsonDelete(TABLE, sessionId)
}
}
+136
View File
@@ -0,0 +1,136 @@
import { DatabaseSync } from 'node:sqlite'
import { mkdirSync, readFileSync, writeFileSync, existsSync } from 'fs'
import { resolve } from 'path'
import { homedir } from 'os'
const DB_DIR = resolve(homedir(), '.hermes-web-ui')
const DB_PATH = resolve(DB_DIR, 'hermes-web-ui.db')
const JSON_PATH = resolve(DB_DIR, 'hermes-web-ui.json')
// --- SQLite availability check ---
const SQLITE_AVAILABLE = (() => {
const [major, minor] = process.versions.node.split('.').map(Number)
return major > 22 || (major === 22 && minor >= 5)
})()
export function isSqliteAvailable(): boolean {
return SQLITE_AVAILABLE
}
// --- SQLite backend ---
let _db: DatabaseSync | null = null
export function getDb(): DatabaseSync | null {
if (!SQLITE_AVAILABLE) return null
if (!_db) {
mkdirSync(DB_DIR, { recursive: true })
_db = new DatabaseSync(DB_PATH)
_db.exec('PRAGMA journal_mode=WAL')
_db.exec('PRAGMA foreign_keys=ON')
}
return _db
}
/**
* Ensure a table's schema matches the expected definition.
* - Creates the table if it does not exist
* - Adds missing columns (ALTER TABLE ADD COLUMN)
* - Drops extra columns (ALTER TABLE DROP COLUMN, SQLite 3.35+)
*
* No-op when SQLite is not available.
*/
export function ensureTable(tableName: string, schema: Record<string, string>): void {
const db = getDb()
if (!db) return
const colDefs = Object.entries(schema)
.map(([col, def]) => `"${col}" ${def}`)
.join(', ')
db.exec(`CREATE TABLE IF NOT EXISTS "${tableName}" (${colDefs})`)
const rows = db.prepare(`PRAGMA table_info("${tableName}")`).all() as Array<{ name: string }>
const existingCols = new Set(rows.map(r => r.name))
const expectedCols = new Set(Object.keys(schema))
for (const col of expectedCols) {
if (!existingCols.has(col)) {
db.exec(`ALTER TABLE "${tableName}" ADD COLUMN "${col}" ${schema[col]}`)
}
}
for (const col of existingCols) {
if (!expectedCols.has(col)) {
db.exec(`ALTER TABLE "${tableName}" DROP COLUMN "${col}"`)
}
}
}
// --- JSON fallback backend ---
type JsonData = Record<string, Record<string, Record<string, any>>>
function readJsonStore(): JsonData {
if (!existsSync(JSON_PATH)) return {}
try {
return JSON.parse(readFileSync(JSON_PATH, 'utf-8'))
} catch {
return {}
}
}
function writeJsonStore(data: JsonData): void {
mkdirSync(DB_DIR, { recursive: true })
writeFileSync(JSON_PATH, JSON.stringify(data, null, 2), 'utf-8')
}
/**
* Get a record from the JSON store.
* @param table Table name (namespace)
* @param key Primary key
*/
export function jsonGet(table: string, key: string): Record<string, any> | undefined {
const data = readJsonStore()
return data[table]?.[key]
}
/**
* Set a record in the JSON store.
* @param table Table name (namespace)
* @param key Primary key
* @param value Record data
*/
export function jsonSet(table: string, key: string, value: Record<string, any>): void {
const data = readJsonStore()
if (!data[table]) data[table] = {}
data[table][key] = value
writeJsonStore(data)
}
/**
* Get all records from a table in the JSON store.
*/
export function jsonGetAll(table: string): Record<string, Record<string, any>> {
const data = readJsonStore()
return data[table] || {}
}
/**
* Delete a record from the JSON store.
*/
export function jsonDelete(table: string, key: string): void {
const data = readJsonStore()
if (data[table]) {
delete data[table][key]
writeJsonStore(data)
}
}
/**
* Get the storage path for debugging.
*/
export function getStoragePath(): string {
return SQLITE_AVAILABLE ? DB_PATH : JSON_PATH
}
+7 -1
View File
@@ -20,7 +20,7 @@ import { logger } from './services/logger'
declare const __APP_VERSION__: string
const APP_VERSION = typeof __APP_VERSION__ !== 'undefined'
? __APP_VERSION__
: (() => { try { return JSON.parse(readFileSync(resolve(__dirname, '../../package.json'), 'utf-8')).version } catch { return 'dev' } } )()
: (() => { try { return JSON.parse(readFileSync(resolve(__dirname, '../../package.json'), 'utf-8')).version } catch { return 'dev' } })()
// Global error handlers
process.on('uncaughtException', (err) => {
@@ -44,6 +44,12 @@ export async function bootstrap() {
await initGatewayManager()
console.log('[bootstrap] gateway manager initialized')
// Initialize web-ui SQLite tables
const { initUsageStore } = await import('./db/hermes/usage-store')
initUsageStore()
console.log('[bootstrap] usage store initialized')
app.use(cors({ origin: config.corsOrigins }))
app.use(bodyParser())
console.log('[bootstrap] cors + bodyParser registered')
@@ -1,9 +1,26 @@
import type { Context } from 'koa'
import { config } from '../../config'
import { getGatewayManagerInstance } from '../../services/gateway-bootstrap'
import { updateUsage } from '../../db/hermes/usage-store'
function getGatewayManager() { return getGatewayManagerInstance() }
// --- run_id → session_id mapping (in-memory, ephemeral) ---
const runSessionMap = new Map<string, string>()
export function setRunSession(runId: string, sessionId: string): void {
runSessionMap.set(runId, sessionId)
// Auto-cleanup after 30 minutes
setTimeout(() => runSessionMap.delete(runId), 30 * 60 * 1000)
}
function getSessionForRun(runId: string): string | undefined {
return runSessionMap.get(runId)
}
// --- Helpers ---
function isTransientGatewayError(err: any): boolean {
const msg = String(err?.message || '')
const causeCode = String(err?.cause?.code || '')
@@ -48,19 +65,7 @@ function resolveUpstream(ctx: Context): string {
return config.upstream.replace(/\/$/, '')
}
export async function proxy(ctx: Context) {
const profile = resolveProfile(ctx)
const upstream = resolveUpstream(ctx)
// Rewrite path for upstream gateway:
// /api/hermes/v1/* -> /v1/* (upstream uses /v1/ prefix)
// /api/hermes/* -> /api/* (upstream uses /api/ prefix)
const upstreamPath = ctx.path.replace(/^\/api\/hermes\/v1/, '/v1').replace(/^\/api\/hermes/, '/api')
const params = new URLSearchParams(ctx.search || '')
params.delete('token')
const search = params.toString()
const url = `${upstream}${upstreamPath}${search ? `?${search}` : ''}`
// Build headers — forward most, strip browser/web-ui specific ones
function buildProxyHeaders(ctx: Context, upstream: string): Record<string, string> {
const headers: Record<string, string> = {}
for (const [key, value] of Object.entries(ctx.headers)) {
if (value == null) continue
@@ -75,33 +80,118 @@ export async function proxy(ctx: Context) {
}
}
// Inject Hermes gateway API key from profile's .env
const mgr = getGatewayManager()
if (mgr) {
const apiKey = mgr.getApiKey(profile)
const apiKey = mgr.getApiKey(resolveProfile(ctx))
if (apiKey) {
headers['authorization'] = `Bearer ${apiKey}`
}
}
return headers
}
// --- SSE stream interception ---
const SSE_EVENTS_PATH = /^\/v1\/runs\/([^/]+)\/events$/
/**
* Parse SSE text chunks and extract run.completed events.
* Returns the run_id if a run.completed was found.
*/
function extractRunCompletedFromChunk(chunk: string): string | null {
// SSE format: each line is "data: {...}\n\n"
const lines = chunk.split('\n')
for (const line of lines) {
if (!line.startsWith('data: ')) continue
try {
// Build request body from raw body
let body: string | undefined
if (ctx.req.method !== 'GET' && ctx.req.method !== 'HEAD') {
body = (ctx as any).request.rawBody as string | undefined
const data = JSON.parse(line.slice(6))
if (data.event === 'run.completed' && data.usage && data.run_id) {
const sessionId = getSessionForRun(data.run_id)
if (sessionId) {
updateUsage(sessionId, data.usage.input_tokens, data.usage.output_tokens)
return data.run_id
}
}
} catch { /* not JSON, skip */ }
}
return null
}
/**
* Stream an SSE response while intercepting run.completed events.
*/
async function streamSSE(ctx: Context, res: Response): Promise<void> {
if (!res.body) {
ctx.res.end()
return
}
const requestInit: RequestInit = {
method: ctx.req.method,
headers,
body,
const reader = res.body.getReader()
const decoder = new TextDecoder()
let buffer = ''
try {
while (true) {
const { done, value } = await reader.read()
if (done) break
// Forward raw bytes to client immediately
ctx.res.write(value)
// Also decode for interception
buffer += decoder.decode(value, { stream: true })
// Process complete SSE lines (delimited by double newline)
let newlineIdx: number
while ((newlineIdx = buffer.indexOf('\n\n')) !== -1) {
const eventBlock = buffer.slice(0, newlineIdx)
buffer = buffer.slice(newlineIdx + 2)
extractRunCompletedFromChunk(eventBlock)
}
}
// Process remaining buffer
if (buffer.trim()) {
extractRunCompletedFromChunk(buffer)
}
} finally {
ctx.res.end()
}
}
// --- Main proxy function ---
export async function proxy(ctx: Context) {
const profile = resolveProfile(ctx)
const upstream = resolveUpstream(ctx)
const upstreamPath = ctx.path.replace(/^\/api\/hermes\/v1/, '/v1').replace(/^\/api\/hermes/, '/api')
const params = new URLSearchParams(ctx.search || '')
params.delete('token')
const search = params.toString()
const url = `${upstream}${upstreamPath}${search ? `?${search}` : ''}`
const headers = buildProxyHeaders(ctx, upstream)
try {
let body: string | undefined
if (ctx.req.method !== 'GET' && ctx.req.method !== 'HEAD') {
// @koa/bodyparser parses JSON into ctx.request.body but doesn't store rawBody
// by default. Re-serialize the parsed body to get the string form.
const parsed = (ctx as any).request.body
if (typeof parsed === 'string') {
body = parsed
} else if (parsed && typeof parsed === 'object') {
body = JSON.stringify(parsed)
}
}
const requestInit: RequestInit = { method: ctx.req.method, headers, body }
let res: Response
try {
res = await fetch(url, requestInit)
} catch (err: any) {
// Gateway may be restarting; wait briefly and retry once.
if (isTransientGatewayError(err) && await waitForGatewayReady(upstream)) {
res = await fetch(url, requestInit)
} else {
@@ -116,10 +206,37 @@ export async function proxy(ctx: Context) {
ctx.set(key, value)
}
})
ctx.status = res.status
// Stream response body
// Intercept POST /v1/runs to capture run_id → session_id mapping
if (ctx.req.method === 'POST' && /\/v1\/runs$/.test(upstreamPath) && body) {
try {
const parsed = JSON.parse(body)
if (parsed.session_id) {
const resBody = await res.text()
ctx.res.write(resBody)
ctx.res.end()
try {
const result = JSON.parse(resBody)
if (result.run_id) {
setRunSession(result.run_id, parsed.session_id)
}
} catch { /* response not JSON, ignore */ }
return
}
} catch { /* body not JSON, fall through to normal stream */ }
// No session_id in body — fall through to normal response handling below
}
// Intercept SSE streams for /v1/runs/{id}/events
const sseMatch = upstreamPath.match(SSE_EVENTS_PATH)
if (sseMatch) {
await streamSSE(ctx, res)
return
}
// Default: pipe response body directly
if (res.body) {
const reader = res.body.getReader()
const pump = async () => {
@@ -8,6 +8,9 @@ sessionRoutes.get('/api/hermes/sessions/conversations/:id/messages', ctrl.getCon
sessionRoutes.get('/api/hermes/sessions', ctrl.list)
sessionRoutes.get('/api/hermes/search/sessions', ctrl.search)
sessionRoutes.get('/api/hermes/sessions/search', ctrl.search)
sessionRoutes.get('/api/hermes/sessions/usage', ctrl.usageBatch)
sessionRoutes.get('/api/hermes/sessions/context-length', ctrl.contextLength)
sessionRoutes.get('/api/hermes/sessions/:id', ctrl.get)
sessionRoutes.get('/api/hermes/sessions/:id/usage', ctrl.usageSingle)
sessionRoutes.delete('/api/hermes/sessions/:id', ctrl.remove)
sessionRoutes.post('/api/hermes/sessions/:id/rename', ctrl.rename)
@@ -0,0 +1,106 @@
import { resolve, join } from 'path'
import { homedir } from 'os'
import { readFileSync, existsSync, statSync } from 'fs'
const HERMES_BASE = resolve(homedir(), '.hermes')
const MODELS_DEV_CACHE = resolve(HERMES_BASE, 'models_dev_cache.json')
const DEFAULT_CONTEXT_LENGTH = 200_000
interface ModelLimit {
context?: number
output?: number
input?: number
}
interface ModelEntry {
id?: string
limit?: ModelLimit
}
interface ProviderEntry {
models?: Record<string, ModelEntry>
}
// --- In-memory cache: parsed models_dev_cache (1.7MB), invalidated by mtime ---
let _cache: Record<string, ProviderEntry> | null = null
let _cacheMtime = 0
const CACHE_TTL_MS = 5 * 60 * 1000 // 5 minutes
let _cacheLoadedAt = 0
function loadModelsDevCache(): Record<string, ProviderEntry> | null {
if (!existsSync(MODELS_DEV_CACHE)) return null
try {
const stat = statSync(MODELS_DEV_CACHE)
const now = Date.now()
// Return cached if file hasn't changed and within TTL
if (_cache && stat.mtimeMs === _cacheMtime && now - _cacheLoadedAt < CACHE_TTL_MS) {
return _cache
}
const raw = readFileSync(MODELS_DEV_CACHE, 'utf-8')
_cache = JSON.parse(raw) as Record<string, ProviderEntry>
_cacheMtime = stat.mtimeMs
_cacheLoadedAt = now
return _cache
} catch {
return _cache // return stale cache on error
}
}
// --- Profile helpers ---
function getProfileDir(profile?: string): string {
if (!profile || profile === 'default') return HERMES_BASE
const dir = join(HERMES_BASE, 'profiles', profile)
return existsSync(dir) ? dir : HERMES_BASE
}
function getDefaultModel(profileDir: string): string | null {
const configPath = join(profileDir, 'config.yaml')
if (!existsSync(configPath)) return null
try {
const content = readFileSync(configPath, 'utf-8')
const match = content.match(/^model:\s*\n\s+default:\s*(.+)$/m)
return match ? match[1].trim() : null
} catch {
return null
}
}
// --- Context lookup ---
function lookupContextFromCache(modelName: string): number | null {
const data = loadModelsDevCache()
if (!data) return null
// Exact match first
for (const prov of Object.values(data)) {
const models = prov.models || {}
const entry = models[modelName]
if (entry?.limit?.context) return entry.limit.context
}
// Case-insensitive fallback
const lower = modelName.toLowerCase()
for (const prov of Object.values(data)) {
const models = prov.models || {}
for (const [name, entry] of Object.entries(models)) {
if (name.toLowerCase() === lower && entry?.limit?.context) {
return entry.limit.context
}
}
}
return null
}
/**
* Get the context length for the current profile's default model.
* Results are cached in memory (5min TTL) and invalidated by file mtime.
*/
export function getModelContextLength(profile?: string): number {
const profileDir = getProfileDir(profile)
const model = getDefaultModel(profileDir)
if (!model) return DEFAULT_CONTEXT_LENGTH
return lookupContextFromCache(model) || DEFAULT_CONTEXT_LENGTH
}
+116
View File
@@ -0,0 +1,116 @@
import { describe, it, expect, vi } from 'vitest'
// Force JSON fallback by mocking isSqliteAvailable
vi.mock('../../packages/server/src/db/index', async (importOriginal) => {
const actual = await importOriginal() as any
return {
...actual,
isSqliteAvailable: () => false,
getDb: () => null,
}
})
import {
jsonGet,
jsonSet,
jsonGetAll,
jsonDelete,
} from '../../packages/server/src/db/index'
describe('JSON fallback store', () => {
it('jsonSet and jsonGet round-trip', () => {
expect(typeof jsonSet).toBe('function')
expect(typeof jsonGet).toBe('function')
expect(typeof jsonGetAll).toBe('function')
expect(typeof jsonDelete).toBe('function')
})
})
// Test ensureTable with a real in-memory SQLite (Node 22+)
describe('SQLite ensureTable', () => {
it('creates table with correct columns and handles migration', () => {
// This test requires Node 22.5+ for node:sqlite
const nodeVersion = process.versions.node.split('.').map(Number)
const isAvailable = nodeVersion[0] > 22 || (nodeVersion[0] === 22 && nodeVersion[1] >= 5)
if (!isAvailable) {
console.log('Skipping SQLite test — Node < 22.5')
return
}
const { DatabaseSync } = require('node:sqlite')
const db = new DatabaseSync(':memory:')
// Simulate ensureTable logic
function ensureTable(tableName: string, schema: Record<string, string>): void {
const colDefs = Object.entries(schema)
.map(([col, def]) => `"${col}" ${def}`)
.join(', ')
db.exec(`CREATE TABLE IF NOT EXISTS "${tableName}" (${colDefs})`)
const rows = db.prepare(`PRAGMA table_info("${tableName}")`).all() as Array<{ name: string }>
const existingCols = new Set(rows.map(r => r.name))
const expectedCols = new Set(Object.keys(schema))
for (const col of expectedCols) {
if (!existingCols.has(col)) {
db.exec(`ALTER TABLE "${tableName}" ADD COLUMN "${col}" ${schema[col]}`)
}
}
for (const col of existingCols) {
if (!expectedCols.has(col)) {
db.exec(`ALTER TABLE "${tableName}" DROP COLUMN "${col}"`)
}
}
}
// Initial schema
const schema: Record<string, string> = {
session_id: 'TEXT PRIMARY KEY',
input_tokens: 'INTEGER NOT NULL DEFAULT 0',
output_tokens: 'INTEGER NOT NULL DEFAULT 0',
updated_at: 'INTEGER NOT NULL',
}
ensureTable('session_usage', schema)
// Verify columns
const cols = db.prepare(`PRAGMA table_info("session_usage")`).all() as Array<{ name: string }>
const colNames = cols.map(c => c.name)
expect(colNames).toContain('session_id')
expect(colNames).toContain('input_tokens')
expect(colNames).toContain('output_tokens')
expect(colNames).toContain('updated_at')
// Add a column
schema['cost_usd'] = 'REAL DEFAULT 0'
ensureTable('session_usage', schema)
const cols2 = db.prepare(`PRAGMA table_info("session_usage")`).all() as Array<{ name: string }>
const colNames2 = cols2.map(c => c.name)
expect(colNames2).toContain('cost_usd')
// Remove a column
delete schema['cost_usd']
ensureTable('session_usage', schema)
const cols3 = db.prepare(`PRAGMA table_info("session_usage")`).all() as Array<{ name: string }>
const colNames3 = cols3.map(c => c.name)
expect(colNames3).not.toContain('cost_usd')
// Verify INSERT works
db.prepare(
`INSERT INTO session_usage (session_id, input_tokens, output_tokens, updated_at)
VALUES (?, ?, ?, ?)`,
).run('test-session', 100, 50, Date.now())
const row = db.prepare('SELECT * FROM session_usage WHERE session_id = ?').get('test-session') as any
expect(row.session_id).toBe('test-session')
expect(row.input_tokens).toBe(100)
expect(row.output_tokens).toBe(50)
// Verify DELETE works
db.prepare('DELETE FROM session_usage WHERE session_id = ?').run('test-session')
const deleted = db.prepare('SELECT * FROM session_usage WHERE session_id = ?').get('test-session')
expect(deleted).toBeUndefined()
db.close()
})
})
+258 -4
View File
@@ -9,10 +9,18 @@ vi.mock('../../packages/server/src/services/gateway-bootstrap', () => ({
getGatewayManagerInstance: () => null,
}))
// Mock updateUsage so we can assert calls without real DB
const { mockUpdateUsage } = vi.hoisted(() => ({
mockUpdateUsage: vi.fn(),
}))
vi.mock('../../packages/server/src/db/hermes/usage-store', () => ({
updateUsage: mockUpdateUsage,
}))
const mockFetch = vi.fn()
vi.stubGlobal('fetch', mockFetch)
import { proxy } from '../../packages/server/src/routes/hermes/proxy-handler'
import { proxy, setRunSession } from '../../packages/server/src/routes/hermes/proxy-handler'
function createMockCtx(overrides: Record<string, any> = {}) {
const ctx: any = {
@@ -42,6 +50,25 @@ function createMockCtx(overrides: Record<string, any> = {}) {
return ctx
}
/**
* Helper: create a ReadableStream from string chunks.
* Each chunk is a Uint8Array segment delivered sequentially.
*/
function createSSEBody(events: string[]): ReadableStream<Uint8Array> {
const encoder = new TextEncoder()
let idx = 0
return new ReadableStream({
pull(controller) {
if (idx < events.length) {
controller.enqueue(encoder.encode(events[idx]))
idx++
} else {
controller.close()
}
},
})
}
describe('Proxy Handler', () => {
beforeEach(() => {
vi.clearAllMocks()
@@ -130,9 +157,6 @@ describe('Proxy Handler', () => {
})
it('returns 502 on connection failure', async () => {
// waitForGatewayReady loops calling fetch(healthUrl) until res.ok or timeout.
// Return ok:true for health checks so the loop exits immediately (gateway
// "ready"), then the retry fetch also fails with ECONNREFUSED → 502.
mockFetch.mockImplementation((url: string) => {
if (typeof url === 'string' && url.includes('/health')) {
return Promise.resolve({ ok: true })
@@ -161,3 +185,233 @@ describe('Proxy Handler', () => {
expect(ctx.status).toBe(404)
})
})
describe('POST /v1/runs — session_id capture', () => {
beforeEach(() => {
vi.clearAllMocks()
})
it('captures run_id → session_id mapping from POST /v1/runs', async () => {
const runId = 'run-abc-123'
const sessionId = 'session-xyz'
const responseBody = JSON.stringify({ run_id: runId, status: 'queued' })
mockFetch.mockResolvedValue({
status: 200,
headers: new Headers({ 'content-type': 'application/json' }),
text: () => Promise.resolve(responseBody),
body: null,
})
const ctx = createMockCtx({
path: '/api/hermes/v1/runs',
req: { method: 'POST' },
request: {
body: { session_id: sessionId, input: 'hello', model: 'gpt-4' },
},
})
await proxy(ctx)
// Verify the response was forwarded to client
expect(ctx.res.write).toHaveBeenCalledWith(responseBody)
expect(ctx.res.end).toHaveBeenCalled()
})
it('falls through to normal stream when POST body has no session_id', async () => {
const responseBody = JSON.stringify({ run_id: 'r1', status: 'queued' })
mockFetch.mockResolvedValue({
status: 200,
headers: new Headers({ 'content-type': 'application/json' }),
text: () => Promise.resolve(responseBody),
body: null,
})
const ctx = createMockCtx({
path: '/api/hermes/v1/runs',
req: { method: 'POST' },
request: { body: { input: 'hello' } }, // no session_id
})
await proxy(ctx)
// Should still forward the response
expect(ctx.res.end).toHaveBeenCalled()
})
it('serializes parsed JSON body when rawBody is not available', async () => {
const responseBody = JSON.stringify({ run_id: 'r1', status: 'queued' })
mockFetch.mockResolvedValue({
status: 200,
headers: new Headers({ 'content-type': 'application/json' }),
body: {
getReader: () => {
const encoder = new TextEncoder()
let done = false
return {
read: () => {
if (done) return Promise.resolve({ done: true, value: undefined })
done = true
return Promise.resolve({ done: false, value: encoder.encode(responseBody) })
},
}
},
},
})
const ctx = createMockCtx({
path: '/api/hermes/v1/runs',
req: { method: 'POST' },
request: { body: { session_id: 's1', input: 'test' } },
})
await proxy(ctx)
// Verify fetch was called with stringified body
const [, options] = mockFetch.mock.calls[0]
expect(typeof options.body).toBe('string')
const parsed = JSON.parse(options.body)
expect(parsed.session_id).toBe('s1')
expect(parsed.input).toBe('test')
})
})
describe('SSE stream interception — run.completed', () => {
beforeEach(() => {
vi.clearAllMocks()
})
it('intercepts run.completed and calls updateUsage', async () => {
const runId = 'run-test-1'
const sessionId = 'session-test-1'
// Pre-populate the run → session mapping
setRunSession(runId, sessionId)
const sseData = [
`data: ${JSON.stringify({ event: 'run.started', run_id: runId })}\n\n`,
`data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'Hello' })}\n\n`,
`data: ${JSON.stringify({ event: 'run.completed', run_id: runId, usage: { input_tokens: 13949, output_tokens: 45, total_tokens: 13994 } })}\n\n`,
]
mockFetch.mockResolvedValue({
status: 200,
headers: new Headers({ 'content-type': 'text/event-stream' }),
body: createSSEBody(sseData),
})
const ctx = createMockCtx({
path: `/api/hermes/v1/runs/${runId}/events`,
search: `?token=test&profile=default`,
})
await proxy(ctx)
// Verify updateUsage was called with correct values
expect(mockUpdateUsage).toHaveBeenCalledWith(sessionId, 13949, 45)
// Verify SSE data was forwarded to client
expect(ctx.res.write).toHaveBeenCalled()
expect(ctx.res.end).toHaveBeenCalled()
})
it('does not call updateUsage when no mapping exists', async () => {
const sseData = [
`data: ${JSON.stringify({ event: 'run.completed', run_id: 'unknown-run', usage: { input_tokens: 100, output_tokens: 50, total_tokens: 150 } })}\n\n`,
]
mockFetch.mockResolvedValue({
status: 200,
headers: new Headers({ 'content-type': 'text/event-stream' }),
body: createSSEBody(sseData),
})
const ctx = createMockCtx({
path: '/api/hermes/v1/runs/unknown-run/events',
search: '',
})
await proxy(ctx)
expect(mockUpdateUsage).not.toHaveBeenCalled()
})
it('does not call updateUsage for non-run.completed events', async () => {
const runId = 'run-no-complete'
setRunSession(runId, 'session-x')
const sseData = [
`data: ${JSON.stringify({ event: 'run.started', run_id: runId })}\n\n`,
`data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'Hi' })}\n\n`,
`data: ${JSON.stringify({ event: 'run.failed', run_id: runId, error: 'timeout' })}\n\n`,
]
mockFetch.mockResolvedValue({
status: 200,
headers: new Headers({ 'content-type': 'text/event-stream' }),
body: createSSEBody(sseData),
})
const ctx = createMockCtx({
path: `/api/hermes/v1/runs/${runId}/events`,
search: '',
})
await proxy(ctx)
expect(mockUpdateUsage).not.toHaveBeenCalled()
})
it('handles SSE with multiple events in a single chunk', async () => {
const runId = 'run-multi'
setRunSession(runId, 'session-multi')
// All events in one chunk
const singleChunk = [
`data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'A' })}\n\n`,
`data: ${JSON.stringify({ event: 'message.delta', run_id: runId, delta: 'B' })}\n\n`,
`data: ${JSON.stringify({ event: 'run.completed', run_id: runId, usage: { input_tokens: 500, output_tokens: 100, total_tokens: 600 } })}\n\n`,
].join('')
mockFetch.mockResolvedValue({
status: 200,
headers: new Headers({ 'content-type': 'text/event-stream' }),
body: createSSEBody([singleChunk]),
})
const ctx = createMockCtx({
path: `/api/hermes/v1/runs/${runId}/events`,
search: '',
})
await proxy(ctx)
expect(mockUpdateUsage).toHaveBeenCalledWith('session-multi', 500, 100)
})
it('handles SSE split across multiple chunks', async () => {
const runId = 'run-split'
setRunSession(runId, 'session-split')
const completedJson = JSON.stringify({ event: 'run.completed', run_id: runId, usage: { input_tokens: 200, output_tokens: 50, total_tokens: 250 } })
const sseEvent = `data: ${completedJson}\n\n`
// Split the event across two chunks
const chunk1 = sseEvent.slice(0, 30)
const chunk2 = sseEvent.slice(30)
mockFetch.mockResolvedValue({
status: 200,
headers: new Headers({ 'content-type': 'text/event-stream' }),
body: createSSEBody([chunk1, chunk2]),
})
const ctx = createMockCtx({
path: `/api/hermes/v1/runs/${runId}/events`,
search: '',
})
await proxy(ctx)
expect(mockUpdateUsage).toHaveBeenCalledWith('session-split', 200, 50)
})
})
+4 -4
View File
@@ -63,7 +63,7 @@ describe('session DB summaries', () => {
},
])
const mod = await import('../../packages/server/src/services/hermes/sessions-db')
const mod = await import('../../packages/server/src/db/hermes/sessions-db')
const rows = await mod.listSessionSummaries(undefined, 50)
expect(databaseSyncMock).toHaveBeenCalledWith('/tmp/hermes-profile/state.db', { open: true, readOnly: true })
@@ -124,7 +124,7 @@ describe('session DB summaries', () => {
},
])
const mod = await import('../../packages/server/src/services/hermes/sessions-db')
const mod = await import('../../packages/server/src/db/hermes/sessions-db')
const rows = await mod.listSessionSummaries('telegram', 2)
expect(prepareMock).toHaveBeenCalledWith(expect.stringContaining('AND s.source = ?'))
@@ -218,7 +218,7 @@ describe('session DB summaries', () => {
},
])
const mod = await import('../../packages/server/src/services/hermes/sessions-db')
const mod = await import('../../packages/server/src/db/hermes/sessions-db')
const rows = await mod.searchSessionSummaries('docker', undefined, 10)
expect(prepareMock).toHaveBeenCalledWith(expect.stringContaining('messages_fts MATCH'))
@@ -265,7 +265,7 @@ describe('session DB summaries', () => {
},
])
const mod = await import('../../packages/server/src/services/hermes/sessions-db')
const mod = await import('../../packages/server/src/db/hermes/sessions-db')
const rows = await mod.searchSessionSummaries('记忆断裂', undefined, 10)
expect(likeAllMock).toHaveBeenCalledWith('记忆断裂', '%记忆断裂%')
+9
View File
@@ -7,6 +7,9 @@ const searchMock = vi.fn(async (ctx: any) => { ctx.body = { results: [{ id: 'sea
const getMock = vi.fn(async (ctx: any) => { ctx.body = { session: { id: ctx.params.id } } })
const removeMock = vi.fn(async (ctx: any) => { ctx.body = { ok: true } })
const renameMock = vi.fn(async (ctx: any) => { ctx.body = { ok: true } })
const usageBatchMock = vi.fn(async (ctx: any) => { ctx.body = {} })
const usageSingleMock = vi.fn(async (ctx: any) => { ctx.body = { input_tokens: 0, output_tokens: 0 } })
const contextLengthMock = vi.fn(async (ctx: any) => { ctx.body = { context_length: 200000 } })
vi.mock('../../packages/server/src/controllers/hermes/sessions', () => ({
listConversations: listConversationsMock,
@@ -16,6 +19,9 @@ vi.mock('../../packages/server/src/controllers/hermes/sessions', () => ({
get: getMock,
remove: removeMock,
rename: renameMock,
usageBatch: usageBatchMock,
usageSingle: usageSingleMock,
contextLength: contextLengthMock,
}))
describe('session routes', () => {
@@ -40,7 +46,10 @@ describe('session routes', () => {
'/api/hermes/sessions',
'/api/hermes/search/sessions',
'/api/hermes/sessions/search',
'/api/hermes/sessions/usage',
'/api/hermes/sessions/context-length',
'/api/hermes/sessions/:id',
'/api/hermes/sessions/:id/usage',
'/api/hermes/sessions/:id/rename',
]))
})
+159
View File
@@ -0,0 +1,159 @@
import { describe, it, expect, vi, beforeEach } from 'vitest'
// Mock the db index module so we can test usage-store in isolation
const { mockEnsureTable, mockJsonSet, mockJsonGet, mockJsonGetAll, mockJsonDelete } = vi.hoisted(() => ({
mockEnsureTable: vi.fn(),
mockJsonSet: vi.fn(),
mockJsonGet: vi.fn(),
mockJsonGetAll: vi.fn(),
mockJsonDelete: vi.fn(),
}))
vi.mock('../../packages/server/src/db/index', () => ({
isSqliteAvailable: () => false, // Force JSON fallback path
ensureTable: mockEnsureTable,
getDb: () => null,
jsonSet: mockJsonSet,
jsonGet: mockJsonGet,
jsonGetAll: mockJsonGetAll,
jsonDelete: mockJsonDelete,
}))
import {
initUsageStore,
updateUsage,
getUsage,
getUsageBatch,
deleteUsage,
} from '../../packages/server/src/db/hermes/usage-store'
describe('Usage Store (JSON fallback)', () => {
beforeEach(() => {
vi.clearAllMocks()
})
it('initUsageStore calls ensureTable when SQLite is available', () => {
// In our mock, isSqliteAvailable returns false, so ensureTable should NOT be called
initUsageStore()
expect(mockEnsureTable).not.toHaveBeenCalled()
})
it('updateUsage writes via jsonSet', () => {
updateUsage('session-1', 100, 50)
expect(mockJsonSet).toHaveBeenCalledWith(
'session_usage',
'session-1',
expect.objectContaining({
input_tokens: 100,
output_tokens: 50,
updated_at: expect.any(Number),
}),
)
})
it('getUsage reads via jsonGet', () => {
mockJsonGet.mockReturnValue({ input_tokens: 200, output_tokens: 80 })
const result = getUsage('session-1')
expect(result).toEqual({ input_tokens: 200, output_tokens: 80 })
expect(mockJsonGet).toHaveBeenCalledWith('session_usage', 'session-1')
})
it('getUsage returns undefined when jsonGet returns nothing', () => {
mockJsonGet.mockReturnValue(undefined)
const result = getUsage('nonexistent')
expect(result).toBeUndefined()
})
it('getUsageBatch returns empty map for empty input', () => {
const result = getUsageBatch([])
expect(result).toEqual({})
expect(mockJsonGetAll).not.toHaveBeenCalled()
})
it('getUsageBatch returns matching records', () => {
mockJsonGetAll.mockReturnValue({
'session-1': { input_tokens: 100, output_tokens: 50 },
'session-2': { input_tokens: 200, output_tokens: 80 },
'session-3': { input_tokens: 300, output_tokens: 120 },
})
const result = getUsageBatch(['session-1', 'session-3', 'session-missing'])
expect(result).toEqual({
'session-1': { input_tokens: 100, output_tokens: 50 },
'session-3': { input_tokens: 300, output_tokens: 120 },
})
})
it('deleteUsage calls jsonDelete', () => {
deleteUsage('session-1')
expect(mockJsonDelete).toHaveBeenCalledWith('session_usage', 'session-1')
})
})
// Test with SQLite available (mocked)
describe('Usage Store (SQLite path)', () => {
let runMock: ReturnType<typeof vi.fn>
let getMock: ReturnType<typeof vi.fn>
let allMock: ReturnType<typeof vi.fn>
let deleteMock: ReturnType<typeof vi.fn>
beforeEach(() => {
vi.resetModules()
runMock = vi.fn()
getMock = vi.fn()
allMock = vi.fn()
deleteMock = vi.fn()
vi.doMock('../../packages/server/src/db/index', () => ({
isSqliteAvailable: () => true,
ensureTable: vi.fn(),
getDb: () => ({
prepare: vi.fn((sql: string) => {
if (sql.includes('INSERT') || sql.includes('UPDATE')) return { run: runMock }
if (sql.includes('SELECT') && sql.includes('WHERE session_id = ?')) return { get: getMock }
if (sql.includes('SELECT') && sql.includes('IN')) return { all: allMock }
if (sql.includes('DELETE')) return { run: deleteMock }
return { run: runMock, get: getMock, all: allMock }
}),
}),
jsonSet: vi.fn(),
jsonGet: vi.fn(),
jsonGetAll: vi.fn(),
jsonDelete: vi.fn(),
}))
})
it('updateUsage runs INSERT ... ON CONFLICT query', async () => {
const { updateUsage } = await import('../../packages/server/src/db/hermes/usage-store')
updateUsage('s1', 500, 200)
expect(runMock).toHaveBeenCalledWith('s1', 500, 200, expect.any(Number))
})
it('getUsage queries by session_id', async () => {
getMock.mockReturnValue({ input_tokens: 999, output_tokens: 111 })
const { getUsage } = await import('../../packages/server/src/db/hermes/usage-store')
const result = getUsage('s1')
expect(getMock).toHaveBeenCalledWith('s1')
expect(result).toEqual({ input_tokens: 999, output_tokens: 111 })
})
it('getUsageBatch queries with IN clause', async () => {
allMock.mockReturnValue([
{ session_id: 'a', input_tokens: 1, output_tokens: 2 },
{ session_id: 'b', input_tokens: 3, output_tokens: 4 },
])
const { getUsageBatch } = await import('../../packages/server/src/db/hermes/usage-store')
const result = getUsageBatch(['a', 'b', 'c'])
expect(allMock).toHaveBeenCalledWith('a', 'b', 'c')
expect(result).toEqual({
a: { input_tokens: 1, output_tokens: 2 },
b: { input_tokens: 3, output_tokens: 4 },
})
})
it('deleteUsage runs DELETE query', async () => {
const { deleteUsage } = await import('../../packages/server/src/db/hermes/usage-store')
deleteUsage('s1')
expect(deleteMock).toHaveBeenCalledWith('s1')
})
})