feat: add token usage tracking, context display, and dynamic context length (#132)
* fix: specify TS_NODE_PROJECT for dev:server script ts-node/register resolves tsconfig from the entry file upward, finding the root solution-style tsconfig.json (no compilerOptions). This causes target to default to ES3, breaking MapIterator spread syntax (TS2802). Set TS_NODE_PROJECT env var to point to the server tsconfig which targets ES2024. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: add token usage tracking, context display, and dynamic context length - Intercept SSE proxy to capture run.completed events and persist token usage (input_tokens, output_tokens) per session to SQLite/JSON store - Display context usage bar in ChatInput showing used/total/remaining tokens - Resolve actual context length from Hermes models_dev_cache.json based on the active profile's default model (fallback 200K), with 5min in-memory cache - Move sessions-db.ts to db/hermes/ for unified database layer - Add usage store with SQLite + JSON fallback (auto-migration via ensureTable) - Fix proxy SSE path regex to match rewritten upstream path - Fix route ordering: /sessions/usage before /sessions/:id to avoid 404 - Fetch per-session usage on session enter instead of batch - Add unit tests for usage-store, db index, and proxy SSE interception Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -28,6 +28,11 @@ export interface RunEvent {
|
||||
preview?: string
|
||||
timestamp?: number
|
||||
error?: string
|
||||
usage?: {
|
||||
input_tokens: number
|
||||
output_tokens: number
|
||||
total_tokens: number
|
||||
}
|
||||
}
|
||||
|
||||
export async function startRun(body: StartRunRequest): Promise<StartRunResponse> {
|
||||
|
||||
@@ -94,3 +94,26 @@ export async function renameSession(id: string, title: string): Promise<boolean>
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
export async function fetchSessionUsage(ids: string[]): Promise<Record<string, { input_tokens: number; output_tokens: number }>> {
|
||||
if (ids.length === 0) return {}
|
||||
const params = new URLSearchParams()
|
||||
params.set('ids', ids.join(','))
|
||||
return request(`/api/hermes/sessions/usage?${params}`)
|
||||
}
|
||||
|
||||
export async function fetchSessionUsageSingle(id: string): Promise<{ input_tokens: number; output_tokens: number } | null> {
|
||||
try {
|
||||
return await request<{ input_tokens: number; output_tokens: number }>(`/api/hermes/sessions/${id}/usage`)
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
export async function fetchContextLength(profile?: string): Promise<number> {
|
||||
const params = new URLSearchParams()
|
||||
if (profile) params.set('profile', profile)
|
||||
const query = params.toString()
|
||||
const res = await request<{ context_length: number }>(`/api/hermes/sessions/context-length${query ? `?${query}` : ''}`)
|
||||
return res.context_length
|
||||
}
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
<script setup lang="ts">
|
||||
import type { Attachment } from '@/stores/hermes/chat'
|
||||
import { useChatStore } from '@/stores/hermes/chat'
|
||||
import { useAppStore } from '@/stores/hermes/app'
|
||||
import { useProfilesStore } from '@/stores/hermes/profiles'
|
||||
import { fetchContextLength } from '@/api/hermes/sessions'
|
||||
import { NButton, NTooltip } from 'naive-ui'
|
||||
import { computed, ref } from 'vue'
|
||||
import { computed, ref, onMounted, watch } from 'vue'
|
||||
import { useI18n } from 'vue-i18n'
|
||||
|
||||
const chatStore = useChatStore()
|
||||
@@ -17,25 +20,41 @@ const isComposing = ref(false)
|
||||
|
||||
const canSend = computed(() => inputText.value.trim() || attachments.value.length > 0)
|
||||
|
||||
// --- Voice input (Web Speech API) ---
|
||||
// TODO: re-enable when needed — browser-native speech-to-text
|
||||
// const hasSpeechRecognition = ref(false)
|
||||
// let recognition: SpeechRecognition | null = null
|
||||
// let finalTranscript = ''
|
||||
// let prefixText = ''
|
||||
// onMounted(() => {
|
||||
// const SR = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition
|
||||
// if (!SR) return
|
||||
// recognition = new SR()
|
||||
// recognition.continuous = false
|
||||
// recognition.interimResults = true
|
||||
// recognition.lang = 'en-US'
|
||||
// hasSpeechRecognition.value = true
|
||||
// recognition.onresult = (event: SpeechRecognitionEvent) => { ... }
|
||||
// recognition.onend = () => { ... }
|
||||
// recognition.onerror = (event: SpeechRecognitionErrorEvent) => { ... }
|
||||
// })
|
||||
// onUnmounted(() => { if (recognition && isRecording.value) recognition.stop() })
|
||||
// --- Context info ---
|
||||
|
||||
const contextLength = ref(200000)
|
||||
const FALLBACK_CONTEXT = 200000
|
||||
|
||||
async function loadContextLength() {
|
||||
try {
|
||||
const profile = useProfilesStore().activeProfileName || undefined
|
||||
contextLength.value = await fetchContextLength(profile)
|
||||
} catch {
|
||||
contextLength.value = FALLBACK_CONTEXT
|
||||
}
|
||||
}
|
||||
|
||||
onMounted(loadContextLength)
|
||||
watch(() => useProfilesStore().activeProfileName, loadContextLength)
|
||||
watch(() => useAppStore().selectedModel, loadContextLength)
|
||||
|
||||
const totalTokens = computed(() => {
|
||||
const input = chatStore.activeSession?.inputTokens ?? 0
|
||||
const output = chatStore.activeSession?.outputTokens ?? 0
|
||||
return input + output
|
||||
})
|
||||
|
||||
const remainingTokens = computed(() => contextLength.value - totalTokens.value)
|
||||
|
||||
const usagePercent = computed(() =>
|
||||
Math.min((totalTokens.value / contextLength.value) * 100, 100),
|
||||
)
|
||||
|
||||
function formatTokens(n: number): string {
|
||||
if (n >= 1000000) return (n / 1000000).toFixed(1) + 'M'
|
||||
if (n >= 1000) return (n / 1000).toFixed(1) + 'k'
|
||||
return String(n)
|
||||
}
|
||||
|
||||
// --- File attachment helpers ---
|
||||
|
||||
@@ -176,6 +195,33 @@ function isImage(type: string): boolean {
|
||||
|
||||
<template>
|
||||
<div class="chat-input-area">
|
||||
<!-- Top bar: attach + context info -->
|
||||
<div class="input-top-bar">
|
||||
<NTooltip trigger="hover">
|
||||
<template #trigger>
|
||||
<NButton quaternary size="tiny" @click="handleAttachClick" circle>
|
||||
<template #icon>
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><path d="M21.44 11.05l-9.19 9.19a6 6 0 0 1-8.49-8.49l9.19-9.19a4 4 0 0 1 5.66 5.66l-9.2 9.19a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg>
|
||||
</template>
|
||||
</NButton>
|
||||
</template>
|
||||
{{ t('chat.attachFiles') }}
|
||||
</NTooltip>
|
||||
<span v-if="totalTokens > 0" class="context-info" :class="{ 'context-warning': usagePercent > 80 }">
|
||||
{{ formatTokens(totalTokens) }} / {{ formatTokens(contextLength) }} · {{ t('chat.contextRemaining') }} {{ formatTokens(remainingTokens) }}
|
||||
</span>
|
||||
<div v-if="totalTokens > 0" class="context-bar">
|
||||
<div
|
||||
class="context-bar-fill"
|
||||
:class="{
|
||||
'context-bar-warn': usagePercent > 60 && usagePercent <= 80,
|
||||
'context-bar-danger': usagePercent > 80,
|
||||
}"
|
||||
:style="{ width: `${usagePercent}%` }"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Attachment previews -->
|
||||
<div v-if="attachments.length > 0" class="attachment-previews">
|
||||
<div
|
||||
@@ -228,16 +274,6 @@ function isImage(type: string): boolean {
|
||||
@paste="handlePaste"
|
||||
></textarea>
|
||||
<div class="input-actions">
|
||||
<NTooltip trigger="hover">
|
||||
<template #trigger>
|
||||
<NButton quaternary size="small" @click="handleAttachClick" circle>
|
||||
<template #icon>
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><path d="M21.44 11.05l-9.19 9.19a6 6 0 0 1-8.49-8.49l9.19-9.19a4 4 0 0 1 5.66 5.66l-9.2 9.19a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg>
|
||||
</template>
|
||||
</NButton>
|
||||
</template>
|
||||
{{ t('chat.attachFiles') }}
|
||||
</NTooltip>
|
||||
<NButton
|
||||
v-if="chatStore.isStreaming"
|
||||
size="small"
|
||||
@@ -271,6 +307,45 @@ function isImage(type: string): boolean {
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.input-top-bar {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
padding: 0 0 6px;
|
||||
}
|
||||
|
||||
.context-info {
|
||||
font-size: 11px;
|
||||
color: $text-muted;
|
||||
|
||||
&.context-warning {
|
||||
color: #e8a735;
|
||||
}
|
||||
}
|
||||
|
||||
.context-bar {
|
||||
width: 60px;
|
||||
height: 4px;
|
||||
background: rgba(128, 128, 128, 0.2);
|
||||
border-radius: 2px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.context-bar-fill {
|
||||
height: 100%;
|
||||
background: linear-gradient(90deg, rgba(128, 128, 128, 0.3), rgba(128, 128, 128, 0.6));
|
||||
border-radius: 2px;
|
||||
transition: width 0.3s ease;
|
||||
|
||||
&.context-bar-warn {
|
||||
background: linear-gradient(90deg, #c98a1a, #e8a735);
|
||||
}
|
||||
|
||||
&.context-bar-danger {
|
||||
background: linear-gradient(90deg, #c43a2a, #e85d4a);
|
||||
}
|
||||
}
|
||||
|
||||
.attachment-previews {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
|
||||
@@ -169,57 +169,6 @@ const headerTitle = computed(() =>
|
||||
currentMode.value === 'live' ? t('chat.liveSessions') : activeSessionTitle.value,
|
||||
)
|
||||
|
||||
const totalTokens = computed(() => {
|
||||
const input = chatStore.activeSession?.inputTokens ?? 0
|
||||
const output = chatStore.activeSession?.outputTokens ?? 0
|
||||
return input + output
|
||||
})
|
||||
|
||||
const MODEL_CONTEXT: Record<string, number> = {
|
||||
'claude-opus-4': 200000,
|
||||
'claude-sonnet-4': 200000,
|
||||
'claude-haiku-4': 200000,
|
||||
'claude-3.5-sonnet': 200000,
|
||||
'claude-3.5-haiku': 200000,
|
||||
'claude-3-opus': 200000,
|
||||
'claude-3-sonnet': 200000,
|
||||
'claude-3-haiku': 200000,
|
||||
'gpt-4o': 128000,
|
||||
'gpt-4o-mini': 128000,
|
||||
'gpt-4-turbo': 128000,
|
||||
'gpt-4': 8192,
|
||||
'gpt-3.5-turbo': 16385,
|
||||
'o1': 200000,
|
||||
'o1-mini': 128000,
|
||||
'o3': 200000,
|
||||
'o3-mini': 200000,
|
||||
'o4-mini': 200000,
|
||||
'deepseek-chat': 65536,
|
||||
'deepseek-reasoner': 65536,
|
||||
'gemini-2.5-pro': 1000000,
|
||||
'gemini-2.5-flash': 1000000,
|
||||
'gemini-2.0-flash': 1000000,
|
||||
'glm-4-plus': 128000,
|
||||
'glm-4': 128000,
|
||||
'qwen-max': 128000,
|
||||
'qwen-plus': 128000,
|
||||
'qwen-turbo': 128000,
|
||||
}
|
||||
|
||||
const contextWindow = computed(() => {
|
||||
const model = chatStore.activeSession?.model || ''
|
||||
for (const [key, val] of Object.entries(MODEL_CONTEXT)) {
|
||||
if (model.includes(key)) return val
|
||||
}
|
||||
return null
|
||||
})
|
||||
|
||||
function formatTokens(n: number): string {
|
||||
if (n >= 1000000) return (n / 1000000).toFixed(1) + 'M'
|
||||
if (n >= 1000) return (n / 1000).toFixed(1) + 'k'
|
||||
return String(n)
|
||||
}
|
||||
|
||||
const activeSessionSource = computed(() =>
|
||||
currentMode.value === 'chat' ? (chatStore.activeSession?.source || '') : '',
|
||||
)
|
||||
@@ -446,9 +395,6 @@ async function handleRenameConfirm() {
|
||||
|
||||
<template v-if="currentMode === 'chat'">
|
||||
<MessageList />
|
||||
<div v-if="contextWindow !== null" class="context-info">
|
||||
<span>{{ formatTokens(totalTokens) }} / {{ formatTokens(contextWindow) }}</span>
|
||||
</div>
|
||||
<ChatInput />
|
||||
</template>
|
||||
<ConversationMonitorPane v-else :human-only="sessionBrowserPrefsStore.humanOnly" />
|
||||
@@ -799,20 +745,9 @@ async function handleRenameConfirm() {
|
||||
margin-right: 4px;
|
||||
}
|
||||
|
||||
.context-info {
|
||||
padding: 0 20px 4px;
|
||||
font-size: 11px;
|
||||
color: $text-muted;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
@media (max-width: $breakpoint-mobile) {
|
||||
.chat-header {
|
||||
padding: 16px 12px 16px 52px;
|
||||
}
|
||||
|
||||
.context-info {
|
||||
padding: 0 12px 4px;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -61,6 +61,7 @@ export default {
|
||||
|
||||
// Chat
|
||||
chat: {
|
||||
contextRemaining: 'übrig',
|
||||
emptyState: 'Starten Sie eine Konversation mit Hermes Agent',
|
||||
inputPlaceholder: 'Nachricht eingeben... (Enter zum Senden, Shift+Enter fur neue Zeile)',
|
||||
attachFiles: 'Dateien anhangen',
|
||||
|
||||
@@ -73,6 +73,7 @@ export default {
|
||||
|
||||
// Chat
|
||||
chat: {
|
||||
contextRemaining: 'remaining',
|
||||
emptyState: 'Start a conversation with Hermes Agent',
|
||||
inputPlaceholder: 'Type a message... (Enter to send, Shift+Enter for new line)',
|
||||
attachFiles: 'Attach files',
|
||||
|
||||
@@ -61,6 +61,7 @@ export default {
|
||||
|
||||
// Chat
|
||||
chat: {
|
||||
contextRemaining: 'restante',
|
||||
emptyState: 'Inicia una conversacion con Hermes Agent',
|
||||
inputPlaceholder: 'Escribe un mensaje... (Enter para enviar, Shift+Enter para nueva linea)',
|
||||
attachFiles: 'Adjuntar archivos',
|
||||
|
||||
@@ -61,6 +61,7 @@ export default {
|
||||
|
||||
// Chat
|
||||
chat: {
|
||||
contextRemaining: 'restant',
|
||||
emptyState: 'Demarrer une conversation avec Hermes Agent',
|
||||
inputPlaceholder: 'Tapez un message... (Entree pour envoyer, Shift+Entree pour un saut de ligne)',
|
||||
attachFiles: 'Joindre des fichiers',
|
||||
|
||||
@@ -61,6 +61,7 @@ export default {
|
||||
|
||||
// チャット
|
||||
chat: {
|
||||
contextRemaining: '残り',
|
||||
emptyState: 'Hermes Agent と会話を開始しましょう',
|
||||
inputPlaceholder: 'メッセージを入力... (Enter で送信、Shift+Enter で改行)',
|
||||
attachFiles: 'ファイルを添付',
|
||||
|
||||
@@ -61,6 +61,7 @@ export default {
|
||||
|
||||
// 채팅
|
||||
chat: {
|
||||
contextRemaining: '남음',
|
||||
emptyState: 'Hermes Agent와 대화를 시작하세요',
|
||||
inputPlaceholder: '메시지를 입력하세요... (Enter로 전송, Shift+Enter로 줄바꿈)',
|
||||
attachFiles: '파일 첨부',
|
||||
|
||||
@@ -61,6 +61,7 @@ export default {
|
||||
|
||||
// Chat
|
||||
chat: {
|
||||
contextRemaining: 'restante',
|
||||
emptyState: 'Inicie uma conversa com o Hermes Agent',
|
||||
inputPlaceholder: 'Digite uma mensagem... (Enter para enviar, Shift+Enter para nova linha)',
|
||||
attachFiles: 'Anexar arquivos',
|
||||
|
||||
@@ -73,6 +73,7 @@ export default {
|
||||
|
||||
// 对话
|
||||
chat: {
|
||||
contextRemaining: '剩余',
|
||||
emptyState: '开始与 Hermes Agent 对话',
|
||||
inputPlaceholder: '输入消息... (Enter 发送,Shift+Enter 换行)',
|
||||
attachFiles: '添加附件',
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { startRun, streamRunEvents, type ChatMessage, type RunEvent } from '@/api/hermes/chat'
|
||||
import { deleteSession as deleteSessionApi, fetchSession, fetchSessions, type HermesMessage, type SessionSummary } from '@/api/hermes/sessions'
|
||||
import { deleteSession as deleteSessionApi, fetchSession, fetchSessions, fetchSessionUsageSingle, type HermesMessage, type SessionSummary } from '@/api/hermes/sessions'
|
||||
import { defineStore } from 'pinia'
|
||||
import { ref, computed } from 'vue'
|
||||
import { useAppStore } from './app'
|
||||
@@ -155,8 +155,6 @@ function mapHermesSession(s: SessionSummary): Session {
|
||||
model: s.model,
|
||||
provider: (s as any).billing_provider || '',
|
||||
messageCount: s.message_count,
|
||||
inputTokens: s.input_tokens,
|
||||
outputTokens: s.output_tokens,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -340,8 +338,6 @@ export const useChatStore = defineStore('chat', () => {
|
||||
|| (serverUsers === localUsers && serverAssistantLen >= localAssistantLen)
|
||||
if (serverIsAhead) {
|
||||
target.messages = mapped
|
||||
target.inputTokens = detail.input_tokens
|
||||
target.outputTokens = detail.output_tokens
|
||||
if (detail.title && !target.title) target.title = detail.title
|
||||
if (sid === activeSessionId.value) persistActiveMessages()
|
||||
}
|
||||
@@ -363,8 +359,6 @@ export const useChatStore = defineStore('chat', () => {
|
||||
// our "don't retreat" guard above skipped it — the server is
|
||||
// now the authoritative source of truth.
|
||||
target.messages = mapped
|
||||
target.inputTokens = detail.input_tokens
|
||||
target.outputTokens = detail.output_tokens
|
||||
if (detail.title) target.title = detail.title
|
||||
if (sid === activeSessionId.value) persistActiveMessages()
|
||||
clearInFlight(sid)
|
||||
@@ -447,8 +441,6 @@ export const useChatStore = defineStore('chat', () => {
|
||||
if (!target) return false
|
||||
const mapped = mapHermesMessages(detail.messages || [])
|
||||
target.messages = mapped
|
||||
target.inputTokens = detail.input_tokens
|
||||
target.outputTokens = detail.output_tokens
|
||||
if (detail.title) target.title = detail.title
|
||||
persistActiveMessages()
|
||||
return true
|
||||
@@ -531,8 +523,6 @@ export const useChatStore = defineStore('chat', () => {
|
||||
if (serverIsAhead) {
|
||||
activeSession.value.messages = mapped
|
||||
}
|
||||
activeSession.value.inputTokens = detail.input_tokens
|
||||
activeSession.value.outputTokens = detail.output_tokens
|
||||
// Update title: use Hermes title, or fallback to first user message
|
||||
if (detail.title) {
|
||||
activeSession.value.title = detail.title
|
||||
@@ -557,6 +547,15 @@ export const useChatStore = defineStore('chat', () => {
|
||||
if (readInFlight(sessionId) && !streamStates.value.has(sessionId)) {
|
||||
startPolling(sessionId)
|
||||
}
|
||||
|
||||
// Fetch token usage for this session from web-ui DB
|
||||
try {
|
||||
const usage = await fetchSessionUsageSingle(sessionId)
|
||||
if (usage) {
|
||||
activeSession.value.inputTokens = usage.input_tokens
|
||||
activeSession.value.outputTokens = usage.output_tokens
|
||||
}
|
||||
} catch { /* non-critical */ }
|
||||
}
|
||||
|
||||
function newChat() {
|
||||
@@ -785,9 +784,15 @@ export const useChatStore = defineStore('chat', () => {
|
||||
if (lastMsg?.isStreaming) {
|
||||
updateMessage(sid, lastMsg.id, { isStreaming: false })
|
||||
}
|
||||
if (evt.usage) {
|
||||
const target = sessions.value.find(s => s.id === sid)
|
||||
if (target) {
|
||||
target.inputTokens = evt.usage.input_tokens
|
||||
target.outputTokens = evt.usage.output_tokens
|
||||
}
|
||||
}
|
||||
cleanup()
|
||||
updateSessionTitle(sid)
|
||||
// IMPORTANT ordering: persist the final cache BEFORE clearing
|
||||
// the in-flight marker. If the browser is reloading right now
|
||||
// and kills us between the two localStorage writes, we want
|
||||
// the next page load to still see in-flight === true (so
|
||||
|
||||
Reference in New Issue
Block a user