add bridge performance monitoring
This commit is contained in:
@@ -0,0 +1,63 @@
|
|||||||
|
import { request } from '../client'
|
||||||
|
|
||||||
|
export interface ProcessUsage {
|
||||||
|
pid: number
|
||||||
|
role: 'web' | 'broker' | 'worker'
|
||||||
|
profile?: string
|
||||||
|
running: boolean
|
||||||
|
cpuPercent: number
|
||||||
|
memoryRssBytes: number
|
||||||
|
command?: string
|
||||||
|
error?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PerformanceRuntimeSnapshot {
|
||||||
|
timestamp: number
|
||||||
|
system: {
|
||||||
|
platform: string
|
||||||
|
arch: string
|
||||||
|
uptimeSeconds: number
|
||||||
|
cpuCount: number
|
||||||
|
cpuPercent: number
|
||||||
|
loadAverage: number[]
|
||||||
|
totalMemoryBytes: number
|
||||||
|
freeMemoryBytes: number
|
||||||
|
usedMemoryBytes: number
|
||||||
|
memoryPercent: number
|
||||||
|
}
|
||||||
|
web: {
|
||||||
|
pid: number
|
||||||
|
uptimeSeconds: number
|
||||||
|
memory: Record<string, number>
|
||||||
|
cpuPercent: number
|
||||||
|
}
|
||||||
|
bridge: {
|
||||||
|
endpoint: string
|
||||||
|
reachable: boolean
|
||||||
|
error?: string
|
||||||
|
broker: {
|
||||||
|
running: boolean
|
||||||
|
ready: boolean
|
||||||
|
pid?: number
|
||||||
|
process?: ProcessUsage
|
||||||
|
restartScheduled: boolean
|
||||||
|
restartAttempts: number
|
||||||
|
}
|
||||||
|
workers: Array<ProcessUsage & {
|
||||||
|
endpoint?: string
|
||||||
|
lastUsedAt?: number
|
||||||
|
sessionCount: number
|
||||||
|
runningSessionCount: number
|
||||||
|
}>
|
||||||
|
totalWorkerMemoryRssBytes: number
|
||||||
|
}
|
||||||
|
sessions: {
|
||||||
|
active: number
|
||||||
|
running: number
|
||||||
|
byProfile: Record<string, number>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function fetchPerformanceRuntime(): Promise<PerformanceRuntimeSnapshot> {
|
||||||
|
return request<PerformanceRuntimeSnapshot>('/api/hermes/performance/runtime')
|
||||||
|
}
|
||||||
@@ -226,10 +226,17 @@ function openChangelog() {
|
|||||||
</svg>
|
</svg>
|
||||||
<span>{{ t("sidebar.usage") }}</span>
|
<span>{{ t("sidebar.usage") }}</span>
|
||||||
</button>
|
</button>
|
||||||
<button class="nav-item" :class="{ active: selectedKey === 'hermes.skillsUsage' }" @click="handleNav('hermes.skillsUsage')">
|
<button class="nav-item" :class="{ active: selectedKey === 'hermes.performance' }" @click="handleNav('hermes.performance')">
|
||||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round">
|
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round">
|
||||||
<polyline points="22 12 18 12 15 21 9 3 6 12 2 12" />
|
<polyline points="22 12 18 12 15 21 9 3 6 12 2 12" />
|
||||||
</svg>
|
</svg>
|
||||||
|
<span>{{ t("sidebar.performance") }}</span>
|
||||||
|
</button>
|
||||||
|
<button class="nav-item" :class="{ active: selectedKey === 'hermes.skillsUsage' }" @click="handleNav('hermes.skillsUsage')">
|
||||||
|
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round">
|
||||||
|
<path d="M21.21 15.89A10 10 0 1 1 8.11 2.79" />
|
||||||
|
<path d="M22 12A10 10 0 0 0 12 2v10z" />
|
||||||
|
</svg>
|
||||||
<span>{{ t("sidebar.skillsUsage") }}</span>
|
<span>{{ t("sidebar.skillsUsage") }}</span>
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -82,6 +82,7 @@ export default {
|
|||||||
memory: 'Gedachtnis',
|
memory: 'Gedachtnis',
|
||||||
logs: 'Protokolle',
|
logs: 'Protokolle',
|
||||||
usage: 'Nutzung',
|
usage: 'Nutzung',
|
||||||
|
performance: 'Leistung',
|
||||||
skillsUsage: 'Skill-Nutzung',
|
skillsUsage: 'Skill-Nutzung',
|
||||||
channels: 'Kanale',
|
channels: 'Kanale',
|
||||||
terminal: 'Konsole',
|
terminal: 'Konsole',
|
||||||
@@ -116,6 +117,36 @@ export default {
|
|||||||
collapse: 'Menü einklappen',
|
collapse: 'Menü einklappen',
|
||||||
},
|
},
|
||||||
|
|
||||||
|
performance: {
|
||||||
|
title: 'Leistung',
|
||||||
|
subtitle: 'Systemressourcen, Bridge Broker, Workers und aktive Sitzungen überwachen',
|
||||||
|
refresh: 'Aktualisieren',
|
||||||
|
autoRefreshOn: 'Automatisch aktualisieren',
|
||||||
|
autoRefreshOff: 'Manuell aktualisieren',
|
||||||
|
loadFailed: 'Leistungsdaten konnten nicht geladen werden',
|
||||||
|
systemCpu: 'System-CPU',
|
||||||
|
systemMemory: 'Systemspeicher',
|
||||||
|
activeSessions: 'Aktive Sitzungen',
|
||||||
|
runningSessions: 'Laufend {count}',
|
||||||
|
workers: 'Workers',
|
||||||
|
totalWorkerMemory: 'Worker-Gesamtspeicher',
|
||||||
|
processes: 'Prozesse',
|
||||||
|
uptime: 'Laufzeit',
|
||||||
|
running: 'Läuft',
|
||||||
|
stopped: 'Gestoppt',
|
||||||
|
workerMemory: 'Worker-Speicher',
|
||||||
|
lastUpdated: 'Aktualisiert',
|
||||||
|
profile: 'Profile',
|
||||||
|
memory: 'Speicher',
|
||||||
|
sessions: 'Sitzungen',
|
||||||
|
runningActiveSessions: 'Laufend / Aktiv',
|
||||||
|
lastUsed: 'Zuletzt verwendet',
|
||||||
|
status: 'Status',
|
||||||
|
noWorkers: 'Keine Workers',
|
||||||
|
sessionsByProfile: 'Sitzungen nach Profile',
|
||||||
|
noActiveSessions: 'Keine aktiven Sitzungen',
|
||||||
|
},
|
||||||
|
|
||||||
// Drawer
|
// Drawer
|
||||||
drawer: {
|
drawer: {
|
||||||
terminal: 'Konsole',
|
terminal: 'Konsole',
|
||||||
|
|||||||
@@ -83,6 +83,7 @@ export default {
|
|||||||
memory: 'Memory',
|
memory: 'Memory',
|
||||||
logs: 'Logs',
|
logs: 'Logs',
|
||||||
usage: 'Usage',
|
usage: 'Usage',
|
||||||
|
performance: 'Performance',
|
||||||
skillsUsage: 'Skills Usage',
|
skillsUsage: 'Skills Usage',
|
||||||
channels: 'Channels',
|
channels: 'Channels',
|
||||||
gateways: 'Gateways',
|
gateways: 'Gateways',
|
||||||
@@ -116,6 +117,36 @@ export default {
|
|||||||
noChangelog: 'No changelog available',
|
noChangelog: 'No changelog available',
|
||||||
},
|
},
|
||||||
|
|
||||||
|
performance: {
|
||||||
|
title: 'Performance',
|
||||||
|
subtitle: 'Inspect system resources, bridge broker, workers, and active sessions',
|
||||||
|
refresh: 'Refresh',
|
||||||
|
autoRefreshOn: 'Auto refresh',
|
||||||
|
autoRefreshOff: 'Manual refresh',
|
||||||
|
loadFailed: 'Failed to load performance metrics',
|
||||||
|
systemCpu: 'System CPU',
|
||||||
|
systemMemory: 'System Memory',
|
||||||
|
activeSessions: 'Active Sessions',
|
||||||
|
runningSessions: 'Running {count}',
|
||||||
|
workers: 'Workers',
|
||||||
|
totalWorkerMemory: 'Worker memory',
|
||||||
|
processes: 'Processes',
|
||||||
|
uptime: 'Uptime',
|
||||||
|
running: 'Running',
|
||||||
|
stopped: 'Stopped',
|
||||||
|
workerMemory: 'Worker Memory',
|
||||||
|
lastUpdated: 'Updated',
|
||||||
|
profile: 'Profile',
|
||||||
|
memory: 'Memory',
|
||||||
|
sessions: 'Sessions',
|
||||||
|
runningActiveSessions: 'Running / Active',
|
||||||
|
lastUsed: 'Last Used',
|
||||||
|
status: 'Status',
|
||||||
|
noWorkers: 'No workers',
|
||||||
|
sessionsByProfile: 'Sessions by Profile',
|
||||||
|
noActiveSessions: 'No active sessions',
|
||||||
|
},
|
||||||
|
|
||||||
// Drawer
|
// Drawer
|
||||||
drawer: {
|
drawer: {
|
||||||
terminal: 'Terminal',
|
terminal: 'Terminal',
|
||||||
|
|||||||
@@ -82,6 +82,7 @@ export default {
|
|||||||
memory: 'Memoria',
|
memory: 'Memoria',
|
||||||
logs: 'Registros',
|
logs: 'Registros',
|
||||||
usage: 'Uso',
|
usage: 'Uso',
|
||||||
|
performance: 'Rendimiento',
|
||||||
skillsUsage: 'Uso de habilidades',
|
skillsUsage: 'Uso de habilidades',
|
||||||
channels: 'Canales',
|
channels: 'Canales',
|
||||||
terminal: 'Terminal',
|
terminal: 'Terminal',
|
||||||
@@ -116,6 +117,36 @@ export default {
|
|||||||
collapse: 'Contraer menú',
|
collapse: 'Contraer menú',
|
||||||
},
|
},
|
||||||
|
|
||||||
|
performance: {
|
||||||
|
title: 'Rendimiento',
|
||||||
|
subtitle: 'Supervisa recursos del sistema, Bridge Broker, Workers y sesiones activas',
|
||||||
|
refresh: 'Actualizar',
|
||||||
|
autoRefreshOn: 'Actualización automática',
|
||||||
|
autoRefreshOff: 'Actualización manual',
|
||||||
|
loadFailed: 'No se pudieron cargar las métricas de rendimiento',
|
||||||
|
systemCpu: 'CPU del sistema',
|
||||||
|
systemMemory: 'Memoria del sistema',
|
||||||
|
activeSessions: 'Sesiones activas',
|
||||||
|
runningSessions: 'En ejecución {count}',
|
||||||
|
workers: 'Workers',
|
||||||
|
totalWorkerMemory: 'Memoria total de Worker',
|
||||||
|
processes: 'Procesos',
|
||||||
|
uptime: 'Tiempo activo',
|
||||||
|
running: 'En ejecución',
|
||||||
|
stopped: 'Detenido',
|
||||||
|
workerMemory: 'Memoria de Worker',
|
||||||
|
lastUpdated: 'Actualizado',
|
||||||
|
profile: 'Profile',
|
||||||
|
memory: 'Memoria',
|
||||||
|
sessions: 'Sesiones',
|
||||||
|
runningActiveSessions: 'En ejecución / Activas',
|
||||||
|
lastUsed: 'Último uso',
|
||||||
|
status: 'Estado',
|
||||||
|
noWorkers: 'Sin Workers',
|
||||||
|
sessionsByProfile: 'Sesiones por Profile',
|
||||||
|
noActiveSessions: 'No hay sesiones activas',
|
||||||
|
},
|
||||||
|
|
||||||
// Drawer
|
// Drawer
|
||||||
drawer: {
|
drawer: {
|
||||||
terminal: 'Terminal',
|
terminal: 'Terminal',
|
||||||
|
|||||||
@@ -82,6 +82,7 @@ export default {
|
|||||||
memory: 'Memoire',
|
memory: 'Memoire',
|
||||||
logs: 'Journaux',
|
logs: 'Journaux',
|
||||||
usage: 'Utilisation',
|
usage: 'Utilisation',
|
||||||
|
performance: 'Performance',
|
||||||
skillsUsage: 'Utilisation des compétences',
|
skillsUsage: 'Utilisation des compétences',
|
||||||
channels: 'Canaux',
|
channels: 'Canaux',
|
||||||
terminal: 'Terminal',
|
terminal: 'Terminal',
|
||||||
@@ -116,6 +117,36 @@ export default {
|
|||||||
collapse: 'Replier le menu',
|
collapse: 'Replier le menu',
|
||||||
},
|
},
|
||||||
|
|
||||||
|
performance: {
|
||||||
|
title: 'Performance',
|
||||||
|
subtitle: 'Surveiller les ressources système, Bridge Broker, Workers et sessions actives',
|
||||||
|
refresh: 'Actualiser',
|
||||||
|
autoRefreshOn: 'Actualisation auto',
|
||||||
|
autoRefreshOff: 'Actualisation manuelle',
|
||||||
|
loadFailed: 'Échec du chargement des métriques de performance',
|
||||||
|
systemCpu: 'CPU système',
|
||||||
|
systemMemory: 'Mémoire système',
|
||||||
|
activeSessions: 'Sessions actives',
|
||||||
|
runningSessions: 'En cours {count}',
|
||||||
|
workers: 'Workers',
|
||||||
|
totalWorkerMemory: 'Mémoire totale Worker',
|
||||||
|
processes: 'Processus',
|
||||||
|
uptime: 'Disponibilité',
|
||||||
|
running: 'En cours',
|
||||||
|
stopped: 'Arrêté',
|
||||||
|
workerMemory: 'Mémoire Worker',
|
||||||
|
lastUpdated: 'Mis à jour',
|
||||||
|
profile: 'Profile',
|
||||||
|
memory: 'Mémoire',
|
||||||
|
sessions: 'Sessions',
|
||||||
|
runningActiveSessions: 'En cours / Actives',
|
||||||
|
lastUsed: 'Dernière utilisation',
|
||||||
|
status: 'Statut',
|
||||||
|
noWorkers: 'Aucun Worker',
|
||||||
|
sessionsByProfile: 'Sessions par Profile',
|
||||||
|
noActiveSessions: 'Aucune session active',
|
||||||
|
},
|
||||||
|
|
||||||
// Drawer
|
// Drawer
|
||||||
drawer: {
|
drawer: {
|
||||||
terminal: 'Terminal',
|
terminal: 'Terminal',
|
||||||
|
|||||||
@@ -82,6 +82,7 @@ export default {
|
|||||||
memory: 'メモリ',
|
memory: 'メモリ',
|
||||||
logs: 'ログ',
|
logs: 'ログ',
|
||||||
usage: '使用量',
|
usage: '使用量',
|
||||||
|
performance: 'パフォーマンス',
|
||||||
skillsUsage: 'スキル使用状況',
|
skillsUsage: 'スキル使用状況',
|
||||||
channels: 'チャンネル',
|
channels: 'チャンネル',
|
||||||
terminal: 'ターミナル',
|
terminal: 'ターミナル',
|
||||||
@@ -116,6 +117,36 @@ export default {
|
|||||||
collapse: 'メニューを折りたたむ',
|
collapse: 'メニューを折りたたむ',
|
||||||
},
|
},
|
||||||
|
|
||||||
|
performance: {
|
||||||
|
title: 'パフォーマンス',
|
||||||
|
subtitle: 'システムリソース、Bridge Broker、Workers、アクティブセッションを確認',
|
||||||
|
refresh: '更新',
|
||||||
|
autoRefreshOn: '自動更新',
|
||||||
|
autoRefreshOff: '手動更新',
|
||||||
|
loadFailed: 'パフォーマンスデータの読み込みに失敗しました',
|
||||||
|
systemCpu: 'システム CPU',
|
||||||
|
systemMemory: 'システムメモリ',
|
||||||
|
activeSessions: 'アクティブセッション',
|
||||||
|
runningSessions: '実行中 {count}',
|
||||||
|
workers: 'Workers',
|
||||||
|
totalWorkerMemory: 'Worker 合計メモリ',
|
||||||
|
processes: 'プロセス',
|
||||||
|
uptime: '稼働時間',
|
||||||
|
running: '実行中',
|
||||||
|
stopped: '停止',
|
||||||
|
workerMemory: 'Worker メモリ',
|
||||||
|
lastUpdated: '更新時刻',
|
||||||
|
profile: 'Profile',
|
||||||
|
memory: 'メモリ',
|
||||||
|
sessions: 'セッション',
|
||||||
|
runningActiveSessions: '実行中 / アクティブ',
|
||||||
|
lastUsed: '最終使用',
|
||||||
|
status: '状態',
|
||||||
|
noWorkers: 'Worker はありません',
|
||||||
|
sessionsByProfile: 'Profile 別セッション',
|
||||||
|
noActiveSessions: 'アクティブセッションはありません',
|
||||||
|
},
|
||||||
|
|
||||||
// ドロワー
|
// ドロワー
|
||||||
drawer: {
|
drawer: {
|
||||||
terminal: 'ターミナル',
|
terminal: 'ターミナル',
|
||||||
|
|||||||
@@ -82,6 +82,7 @@ export default {
|
|||||||
memory: '메모리',
|
memory: '메모리',
|
||||||
logs: '로그',
|
logs: '로그',
|
||||||
usage: '사용량',
|
usage: '사용량',
|
||||||
|
performance: '성능 모니터링',
|
||||||
skillsUsage: '스킬 사용량',
|
skillsUsage: '스킬 사용량',
|
||||||
channels: '채널',
|
channels: '채널',
|
||||||
terminal: '터미널',
|
terminal: '터미널',
|
||||||
@@ -116,6 +117,36 @@ export default {
|
|||||||
collapse: '메뉴 접기',
|
collapse: '메뉴 접기',
|
||||||
},
|
},
|
||||||
|
|
||||||
|
performance: {
|
||||||
|
title: '성능 모니터링',
|
||||||
|
subtitle: '시스템 리소스, Bridge Broker, Workers, 활성 세션 확인',
|
||||||
|
refresh: '새로고침',
|
||||||
|
autoRefreshOn: '자동 새로고침',
|
||||||
|
autoRefreshOff: '수동 새로고침',
|
||||||
|
loadFailed: '성능 데이터를 불러오지 못했습니다',
|
||||||
|
systemCpu: '시스템 CPU',
|
||||||
|
systemMemory: '시스템 메모리',
|
||||||
|
activeSessions: '활성 세션',
|
||||||
|
runningSessions: '실행 중 {count}',
|
||||||
|
workers: 'Workers',
|
||||||
|
totalWorkerMemory: 'Worker 총 메모리',
|
||||||
|
processes: '프로세스',
|
||||||
|
uptime: '실행 시간',
|
||||||
|
running: '실행 중',
|
||||||
|
stopped: '중지됨',
|
||||||
|
workerMemory: 'Worker 메모리',
|
||||||
|
lastUpdated: '업데이트 시간',
|
||||||
|
profile: 'Profile',
|
||||||
|
memory: '메모리',
|
||||||
|
sessions: '세션',
|
||||||
|
runningActiveSessions: '실행 중 / 활성',
|
||||||
|
lastUsed: '마지막 사용',
|
||||||
|
status: '상태',
|
||||||
|
noWorkers: 'Worker 없음',
|
||||||
|
sessionsByProfile: 'Profile별 세션',
|
||||||
|
noActiveSessions: '활성 세션 없음',
|
||||||
|
},
|
||||||
|
|
||||||
// 서랍
|
// 서랍
|
||||||
drawer: {
|
drawer: {
|
||||||
terminal: '터미널',
|
terminal: '터미널',
|
||||||
|
|||||||
@@ -82,6 +82,7 @@ export default {
|
|||||||
memory: 'Memoria',
|
memory: 'Memoria',
|
||||||
logs: 'Logs',
|
logs: 'Logs',
|
||||||
usage: 'Uso',
|
usage: 'Uso',
|
||||||
|
performance: 'Desempenho',
|
||||||
skillsUsage: 'Uso de habilidades',
|
skillsUsage: 'Uso de habilidades',
|
||||||
channels: 'Canais',
|
channels: 'Canais',
|
||||||
terminal: 'Terminal',
|
terminal: 'Terminal',
|
||||||
@@ -116,6 +117,36 @@ export default {
|
|||||||
collapse: 'Recolher menu',
|
collapse: 'Recolher menu',
|
||||||
},
|
},
|
||||||
|
|
||||||
|
performance: {
|
||||||
|
title: 'Desempenho',
|
||||||
|
subtitle: 'Monitore recursos do sistema, Bridge Broker, Workers e sessões ativas',
|
||||||
|
refresh: 'Atualizar',
|
||||||
|
autoRefreshOn: 'Atualização automática',
|
||||||
|
autoRefreshOff: 'Atualização manual',
|
||||||
|
loadFailed: 'Falha ao carregar métricas de desempenho',
|
||||||
|
systemCpu: 'CPU do sistema',
|
||||||
|
systemMemory: 'Memória do sistema',
|
||||||
|
activeSessions: 'Sessões ativas',
|
||||||
|
runningSessions: 'Em execução {count}',
|
||||||
|
workers: 'Workers',
|
||||||
|
totalWorkerMemory: 'Memória total de Worker',
|
||||||
|
processes: 'Processos',
|
||||||
|
uptime: 'Tempo ativo',
|
||||||
|
running: 'Em execução',
|
||||||
|
stopped: 'Parado',
|
||||||
|
workerMemory: 'Memória de Worker',
|
||||||
|
lastUpdated: 'Atualizado',
|
||||||
|
profile: 'Profile',
|
||||||
|
memory: 'Memória',
|
||||||
|
sessions: 'Sessões',
|
||||||
|
runningActiveSessions: 'Em execução / Ativas',
|
||||||
|
lastUsed: 'Último uso',
|
||||||
|
status: 'Status',
|
||||||
|
noWorkers: 'Nenhum Worker',
|
||||||
|
sessionsByProfile: 'Sessões por Profile',
|
||||||
|
noActiveSessions: 'Nenhuma sessão ativa',
|
||||||
|
},
|
||||||
|
|
||||||
// Gaveta
|
// Gaveta
|
||||||
drawer: {
|
drawer: {
|
||||||
terminal: 'Terminal',
|
terminal: 'Terminal',
|
||||||
|
|||||||
@@ -83,6 +83,7 @@ export default {
|
|||||||
memory: '記憶',
|
memory: '記憶',
|
||||||
logs: '日誌',
|
logs: '日誌',
|
||||||
usage: '用量',
|
usage: '用量',
|
||||||
|
performance: '效能監控',
|
||||||
skillsUsage: '技能用量',
|
skillsUsage: '技能用量',
|
||||||
channels: '頻道',
|
channels: '頻道',
|
||||||
gateways: '閘道',
|
gateways: '閘道',
|
||||||
@@ -116,6 +117,36 @@ export default {
|
|||||||
noChangelog: '目前無更新日誌',
|
noChangelog: '目前無更新日誌',
|
||||||
},
|
},
|
||||||
|
|
||||||
|
performance: {
|
||||||
|
title: '效能監控',
|
||||||
|
subtitle: '查看系統資源、Bridge Broker、Workers 和活躍會話',
|
||||||
|
refresh: '重新整理',
|
||||||
|
autoRefreshOn: '自動重新整理',
|
||||||
|
autoRefreshOff: '手動重新整理',
|
||||||
|
loadFailed: '效能資料載入失敗',
|
||||||
|
systemCpu: '系統 CPU',
|
||||||
|
systemMemory: '系統記憶體',
|
||||||
|
activeSessions: '活躍會話',
|
||||||
|
runningSessions: '執行中 {count}',
|
||||||
|
workers: 'Workers',
|
||||||
|
totalWorkerMemory: 'Worker 總記憶體',
|
||||||
|
processes: '程序',
|
||||||
|
uptime: '執行',
|
||||||
|
running: '執行中',
|
||||||
|
stopped: '已停止',
|
||||||
|
workerMemory: 'Worker 記憶體',
|
||||||
|
lastUpdated: '更新時間',
|
||||||
|
profile: 'Profile',
|
||||||
|
memory: '記憶體',
|
||||||
|
sessions: '會話',
|
||||||
|
runningActiveSessions: '執行中 / 活躍',
|
||||||
|
lastUsed: '最後使用',
|
||||||
|
status: '狀態',
|
||||||
|
noWorkers: '暫無 Worker',
|
||||||
|
sessionsByProfile: '按 Profile 統計會話',
|
||||||
|
noActiveSessions: '暫無活躍會話',
|
||||||
|
},
|
||||||
|
|
||||||
// 抽屜
|
// 抽屜
|
||||||
drawer: {
|
drawer: {
|
||||||
terminal: '終端機',
|
terminal: '終端機',
|
||||||
|
|||||||
@@ -83,6 +83,7 @@ export default {
|
|||||||
memory: '记忆',
|
memory: '记忆',
|
||||||
logs: '日志',
|
logs: '日志',
|
||||||
usage: '用量',
|
usage: '用量',
|
||||||
|
performance: '性能监控',
|
||||||
skillsUsage: '技能用量',
|
skillsUsage: '技能用量',
|
||||||
channels: '频道',
|
channels: '频道',
|
||||||
gateways: '网关',
|
gateways: '网关',
|
||||||
@@ -116,6 +117,36 @@ export default {
|
|||||||
noChangelog: '暂无更新日志',
|
noChangelog: '暂无更新日志',
|
||||||
},
|
},
|
||||||
|
|
||||||
|
performance: {
|
||||||
|
title: '性能监控',
|
||||||
|
subtitle: '查看系统资源、Bridge Broker、Workers 和活跃会话',
|
||||||
|
refresh: '刷新',
|
||||||
|
autoRefreshOn: '自动刷新',
|
||||||
|
autoRefreshOff: '手动刷新',
|
||||||
|
loadFailed: '性能数据加载失败',
|
||||||
|
systemCpu: '系统 CPU',
|
||||||
|
systemMemory: '系统内存',
|
||||||
|
activeSessions: '活跃会话',
|
||||||
|
runningSessions: '运行中 {count}',
|
||||||
|
workers: 'Workers',
|
||||||
|
totalWorkerMemory: 'Worker 总内存',
|
||||||
|
processes: '进程',
|
||||||
|
uptime: '运行',
|
||||||
|
running: '运行中',
|
||||||
|
stopped: '已停止',
|
||||||
|
workerMemory: 'Worker 内存',
|
||||||
|
lastUpdated: '更新时间',
|
||||||
|
profile: 'Profile',
|
||||||
|
memory: '内存',
|
||||||
|
sessions: '会话',
|
||||||
|
runningActiveSessions: '运行中 / 活跃',
|
||||||
|
lastUsed: '最后使用',
|
||||||
|
status: '状态',
|
||||||
|
noWorkers: '暂无 Worker',
|
||||||
|
sessionsByProfile: '按 Profile 统计会话',
|
||||||
|
noActiveSessions: '暂无活跃会话',
|
||||||
|
},
|
||||||
|
|
||||||
// 抽屉
|
// 抽屉
|
||||||
drawer: {
|
drawer: {
|
||||||
terminal: '终端',
|
terminal: '终端',
|
||||||
|
|||||||
@@ -50,6 +50,11 @@ const router = createRouter({
|
|||||||
name: 'hermes.usage',
|
name: 'hermes.usage',
|
||||||
component: () => import('@/views/hermes/UsageView.vue'),
|
component: () => import('@/views/hermes/UsageView.vue'),
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
path: '/hermes/performance',
|
||||||
|
name: 'hermes.performance',
|
||||||
|
component: () => import('@/views/hermes/PerformanceView.vue'),
|
||||||
|
},
|
||||||
{
|
{
|
||||||
path: '/hermes/skills-usage',
|
path: '/hermes/skills-usage',
|
||||||
name: 'hermes.skillsUsage',
|
name: 'hermes.skillsUsage',
|
||||||
|
|||||||
@@ -0,0 +1,486 @@
|
|||||||
|
<script setup lang="ts">
|
||||||
|
import { computed, onBeforeUnmount, onMounted, ref } from 'vue'
|
||||||
|
import { NButton, NSpin, useMessage } from 'naive-ui'
|
||||||
|
import { useI18n } from 'vue-i18n'
|
||||||
|
import { fetchPerformanceRuntime, type PerformanceRuntimeSnapshot } from '@/api/hermes/performance-monitor'
|
||||||
|
|
||||||
|
const { t } = useI18n()
|
||||||
|
const message = useMessage()
|
||||||
|
const snapshot = ref<PerformanceRuntimeSnapshot | null>(null)
|
||||||
|
const loading = ref(false)
|
||||||
|
const autoRefresh = ref(true)
|
||||||
|
let timer: ReturnType<typeof setInterval> | undefined
|
||||||
|
|
||||||
|
const brokerMemory = computed(() => snapshot.value?.bridge.broker.process?.memoryRssBytes ?? null)
|
||||||
|
const webRssMemory = computed(() => snapshot.value?.web.memory.rss ?? null)
|
||||||
|
const workerCount = computed(() => snapshot.value?.bridge.workers.length ?? 0)
|
||||||
|
const runningWorkerCount = computed(() => snapshot.value?.bridge.workers.filter(worker => worker.running).length ?? 0)
|
||||||
|
|
||||||
|
function formatBytes(value?: number | null): string {
|
||||||
|
if (value == null || !Number.isFinite(value)) return '-'
|
||||||
|
const units = ['B', 'KB', 'MB', 'GB', 'TB']
|
||||||
|
let size = value
|
||||||
|
let unit = 0
|
||||||
|
while (size >= 1024 && unit < units.length - 1) {
|
||||||
|
size /= 1024
|
||||||
|
unit += 1
|
||||||
|
}
|
||||||
|
return `${size.toFixed(unit === 0 ? 0 : 1)} ${units[unit]}`
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatPercent(value?: number | null): string {
|
||||||
|
return value == null || !Number.isFinite(value) ? '-' : `${value.toFixed(1)}%`
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatDuration(seconds?: number | null): string {
|
||||||
|
if (seconds == null || !Number.isFinite(seconds)) return '-'
|
||||||
|
const days = Math.floor(seconds / 86400)
|
||||||
|
const hours = Math.floor((seconds % 86400) / 3600)
|
||||||
|
const minutes = Math.floor((seconds % 3600) / 60)
|
||||||
|
if (days > 0) return `${days}d ${hours}h`
|
||||||
|
if (hours > 0) return `${hours}h ${minutes}m`
|
||||||
|
return `${minutes}m`
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatTime(seconds?: number): string {
|
||||||
|
if (!seconds) return '-'
|
||||||
|
return new Date(seconds * 1000).toLocaleString()
|
||||||
|
}
|
||||||
|
|
||||||
|
function statusText(running: boolean): string {
|
||||||
|
return running ? t('performance.running') : t('performance.stopped')
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadRuntime(showError = true) {
|
||||||
|
loading.value = true
|
||||||
|
try {
|
||||||
|
snapshot.value = await fetchPerformanceRuntime()
|
||||||
|
} catch (err: any) {
|
||||||
|
if (showError) message.error(err?.message || t('performance.loadFailed'))
|
||||||
|
} finally {
|
||||||
|
loading.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function setAutoRefresh(enabled: boolean) {
|
||||||
|
autoRefresh.value = enabled
|
||||||
|
if (timer) {
|
||||||
|
clearInterval(timer)
|
||||||
|
timer = undefined
|
||||||
|
}
|
||||||
|
if (enabled) {
|
||||||
|
timer = setInterval(() => loadRuntime(false), 3000)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
onMounted(() => {
|
||||||
|
loadRuntime()
|
||||||
|
setAutoRefresh(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
onBeforeUnmount(() => {
|
||||||
|
if (timer) clearInterval(timer)
|
||||||
|
})
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<template>
|
||||||
|
<div class="performance-view">
|
||||||
|
<header class="page-header">
|
||||||
|
<h2 class="header-title">{{ t('performance.title') }}</h2>
|
||||||
|
<div class="header-actions">
|
||||||
|
<NButton size="small" :type="autoRefresh ? 'primary' : 'default'" secondary @click="setAutoRefresh(!autoRefresh)">
|
||||||
|
{{ autoRefresh ? t('performance.autoRefreshOn') : t('performance.autoRefreshOff') }}
|
||||||
|
</NButton>
|
||||||
|
<NButton size="small" :loading="loading" @click="loadRuntime()">{{ t('performance.refresh') }}</NButton>
|
||||||
|
</div>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<NSpin :show="loading && !snapshot" class="performance-spin">
|
||||||
|
<main v-if="snapshot" class="performance-content">
|
||||||
|
<section class="summary-grid">
|
||||||
|
<div class="summary-item">
|
||||||
|
<span class="summary-label">{{ t('performance.systemCpu') }}</span>
|
||||||
|
<strong>{{ formatPercent(snapshot.system.cpuPercent) }}</strong>
|
||||||
|
<div class="meter"><span :style="{ width: `${snapshot.system.cpuPercent || 0}%` }" /></div>
|
||||||
|
</div>
|
||||||
|
<div class="summary-item">
|
||||||
|
<span class="summary-label">{{ t('performance.systemMemory') }}</span>
|
||||||
|
<strong>{{ formatPercent(snapshot.system.memoryPercent) }}</strong>
|
||||||
|
<small>{{ formatBytes(snapshot.system.usedMemoryBytes) }} / {{ formatBytes(snapshot.system.totalMemoryBytes) }}</small>
|
||||||
|
<div class="meter"><span :style="{ width: `${snapshot.system.memoryPercent || 0}%` }" /></div>
|
||||||
|
</div>
|
||||||
|
<div class="summary-item">
|
||||||
|
<span class="summary-label">{{ t('performance.activeSessions') }}</span>
|
||||||
|
<strong>{{ snapshot.sessions.active }}</strong>
|
||||||
|
<small>{{ t('performance.runningSessions', { count: snapshot.sessions.running }) }}</small>
|
||||||
|
</div>
|
||||||
|
<div class="summary-item">
|
||||||
|
<span class="summary-label">{{ t('performance.workers') }}</span>
|
||||||
|
<strong>{{ runningWorkerCount }} / {{ workerCount }}</strong>
|
||||||
|
<small>{{ t('performance.totalWorkerMemory') }} {{ formatBytes(snapshot.bridge.totalWorkerMemoryRssBytes) }}</small>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="runtime-section">
|
||||||
|
<div class="section-header">
|
||||||
|
<h3>{{ t('performance.processes') }}</h3>
|
||||||
|
<span>{{ snapshot.system.platform }} {{ snapshot.system.arch }} · {{ snapshot.system.cpuCount }} CPU · {{ t('performance.uptime') }} {{ formatDuration(snapshot.system.uptimeSeconds) }}</span>
|
||||||
|
</div>
|
||||||
|
<div class="process-grid">
|
||||||
|
<div class="process-row">
|
||||||
|
<div>
|
||||||
|
<strong>Web UI</strong>
|
||||||
|
<span>PID {{ snapshot.web.pid }}</span>
|
||||||
|
</div>
|
||||||
|
<span>{{ formatPercent(snapshot.web.cpuPercent) }}</span>
|
||||||
|
<span>{{ formatBytes(webRssMemory) }}</span>
|
||||||
|
<span class="status running">{{ statusText(true) }}</span>
|
||||||
|
</div>
|
||||||
|
<div class="process-row">
|
||||||
|
<div>
|
||||||
|
<strong>Bridge Broker</strong>
|
||||||
|
<span>{{ snapshot.bridge.endpoint }}</span>
|
||||||
|
</div>
|
||||||
|
<span>{{ formatPercent(snapshot.bridge.broker.process?.cpuPercent) }}</span>
|
||||||
|
<span>{{ formatBytes(brokerMemory) }}</span>
|
||||||
|
<span class="status" :class="{ running: snapshot.bridge.reachable && snapshot.bridge.broker.running }">
|
||||||
|
{{ snapshot.bridge.reachable && snapshot.bridge.broker.running ? statusText(true) : statusText(false) }}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div v-if="snapshot.bridge.error" class="runtime-error">{{ snapshot.bridge.error }}</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="runtime-section">
|
||||||
|
<div class="section-header">
|
||||||
|
<h3>{{ t('performance.workerMemory') }}</h3>
|
||||||
|
<span>{{ t('performance.lastUpdated') }} {{ new Date(snapshot.timestamp).toLocaleTimeString() }}</span>
|
||||||
|
</div>
|
||||||
|
<div class="worker-table-wrap">
|
||||||
|
<table class="worker-table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>{{ t('performance.profile') }}</th>
|
||||||
|
<th>PID</th>
|
||||||
|
<th>CPU</th>
|
||||||
|
<th>{{ t('performance.memory') }}</th>
|
||||||
|
<th>{{ t('performance.runningActiveSessions') }}</th>
|
||||||
|
<th>{{ t('performance.lastUsed') }}</th>
|
||||||
|
<th>{{ t('performance.status') }}</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr v-if="snapshot.bridge.workers.length === 0">
|
||||||
|
<td colspan="7" class="empty-cell">{{ t('performance.noWorkers') }}</td>
|
||||||
|
</tr>
|
||||||
|
<tr v-for="worker in snapshot.bridge.workers" :key="worker.profile || worker.pid">
|
||||||
|
<td>{{ worker.profile || '-' }}</td>
|
||||||
|
<td>{{ worker.pid || '-' }}</td>
|
||||||
|
<td>{{ formatPercent(worker.cpuPercent) }}</td>
|
||||||
|
<td>{{ formatBytes(worker.memoryRssBytes) }}</td>
|
||||||
|
<td>{{ worker.runningSessionCount }} / {{ worker.sessionCount }}</td>
|
||||||
|
<td>{{ formatTime(worker.lastUsedAt) }}</td>
|
||||||
|
<td><span class="status" :class="{ running: worker.running }">{{ statusText(worker.running) }}</span></td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="runtime-section">
|
||||||
|
<div class="section-header">
|
||||||
|
<h3>{{ t('performance.sessionsByProfile') }}</h3>
|
||||||
|
</div>
|
||||||
|
<div class="session-list">
|
||||||
|
<div v-if="Object.keys(snapshot.sessions.byProfile).length === 0" class="session-empty">
|
||||||
|
{{ t('performance.noActiveSessions') }}
|
||||||
|
</div>
|
||||||
|
<div v-for="(count, profile) in snapshot.sessions.byProfile" :key="profile" class="session-row">
|
||||||
|
<span>{{ profile }}</span>
|
||||||
|
<strong>{{ count }}</strong>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</main>
|
||||||
|
</NSpin>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<style scoped lang="scss">
|
||||||
|
@use '@/styles/variables' as *;
|
||||||
|
|
||||||
|
.performance-view {
|
||||||
|
height: 100%;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-header {
|
||||||
|
display: flex;
|
||||||
|
flex-shrink: 0;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: space-between;
|
||||||
|
gap: 12px;
|
||||||
|
padding: 21px 20px;
|
||||||
|
border-bottom: 1px solid $border-color;
|
||||||
|
}
|
||||||
|
|
||||||
|
.header-title {
|
||||||
|
margin: 0;
|
||||||
|
color: $text-primary;
|
||||||
|
font-size: 16px;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.header-actions {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.performance-spin {
|
||||||
|
flex: 1;
|
||||||
|
min-height: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.performance-content {
|
||||||
|
height: 100%;
|
||||||
|
overflow-y: auto;
|
||||||
|
padding: 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.summary-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(4, minmax(0, 1fr));
|
||||||
|
gap: 12px;
|
||||||
|
margin-bottom: 16px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.summary-item,
|
||||||
|
.runtime-section {
|
||||||
|
border: 1px solid $border-color;
|
||||||
|
border-radius: $radius-sm;
|
||||||
|
background: $bg-card;
|
||||||
|
}
|
||||||
|
|
||||||
|
.summary-item {
|
||||||
|
min-height: 108px;
|
||||||
|
padding: 14px;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.summary-label,
|
||||||
|
.summary-item small,
|
||||||
|
.section-header span,
|
||||||
|
.process-row div span {
|
||||||
|
color: $text-muted;
|
||||||
|
font-size: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.summary-item strong {
|
||||||
|
color: $text-primary;
|
||||||
|
font-size: 24px;
|
||||||
|
font-weight: 650;
|
||||||
|
}
|
||||||
|
|
||||||
|
.meter {
|
||||||
|
height: 6px;
|
||||||
|
overflow: hidden;
|
||||||
|
border-radius: 999px;
|
||||||
|
background: $bg-secondary;
|
||||||
|
|
||||||
|
span {
|
||||||
|
display: block;
|
||||||
|
height: 100%;
|
||||||
|
border-radius: inherit;
|
||||||
|
background: $accent-primary;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.runtime-section {
|
||||||
|
margin-top: 12px;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
.section-header {
|
||||||
|
min-height: 46px;
|
||||||
|
padding: 12px 14px;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: space-between;
|
||||||
|
gap: 12px;
|
||||||
|
border-bottom: 1px solid $border-light;
|
||||||
|
|
||||||
|
h3 {
|
||||||
|
margin: 0;
|
||||||
|
color: $text-primary;
|
||||||
|
font-size: 14px;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.process-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 1fr;
|
||||||
|
}
|
||||||
|
|
||||||
|
.process-row {
|
||||||
|
min-height: 56px;
|
||||||
|
padding: 10px 14px;
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: minmax(0, 1fr) 80px 110px 86px;
|
||||||
|
align-items: center;
|
||||||
|
gap: 12px;
|
||||||
|
border-bottom: 1px solid $border-light;
|
||||||
|
color: $text-secondary;
|
||||||
|
font-size: 13px;
|
||||||
|
|
||||||
|
&:last-child {
|
||||||
|
border-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
div {
|
||||||
|
min-width: 0;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 3px;
|
||||||
|
}
|
||||||
|
|
||||||
|
strong {
|
||||||
|
color: $text-primary;
|
||||||
|
font-size: 13px;
|
||||||
|
}
|
||||||
|
|
||||||
|
span {
|
||||||
|
overflow: hidden;
|
||||||
|
text-overflow: ellipsis;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.status {
|
||||||
|
width: fit-content;
|
||||||
|
max-width: 100%;
|
||||||
|
padding: 2px 8px;
|
||||||
|
border: 1px solid $border-color;
|
||||||
|
border-radius: 999px;
|
||||||
|
color: $text-muted;
|
||||||
|
font-size: 12px;
|
||||||
|
|
||||||
|
&.running {
|
||||||
|
border-color: rgba(var(--success-rgb), 0.35);
|
||||||
|
color: $success;
|
||||||
|
background: rgba(var(--success-rgb), 0.08);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.runtime-error {
|
||||||
|
padding: 10px 14px;
|
||||||
|
border-top: 1px solid $border-light;
|
||||||
|
color: $error;
|
||||||
|
font-size: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.worker-table-wrap {
|
||||||
|
overflow-x: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.worker-table {
|
||||||
|
width: 100%;
|
||||||
|
min-width: 760px;
|
||||||
|
border-collapse: collapse;
|
||||||
|
color: $text-secondary;
|
||||||
|
font-size: 13px;
|
||||||
|
|
||||||
|
th,
|
||||||
|
td {
|
||||||
|
padding: 11px 14px;
|
||||||
|
border-bottom: 1px solid $border-light;
|
||||||
|
text-align: left;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
th {
|
||||||
|
color: $text-muted;
|
||||||
|
font-size: 12px;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
td:first-child {
|
||||||
|
color: $text-primary;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
tr:last-child td {
|
||||||
|
border-bottom: 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.empty-cell,
|
||||||
|
.session-empty {
|
||||||
|
color: $text-muted;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.session-list {
|
||||||
|
padding: 6px 14px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.session-row {
|
||||||
|
min-height: 34px;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: space-between;
|
||||||
|
gap: 12px;
|
||||||
|
border-bottom: 1px solid $border-light;
|
||||||
|
color: $text-secondary;
|
||||||
|
font-size: 13px;
|
||||||
|
|
||||||
|
&:last-child {
|
||||||
|
border-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
strong {
|
||||||
|
color: $text-primary;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.session-empty {
|
||||||
|
padding: 18px 0;
|
||||||
|
font-size: 13px;
|
||||||
|
}
|
||||||
|
|
||||||
|
@media (max-width: 960px) {
|
||||||
|
.summary-grid {
|
||||||
|
grid-template-columns: repeat(2, minmax(0, 1fr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@media (max-width: $breakpoint-mobile) {
|
||||||
|
.page-header,
|
||||||
|
.header-actions,
|
||||||
|
.section-header {
|
||||||
|
align-items: flex-start;
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.header-actions {
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.summary-grid {
|
||||||
|
grid-template-columns: 1fr;
|
||||||
|
}
|
||||||
|
|
||||||
|
.process-row {
|
||||||
|
grid-template-columns: 1fr 72px;
|
||||||
|
|
||||||
|
> span:nth-child(3),
|
||||||
|
> span:nth-child(4) {
|
||||||
|
justify-self: start;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</style>
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
import { createEmptyOpsRuntimeSnapshot, getOpsRuntimeSnapshot } from '../../services/hermes/ops-monitor'
|
||||||
|
|
||||||
|
export async function runtime(ctx: any) {
|
||||||
|
try {
|
||||||
|
ctx.body = await getOpsRuntimeSnapshot()
|
||||||
|
} catch (err: any) {
|
||||||
|
ctx.body = createEmptyOpsRuntimeSnapshot(err?.message || 'Failed to read performance metrics')
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
import Router from '@koa/router'
|
||||||
|
import * as ctrl from '../../controllers/hermes/performance-monitor'
|
||||||
|
|
||||||
|
export const performanceMonitorRoutes = new Router()
|
||||||
|
|
||||||
|
performanceMonitorRoutes.get('/api/hermes/performance/runtime', ctrl.runtime)
|
||||||
@@ -31,6 +31,7 @@ import { ttsRoutes } from './hermes/tts'
|
|||||||
import { mediaRoutes } from './hermes/media'
|
import { mediaRoutes } from './hermes/media'
|
||||||
import { proxyRoutes, proxyMiddleware } from './hermes/proxy'
|
import { proxyRoutes, proxyMiddleware } from './hermes/proxy'
|
||||||
import { groupChatRoutes, setGroupChatServer } from './hermes/group-chat'
|
import { groupChatRoutes, setGroupChatServer } from './hermes/group-chat'
|
||||||
|
import { performanceMonitorRoutes } from './hermes/performance-monitor'
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Register all routes on the Koa app.
|
* Register all routes on the Koa app.
|
||||||
@@ -72,6 +73,7 @@ export function registerRoutes(app: any, requireAuth: (ctx: Context, next: Next)
|
|||||||
app.use(cronHistoryRoutes.routes()) // Must be before proxy
|
app.use(cronHistoryRoutes.routes()) // Must be before proxy
|
||||||
app.use(kanbanRoutes.routes()) // Must be before proxy
|
app.use(kanbanRoutes.routes()) // Must be before proxy
|
||||||
app.use(mediaRoutes.routes()) // Must be before proxy
|
app.use(mediaRoutes.routes()) // Must be before proxy
|
||||||
|
app.use(performanceMonitorRoutes.routes()) // Must be before proxy
|
||||||
app.use(proxyRoutes.routes())
|
app.use(proxyRoutes.routes())
|
||||||
|
|
||||||
// Proxy catch-all middleware (must be last)
|
// Proxy catch-all middleware (must be last)
|
||||||
|
|||||||
@@ -10,13 +10,16 @@ delimited JSON request/response protocol over a local socket.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import atexit
|
||||||
import copy
|
import copy
|
||||||
|
import errno
|
||||||
import hashlib
|
import hashlib
|
||||||
import importlib.util
|
import importlib.util
|
||||||
import json
|
import json
|
||||||
import locale
|
import locale
|
||||||
import os
|
import os
|
||||||
import queue
|
import queue
|
||||||
|
import signal
|
||||||
import shutil
|
import shutil
|
||||||
import socket
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
@@ -38,12 +41,100 @@ DEFAULT_AGENT_ROOT = "~/.hermes/hermes-agent"
|
|||||||
DEFAULT_HERMES_HOME = "~/.hermes"
|
DEFAULT_HERMES_HOME = "~/.hermes"
|
||||||
APPROVAL_TIMEOUT_SECONDS = 120
|
APPROVAL_TIMEOUT_SECONDS = 120
|
||||||
APPROVAL_TIMEOUT_MS = APPROVAL_TIMEOUT_SECONDS * 1000
|
APPROVAL_TIMEOUT_MS = APPROVAL_TIMEOUT_SECONDS * 1000
|
||||||
|
PARENT_WATCHDOG_INTERVAL_SECONDS = 2.0
|
||||||
|
|
||||||
|
|
||||||
def _bridge_platform() -> str:
|
def _bridge_platform() -> str:
|
||||||
return os.environ.get("HERMES_AGENT_BRIDGE_PLATFORM", "cli").strip() or "cli"
|
return os.environ.get("HERMES_AGENT_BRIDGE_PLATFORM", "cli").strip() or "cli"
|
||||||
|
|
||||||
|
|
||||||
|
def _positive_int(value: str | None) -> int | None:
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
parsed = int(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
return parsed if parsed > 0 else None
|
||||||
|
|
||||||
|
|
||||||
|
def _process_exists(pid: int) -> bool:
|
||||||
|
if pid <= 0:
|
||||||
|
return False
|
||||||
|
if os.name == "nt":
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["tasklist.exe", "/FI", f"PID eq {pid}", "/NH"],
|
||||||
|
check=False,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=5,
|
||||||
|
)
|
||||||
|
return str(pid) in (result.stdout or "")
|
||||||
|
except Exception:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
os.kill(pid, 0)
|
||||||
|
return True
|
||||||
|
except ProcessLookupError:
|
||||||
|
return False
|
||||||
|
except PermissionError:
|
||||||
|
return True
|
||||||
|
except OSError as exc:
|
||||||
|
return exc.errno != errno.ESRCH
|
||||||
|
|
||||||
|
|
||||||
|
def _start_parent_process_watchdog(
|
||||||
|
parent_pid: int | None,
|
||||||
|
stop_event: threading.Event,
|
||||||
|
label: str,
|
||||||
|
interval: float = PARENT_WATCHDOG_INTERVAL_SECONDS,
|
||||||
|
) -> None:
|
||||||
|
if not parent_pid or parent_pid == os.getpid():
|
||||||
|
return
|
||||||
|
|
||||||
|
def run() -> None:
|
||||||
|
while not stop_event.wait(interval):
|
||||||
|
if _process_exists(parent_pid):
|
||||||
|
continue
|
||||||
|
print(
|
||||||
|
f"[hermes-bridge] parent pid {parent_pid} exited; stopping {label}",
|
||||||
|
file=sys.stderr,
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
stop_event.set()
|
||||||
|
return
|
||||||
|
|
||||||
|
threading.Thread(target=run, daemon=True, name=f"hermes-bridge-parent-watchdog-{label}").start()
|
||||||
|
|
||||||
|
|
||||||
|
def _install_stop_signal_handlers(stop_event: threading.Event) -> Callable[[], None]:
|
||||||
|
if threading.current_thread() is not threading.main_thread():
|
||||||
|
return lambda: None
|
||||||
|
|
||||||
|
previous: list[tuple[signal.Signals, Any]] = []
|
||||||
|
|
||||||
|
def handle_signal(signum: int, _frame: Any) -> None:
|
||||||
|
stop_event.set()
|
||||||
|
|
||||||
|
for signum in (signal.SIGINT, signal.SIGTERM):
|
||||||
|
try:
|
||||||
|
sig = signal.Signals(signum)
|
||||||
|
previous.append((sig, signal.getsignal(sig)))
|
||||||
|
signal.signal(sig, handle_signal)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def restore() -> None:
|
||||||
|
for sig, handler in previous:
|
||||||
|
try:
|
||||||
|
signal.signal(sig, handler)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return restore
|
||||||
|
|
||||||
|
|
||||||
def _suppress_bridge_platform_hint() -> None:
|
def _suppress_bridge_platform_hint() -> None:
|
||||||
raw = os.environ.get("HERMES_BRIDGE_SUPPRESS_PLATFORM_HINT", "cli").strip()
|
raw = os.environ.get("HERMES_BRIDGE_SUPPRESS_PLATFORM_HINT", "cli").strip()
|
||||||
if raw.lower() in {"0", "false", "no", "off"}:
|
if raw.lower() in {"0", "false", "no", "off"}:
|
||||||
@@ -1452,12 +1543,18 @@ class BridgeServer:
|
|||||||
raise ValueError("action is required")
|
raise ValueError("action is required")
|
||||||
|
|
||||||
if action == "ping":
|
if action == "ping":
|
||||||
|
with self.pool._lock:
|
||||||
|
sessions = list(self.pool._sessions.values())
|
||||||
|
running_sessions = sum(1 for session in sessions if session.running)
|
||||||
return {
|
return {
|
||||||
"pong": True,
|
"pong": True,
|
||||||
"time": time.time(),
|
"time": time.time(),
|
||||||
|
"pid": os.getpid(),
|
||||||
"agent_root": str(_agent_root()),
|
"agent_root": str(_agent_root()),
|
||||||
"profile": _worker_profile() or "default",
|
"profile": _worker_profile() or "default",
|
||||||
"hermes_home": str(_hermes_home()),
|
"hermes_home": str(_hermes_home()),
|
||||||
|
"session_count": len(sessions),
|
||||||
|
"running_session_count": running_sessions,
|
||||||
}
|
}
|
||||||
|
|
||||||
if action == "chat":
|
if action == "chat":
|
||||||
@@ -1588,46 +1685,54 @@ class BridgeServer:
|
|||||||
|
|
||||||
def serve_forever(self) -> None:
|
def serve_forever(self) -> None:
|
||||||
server = self._make_server_socket()
|
server = self._make_server_socket()
|
||||||
server.listen(16)
|
restore_signals = _install_stop_signal_handlers(self._stop)
|
||||||
server.settimeout(0.2)
|
_start_parent_process_watchdog(
|
||||||
print(json.dumps({"event": "ready", "endpoint": self.endpoint}), flush=True)
|
_positive_int(os.environ.get("HERMES_AGENT_BRIDGE_BROKER_PID")),
|
||||||
|
self._stop,
|
||||||
|
f"worker:{_worker_profile() or 'default'}",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
server.listen(16)
|
||||||
|
server.settimeout(0.2)
|
||||||
|
print(json.dumps({"event": "ready", "endpoint": self.endpoint}), flush=True)
|
||||||
|
|
||||||
while not self._stop.is_set():
|
while not self._stop.is_set():
|
||||||
conn: socket.socket | None = None
|
conn: socket.socket | None = None
|
||||||
try:
|
|
||||||
try:
|
try:
|
||||||
conn, _addr = server.accept()
|
|
||||||
except socket.timeout:
|
|
||||||
self._gc_idle_sessions()
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
req = self._read_request(conn)
|
|
||||||
data = self.handle(req)
|
|
||||||
resp = {"ok": True, **_jsonable(data)}
|
|
||||||
except Exception as exc:
|
|
||||||
resp = {
|
|
||||||
"ok": False,
|
|
||||||
"error": str(exc),
|
|
||||||
"error_type": exc.__class__.__name__,
|
|
||||||
}
|
|
||||||
self._write_response(conn, resp)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
break
|
|
||||||
except Exception as exc:
|
|
||||||
print(f"[hermes-bridge] server loop error: {exc}", file=sys.stderr, flush=True)
|
|
||||||
finally:
|
|
||||||
if conn is not None:
|
|
||||||
try:
|
try:
|
||||||
conn.close()
|
conn, _addr = server.accept()
|
||||||
except OSError:
|
except socket.timeout:
|
||||||
pass
|
self._gc_idle_sessions()
|
||||||
|
continue
|
||||||
server.close()
|
try:
|
||||||
if self.endpoint.startswith("ipc://"):
|
req = self._read_request(conn)
|
||||||
try:
|
data = self.handle(req)
|
||||||
Path(self.endpoint.removeprefix("ipc://")).unlink(missing_ok=True)
|
resp = {"ok": True, **_jsonable(data)}
|
||||||
except OSError:
|
except Exception as exc:
|
||||||
pass
|
resp = {
|
||||||
|
"ok": False,
|
||||||
|
"error": str(exc),
|
||||||
|
"error_type": exc.__class__.__name__,
|
||||||
|
}
|
||||||
|
self._write_response(conn, resp)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
break
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"[hermes-bridge] server loop error: {exc}", file=sys.stderr, flush=True)
|
||||||
|
finally:
|
||||||
|
if conn is not None:
|
||||||
|
try:
|
||||||
|
conn.close()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
restore_signals()
|
||||||
|
server.close()
|
||||||
|
if self.endpoint.startswith("ipc://"):
|
||||||
|
try:
|
||||||
|
Path(self.endpoint.removeprefix("ipc://")).unlink(missing_ok=True)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class WorkerProcess:
|
class WorkerProcess:
|
||||||
@@ -1647,6 +1752,10 @@ class WorkerProcess:
|
|||||||
def running(self) -> bool:
|
def running(self) -> bool:
|
||||||
return self.process is not None and self.process.poll() is None
|
return self.process is not None and self.process.poll() is None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pid(self) -> int | None:
|
||||||
|
return self.process.pid if self.process is not None else None
|
||||||
|
|
||||||
def start(self) -> None:
|
def start(self) -> None:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
if self.running:
|
if self.running:
|
||||||
@@ -1668,6 +1777,7 @@ class WorkerProcess:
|
|||||||
**os.environ,
|
**os.environ,
|
||||||
"HERMES_AGENT_BRIDGE_ENDPOINT": self.endpoint,
|
"HERMES_AGENT_BRIDGE_ENDPOINT": self.endpoint,
|
||||||
"HERMES_AGENT_BRIDGE_WORKER_PROFILE": self.profile,
|
"HERMES_AGENT_BRIDGE_WORKER_PROFILE": self.profile,
|
||||||
|
"HERMES_AGENT_BRIDGE_BROKER_PID": str(os.getpid()),
|
||||||
}
|
}
|
||||||
self.process = subprocess.Popen(
|
self.process = subprocess.Popen(
|
||||||
args,
|
args,
|
||||||
@@ -2019,6 +2129,18 @@ class BridgeBroker:
|
|||||||
if event.get("event") in {"bridge.compression.completed", "bridge.compression.failed"} and request_id:
|
if event.get("event") in {"bridge.compression.completed", "bridge.compression.failed"} and request_id:
|
||||||
self._compression_profile.pop(request_id, None)
|
self._compression_profile.pop(request_id, None)
|
||||||
|
|
||||||
|
def stop(self) -> None:
|
||||||
|
self._stop.set()
|
||||||
|
with self._lock:
|
||||||
|
workers = list(self._workers.values())
|
||||||
|
self._workers.clear()
|
||||||
|
self._run_profile.clear()
|
||||||
|
self._session_profile.clear()
|
||||||
|
self._approval_profile.clear()
|
||||||
|
self._compression_profile.clear()
|
||||||
|
for worker in workers:
|
||||||
|
worker.stop()
|
||||||
|
|
||||||
def _forward(self, profile: str, req: dict[str, Any]) -> dict[str, Any]:
|
def _forward(self, profile: str, req: dict[str, Any]) -> dict[str, Any]:
|
||||||
worker = self._worker_for_profile(profile)
|
worker = self._worker_for_profile(profile)
|
||||||
forwarded = dict(req)
|
forwarded = dict(req)
|
||||||
@@ -2034,8 +2156,33 @@ class BridgeBroker:
|
|||||||
|
|
||||||
if action == "ping":
|
if action == "ping":
|
||||||
with self._lock:
|
with self._lock:
|
||||||
workers = {profile: worker.running for profile, worker in self._workers.items()}
|
worker_details = {
|
||||||
return {"pong": True, "time": time.time(), "mode": "broker", "workers": workers}
|
profile: {
|
||||||
|
"running": worker.running,
|
||||||
|
"pid": worker.pid,
|
||||||
|
"endpoint": worker.endpoint,
|
||||||
|
"last_used_at": worker.last_used_at,
|
||||||
|
}
|
||||||
|
for profile, worker in self._workers.items()
|
||||||
|
}
|
||||||
|
workers = {profile: details["running"] for profile, details in worker_details.items()}
|
||||||
|
sessions_by_profile: dict[str, int] = {}
|
||||||
|
for profile in self._session_profile.values():
|
||||||
|
sessions_by_profile[profile] = sessions_by_profile.get(profile, 0) + 1
|
||||||
|
active_sessions = len(self._session_profile)
|
||||||
|
return {
|
||||||
|
"pong": True,
|
||||||
|
"time": time.time(),
|
||||||
|
"mode": "broker",
|
||||||
|
"broker": {
|
||||||
|
"pid": os.getpid(),
|
||||||
|
"endpoint": self.endpoint,
|
||||||
|
},
|
||||||
|
"workers": workers,
|
||||||
|
"worker_details": worker_details,
|
||||||
|
"active_sessions": active_sessions,
|
||||||
|
"sessions_by_profile": sessions_by_profile,
|
||||||
|
}
|
||||||
|
|
||||||
if action == "worker_ping":
|
if action == "worker_ping":
|
||||||
profile = self._normalize_profile(req.get("profile"))
|
profile = self._normalize_profile(req.get("profile"))
|
||||||
@@ -2145,17 +2292,7 @@ class BridgeBroker:
|
|||||||
return {"sessions": sessions}
|
return {"sessions": sessions}
|
||||||
|
|
||||||
if action == "shutdown":
|
if action == "shutdown":
|
||||||
self._stop.set()
|
self.stop()
|
||||||
with self._lock:
|
|
||||||
workers = list(self._workers.values())
|
|
||||||
for worker in workers:
|
|
||||||
if not worker.running:
|
|
||||||
worker.stop()
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
worker.request({"action": "shutdown"})
|
|
||||||
except Exception:
|
|
||||||
worker.stop()
|
|
||||||
return {"status": "shutting_down"}
|
return {"status": "shutting_down"}
|
||||||
|
|
||||||
raise ValueError(f"unknown action: {action}")
|
raise ValueError(f"unknown action: {action}")
|
||||||
@@ -2187,51 +2324,55 @@ class BridgeBroker:
|
|||||||
|
|
||||||
def serve_forever(self) -> None:
|
def serve_forever(self) -> None:
|
||||||
server = self._make_server_socket()
|
server = self._make_server_socket()
|
||||||
server.listen(64)
|
restore_signals = _install_stop_signal_handlers(self._stop)
|
||||||
server.settimeout(0.2)
|
atexit.register(self.stop)
|
||||||
print(json.dumps({"event": "ready", "endpoint": self.endpoint, "mode": "broker"}), flush=True)
|
try:
|
||||||
|
server.listen(64)
|
||||||
|
server.settimeout(0.2)
|
||||||
|
print(json.dumps({"event": "ready", "endpoint": self.endpoint, "mode": "broker"}), flush=True)
|
||||||
|
|
||||||
while not self._stop.is_set():
|
while not self._stop.is_set():
|
||||||
conn: socket.socket | None = None
|
conn: socket.socket | None = None
|
||||||
try:
|
|
||||||
try:
|
try:
|
||||||
conn, _addr = server.accept()
|
|
||||||
except socket.timeout:
|
|
||||||
self._gc_idle_workers()
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
req = self._read_request(conn)
|
|
||||||
data = self.handle(req)
|
|
||||||
resp = {"ok": True, **_jsonable(data)}
|
|
||||||
except Exception as exc:
|
|
||||||
resp = {
|
|
||||||
"ok": False,
|
|
||||||
"error": str(exc),
|
|
||||||
"error_type": exc.__class__.__name__,
|
|
||||||
}
|
|
||||||
self._write_response(conn, resp)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
break
|
|
||||||
except Exception as exc:
|
|
||||||
print(f"[hermes-bridge-broker] server loop error: {exc}", file=sys.stderr, flush=True)
|
|
||||||
finally:
|
|
||||||
if conn is not None:
|
|
||||||
try:
|
try:
|
||||||
conn.close()
|
conn, _addr = server.accept()
|
||||||
except OSError:
|
except socket.timeout:
|
||||||
pass
|
self._gc_idle_workers()
|
||||||
|
continue
|
||||||
with self._lock:
|
try:
|
||||||
workers = list(self._workers.values())
|
req = self._read_request(conn)
|
||||||
self._workers.clear()
|
data = self.handle(req)
|
||||||
for worker in workers:
|
resp = {"ok": True, **_jsonable(data)}
|
||||||
worker.stop()
|
except Exception as exc:
|
||||||
server.close()
|
resp = {
|
||||||
if self.endpoint.startswith("ipc://"):
|
"ok": False,
|
||||||
|
"error": str(exc),
|
||||||
|
"error_type": exc.__class__.__name__,
|
||||||
|
}
|
||||||
|
self._write_response(conn, resp)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
break
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"[hermes-bridge-broker] server loop error: {exc}", file=sys.stderr, flush=True)
|
||||||
|
finally:
|
||||||
|
if conn is not None:
|
||||||
|
try:
|
||||||
|
conn.close()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
restore_signals()
|
||||||
try:
|
try:
|
||||||
Path(self.endpoint.removeprefix("ipc://")).unlink(missing_ok=True)
|
atexit.unregister(self.stop)
|
||||||
except OSError:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
self.stop()
|
||||||
|
server.close()
|
||||||
|
if self.endpoint.startswith("ipc://"):
|
||||||
|
try:
|
||||||
|
Path(self.endpoint.removeprefix("ipc://")).unlink(missing_ok=True)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def main(argv: list[str] | None = None) -> int:
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
|||||||
@@ -25,6 +25,17 @@ export interface BridgeCommand {
|
|||||||
hermesHome: string
|
hermesHome: string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface AgentBridgeManagerRuntimeState {
|
||||||
|
endpoint: string
|
||||||
|
running: boolean
|
||||||
|
ready: boolean
|
||||||
|
pid?: number
|
||||||
|
starting: boolean
|
||||||
|
stopping: boolean
|
||||||
|
restartScheduled: boolean
|
||||||
|
restartAttempts: number
|
||||||
|
}
|
||||||
|
|
||||||
function envPositiveInt(name: string): number | undefined {
|
function envPositiveInt(name: string): number | undefined {
|
||||||
const raw = process.env[name]
|
const raw = process.env[name]
|
||||||
if (!raw) return undefined
|
if (!raw) return undefined
|
||||||
@@ -308,6 +319,19 @@ export class AgentBridgeManager {
|
|||||||
return !!this.child && !this.child.killed && this.ready
|
return !!this.child && !this.child.killed && this.ready
|
||||||
}
|
}
|
||||||
|
|
||||||
|
getRuntimeState(): AgentBridgeManagerRuntimeState {
|
||||||
|
return {
|
||||||
|
endpoint: this.endpoint,
|
||||||
|
running: this.running,
|
||||||
|
ready: this.ready,
|
||||||
|
pid: this.child?.pid,
|
||||||
|
starting: !!this.starting,
|
||||||
|
stopping: this.stopping,
|
||||||
|
restartScheduled: !!this.restartTimer,
|
||||||
|
restartAttempts: this.restartAttempts,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async start(): Promise<void> {
|
async start(): Promise<void> {
|
||||||
if (this.running) return
|
if (this.running) return
|
||||||
if (this.starting) return this.starting
|
if (this.starting) return this.starting
|
||||||
|
|||||||
@@ -0,0 +1,551 @@
|
|||||||
|
import { execFileSync } from 'child_process'
|
||||||
|
import { readFileSync } from 'fs'
|
||||||
|
import { cpus, freemem, loadavg, platform, totalmem, uptime } from 'os'
|
||||||
|
import { AgentBridgeClient } from './agent-bridge'
|
||||||
|
import { getAgentBridgeManager } from './agent-bridge/manager'
|
||||||
|
|
||||||
|
export interface ProcessUsage {
|
||||||
|
pid: number
|
||||||
|
role: 'web' | 'broker' | 'worker'
|
||||||
|
profile?: string
|
||||||
|
running: boolean
|
||||||
|
cpuPercent: number
|
||||||
|
memoryRssBytes: number
|
||||||
|
command?: string
|
||||||
|
error?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface OpsRuntimeSnapshot {
|
||||||
|
timestamp: number
|
||||||
|
system: {
|
||||||
|
platform: NodeJS.Platform
|
||||||
|
arch: string
|
||||||
|
uptimeSeconds: number
|
||||||
|
cpuCount: number
|
||||||
|
cpuPercent: number
|
||||||
|
loadAverage: number[]
|
||||||
|
totalMemoryBytes: number
|
||||||
|
freeMemoryBytes: number
|
||||||
|
usedMemoryBytes: number
|
||||||
|
memoryPercent: number
|
||||||
|
}
|
||||||
|
web: {
|
||||||
|
pid: number
|
||||||
|
uptimeSeconds: number
|
||||||
|
memory: NodeJS.MemoryUsage
|
||||||
|
cpuPercent: number
|
||||||
|
}
|
||||||
|
bridge: {
|
||||||
|
endpoint: string
|
||||||
|
reachable: boolean
|
||||||
|
error?: string
|
||||||
|
broker: {
|
||||||
|
running: boolean
|
||||||
|
ready: boolean
|
||||||
|
pid?: number
|
||||||
|
process?: ProcessUsage
|
||||||
|
restartScheduled: boolean
|
||||||
|
restartAttempts: number
|
||||||
|
}
|
||||||
|
workers: Array<ProcessUsage & {
|
||||||
|
endpoint?: string
|
||||||
|
lastUsedAt?: number
|
||||||
|
sessionCount: number
|
||||||
|
runningSessionCount: number
|
||||||
|
}>
|
||||||
|
totalWorkerMemoryRssBytes: number
|
||||||
|
}
|
||||||
|
sessions: {
|
||||||
|
active: number
|
||||||
|
running: number
|
||||||
|
byProfile: Record<string, number>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interface CpuTimesSample {
|
||||||
|
idle: number
|
||||||
|
total: number
|
||||||
|
}
|
||||||
|
|
||||||
|
interface WebCpuSample {
|
||||||
|
at: number
|
||||||
|
usage: NodeJS.CpuUsage
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SystemMemoryUsage {
|
||||||
|
totalMemoryBytes: number
|
||||||
|
freeMemoryBytes: number
|
||||||
|
usedMemoryBytes: number
|
||||||
|
memoryPercent: number
|
||||||
|
}
|
||||||
|
|
||||||
|
let previousSystemCpu: CpuTimesSample | null = null
|
||||||
|
let previousWebCpu: WebCpuSample | null = null
|
||||||
|
|
||||||
|
function safeCpus(): ReturnType<typeof cpus> {
|
||||||
|
try {
|
||||||
|
return cpus()
|
||||||
|
} catch {
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function safeLoadAverage(): number[] {
|
||||||
|
try {
|
||||||
|
return loadavg()
|
||||||
|
} catch {
|
||||||
|
return [0, 0, 0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function safeUptime(): number {
|
||||||
|
try {
|
||||||
|
return uptime()
|
||||||
|
} catch {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function safeProcessUptime(): number {
|
||||||
|
try {
|
||||||
|
return process.uptime()
|
||||||
|
} catch {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function safeProcessMemoryUsage(): NodeJS.MemoryUsage {
|
||||||
|
try {
|
||||||
|
return process.memoryUsage()
|
||||||
|
} catch {
|
||||||
|
return {
|
||||||
|
rss: 0,
|
||||||
|
heapTotal: 0,
|
||||||
|
heapUsed: 0,
|
||||||
|
external: 0,
|
||||||
|
arrayBuffers: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function readCpuTimes(): CpuTimesSample {
|
||||||
|
let idle = 0
|
||||||
|
let total = 0
|
||||||
|
for (const cpu of safeCpus()) {
|
||||||
|
idle += cpu.times.idle
|
||||||
|
total += Object.values(cpu.times).reduce((sum, value) => sum + value, 0)
|
||||||
|
}
|
||||||
|
return { idle, total }
|
||||||
|
}
|
||||||
|
|
||||||
|
function sampleSystemCpuPercent(): number | null {
|
||||||
|
try {
|
||||||
|
const current = readCpuTimes()
|
||||||
|
const previous = previousSystemCpu
|
||||||
|
previousSystemCpu = current
|
||||||
|
if (!previous) return null
|
||||||
|
|
||||||
|
const idleDelta = current.idle - previous.idle
|
||||||
|
const totalDelta = current.total - previous.total
|
||||||
|
if (totalDelta <= 0) return null
|
||||||
|
return clampPercent(((totalDelta - idleDelta) / totalDelta) * 100)
|
||||||
|
} catch {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function sampleWebCpuPercent(): number | null {
|
||||||
|
try {
|
||||||
|
const current = {
|
||||||
|
at: Date.now(),
|
||||||
|
usage: process.cpuUsage(),
|
||||||
|
}
|
||||||
|
const previous = previousWebCpu
|
||||||
|
previousWebCpu = current
|
||||||
|
if (!previous) return null
|
||||||
|
|
||||||
|
const elapsedMicros = (current.at - previous.at) * 1000
|
||||||
|
const used = (current.usage.user - previous.usage.user) + (current.usage.system - previous.usage.system)
|
||||||
|
if (elapsedMicros <= 0 || used < 0) return null
|
||||||
|
return clampPercent((used / elapsedMicros / Math.max(safeCpus().length, 1)) * 100)
|
||||||
|
} catch {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function clampPercent(value: number): number {
|
||||||
|
return Math.max(0, Math.min(100, Math.round(value * 10) / 10))
|
||||||
|
}
|
||||||
|
|
||||||
|
function numberOrNull(value: unknown): number | null {
|
||||||
|
const parsed = Number(value)
|
||||||
|
return Number.isFinite(parsed) ? parsed : null
|
||||||
|
}
|
||||||
|
|
||||||
|
function fallbackSystemMemoryUsage(): SystemMemoryUsage {
|
||||||
|
let memoryTotal = 0
|
||||||
|
let memoryFree = 0
|
||||||
|
try {
|
||||||
|
memoryTotal = totalmem()
|
||||||
|
memoryFree = freemem()
|
||||||
|
} catch {}
|
||||||
|
const usedMemory = memoryTotal - memoryFree
|
||||||
|
return {
|
||||||
|
totalMemoryBytes: memoryTotal,
|
||||||
|
freeMemoryBytes: memoryFree,
|
||||||
|
usedMemoryBytes: usedMemory,
|
||||||
|
memoryPercent: memoryTotal > 0 ? clampPercent((usedMemory / memoryTotal) * 100) : 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseVmStatPageCount(line: string): number | null {
|
||||||
|
const match = line.match(/:\s+([\d.]+)\.?$/)
|
||||||
|
if (!match) return null
|
||||||
|
const value = Number(match[1].replace(/\./g, ''))
|
||||||
|
return Number.isFinite(value) ? value : null
|
||||||
|
}
|
||||||
|
|
||||||
|
export function parseMacVmStatMemory(vmStatOutput: string, totalMemoryBytes: number): SystemMemoryUsage | null {
|
||||||
|
const pageSize = Number(vmStatOutput.match(/page size of\s+(\d+)\s+bytes/i)?.[1])
|
||||||
|
if (!Number.isFinite(pageSize) || pageSize <= 0 || totalMemoryBytes <= 0) return null
|
||||||
|
|
||||||
|
const pages: Record<string, number> = {}
|
||||||
|
for (const line of vmStatOutput.split(/\r?\n/)) {
|
||||||
|
const count = parseVmStatPageCount(line.trim())
|
||||||
|
if (count == null) continue
|
||||||
|
if (line.includes('Pages active')) pages.active = count
|
||||||
|
else if (line.includes('Pages wired down')) pages.wired = count
|
||||||
|
else if (line.includes('Pages occupied by compressor')) pages.compressed = count
|
||||||
|
}
|
||||||
|
|
||||||
|
const usedPages = (pages.active || 0) + (pages.wired || 0) + (pages.compressed || 0)
|
||||||
|
if (usedPages <= 0) return null
|
||||||
|
const usedMemory = Math.min(totalMemoryBytes, usedPages * pageSize)
|
||||||
|
const freeMemory = Math.max(0, totalMemoryBytes - usedMemory)
|
||||||
|
|
||||||
|
return {
|
||||||
|
totalMemoryBytes,
|
||||||
|
freeMemoryBytes: freeMemory,
|
||||||
|
usedMemoryBytes: usedMemory,
|
||||||
|
memoryPercent: clampPercent((usedMemory / totalMemoryBytes) * 100),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function collectMacSystemMemoryUsage(): SystemMemoryUsage | null {
|
||||||
|
try {
|
||||||
|
const totalRaw = execFileSync('sysctl', ['-n', 'hw.memsize'], {
|
||||||
|
encoding: 'utf-8',
|
||||||
|
timeout: 3000,
|
||||||
|
}).trim()
|
||||||
|
const totalMemoryBytes = Number(totalRaw)
|
||||||
|
const vmStatOutput = execFileSync('vm_stat', {
|
||||||
|
encoding: 'utf-8',
|
||||||
|
timeout: 3000,
|
||||||
|
})
|
||||||
|
return parseMacVmStatMemory(vmStatOutput, totalMemoryBytes)
|
||||||
|
} catch {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function collectSystemMemoryUsage(): SystemMemoryUsage {
|
||||||
|
if (platform() === 'darwin') {
|
||||||
|
return collectMacSystemMemoryUsage() || fallbackSystemMemoryUsage()
|
||||||
|
}
|
||||||
|
return fallbackSystemMemoryUsage()
|
||||||
|
}
|
||||||
|
|
||||||
|
function collectPosixProcessMetrics(pids: number[]): Map<number, Partial<ProcessUsage>> {
|
||||||
|
const metrics = collectProcfsProcessMetrics(pids)
|
||||||
|
if (!pids.length) return metrics
|
||||||
|
try {
|
||||||
|
const output = execFileSync('ps', ['-o', 'pid=,pcpu=,rss=,comm=', '-p', pids.join(',')], {
|
||||||
|
encoding: 'utf-8',
|
||||||
|
timeout: 3000,
|
||||||
|
})
|
||||||
|
for (const line of output.split(/\r?\n/)) {
|
||||||
|
const trimmed = line.trim()
|
||||||
|
if (!trimmed) continue
|
||||||
|
const [pidRaw, cpuRaw, rssRaw, ...commandParts] = trimmed.split(/\s+/)
|
||||||
|
const pid = Number(pidRaw)
|
||||||
|
if (!Number.isFinite(pid)) continue
|
||||||
|
const rssKb = numberOrNull(rssRaw)
|
||||||
|
metrics.set(pid, {
|
||||||
|
cpuPercent: numberOrNull(cpuRaw) ?? 0,
|
||||||
|
memoryRssBytes: rssKb == null ? metrics.get(pid)?.memoryRssBytes : rssKb * 1024,
|
||||||
|
command: commandParts.join(' ') || undefined,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return metrics
|
||||||
|
} catch {
|
||||||
|
return metrics
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function collectProcfsProcessMetrics(pids: number[]): Map<number, Partial<ProcessUsage>> {
|
||||||
|
const metrics = new Map<number, Partial<ProcessUsage>>()
|
||||||
|
for (const pid of pids) {
|
||||||
|
try {
|
||||||
|
const status = readFileSync(`/proc/${pid}/status`, 'utf-8')
|
||||||
|
const rssKb = Number(status.match(/^VmRSS:\s+(\d+)\s+kB/im)?.[1])
|
||||||
|
const name = status.match(/^Name:\s+(.+)$/im)?.[1]?.trim()
|
||||||
|
metrics.set(pid, {
|
||||||
|
cpuPercent: 0,
|
||||||
|
memoryRssBytes: Number.isFinite(rssKb) ? rssKb * 1024 : 0,
|
||||||
|
command: name,
|
||||||
|
})
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
return metrics
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseWindowsJson(output: string): any[] {
|
||||||
|
if (!output.trim()) return []
|
||||||
|
const parsed = JSON.parse(output)
|
||||||
|
return Array.isArray(parsed) ? parsed : [parsed]
|
||||||
|
}
|
||||||
|
|
||||||
|
function collectWindowsProcessMetrics(pids: number[]): Map<number, Partial<ProcessUsage>> {
|
||||||
|
if (!pids.length) return new Map()
|
||||||
|
const idList = pids.join(',')
|
||||||
|
try {
|
||||||
|
const script = [
|
||||||
|
`$ids=@(${idList})`,
|
||||||
|
'Get-CimInstance Win32_PerfFormattedData_PerfProc_Process',
|
||||||
|
'| Where-Object { $ids -contains [int]$_.IDProcess }',
|
||||||
|
'| Select-Object @{Name="pid";Expression={[int]$_.IDProcess}},@{Name="cpuPercent";Expression={[double]$_.PercentProcessorTime}},@{Name="memoryRssBytes";Expression={[double]$_.WorkingSet}},@{Name="command";Expression={$_.Name}}',
|
||||||
|
'| ConvertTo-Json -Compress',
|
||||||
|
].join(' ')
|
||||||
|
const output = execFileSync('powershell.exe', ['-NoProfile', '-Command', script], {
|
||||||
|
encoding: 'utf-8',
|
||||||
|
timeout: 5000,
|
||||||
|
windowsHide: true,
|
||||||
|
})
|
||||||
|
const metrics = new Map<number, Partial<ProcessUsage>>()
|
||||||
|
for (const item of parseWindowsJson(output)) {
|
||||||
|
const pid = Number(item?.pid)
|
||||||
|
if (!Number.isFinite(pid)) continue
|
||||||
|
metrics.set(pid, {
|
||||||
|
cpuPercent: numberOrNull(item?.cpuPercent) ?? 0,
|
||||||
|
memoryRssBytes: numberOrNull(item?.memoryRssBytes) ?? 0,
|
||||||
|
command: typeof item?.command === 'string' ? item.command : undefined,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return metrics
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
const metrics = new Map<number, Partial<ProcessUsage>>()
|
||||||
|
for (const pid of pids) {
|
||||||
|
try {
|
||||||
|
const output = execFileSync('tasklist.exe', ['/FI', `PID eq ${pid}`, '/FO', 'CSV', '/NH'], {
|
||||||
|
encoding: 'utf-8',
|
||||||
|
timeout: 3000,
|
||||||
|
windowsHide: true,
|
||||||
|
})
|
||||||
|
const line = output.split(/\r?\n/).find(item => item.includes(`"${pid}"`))
|
||||||
|
if (!line) continue
|
||||||
|
const columns = line.match(/(".*?"|[^",]+)(?=\s*,|\s*$)/g)?.map(value => value.replace(/^"|"$/g, '')) || []
|
||||||
|
const memoryKb = Number(columns[4]?.replace(/[^\d]/g, ''))
|
||||||
|
metrics.set(pid, {
|
||||||
|
cpuPercent: 0,
|
||||||
|
memoryRssBytes: Number.isFinite(memoryKb) ? memoryKb * 1024 : 0,
|
||||||
|
command: columns[0],
|
||||||
|
})
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
return metrics
|
||||||
|
}
|
||||||
|
|
||||||
|
function collectProcessMetrics(pids: number[]): Map<number, Partial<ProcessUsage>> {
|
||||||
|
const uniquePids = [...new Set(pids.filter(pid => Number.isFinite(pid) && pid > 0))]
|
||||||
|
return platform() === 'win32'
|
||||||
|
? collectWindowsProcessMetrics(uniquePids)
|
||||||
|
: collectPosixProcessMetrics(uniquePids)
|
||||||
|
}
|
||||||
|
|
||||||
|
function processUsage(
|
||||||
|
pid: number | undefined,
|
||||||
|
role: ProcessUsage['role'],
|
||||||
|
metrics: Map<number, Partial<ProcessUsage>>,
|
||||||
|
profile?: string,
|
||||||
|
): ProcessUsage | undefined {
|
||||||
|
if (!pid) return undefined
|
||||||
|
const metric = metrics.get(pid)
|
||||||
|
return {
|
||||||
|
pid,
|
||||||
|
role,
|
||||||
|
profile,
|
||||||
|
running: !!metric,
|
||||||
|
cpuPercent: metric?.cpuPercent ?? 0,
|
||||||
|
memoryRssBytes: metric?.memoryRssBytes ?? 0,
|
||||||
|
command: metric?.command,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeWorker(raw: unknown): {
|
||||||
|
running: boolean
|
||||||
|
pid?: number
|
||||||
|
endpoint?: string
|
||||||
|
lastUsedAt?: number
|
||||||
|
} {
|
||||||
|
if (typeof raw === 'boolean') return { running: raw }
|
||||||
|
if (!raw || typeof raw !== 'object') return { running: false }
|
||||||
|
const record = raw as Record<string, unknown>
|
||||||
|
const pid = Number(record.pid)
|
||||||
|
const lastUsedAt = Number(record.last_used_at)
|
||||||
|
return {
|
||||||
|
running: !!record.running,
|
||||||
|
pid: Number.isFinite(pid) && pid > 0 ? pid : undefined,
|
||||||
|
endpoint: typeof record.endpoint === 'string' ? record.endpoint : undefined,
|
||||||
|
lastUsedAt: Number.isFinite(lastUsedAt) ? lastUsedAt : undefined,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createEmptyOpsRuntimeSnapshot(error?: string): OpsRuntimeSnapshot {
|
||||||
|
return {
|
||||||
|
timestamp: Date.now(),
|
||||||
|
system: {
|
||||||
|
platform: process.platform,
|
||||||
|
arch: process.arch,
|
||||||
|
uptimeSeconds: safeUptime(),
|
||||||
|
cpuCount: safeCpus().length,
|
||||||
|
cpuPercent: 0,
|
||||||
|
loadAverage: safeLoadAverage(),
|
||||||
|
totalMemoryBytes: 0,
|
||||||
|
freeMemoryBytes: 0,
|
||||||
|
usedMemoryBytes: 0,
|
||||||
|
memoryPercent: 0,
|
||||||
|
},
|
||||||
|
web: {
|
||||||
|
pid: process.pid,
|
||||||
|
uptimeSeconds: safeProcessUptime(),
|
||||||
|
memory: safeProcessMemoryUsage(),
|
||||||
|
cpuPercent: 0,
|
||||||
|
},
|
||||||
|
bridge: {
|
||||||
|
endpoint: '',
|
||||||
|
reachable: false,
|
||||||
|
error,
|
||||||
|
broker: {
|
||||||
|
running: false,
|
||||||
|
ready: false,
|
||||||
|
restartScheduled: false,
|
||||||
|
restartAttempts: 0,
|
||||||
|
},
|
||||||
|
workers: [],
|
||||||
|
totalWorkerMemoryRssBytes: 0,
|
||||||
|
},
|
||||||
|
sessions: {
|
||||||
|
active: 0,
|
||||||
|
running: 0,
|
||||||
|
byProfile: {},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getOpsRuntimeSnapshot(): Promise<OpsRuntimeSnapshot> {
|
||||||
|
const manager = getAgentBridgeManager()
|
||||||
|
const managerState = manager.getRuntimeState()
|
||||||
|
let bridgeReachable = false
|
||||||
|
let bridgeError: string | undefined
|
||||||
|
let bridgePing: Record<string, any> = {}
|
||||||
|
let sessions: Array<Record<string, any>> = []
|
||||||
|
|
||||||
|
try {
|
||||||
|
const client = new AgentBridgeClient({ endpoint: managerState.endpoint, timeoutMs: 2000, connectRetryMs: 0 })
|
||||||
|
bridgePing = await client.ping() as Record<string, any>
|
||||||
|
bridgeReachable = true
|
||||||
|
try {
|
||||||
|
const list = await client.list()
|
||||||
|
sessions = Array.isArray((list as any).sessions) ? (list as any).sessions : []
|
||||||
|
} catch {}
|
||||||
|
} catch (err: any) {
|
||||||
|
bridgeError = err?.message || 'Agent bridge is not reachable'
|
||||||
|
}
|
||||||
|
|
||||||
|
const workerEntries = Object.entries((bridgePing.worker_details || {}) as Record<string, unknown>)
|
||||||
|
.map(([profile, value]) => [profile, normalizeWorker(value)] as const)
|
||||||
|
const brokerPid = Number(bridgePing.broker?.pid || managerState.pid)
|
||||||
|
const pids = [
|
||||||
|
process.pid,
|
||||||
|
Number.isFinite(brokerPid) ? brokerPid : undefined,
|
||||||
|
...workerEntries.map(([, worker]) => worker.pid),
|
||||||
|
].filter((pid): pid is number => typeof pid === 'number' && pid > 0)
|
||||||
|
const processMetrics = collectProcessMetrics(pids)
|
||||||
|
|
||||||
|
const sessionCountsByProfile: Record<string, number> = {}
|
||||||
|
let runningSessions = 0
|
||||||
|
for (const session of sessions) {
|
||||||
|
const profileName = String(session.profile || 'default')
|
||||||
|
sessionCountsByProfile[profileName] = (sessionCountsByProfile[profileName] || 0) + 1
|
||||||
|
if (session.running) runningSessions += 1
|
||||||
|
}
|
||||||
|
if (!sessions.length && bridgePing.sessions_by_profile && typeof bridgePing.sessions_by_profile === 'object') {
|
||||||
|
for (const [profileName, count] of Object.entries(bridgePing.sessions_by_profile)) {
|
||||||
|
const value = Number(count)
|
||||||
|
if (Number.isFinite(value)) sessionCountsByProfile[profileName] = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const workers = workerEntries.map(([profileName, worker]) => {
|
||||||
|
const usage = processUsage(worker.pid, 'worker', processMetrics, profileName)
|
||||||
|
return {
|
||||||
|
pid: worker.pid || 0,
|
||||||
|
role: 'worker' as const,
|
||||||
|
profile: profileName,
|
||||||
|
running: worker.running,
|
||||||
|
cpuPercent: usage?.cpuPercent ?? 0,
|
||||||
|
memoryRssBytes: usage?.memoryRssBytes ?? 0,
|
||||||
|
command: usage?.command,
|
||||||
|
endpoint: worker.endpoint,
|
||||||
|
lastUsedAt: worker.lastUsedAt,
|
||||||
|
sessionCount: sessionCountsByProfile[profileName] || 0,
|
||||||
|
runningSessionCount: sessions.filter(session => String(session.profile || 'default') === profileName && session.running).length,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
const systemMemory = collectSystemMemoryUsage()
|
||||||
|
const totalWorkerMemory = workers.reduce((sum, worker) => sum + (worker.memoryRssBytes || 0), 0)
|
||||||
|
|
||||||
|
return {
|
||||||
|
timestamp: Date.now(),
|
||||||
|
system: {
|
||||||
|
platform: process.platform,
|
||||||
|
arch: process.arch,
|
||||||
|
uptimeSeconds: safeUptime(),
|
||||||
|
cpuCount: safeCpus().length,
|
||||||
|
cpuPercent: sampleSystemCpuPercent() ?? 0,
|
||||||
|
loadAverage: safeLoadAverage(),
|
||||||
|
totalMemoryBytes: systemMemory.totalMemoryBytes,
|
||||||
|
freeMemoryBytes: systemMemory.freeMemoryBytes,
|
||||||
|
usedMemoryBytes: systemMemory.usedMemoryBytes,
|
||||||
|
memoryPercent: systemMemory.memoryPercent,
|
||||||
|
},
|
||||||
|
web: {
|
||||||
|
pid: process.pid,
|
||||||
|
uptimeSeconds: safeProcessUptime(),
|
||||||
|
memory: safeProcessMemoryUsage(),
|
||||||
|
cpuPercent: sampleWebCpuPercent() ?? 0,
|
||||||
|
},
|
||||||
|
bridge: {
|
||||||
|
endpoint: managerState.endpoint,
|
||||||
|
reachable: bridgeReachable,
|
||||||
|
error: bridgeError,
|
||||||
|
broker: {
|
||||||
|
running: managerState.running,
|
||||||
|
ready: managerState.ready,
|
||||||
|
pid: Number.isFinite(brokerPid) && brokerPid > 0 ? brokerPid : undefined,
|
||||||
|
process: processUsage(Number.isFinite(brokerPid) ? brokerPid : undefined, 'broker', processMetrics),
|
||||||
|
restartScheduled: managerState.restartScheduled,
|
||||||
|
restartAttempts: managerState.restartAttempts,
|
||||||
|
},
|
||||||
|
workers,
|
||||||
|
totalWorkerMemoryRssBytes: totalWorkerMemory,
|
||||||
|
},
|
||||||
|
sessions: {
|
||||||
|
active: sessions.length || Number(bridgePing.active_sessions || 0),
|
||||||
|
running: runningSessions,
|
||||||
|
byProfile: sessionCountsByProfile,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -393,6 +393,80 @@ assert calls == []
|
|||||||
pool._run_context.session_id = "session-a"
|
pool._run_context.session_id = "session-a"
|
||||||
assert pool._approval_dispatcher("cmd", "desc", allow_permanent=False) == "once"
|
assert pool._approval_dispatcher("cmd", "desc", allow_permanent=False) == "once"
|
||||||
assert calls == [("cmd", "desc", False)]
|
assert calls == [("cmd", "desc", False)]
|
||||||
|
`)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('cleans broker workers and wires worker parent watchdog state', () => {
|
||||||
|
runPython(String.raw`
|
||||||
|
${harness}
|
||||||
|
|
||||||
|
class FakeWorker:
|
||||||
|
def __init__(self):
|
||||||
|
self.running = True
|
||||||
|
self.stopped = False
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
self.running = False
|
||||||
|
self.stopped = True
|
||||||
|
|
||||||
|
broker = bridge.BridgeBroker("ipc:///tmp/unused.sock")
|
||||||
|
worker = FakeWorker()
|
||||||
|
broker._workers["default"] = worker
|
||||||
|
broker._run_profile["run-a"] = "default"
|
||||||
|
broker._session_profile["session-a"] = "default"
|
||||||
|
broker._approval_profile["approval-a"] = "default"
|
||||||
|
broker._compression_profile["compression-a"] = "default"
|
||||||
|
|
||||||
|
broker.stop()
|
||||||
|
assert broker._stop.is_set()
|
||||||
|
assert worker.stopped
|
||||||
|
assert broker._workers == {}
|
||||||
|
assert broker._run_profile == {}
|
||||||
|
assert broker._session_profile == {}
|
||||||
|
assert broker._approval_profile == {}
|
||||||
|
assert broker._compression_profile == {}
|
||||||
|
|
||||||
|
created = {}
|
||||||
|
|
||||||
|
class FakeProcess:
|
||||||
|
stdout = None
|
||||||
|
stderr = None
|
||||||
|
|
||||||
|
def poll(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def fake_popen(args, **kwargs):
|
||||||
|
created["args"] = args
|
||||||
|
created["env"] = kwargs["env"]
|
||||||
|
return FakeProcess()
|
||||||
|
|
||||||
|
original_popen = bridge.subprocess.Popen
|
||||||
|
original_getpid = bridge.os.getpid
|
||||||
|
try:
|
||||||
|
bridge.subprocess.Popen = fake_popen
|
||||||
|
bridge.os.getpid = lambda: 4242
|
||||||
|
proc_worker = bridge.WorkerProcess("default", "ipc:///tmp/worker.sock", "/agent", "/home")
|
||||||
|
proc_worker._pipe_stderr = lambda: None
|
||||||
|
proc_worker._wait_ready = lambda: None
|
||||||
|
proc_worker.start()
|
||||||
|
finally:
|
||||||
|
bridge.subprocess.Popen = original_popen
|
||||||
|
bridge.os.getpid = original_getpid
|
||||||
|
|
||||||
|
assert created["env"]["HERMES_AGENT_BRIDGE_BROKER_PID"] == "4242"
|
||||||
|
assert created["env"]["HERMES_AGENT_BRIDGE_WORKER_PROFILE"] == "default"
|
||||||
|
|
||||||
|
stop_event = threading.Event()
|
||||||
|
seen_pids = []
|
||||||
|
original_process_exists = bridge._process_exists
|
||||||
|
try:
|
||||||
|
bridge._process_exists = lambda pid: seen_pids.append(pid) and False
|
||||||
|
bridge._start_parent_process_watchdog(12345, stop_event, "test", interval=0.01)
|
||||||
|
assert wait_for(stop_event.is_set, timeout=2)
|
||||||
|
finally:
|
||||||
|
bridge._process_exists = original_process_exists
|
||||||
|
|
||||||
|
assert seen_pids == [12345]
|
||||||
`)
|
`)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -0,0 +1,40 @@
|
|||||||
|
import { afterEach, describe, expect, it, vi } from 'vitest'
|
||||||
|
|
||||||
|
const getOpsRuntimeSnapshot = vi.fn()
|
||||||
|
|
||||||
|
vi.mock('../../packages/server/src/services/hermes/ops-monitor', () => ({
|
||||||
|
createEmptyOpsRuntimeSnapshot: (error?: string) => ({ timestamp: 0, error }),
|
||||||
|
getOpsRuntimeSnapshot,
|
||||||
|
}))
|
||||||
|
|
||||||
|
describe('performance monitor controller', () => {
|
||||||
|
afterEach(() => {
|
||||||
|
vi.clearAllMocks()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('returns the runtime snapshot from the performance service', async () => {
|
||||||
|
const snapshot = {
|
||||||
|
timestamp: 1,
|
||||||
|
bridge: { workers: [] },
|
||||||
|
sessions: { active: 0 },
|
||||||
|
}
|
||||||
|
getOpsRuntimeSnapshot.mockResolvedValue(snapshot)
|
||||||
|
const ctx: any = {}
|
||||||
|
|
||||||
|
const { runtime } = await import('../../packages/server/src/controllers/hermes/performance-monitor')
|
||||||
|
await runtime(ctx)
|
||||||
|
|
||||||
|
expect(ctx.body).toBe(snapshot)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('returns a zero snapshot when metrics collection fails', async () => {
|
||||||
|
getOpsRuntimeSnapshot.mockRejectedValue(new Error('boom'))
|
||||||
|
const ctx: any = {}
|
||||||
|
|
||||||
|
const { runtime } = await import('../../packages/server/src/controllers/hermes/performance-monitor')
|
||||||
|
await runtime(ctx)
|
||||||
|
|
||||||
|
expect(ctx.status).toBeUndefined()
|
||||||
|
expect(ctx.body).toEqual({ timestamp: 0, error: 'boom' })
|
||||||
|
})
|
||||||
|
})
|
||||||
Reference in New Issue
Block a user