2026-04-11 21:33:04 +08:00
|
|
|
import type { Context } from 'koa'
|
2026-04-16 08:38:18 +08:00
|
|
|
import { config } from '../../config'
|
2026-04-21 12:35:48 +08:00
|
|
|
import { getGatewayManagerInstance } from '../../services/gateway-bootstrap'
|
2026-04-22 16:14:50 +08:00
|
|
|
import { updateUsage } from '../../db/hermes/usage-store'
|
2026-04-21 12:35:48 +08:00
|
|
|
|
|
|
|
|
function getGatewayManager() { return getGatewayManagerInstance() }
|
2026-04-11 21:33:04 +08:00
|
|
|
|
2026-04-22 16:14:50 +08:00
|
|
|
// --- run_id → session_id mapping (in-memory, ephemeral) ---
|
|
|
|
|
|
|
|
|
|
const runSessionMap = new Map<string, string>()
|
|
|
|
|
|
|
|
|
|
export function setRunSession(runId: string, sessionId: string): void {
|
|
|
|
|
runSessionMap.set(runId, sessionId)
|
|
|
|
|
// Auto-cleanup after 30 minutes
|
|
|
|
|
setTimeout(() => runSessionMap.delete(runId), 30 * 60 * 1000)
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-29 16:26:24 +08:00
|
|
|
export function getSessionForRun(runId: string): string | undefined {
|
2026-04-22 16:14:50 +08:00
|
|
|
return runSessionMap.get(runId)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Helpers ---
|
|
|
|
|
|
2026-04-17 03:13:24 +08:00
|
|
|
function isTransientGatewayError(err: any): boolean {
|
|
|
|
|
const msg = String(err?.message || '')
|
|
|
|
|
const causeCode = String(err?.cause?.code || '')
|
|
|
|
|
return (
|
|
|
|
|
causeCode === 'ECONNREFUSED' ||
|
|
|
|
|
causeCode === 'ECONNRESET' ||
|
|
|
|
|
/ECONNREFUSED|ECONNRESET|fetch failed|socket hang up/i.test(msg)
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function waitForGatewayReady(upstream: string, timeoutMs: number = 5000): Promise<boolean> {
|
|
|
|
|
const deadline = Date.now() + timeoutMs
|
|
|
|
|
const healthUrl = `${upstream}/health`
|
|
|
|
|
while (Date.now() < deadline) {
|
|
|
|
|
try {
|
|
|
|
|
const res = await fetch(healthUrl, {
|
|
|
|
|
method: 'GET',
|
|
|
|
|
signal: AbortSignal.timeout(1200),
|
|
|
|
|
})
|
|
|
|
|
if (res.ok) return true
|
|
|
|
|
} catch { }
|
|
|
|
|
await new Promise(resolve => setTimeout(resolve, 250))
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-19 20:59:25 +08:00
|
|
|
/** Resolve profile name from request */
|
|
|
|
|
function resolveProfile(ctx: Context): string {
|
2026-05-04 12:46:26 +08:00
|
|
|
// Use header/query from request, but fall back to authoritative source if not provided
|
|
|
|
|
const requestedProfile = ctx.get('x-hermes-profile') || (ctx.query.profile as string)
|
|
|
|
|
|
|
|
|
|
if (requestedProfile) {
|
|
|
|
|
return requestedProfile
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Fallback: read from authoritative source (active_profile file)
|
|
|
|
|
try {
|
|
|
|
|
const { getActiveProfileName } = require('../../services/hermes/hermes-profile')
|
|
|
|
|
return getActiveProfileName()
|
|
|
|
|
} catch {
|
|
|
|
|
return 'default'
|
|
|
|
|
}
|
2026-04-19 20:59:25 +08:00
|
|
|
}
|
|
|
|
|
|
2026-04-18 13:07:12 +08:00
|
|
|
/** Resolve upstream URL for a request based on profile header/query */
|
|
|
|
|
function resolveUpstream(ctx: Context): string {
|
|
|
|
|
const mgr = getGatewayManager()
|
|
|
|
|
if (mgr) {
|
2026-04-19 20:59:25 +08:00
|
|
|
const profile = resolveProfile(ctx)
|
|
|
|
|
if (profile && profile !== 'default') {
|
2026-04-18 13:07:12 +08:00
|
|
|
return mgr.getUpstream(profile)
|
|
|
|
|
}
|
|
|
|
|
return mgr.getUpstream()
|
|
|
|
|
}
|
|
|
|
|
return config.upstream.replace(/\/$/, '')
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-22 16:14:50 +08:00
|
|
|
function buildProxyHeaders(ctx: Context, upstream: string): Record<string, string> {
|
2026-04-11 21:33:04 +08:00
|
|
|
const headers: Record<string, string> = {}
|
|
|
|
|
for (const [key, value] of Object.entries(ctx.headers)) {
|
|
|
|
|
if (value == null) continue
|
|
|
|
|
const lower = key.toLowerCase()
|
|
|
|
|
if (lower === 'host') {
|
|
|
|
|
headers['host'] = new URL(upstream).host
|
2026-04-22 02:09:58 +02:00
|
|
|
} else if (lower === 'origin' || lower === 'referer' || lower === 'connection' || lower === 'authorization') {
|
2026-04-16 20:24:09 +08:00
|
|
|
continue
|
|
|
|
|
} else {
|
2026-04-11 21:33:04 +08:00
|
|
|
const v = Array.isArray(value) ? value[0] : value
|
|
|
|
|
if (v) headers[key] = v
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-19 20:59:25 +08:00
|
|
|
const mgr = getGatewayManager()
|
|
|
|
|
if (mgr) {
|
2026-04-22 16:14:50 +08:00
|
|
|
const apiKey = mgr.getApiKey(resolveProfile(ctx))
|
2026-04-19 20:59:25 +08:00
|
|
|
if (apiKey) {
|
|
|
|
|
headers['authorization'] = `Bearer ${apiKey}`
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-22 16:14:50 +08:00
|
|
|
return headers
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- SSE stream interception ---
|
|
|
|
|
|
|
|
|
|
const SSE_EVENTS_PATH = /^\/v1\/runs\/([^/]+)\/events$/
|
|
|
|
|
|
|
|
|
|
/**
|
2026-04-24 22:18:32 +08:00
|
|
|
* Parse SSE text chunks and extract run.completed events.
|
|
|
|
|
* Returns the run_id if a run.completed was found.
|
2026-04-22 16:14:50 +08:00
|
|
|
*/
|
2026-04-29 16:26:24 +08:00
|
|
|
function extractRunCompletedFromChunk(chunk: string, profile: string): string | null {
|
2026-04-24 22:18:32 +08:00
|
|
|
// SSE format: each line is "data: {...}\n\n"
|
|
|
|
|
const lines = chunk.split('\n')
|
|
|
|
|
for (const line of lines) {
|
|
|
|
|
if (!line.startsWith('data: ')) continue
|
|
|
|
|
try {
|
|
|
|
|
const data = JSON.parse(line.slice(6))
|
|
|
|
|
if (data.event === 'run.completed' && data.usage && data.run_id) {
|
|
|
|
|
const sessionId = getSessionForRun(data.run_id)
|
|
|
|
|
if (sessionId) {
|
2026-04-29 16:26:24 +08:00
|
|
|
updateUsage(sessionId, {
|
|
|
|
|
inputTokens: data.usage.input_tokens,
|
|
|
|
|
outputTokens: data.usage.output_tokens,
|
|
|
|
|
cacheReadTokens: data.usage.cache_read_tokens,
|
|
|
|
|
cacheWriteTokens: data.usage.cache_write_tokens,
|
|
|
|
|
reasoningTokens: data.usage.reasoning_tokens,
|
|
|
|
|
model: data.model || '',
|
|
|
|
|
profile,
|
|
|
|
|
})
|
2026-04-24 22:18:32 +08:00
|
|
|
return data.run_id
|
|
|
|
|
}
|
2026-04-24 15:42:42 +02:00
|
|
|
}
|
2026-04-24 22:18:32 +08:00
|
|
|
} catch { /* not JSON, skip */ }
|
2026-04-24 15:42:42 +02:00
|
|
|
}
|
2026-04-24 22:18:32 +08:00
|
|
|
return null
|
2026-04-24 15:42:42 +02:00
|
|
|
}
|
|
|
|
|
|
2026-04-22 16:14:50 +08:00
|
|
|
/**
|
|
|
|
|
* Stream an SSE response while intercepting run.completed events.
|
|
|
|
|
*/
|
2026-04-29 16:26:24 +08:00
|
|
|
async function streamSSE(ctx: Context, res: Response, profile: string): Promise<void> {
|
2026-04-22 16:14:50 +08:00
|
|
|
if (!res.body) {
|
|
|
|
|
ctx.res.end()
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const reader = res.body.getReader()
|
|
|
|
|
const decoder = new TextDecoder()
|
|
|
|
|
let buffer = ''
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
while (true) {
|
|
|
|
|
const { done, value } = await reader.read()
|
|
|
|
|
if (done) break
|
|
|
|
|
|
|
|
|
|
// Forward raw bytes to client immediately
|
|
|
|
|
ctx.res.write(value)
|
|
|
|
|
|
|
|
|
|
// Also decode for interception
|
|
|
|
|
buffer += decoder.decode(value, { stream: true })
|
|
|
|
|
|
2026-04-24 22:18:32 +08:00
|
|
|
// Process complete SSE lines (delimited by double newline)
|
|
|
|
|
let newlineIdx: number
|
|
|
|
|
while ((newlineIdx = buffer.indexOf('\n\n')) !== -1) {
|
|
|
|
|
const eventBlock = buffer.slice(0, newlineIdx)
|
|
|
|
|
buffer = buffer.slice(newlineIdx + 2)
|
2026-04-29 16:26:24 +08:00
|
|
|
extractRunCompletedFromChunk(eventBlock, profile)
|
2026-04-22 16:14:50 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Process remaining buffer
|
|
|
|
|
if (buffer.trim()) {
|
2026-04-29 16:26:24 +08:00
|
|
|
extractRunCompletedFromChunk(buffer, profile)
|
2026-04-22 16:14:50 +08:00
|
|
|
}
|
|
|
|
|
} finally {
|
|
|
|
|
ctx.res.end()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Main proxy function ---
|
|
|
|
|
|
|
|
|
|
export async function proxy(ctx: Context) {
|
|
|
|
|
const profile = resolveProfile(ctx)
|
|
|
|
|
const upstream = resolveUpstream(ctx)
|
|
|
|
|
const upstreamPath = ctx.path.replace(/^\/api\/hermes\/v1/, '/v1').replace(/^\/api\/hermes/, '/api')
|
|
|
|
|
const params = new URLSearchParams(ctx.search || '')
|
|
|
|
|
params.delete('token')
|
|
|
|
|
const search = params.toString()
|
|
|
|
|
const url = `${upstream}${upstreamPath}${search ? `?${search}` : ''}`
|
|
|
|
|
|
|
|
|
|
const headers = buildProxyHeaders(ctx, upstream)
|
|
|
|
|
|
2026-04-11 21:33:04 +08:00
|
|
|
try {
|
|
|
|
|
let body: string | undefined
|
|
|
|
|
if (ctx.req.method !== 'GET' && ctx.req.method !== 'HEAD') {
|
2026-04-22 16:14:50 +08:00
|
|
|
// @koa/bodyparser parses JSON into ctx.request.body but doesn't store rawBody
|
|
|
|
|
// by default. Re-serialize the parsed body to get the string form.
|
|
|
|
|
const parsed = (ctx as any).request.body
|
|
|
|
|
if (typeof parsed === 'string') {
|
|
|
|
|
body = parsed
|
|
|
|
|
} else if (parsed && typeof parsed === 'object') {
|
|
|
|
|
body = JSON.stringify(parsed)
|
|
|
|
|
}
|
2026-04-11 21:33:04 +08:00
|
|
|
}
|
|
|
|
|
|
2026-04-22 16:14:50 +08:00
|
|
|
const requestInit: RequestInit = { method: ctx.req.method, headers, body }
|
2026-04-17 03:13:24 +08:00
|
|
|
|
|
|
|
|
let res: Response
|
|
|
|
|
try {
|
|
|
|
|
res = await fetch(url, requestInit)
|
|
|
|
|
} catch (err: any) {
|
|
|
|
|
if (isTransientGatewayError(err) && await waitForGatewayReady(upstream)) {
|
|
|
|
|
res = await fetch(url, requestInit)
|
|
|
|
|
} else {
|
|
|
|
|
throw err
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-04-11 21:33:04 +08:00
|
|
|
|
|
|
|
|
// Set response headers
|
|
|
|
|
res.headers.forEach((value, key) => {
|
|
|
|
|
const lower = key.toLowerCase()
|
|
|
|
|
if (lower !== 'transfer-encoding' && lower !== 'connection') {
|
2026-04-16 20:24:09 +08:00
|
|
|
ctx.set(key, value)
|
2026-04-11 21:33:04 +08:00
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
ctx.status = res.status
|
|
|
|
|
|
2026-04-22 16:14:50 +08:00
|
|
|
// Intercept POST /v1/runs to capture run_id → session_id mapping
|
|
|
|
|
if (ctx.req.method === 'POST' && /\/v1\/runs$/.test(upstreamPath) && body) {
|
|
|
|
|
try {
|
|
|
|
|
const parsed = JSON.parse(body)
|
|
|
|
|
if (parsed.session_id) {
|
|
|
|
|
const resBody = await res.text()
|
|
|
|
|
ctx.res.write(resBody)
|
|
|
|
|
ctx.res.end()
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
const result = JSON.parse(resBody)
|
|
|
|
|
if (result.run_id) {
|
|
|
|
|
setRunSession(result.run_id, parsed.session_id)
|
|
|
|
|
}
|
|
|
|
|
} catch { /* response not JSON, ignore */ }
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
} catch { /* body not JSON, fall through to normal stream */ }
|
|
|
|
|
// No session_id in body — fall through to normal response handling below
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Intercept SSE streams for /v1/runs/{id}/events
|
|
|
|
|
const sseMatch = upstreamPath.match(SSE_EVENTS_PATH)
|
|
|
|
|
if (sseMatch) {
|
2026-04-29 16:26:24 +08:00
|
|
|
await streamSSE(ctx, res, profile)
|
2026-04-22 16:14:50 +08:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Default: pipe response body directly
|
2026-04-11 21:33:04 +08:00
|
|
|
if (res.body) {
|
|
|
|
|
const reader = res.body.getReader()
|
|
|
|
|
const pump = async () => {
|
|
|
|
|
while (true) {
|
|
|
|
|
const { done, value } = await reader.read()
|
|
|
|
|
if (done) break
|
|
|
|
|
ctx.res.write(value)
|
|
|
|
|
}
|
|
|
|
|
ctx.res.end()
|
|
|
|
|
}
|
|
|
|
|
await pump()
|
|
|
|
|
} else {
|
|
|
|
|
ctx.res.end()
|
|
|
|
|
}
|
|
|
|
|
} catch (err: any) {
|
|
|
|
|
if (!ctx.res.headersSent) {
|
|
|
|
|
ctx.status = 502
|
|
|
|
|
ctx.set('Content-Type', 'application/json')
|
|
|
|
|
ctx.body = { error: { message: `Proxy error: ${err.message}` } }
|
|
|
|
|
} else {
|
|
|
|
|
ctx.res.end()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|