Fix bridge history, profile models, and Windows gateway handling (#845)

* feat: support profile-aware group chat bridge flows

* feat: route cron jobs through hermes cli

* Fix group chat routing and isolate bridge tests

* Add Grok image-to-video media skill

* Default Grok videos to media directory

* Fix bridge profile fallback and cron repeat clearing

* Refine bridge chat and gateway platform handling

* Filter bridge tool-call text deltas

* Preserve structured bridge chat history

* Prepare beta release build artifacts

* Fix Windows run profile resolution

* Fix Windows path compatibility checks

* Fix profile-scoped model page display

* Hide Windows subprocess windows for jobs and updates

* Hide Windows file backend subprocess windows

* Avoid Windows gateway restart lock conflicts

* Treat Windows gateway lock as running on startup

* Force release Windows gateway lock on restart

* Tighten Windows gateway lock cleanup

* Update chat e2e source expectation

* Bump package version to 0.5.30

---------

Co-authored-by: Codex <codex@openai.com>
This commit is contained in:
ekko
2026-05-19 16:09:59 +08:00
committed by GitHub
parent 3d74d78698
commit 9a9416c99c
129 changed files with 7017 additions and 1838 deletions
@@ -127,7 +127,7 @@ export class ContextEngine {
// Under threshold — return summary + new messages directly
if (totalTokens <= config.triggerTokens) {
logger.debug(`[ContextEngine] [Path A] UNDER threshold — return summary + ${newMessages.length} verbatim msgs directly`)
const history = this.buildHistory(snapshot.summary, newMessages, input.agentSocketId)
const history = this.buildHistory(snapshot.summary, newMessages, input.agentSocketId, input.agentName)
this.logHistory('Path A (no compress)', history)
return { conversationHistory: history, instructions, meta }
}
@@ -155,7 +155,7 @@ export class ContextEngine {
meta.summaryTokenEstimate = this.countTokens(result.summary)
logger.debug(`[ContextEngine] [Path A] incremental compression DONE in ${elapsed}ms, newSummaryLen=${result.summary.length}, newLastMsgId=${lastMsg.id}`)
logger.debug(`[ContextEngine] [Path A] NEW SUMMARY (${result.summary.length} chars): ${result.summary.slice(0, 300)}`)
const history = this.buildHistory(result.summary, newMessages, input.agentSocketId)
const history = this.buildHistory(result.summary, newMessages, input.agentSocketId, input.agentName)
this.logHistory('Path A (after incremental compress)', history)
if (result.sessionId) this.sessionCleaner?.(result.sessionId)
return { conversationHistory: history, instructions, meta }
@@ -163,7 +163,7 @@ export class ContextEngine {
// Compression failed — degrade
logger.warn(`[ContextEngine] [Path A] incremental compression FAILED (${elapsed}ms) — degrading to summary + trimmed verbatim`)
const history = this.buildHistory(snapshot.summary, newMessages, input.agentSocketId)
const history = this.buildHistory(snapshot.summary, newMessages, input.agentSocketId, input.agentName)
this.trimToBudget(history, summaryTokens, config.maxHistoryTokens)
return { conversationHistory: history, instructions, meta }
}
@@ -177,7 +177,7 @@ export class ContextEngine {
// Under threshold — pass all messages verbatim
if (totalTokens <= config.triggerTokens) {
logger.debug(`[ContextEngine] [Path B] UNDER threshold — return all ${total} msgs verbatim`)
const history = messages.map(m => this.mapToHistory(m, input.agentSocketId))
const history = messages.map(m => this.mapToHistory(m, input.agentSocketId, input.agentName))
this.logHistory('Path B (no compress)', history)
return { conversationHistory: history, instructions, meta }
}
@@ -209,7 +209,7 @@ export class ContextEngine {
meta.summaryTokenEstimate = this.countTokens(result.summary)
logger.debug(`[ContextEngine] [Path B] full compression DONE in ${elapsed}ms, summaryLen=${result.summary.length}, compressed=${toCompress.length} msgs, keptTail=${tail.length} msgs, savedLastMsgId=${lastCompressedMsg.id}`)
logger.debug(`[ContextEngine] [Path B] COMPRESSED SUMMARY (${result.summary.length} chars): ${result.summary.slice(0, 300)}`)
const history = this.buildHistory(result.summary, tail, input.agentSocketId)
const history = this.buildHistory(result.summary, tail, input.agentSocketId, input.agentName)
this.logHistory('Path B (after full compress)', history)
if (result.sessionId) this.sessionCleaner?.(result.sessionId)
return { conversationHistory: history, instructions, meta }
@@ -217,7 +217,7 @@ export class ContextEngine {
// Compression failed — degrade
logger.warn(`[ContextEngine] [Path B] full compression FAILED (${elapsed}ms) — degrading to trimmed verbatim`)
const history = messages.map(m => this.mapToHistory(m, input.agentSocketId))
const history = messages.map(m => this.mapToHistory(m, input.agentSocketId, input.agentName))
this.trimToBudget(history, 0, config.maxHistoryTokens)
meta.verbatimCount = history.length
return { conversationHistory: history, instructions, meta }
@@ -265,6 +265,7 @@ export class ContextEngine {
summary: string,
messages: StoredMessage[],
agentSocketId: string,
agentName: string,
): Array<{ role: 'user' | 'assistant'; content: string }> {
const history: Array<{ role: 'user' | 'assistant'; content: string }> = []
@@ -275,7 +276,7 @@ export class ContextEngine {
)
}
history.push(...messages.map(m => this.mapToHistory(m, agentSocketId)))
history.push(...messages.map(m => this.mapToHistory(m, agentSocketId, agentName)))
return history
}
@@ -314,11 +315,51 @@ export class ContextEngine {
private mapToHistory(
msg: StoredMessage,
agentSocketId: string,
agentName: string,
): { role: 'user' | 'assistant'; content: string } {
if (msg.senderId === agentSocketId) {
return { role: 'assistant', content: msg.content }
const senderName = msg.senderName || 'unknown'
const isOwnAgent = msg.senderId === agentSocketId || senderName === agentName
if (msg.role === 'tool') {
const label = msg.tool_name ? `Tool result: ${msg.tool_name}` : 'Tool result'
return { role: 'user', content: `[${senderName}] [${label}]\n${msg.content || ''}` }
}
return { role: 'user', content: `[${msg.senderName}]: ${msg.content}` }
if (msg.role === 'assistant' && msg.tool_calls?.length) {
const toolsInfo = msg.tool_calls.map(tc => {
const name = tc.function?.name || 'unknown'
let args = tc.function?.arguments || '{}'
if (args.length > 4000) args = `${args.slice(0, 4000)}...`
return `[Calling tool: ${name} with arguments: ${args}]`
}).join('\n')
const content = msg.content?.trim()
return {
role: isOwnAgent ? 'assistant' : 'user',
content: content
? `${this.formatAttributedContent(senderName, content)}\n${this.formatAttributionPrefix(senderName, content)}${toolsInfo}`
: `${this.formatAttributionPrefix(senderName, content)}${toolsInfo}`,
}
}
return {
role: isOwnAgent ? 'assistant' : 'user',
content: this.formatAttributedContent(senderName, msg.content || ''),
}
}
private formatAttributedContent(senderName: string, content: string): string {
return `${this.formatAttributionPrefix(senderName)}${this.stripMentions(content)}`
}
private formatAttributionPrefix(senderName: string, _content?: string): string {
return `[${senderName}]: `
}
private stripMentions(content: string): string {
return String(content || '')
.replace(/@([^\s@]+)/g, '')
.replace(/[ \t]{2,}/g, ' ')
.replace(/^\s+/, '')
}
private trimToBudget(
@@ -6,10 +6,11 @@ import {
} from './prompt'
import { updateUsage } from '../../../db/hermes/usage-store'
import { logger } from '../../logger'
import { AgentBridgeClient, type AgentBridgeRunResult } from '../agent-bridge'
/**
* Calls Hermes /v1/responses to produce LLM-generated summaries.
* The context engine owns history assembly; Responses storage/chaining is not used.
* Calls the local bridge to produce LLM-generated summaries.
* The context engine owns history assembly; gateway storage/chaining is not used.
*/
export class GatewaySummarizer implements GatewayCaller {
private timeoutMs: number
@@ -19,8 +20,8 @@ export class GatewaySummarizer implements GatewayCaller {
}
async summarize(
upstream: string,
apiKey: string | null,
_upstream: string,
_apiKey: string | null,
systemPrompt: string,
messages: StoredMessage[],
roomId: string,
@@ -29,7 +30,7 @@ export class GatewaySummarizer implements GatewayCaller {
): Promise<{ summary: string; sessionId: string }> {
const history: Array<{ role: string; content: string }> = messages.map(m => ({
role: 'user',
content: `[${m.senderName}]: ${m.content}`,
content: summarizeMessageForPrompt(m),
}))
if (previousSummary) {
@@ -43,132 +44,67 @@ export class GatewaySummarizer implements GatewayCaller {
? buildIncrementalUpdatePrompt()
: buildFullSummaryPrompt()
const res = await fetch(`${upstream.replace(/\/$/, '')}/v1/responses`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
},
body: JSON.stringify({
input: userPrompt,
const bridge = new AgentBridgeClient({ timeoutMs: this.timeoutMs + 15_000 })
const sessionId = `gc_compress_${roomId}_${profile}_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`
.replace(/[^a-zA-Z0-9_-]/g, '_')
.slice(0, 160)
try {
const result = await bridge.request<AgentBridgeRunResult>({
action: 'chat',
session_id: sessionId,
message: userPrompt,
instructions: systemPrompt || buildSummarizationSystemPrompt(),
conversation_history: history,
stream: true,
store: false,
}),
signal: AbortSignal.timeout(this.timeoutMs),
})
profile,
source: 'api_server',
wait: true,
timeout: Math.ceil(this.timeoutMs / 1000),
}, { timeoutMs: this.timeoutMs + 15_000 })
if (!res.ok) {
throw new Error(`Summarization response failed: ${res.status}`)
}
if (!res.body) {
throw new Error('Summarization response stream missing')
}
let output = ''
for await (const frame of readSseFrames(res.body)) {
let parsed: any
try {
parsed = JSON.parse(frame.data)
} catch {
continue
}
const eventType = parsed.type || frame.event || parsed.event
if (eventType === 'response.output_text.delta' && parsed.delta) {
output += parsed.delta
continue
if (result.status === 'error') {
throw new Error(result.error || 'Summarization bridge run failed')
}
if (eventType === 'response.completed') {
const response = parsed.response || parsed
const finalText = extractResponseText(response)
if (!output && finalText) output = finalText
const payload = result.result as any
const output = String(payload?.final_response || result.output || '').trim()
if (!output) throw new Error('Empty summarization response')
const usage = response.usage || {}
const usage = payload?.usage || payload?.response?.usage
if (usage) {
updateUsage(roomId, {
inputTokens: usage.input_tokens ?? usage.inputTokens ?? 0,
outputTokens: usage.output_tokens ?? usage.outputTokens ?? 0,
cacheReadTokens: usage.cache_read_tokens ?? usage.cacheReadTokens ?? 0,
cacheWriteTokens: usage.cache_write_tokens ?? usage.cacheWriteTokens ?? 0,
reasoningTokens: usage.reasoning_tokens ?? usage.reasoningTokens ?? 0,
model: response.model || '',
model: payload?.model || payload?.response?.model || '',
profile,
})
logger.debug(`[GatewaySummarizer] Recorded response usage for compression room ${roomId} (profile=${profile}): input=${usage.input_tokens ?? 0}, output=${usage.output_tokens ?? 0}`)
if (!output || output.trim() === '') {
throw new Error('Empty summarization response')
}
return { summary: output.trim(), sessionId: '' }
}
if (eventType === 'response.failed') {
throw new Error(parsed.error?.message || parsed.error || 'Summarization response failed')
}
logger.debug(`[GatewaySummarizer] Bridge compression completed for room ${roomId} (profile=${profile})`)
return { summary: output, sessionId }
} finally {
await bridge.destroy(sessionId, profile).catch(() => undefined)
}
throw new Error('Summarization response stream ended without a terminal event')
}
}
async function* readSseFrames(stream: ReadableStream<Uint8Array>): AsyncGenerator<{ event?: string; data: string }> {
const decoder = new TextDecoder()
const reader = stream.getReader()
let buffer = ''
try {
while (true) {
const { done, value } = await reader.read()
if (done) break
buffer += decoder.decode(value, { stream: true })
let boundary = buffer.indexOf('\n\n')
while (boundary >= 0) {
const raw = buffer.slice(0, boundary)
buffer = buffer.slice(boundary + 2)
const frame = parseSseFrame(raw)
if (frame?.data) yield frame
boundary = buffer.indexOf('\n\n')
}
}
buffer += decoder.decode()
const frame = parseSseFrame(buffer)
if (frame?.data) yield frame
} finally {
reader.releaseLock()
function summarizeMessageForPrompt(message: StoredMessage): string {
if (message.role === 'tool') {
const label = message.tool_name ? `Tool result: ${message.tool_name}` : 'Tool result'
return `[${label}]\n${message.content || ''}`
}
}
function parseSseFrame(raw: string): { event?: string; data: string } | null {
let event: string | undefined
const data: string[] = []
for (const line of raw.split(/\r?\n/)) {
if (!line || line.startsWith(':')) continue
if (line.startsWith('event:')) {
event = line.slice(6).trim()
} else if (line.startsWith('data:')) {
data.push(line.slice(5).trimStart())
}
if (message.role === 'assistant' && message.tool_calls?.length) {
const toolsInfo = message.tool_calls.map(tc => {
const name = tc.function?.name || 'tool'
const args = tc.function?.arguments || '{}'
return `${name}(${args})`
}).join(', ')
const content = message.content?.trim()
return `[${message.senderName}]: ${content ? `${content}\n` : ''}[Tool calls: ${toolsInfo}]`
}
if (data.length === 0) return null
return { event, data: data.join('\n') }
}
function extractResponseText(response: any): string {
const output = Array.isArray(response?.output) ? response.output : []
const parts: string[] = []
for (const item of output) {
if (item.type !== 'message') continue
const content = Array.isArray(item.content) ? item.content : []
for (const part of content) {
if (part.type === 'output_text' || part.type === 'text') {
parts.push(part.text || '')
}
}
}
if (parts.length > 0) return parts.join('')
return typeof response?.output_text === 'string' ? response.output_text : ''
return `[${message.senderName}]: ${message.content}`
}
@@ -52,15 +52,23 @@ export function buildAgentInstructions(params: AgentInstructionsParams): string
${memberSection}
规则:
- 有人用 @${params.agentName} 提及你时才需要回复,重点回应提及你的人
- 禁止@自己
- 当你收到群聊任务时,说明系统已经判断你需要回复;请直接回应当前消息,不要因为消息里同时提及其他成员而拒绝回复或输出空回复
- 重点回应提及你的人
- 回答简洁、对群聊有帮助。
- 不要假装是人类,需要时明确表明自己是 AI。
- 对话历史中包含多个人的消息,每条消息前标有发送者名字。
- 对话开头可能包含之前的对话摘要,用于提供更早的上下文
- 回复最新一条提及你的消息
- 如果需要其他 agent 协作或明确回复某个人,使用 @名字 来提及对方
- 自行判断对话是否已经结束——如果问题已解决、达成共识、或对方只是陈述不需要回复,则不要再 @任何人,直接结束回复,避免产生无意义的循环对话。`
- 不要假装是人类,需要时明确表明自己是 AI。
- 对话历史中包含多个人的消息,每条消息前标有发送者名字。
- 历史消息里的"[发送者]: ..."只是系统添加的归属标记,用来帮助你理解谁说了这句话;不要在你的回复中复述或模仿这种方括号前缀
- 回复时使用自然语言即可;如果需要点名某人,只使用 @名字,不要输出"[${params.agentName}]:"这类格式
- 对话开头可能包含之前的对话摘要,用于提供更早的上下文
- 回复最新一条提及你的消息。
- 群聊系统支持 agent 之间通过 @名字 接力:当你在回复中写出 @某个成员,系统会把消息路由给对应成员。
- 如果用户明确要求你叫、让、请某个 agent 执行任务,不要自己代办,不要说你无法指挥其他 agent;请直接用 @名字 转交任务,并简短说明你已转交。
- 如果需要其他 agent 协作或明确回复某个人,使用 @名字 来提及对方,并把需要对方执行的任务写清楚。
- 不要主动 @ 任何人,除非最新消息明确要求你转交、邀请、询问某个具体成员。
- 如果只是回答提问,直接回答,不要在结尾 @ 其他成员继续接力。
- 不要为了活跃气氛、征求补充、让别人也看看而 @ 其他 agent 或用户。
- 只有在确实需要对方执行动作、提供信息、确认决策时,才可以 @名字。
- 自行判断对话是否已经结束——如果问题已解决、达成共识、或对方只是陈述不需要回复,则不要再 @任何人,直接结束回复,避免产生无意义的循环对话。`
return getSystemPrompt(basePrompt)
}
@@ -8,6 +8,11 @@ export interface StoredMessage {
senderName: string
content: string
timestamp: number
role?: string
tool_call_id?: string | null
tool_calls?: Array<{ id?: string; type?: string; function?: { name?: string; arguments?: string } }> | null
tool_name?: string | null
finish_reason?: string | null
}
// ─── Compression Config ────────────────────────────────────