Fix bridge history, profile models, and Windows gateway handling (#845)

* feat: support profile-aware group chat bridge flows * feat: route cron jobs through hermes cli * Fix group chat routing and isolate bridge tests * Add Grok image-to-video media skill * Default Grok videos to media directory * Fix bridge profile fallback and cron repeat clearing * Refine bridge chat and gateway platform handling * Filter bridge tool-call text deltas * Preserve structured bridge chat history * Prepare beta release build artifacts * Fix Windows run profile resolution * Fix Windows path compatibility checks * Fix profile-scoped model page display * Hide Windows subprocess windows for jobs and updates * Hide Windows file backend subprocess windows * Avoid Windows gateway restart lock conflicts * Treat Windows gateway lock as running on startup * Force release Windows gateway lock on restart * Tighten Windows gateway lock cleanup * Update chat e2e source expectation * Bump package version to 0.5.30 --------- Co-authored-by: Codex <codex@openai.com>
2026-05-19 16:09:59 +08:00
parent 3d74d78698
commit 9a9416c99c
129 changed files with 7017 additions and 1838 deletions
@@ -127,7 +127,7 @@ export class ContextEngine {
            // Under threshold — return summary + new messages directly
            if (totalTokens <= config.triggerTokens) {
                logger.debug(`[ContextEngine] [Path A] UNDER threshold — return summary + ${newMessages.length} verbatim msgs directly`)
-                const history = this.buildHistory(snapshot.summary, newMessages, input.agentSocketId)
+                const history = this.buildHistory(snapshot.summary, newMessages, input.agentSocketId, input.agentName)
                this.logHistory('Path A (no compress)', history)
                return { conversationHistory: history, instructions, meta }
            }
@@ -155,7 +155,7 @@ export class ContextEngine {
                meta.summaryTokenEstimate = this.countTokens(result.summary)
                logger.debug(`[ContextEngine] [Path A] incremental compression DONE in ${elapsed}ms, newSummaryLen=${result.summary.length}, newLastMsgId=${lastMsg.id}`)
                logger.debug(`[ContextEngine] [Path A] NEW SUMMARY (${result.summary.length} chars): ${result.summary.slice(0, 300)}`)
-                const history = this.buildHistory(result.summary, newMessages, input.agentSocketId)
+                const history = this.buildHistory(result.summary, newMessages, input.agentSocketId, input.agentName)
                this.logHistory('Path A (after incremental compress)', history)
                if (result.sessionId) this.sessionCleaner?.(result.sessionId)
                return { conversationHistory: history, instructions, meta }
@@ -163,7 +163,7 @@ export class ContextEngine {

            // Compression failed — degrade
            logger.warn(`[ContextEngine] [Path A] incremental compression FAILED (${elapsed}ms) — degrading to summary + trimmed verbatim`)
-            const history = this.buildHistory(snapshot.summary, newMessages, input.agentSocketId)
+            const history = this.buildHistory(snapshot.summary, newMessages, input.agentSocketId, input.agentName)
            this.trimToBudget(history, summaryTokens, config.maxHistoryTokens)
            return { conversationHistory: history, instructions, meta }
        }
@@ -177,7 +177,7 @@ export class ContextEngine {
        // Under threshold — pass all messages verbatim
        if (totalTokens <= config.triggerTokens) {
            logger.debug(`[ContextEngine] [Path B] UNDER threshold — return all ${total} msgs verbatim`)
-            const history = messages.map(m => this.mapToHistory(m, input.agentSocketId))
+            const history = messages.map(m => this.mapToHistory(m, input.agentSocketId, input.agentName))
            this.logHistory('Path B (no compress)', history)
            return { conversationHistory: history, instructions, meta }
        }
@@ -209,7 +209,7 @@ export class ContextEngine {
            meta.summaryTokenEstimate = this.countTokens(result.summary)
            logger.debug(`[ContextEngine] [Path B] full compression DONE in ${elapsed}ms, summaryLen=${result.summary.length}, compressed=${toCompress.length} msgs, keptTail=${tail.length} msgs, savedLastMsgId=${lastCompressedMsg.id}`)
            logger.debug(`[ContextEngine] [Path B] COMPRESSED SUMMARY (${result.summary.length} chars): ${result.summary.slice(0, 300)}`)
-            const history = this.buildHistory(result.summary, tail, input.agentSocketId)
+            const history = this.buildHistory(result.summary, tail, input.agentSocketId, input.agentName)
            this.logHistory('Path B (after full compress)', history)
            if (result.sessionId) this.sessionCleaner?.(result.sessionId)
            return { conversationHistory: history, instructions, meta }
@@ -217,7 +217,7 @@ export class ContextEngine {

        // Compression failed — degrade
        logger.warn(`[ContextEngine] [Path B] full compression FAILED (${elapsed}ms) — degrading to trimmed verbatim`)
-        const history = messages.map(m => this.mapToHistory(m, input.agentSocketId))
+        const history = messages.map(m => this.mapToHistory(m, input.agentSocketId, input.agentName))
        this.trimToBudget(history, 0, config.maxHistoryTokens)
        meta.verbatimCount = history.length
        return { conversationHistory: history, instructions, meta }
@@ -265,6 +265,7 @@ export class ContextEngine {
        summary: string,
        messages: StoredMessage[],
        agentSocketId: string,
+        agentName: string,
    ): Array<{ role: 'user' | 'assistant'; content: string }> {
        const history: Array<{ role: 'user' | 'assistant'; content: string }> = []

@@ -275,7 +276,7 @@ export class ContextEngine {
            )
        }

-        history.push(...messages.map(m => this.mapToHistory(m, agentSocketId)))
+        history.push(...messages.map(m => this.mapToHistory(m, agentSocketId, agentName)))
        return history
    }

@@ -314,11 +315,51 @@ export class ContextEngine {
    private mapToHistory(
        msg: StoredMessage,
        agentSocketId: string,
+        agentName: string,
    ): { role: 'user' | 'assistant'; content: string } {
-        if (msg.senderId === agentSocketId) {
-            return { role: 'assistant', content: msg.content }
+        const senderName = msg.senderName || 'unknown'
+        const isOwnAgent = msg.senderId === agentSocketId || senderName === agentName
+
+        if (msg.role === 'tool') {
+            const label = msg.tool_name ? `Tool result: ${msg.tool_name}` : 'Tool result'
+            return { role: 'user', content: `[${senderName}] [${label}]\n${msg.content || ''}` }
        }
-        return { role: 'user', content: `[${msg.senderName}]: ${msg.content}` }
+
+        if (msg.role === 'assistant' && msg.tool_calls?.length) {
+            const toolsInfo = msg.tool_calls.map(tc => {
+                const name = tc.function?.name || 'unknown'
+                let args = tc.function?.arguments || '{}'
+                if (args.length > 4000) args = `${args.slice(0, 4000)}...`
+                return `[Calling tool: ${name} with arguments: ${args}]`
+            }).join('\n')
+            const content = msg.content?.trim()
+            return {
+                role: isOwnAgent ? 'assistant' : 'user',
+                content: content
+                    ? `${this.formatAttributedContent(senderName, content)}\n${this.formatAttributionPrefix(senderName, content)}${toolsInfo}`
+                    : `${this.formatAttributionPrefix(senderName, content)}${toolsInfo}`,
+            }
+        }
+
+        return {
+            role: isOwnAgent ? 'assistant' : 'user',
+            content: this.formatAttributedContent(senderName, msg.content || ''),
+        }
+    }
+
+    private formatAttributedContent(senderName: string, content: string): string {
+        return `${this.formatAttributionPrefix(senderName)}${this.stripMentions(content)}`
+    }
+
+    private formatAttributionPrefix(senderName: string, _content?: string): string {
+        return `[${senderName}]: `
+    }
+
+    private stripMentions(content: string): string {
+        return String(content || '')
+            .replace(/@([^\s@]+)/g, '')
+            .replace(/[ \t]{2,}/g, ' ')
+            .replace(/^\s+/, '')
    }

    private trimToBudget(
@@ -6,10 +6,11 @@ import {
 } from './prompt'
 import { updateUsage } from '../../../db/hermes/usage-store'
 import { logger } from '../../logger'
+import { AgentBridgeClient, type AgentBridgeRunResult } from '../agent-bridge'

 /**
- * Calls Hermes /v1/responses to produce LLM-generated summaries.
- * The context engine owns history assembly; Responses storage/chaining is not used.
+ * Calls the local bridge to produce LLM-generated summaries.
+ * The context engine owns history assembly; gateway storage/chaining is not used.
 */
 export class GatewaySummarizer implements GatewayCaller {
    private timeoutMs: number
@@ -19,8 +20,8 @@ export class GatewaySummarizer implements GatewayCaller {
    }

    async summarize(
-        upstream: string,
-        apiKey: string | null,
+        _upstream: string,
+        _apiKey: string | null,
        systemPrompt: string,
        messages: StoredMessage[],
        roomId: string,
@@ -29,7 +30,7 @@ export class GatewaySummarizer implements GatewayCaller {
    ): Promise<{ summary: string; sessionId: string }> {
        const history: Array<{ role: string; content: string }> = messages.map(m => ({
            role: 'user',
-            content: `[${m.senderName}]: ${m.content}`,
+            content: summarizeMessageForPrompt(m),
        }))

        if (previousSummary) {
@@ -43,132 +44,67 @@ export class GatewaySummarizer implements GatewayCaller {
            ? buildIncrementalUpdatePrompt()
            : buildFullSummaryPrompt()

-        const res = await fetch(`${upstream.replace(/\/$/, '')}/v1/responses`, {
-            method: 'POST',
-            headers: {
-                'Content-Type': 'application/json',
-                ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
-            },
-            body: JSON.stringify({
-                input: userPrompt,
+        const bridge = new AgentBridgeClient({ timeoutMs: this.timeoutMs + 15_000 })
+        const sessionId = `gc_compress_${roomId}_${profile}_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`
+            .replace(/[^a-zA-Z0-9_-]/g, '_')
+            .slice(0, 160)
+
+        try {
+            const result = await bridge.request<AgentBridgeRunResult>({
+                action: 'chat',
+                session_id: sessionId,
+                message: userPrompt,
                instructions: systemPrompt || buildSummarizationSystemPrompt(),
                conversation_history: history,
-                stream: true,
-                store: false,
-            }),
-            signal: AbortSignal.timeout(this.timeoutMs),
-        })
+                profile,
+                source: 'api_server',
+                wait: true,
+                timeout: Math.ceil(this.timeoutMs / 1000),
+            }, { timeoutMs: this.timeoutMs + 15_000 })

-        if (!res.ok) {
-            throw new Error(`Summarization response failed: ${res.status}`)
-        }
-        if (!res.body) {
-            throw new Error('Summarization response stream missing')
-        }
-
-        let output = ''
-        for await (const frame of readSseFrames(res.body)) {
-            let parsed: any
-            try {
-                parsed = JSON.parse(frame.data)
-            } catch {
-                continue
-            }
-            const eventType = parsed.type || frame.event || parsed.event
-
-            if (eventType === 'response.output_text.delta' && parsed.delta) {
-                output += parsed.delta
-                continue
+            if (result.status === 'error') {
+                throw new Error(result.error || 'Summarization bridge run failed')
            }

-            if (eventType === 'response.completed') {
-                const response = parsed.response || parsed
-                const finalText = extractResponseText(response)
-                if (!output && finalText) output = finalText
+            const payload = result.result as any
+            const output = String(payload?.final_response || result.output || '').trim()
+            if (!output) throw new Error('Empty summarization response')

-                const usage = response.usage || {}
+            const usage = payload?.usage || payload?.response?.usage
+            if (usage) {
                updateUsage(roomId, {
                    inputTokens: usage.input_tokens ?? usage.inputTokens ?? 0,
                    outputTokens: usage.output_tokens ?? usage.outputTokens ?? 0,
                    cacheReadTokens: usage.cache_read_tokens ?? usage.cacheReadTokens ?? 0,
                    cacheWriteTokens: usage.cache_write_tokens ?? usage.cacheWriteTokens ?? 0,
                    reasoningTokens: usage.reasoning_tokens ?? usage.reasoningTokens ?? 0,
-                    model: response.model || '',
+                    model: payload?.model || payload?.response?.model || '',
                    profile,
                })
-                logger.debug(`[GatewaySummarizer] Recorded response usage for compression room ${roomId} (profile=${profile}): input=${usage.input_tokens ?? 0}, output=${usage.output_tokens ?? 0}`)
-
-                if (!output || output.trim() === '') {
-                    throw new Error('Empty summarization response')
-                }
-                return { summary: output.trim(), sessionId: '' }
-            }
-
-            if (eventType === 'response.failed') {
-                throw new Error(parsed.error?.message || parsed.error || 'Summarization response failed')
            }
+            logger.debug(`[GatewaySummarizer] Bridge compression completed for room ${roomId} (profile=${profile})`)
+            return { summary: output, sessionId }
+        } finally {
+            await bridge.destroy(sessionId, profile).catch(() => undefined)
        }
-
-        throw new Error('Summarization response stream ended without a terminal event')
    }
 }

-async function* readSseFrames(stream: ReadableStream<Uint8Array>): AsyncGenerator<{ event?: string; data: string }> {
-    const decoder = new TextDecoder()
-    const reader = stream.getReader()
-    let buffer = ''
-
-    try {
-        while (true) {
-            const { done, value } = await reader.read()
-            if (done) break
-            buffer += decoder.decode(value, { stream: true })
-
-            let boundary = buffer.indexOf('\n\n')
-            while (boundary >= 0) {
-                const raw = buffer.slice(0, boundary)
-                buffer = buffer.slice(boundary + 2)
-                const frame = parseSseFrame(raw)
-                if (frame?.data) yield frame
-                boundary = buffer.indexOf('\n\n')
-            }
-        }
-
-        buffer += decoder.decode()
-        const frame = parseSseFrame(buffer)
-        if (frame?.data) yield frame
-    } finally {
-        reader.releaseLock()
+function summarizeMessageForPrompt(message: StoredMessage): string {
+    if (message.role === 'tool') {
+        const label = message.tool_name ? `Tool result: ${message.tool_name}` : 'Tool result'
+        return `[${label}]\n${message.content || ''}`
    }
-}

-function parseSseFrame(raw: string): { event?: string; data: string } | null {
-    let event: string | undefined
-    const data: string[] = []
-    for (const line of raw.split(/\r?\n/)) {
-        if (!line || line.startsWith(':')) continue
-        if (line.startsWith('event:')) {
-            event = line.slice(6).trim()
-        } else if (line.startsWith('data:')) {
-            data.push(line.slice(5).trimStart())
-        }
+    if (message.role === 'assistant' && message.tool_calls?.length) {
+        const toolsInfo = message.tool_calls.map(tc => {
+            const name = tc.function?.name || 'tool'
+            const args = tc.function?.arguments || '{}'
+            return `${name}(${args})`
+        }).join(', ')
+        const content = message.content?.trim()
+        return `[${message.senderName}]: ${content ? `${content}\n` : ''}[Tool calls: ${toolsInfo}]`
    }
-    if (data.length === 0) return null
-    return { event, data: data.join('\n') }
-}

-function extractResponseText(response: any): string {
-    const output = Array.isArray(response?.output) ? response.output : []
-    const parts: string[] = []
-    for (const item of output) {
-        if (item.type !== 'message') continue
-        const content = Array.isArray(item.content) ? item.content : []
-        for (const part of content) {
-            if (part.type === 'output_text' || part.type === 'text') {
-                parts.push(part.text || '')
-            }
-        }
-    }
-    if (parts.length > 0) return parts.join('')
-    return typeof response?.output_text === 'string' ? response.output_text : ''
+    return `[${message.senderName}]: ${message.content}`
 }
@@ -52,15 +52,23 @@ export function buildAgentInstructions(params: AgentInstructionsParams): string
 ${memberSection}

 规则：
- 有人用 @${params.agentName} 提及你时才需要回复，重点回应提及你的人。
- 禁止@自己。
+- 当你收到群聊任务时，说明系统已经判断你需要回复；请直接回应当前消息，不要因为消息里同时提及其他成员而拒绝回复或输出空回复。
+- 重点回应提及你的人。
 - 回答简洁、对群聊有帮助。
- 不要假装是人类，需要时明确表明自己是 AI。
- 对话历史中包含多个人的消息，每条消息前标有发送者名字。
- 对话开头可能包含之前的对话摘要，用于提供更早的上下文。
- 回复最新一条提及你的消息。
- 如果需要其他 agent 协作或明确回复某个人，使用 @名字 来提及对方。
- 自行判断对话是否已经结束——如果问题已解决、达成共识、或对方只是陈述不需要回复，则不要再 @任何人，直接结束回复，避免产生无意义的循环对话。`
+	- 不要假装是人类，需要时明确表明自己是 AI。
+	- 对话历史中包含多个人的消息，每条消息前标有发送者名字。
+	- 历史消息里的"[发送者]: ..."只是系统添加的归属标记，用来帮助你理解谁说了这句话；不要在你的回复中复述或模仿这种方括号前缀。
+	- 回复时使用自然语言即可；如果需要点名某人，只使用 @名字，不要输出"[${params.agentName}]:"这类格式。
+	- 对话开头可能包含之前的对话摘要，用于提供更早的上下文。
+	- 回复最新一条提及你的消息。
+	- 群聊系统支持 agent 之间通过 @名字 接力：当你在回复中写出 @某个成员，系统会把消息路由给对应成员。
+	- 如果用户明确要求你叫、让、请某个 agent 执行任务，不要自己代办，不要说你无法指挥其他 agent；请直接用 @名字 转交任务，并简短说明你已转交。
+	- 如果需要其他 agent 协作或明确回复某个人，使用 @名字 来提及对方，并把需要对方执行的任务写清楚。
+	- 不要主动 @ 任何人，除非最新消息明确要求你转交、邀请、询问某个具体成员。
+	- 如果只是回答提问，直接回答，不要在结尾 @ 其他成员继续接力。
+	- 不要为了活跃气氛、征求补充、让别人也看看而 @ 其他 agent 或用户。
+	- 只有在确实需要对方执行动作、提供信息、确认决策时，才可以 @名字。
+	- 自行判断对话是否已经结束——如果问题已解决、达成共识、或对方只是陈述不需要回复，则不要再 @任何人，直接结束回复，避免产生无意义的循环对话。`

    return getSystemPrompt(basePrompt)
 }
@@ -8,6 +8,11 @@ export interface StoredMessage {
    senderName: string
    content: string
    timestamp: number
+    role?: string
+    tool_call_id?: string | null
+    tool_calls?: Array<{ id?: string; type?: string; function?: { name?: string; arguments?: string } }> | null
+    tool_name?: string | null
+    finish_reason?: string | null
 }

 // ─── Compression Config ────────────────────────────────────