feat: v0.5.16 - migrate to Responses API (#586)
* refactor: migrate from /v1/runs to /v1/responses streaming API Replace EventSource-based polling with direct SSE streaming via the /v1/responses endpoint across all server-side callers (chat-run-socket, context-compressor, gateway-client, agent-clients). Messages are now written to DB in real-time during streaming, eliminating post-run sync. Frontend chat store adds tool_call_id tracking for deduplication. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * chore: bump version to 0.5.16 and add changelog - Persist real API usage to usage table on response.completed - Remove unused codex_reasoning_items field from message schema - Fix unused variable warnings in chat-run-socket - Bump version to 0.5.16 - Add changelog entries for 0.5.16 (8 locales) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -13,7 +13,6 @@
|
||||
* 6. Save snapshot: last_message_index = index where compression ends
|
||||
*/
|
||||
|
||||
import { EventSource } from 'eventsource'
|
||||
import { encodingForModel, getEncoding } from 'js-tiktoken'
|
||||
import { logger } from '../../services/logger'
|
||||
import {
|
||||
@@ -21,7 +20,6 @@ import {
|
||||
saveCompressionSnapshot,
|
||||
deleteCompressionSnapshot,
|
||||
} from '../../db/hermes/compression-snapshot'
|
||||
import { getDb } from '../../db/index'
|
||||
|
||||
// ─── Types ───────────────────────────────────────────────
|
||||
|
||||
@@ -376,8 +374,6 @@ export async function callSummarizer(
|
||||
previousSummary?: string,
|
||||
profile?: string,
|
||||
): Promise<string> {
|
||||
const sessionId = `compress_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`
|
||||
|
||||
const convHistory: Array<{ role: string; content: string }> = [...history]
|
||||
|
||||
if (previousSummary) {
|
||||
@@ -390,88 +386,57 @@ export async function callSummarizer(
|
||||
const headers: Record<string, string> = { 'Content-Type': 'application/json' }
|
||||
if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`
|
||||
|
||||
const res = await fetch(`${upstream}/v1/runs`, {
|
||||
const res = await fetch(`${upstream.replace(/\/$/, '')}/v1/responses`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify({
|
||||
input: prompt,
|
||||
conversation_history: convHistory,
|
||||
session_id: sessionId,
|
||||
stream: true,
|
||||
store: false,
|
||||
}),
|
||||
signal: AbortSignal.timeout(timeoutMs),
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`Summarization run failed: ${res.status}`)
|
||||
throw new Error(`Summarization response failed: ${res.status}`)
|
||||
}
|
||||
|
||||
const { run_id } = await res.json() as { run_id: string }
|
||||
if (!res.body) {
|
||||
throw new Error('Summarization response stream missing')
|
||||
}
|
||||
|
||||
return new Promise<string>((resolve, reject) => {
|
||||
const timer = setTimeout(() => {
|
||||
source.close()
|
||||
reject(new Error('Summarization timed out'))
|
||||
}, timeoutMs)
|
||||
let output = ''
|
||||
for await (const frame of readSseFrames(res.body)) {
|
||||
let parsed: any
|
||||
try {
|
||||
parsed = JSON.parse(frame.data)
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
const eventType = parsed.type || frame.event || parsed.event
|
||||
|
||||
const eventsUrl = new URL(`${upstream}/v1/runs/${run_id}/events`)
|
||||
|
||||
// Use Authorization header instead of query parameter for better compatibility
|
||||
const eventSourceInit: any = apiKey ? {
|
||||
fetch: (url: string, init: any = {}) => fetch(url, {
|
||||
...init,
|
||||
headers: {
|
||||
...(init.headers || {}),
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
}),
|
||||
} : {}
|
||||
|
||||
// @ts-ignore - eventsource library types are too strict
|
||||
const source = new EventSource(eventsUrl.toString(), eventSourceInit)
|
||||
|
||||
source.onmessage = (event: MessageEvent) => {
|
||||
try {
|
||||
const parsed = JSON.parse(event.data)
|
||||
if (parsed.event === 'run.completed') {
|
||||
clearTimeout(timer)
|
||||
source.close()
|
||||
deleteCompressSession(sessionId, profile).catch(() => { })
|
||||
const output = parsed.output
|
||||
if (!output || typeof output !== 'string' || output.trim() === '') {
|
||||
reject(new Error('Empty summarization response'))
|
||||
return
|
||||
}
|
||||
resolve(output.trim())
|
||||
} else if (parsed.event === 'run.failed') {
|
||||
clearTimeout(timer)
|
||||
source.close()
|
||||
deleteCompressSession(sessionId, profile).catch(() => { })
|
||||
reject(new Error(parsed.error || 'Summarization run failed'))
|
||||
}
|
||||
} catch { /* ignore parse errors */ }
|
||||
if (eventType === 'response.output_text.delta' && parsed.delta) {
|
||||
output += parsed.delta
|
||||
continue
|
||||
}
|
||||
|
||||
source.onerror = () => {
|
||||
clearTimeout(timer)
|
||||
source.close()
|
||||
deleteCompressSession(sessionId, profile).catch(() => { })
|
||||
reject(new Error('Summarization SSE connection error'))
|
||||
if (eventType === 'response.completed') {
|
||||
const response = parsed.response || parsed
|
||||
const finalText = extractResponseText(response)
|
||||
if (!output && finalText) output = finalText
|
||||
if (!output || output.trim() === '') {
|
||||
throw new Error('Empty summarization response')
|
||||
}
|
||||
return output.trim()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/** Enqueue compression session for later deletion instead of deleting immediately */
|
||||
async function deleteCompressSession(sessionId: string, profile?: string): Promise<void> {
|
||||
try {
|
||||
const db = getDb()
|
||||
if (!db) return
|
||||
const now = Date.now()
|
||||
db.prepare(
|
||||
`INSERT INTO gc_pending_session_deletes (session_id, profile_name, status, attempt_count, last_error, created_at, updated_at, next_attempt_at)
|
||||
VALUES (?, ?, 'pending', 0, NULL, ?, ?, 0)
|
||||
ON CONFLICT(session_id) DO NOTHING`,
|
||||
).run(sessionId, profile || 'default', now, now)
|
||||
} catch { /* best-effort */ }
|
||||
if (eventType === 'response.failed') {
|
||||
throw new Error(parsed.error?.message || parsed.error || 'Summarization response failed')
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error('Summarization response stream ended without a terminal event')
|
||||
}
|
||||
|
||||
// ─── Main Compressor ────────────────────────────────────
|
||||
@@ -665,3 +630,63 @@ export class ChatContextCompressor {
|
||||
deleteCompressionSnapshot(sessionId)
|
||||
}
|
||||
}
|
||||
|
||||
async function* readSseFrames(stream: ReadableStream<Uint8Array>): AsyncGenerator<{ event?: string; data: string }> {
|
||||
const decoder = new TextDecoder()
|
||||
const reader = stream.getReader()
|
||||
let buffer = ''
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
buffer += decoder.decode(value, { stream: true })
|
||||
|
||||
let boundary = buffer.indexOf('\n\n')
|
||||
while (boundary >= 0) {
|
||||
const raw = buffer.slice(0, boundary)
|
||||
buffer = buffer.slice(boundary + 2)
|
||||
const frame = parseSseFrame(raw)
|
||||
if (frame?.data) yield frame
|
||||
boundary = buffer.indexOf('\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
buffer += decoder.decode()
|
||||
const frame = parseSseFrame(buffer)
|
||||
if (frame?.data) yield frame
|
||||
} finally {
|
||||
reader.releaseLock()
|
||||
}
|
||||
}
|
||||
|
||||
function parseSseFrame(raw: string): { event?: string; data: string } | null {
|
||||
let event: string | undefined
|
||||
const data: string[] = []
|
||||
for (const line of raw.split(/\r?\n/)) {
|
||||
if (!line || line.startsWith(':')) continue
|
||||
if (line.startsWith('event:')) {
|
||||
event = line.slice(6).trim()
|
||||
} else if (line.startsWith('data:')) {
|
||||
data.push(line.slice(5).trimStart())
|
||||
}
|
||||
}
|
||||
if (data.length === 0) return null
|
||||
return { event, data: data.join('\n') }
|
||||
}
|
||||
|
||||
function extractResponseText(response: any): string {
|
||||
const output = Array.isArray(response?.output) ? response.output : []
|
||||
const parts: string[] = []
|
||||
for (const item of output) {
|
||||
if (item.type !== 'message') continue
|
||||
const content = Array.isArray(item.content) ? item.content : []
|
||||
for (const part of content) {
|
||||
if (part.type === 'output_text' || part.type === 'text') {
|
||||
parts.push(part.text || '')
|
||||
}
|
||||
}
|
||||
}
|
||||
if (parts.length > 0) return parts.join('')
|
||||
return typeof response?.output_text === 'string' ? response.output_text : ''
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user