Clean multimodal tool results before storage (#911)
This commit is contained in:
@@ -0,0 +1,104 @@
|
|||||||
|
const IMAGE_PART_TYPES = new Set(['image', 'image_url', 'input_image'])
|
||||||
|
const DATA_IMAGE_RE = /data:image\/[a-zA-Z0-9.+-]+;base64,[A-Za-z0-9+/=\r\n]+/g
|
||||||
|
|
||||||
|
function isPlainRecord(value: unknown): value is Record<string, unknown> {
|
||||||
|
return value !== null && typeof value === 'object' && !Array.isArray(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
function isContentPart(value: unknown): value is Record<string, unknown> {
|
||||||
|
return isPlainRecord(value) && typeof value.type === 'string'
|
||||||
|
}
|
||||||
|
|
||||||
|
function summarizeContentParts(parts: unknown[]): string | null {
|
||||||
|
let sawContentPart = false
|
||||||
|
const text: string[] = []
|
||||||
|
|
||||||
|
for (const part of parts) {
|
||||||
|
if (!isContentPart(part)) continue
|
||||||
|
const type = String(part.type)
|
||||||
|
if (type === 'text') {
|
||||||
|
sawContentPart = true
|
||||||
|
const value = part.text
|
||||||
|
if (value != null) text.push(String(value))
|
||||||
|
} else if (IMAGE_PART_TYPES.has(type)) {
|
||||||
|
sawContentPart = true
|
||||||
|
text.push('[screenshot]')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sawContentPart ? text.filter(Boolean).join('\n') : null
|
||||||
|
}
|
||||||
|
|
||||||
|
function summarizeMultimodalEnvelope(value: Record<string, unknown>): string | null {
|
||||||
|
if (value._multimodal !== true && !Array.isArray(value.content)) return null
|
||||||
|
const parts = Array.isArray(value.content) ? value.content : []
|
||||||
|
if (!parts.length) return null
|
||||||
|
return summarizeContentParts(parts)
|
||||||
|
}
|
||||||
|
|
||||||
|
function redactDataImages(value: unknown): unknown {
|
||||||
|
if (typeof value === 'string') return value.replace(DATA_IMAGE_RE, '[screenshot]')
|
||||||
|
if (Array.isArray(value)) return value.map(redactDataImages)
|
||||||
|
if (!isPlainRecord(value)) return value
|
||||||
|
|
||||||
|
const cleaned: Record<string, unknown> = {}
|
||||||
|
for (const [key, child] of Object.entries(value)) {
|
||||||
|
cleaned[key] = redactDataImages(child)
|
||||||
|
}
|
||||||
|
return cleaned
|
||||||
|
}
|
||||||
|
|
||||||
|
function summarizeKnownMultimodalContent(value: unknown): string | null {
|
||||||
|
if (Array.isArray(value)) {
|
||||||
|
return summarizeContentParts(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isPlainRecord(value)) {
|
||||||
|
return summarizeMultimodalEnvelope(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
function serializeStructuredMessageContent(value: unknown): string | null {
|
||||||
|
const summary = summarizeKnownMultimodalContent(value)
|
||||||
|
if (summary != null) return summary
|
||||||
|
if (Array.isArray(value) || isPlainRecord(value)) return JSON.stringify(redactDataImages(value))
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
function shouldTryParseStructuredString(value: string): boolean {
|
||||||
|
const trimmed = value.trim()
|
||||||
|
if (!trimmed || (!trimmed.startsWith('{') && !trimmed.startsWith('['))) return false
|
||||||
|
if (trimmed.includes('_multimodal') || trimmed.includes('data:image/')) return true
|
||||||
|
return (
|
||||||
|
trimmed.includes('"image_url"') ||
|
||||||
|
trimmed.includes('"input_image"') ||
|
||||||
|
trimmed.includes('"type":"image"') ||
|
||||||
|
trimmed.includes('"type": "image"')
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export function normalizeMessageContentForStorage(content: unknown): string {
|
||||||
|
if (typeof content === 'string') {
|
||||||
|
if (shouldTryParseStructuredString(content)) {
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(content.trim())
|
||||||
|
const summary = summarizeKnownMultimodalContent(parsed)
|
||||||
|
if (summary != null) return summary
|
||||||
|
return JSON.stringify(redactDataImages(parsed))
|
||||||
|
} catch {
|
||||||
|
// Fall back to direct redaction below.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return content.replace(DATA_IMAGE_RE, '[screenshot]')
|
||||||
|
}
|
||||||
|
|
||||||
|
const normalized = serializeStructuredMessageContent(content)
|
||||||
|
if (normalized != null) return normalized
|
||||||
|
return String(content ?? '')
|
||||||
|
}
|
||||||
|
|
||||||
|
export function normalizeMessageContentForStorageRole(role: string | undefined | null, content: string): string {
|
||||||
|
return role === 'user' ? content : normalizeMessageContentForStorage(content)
|
||||||
|
}
|
||||||
@@ -4,6 +4,7 @@
|
|||||||
*/
|
*/
|
||||||
import { isSqliteAvailable, getDb } from '../index'
|
import { isSqliteAvailable, getDb } from '../index'
|
||||||
import { SESSIONS_TABLE, MESSAGES_TABLE } from './schemas'
|
import { SESSIONS_TABLE, MESSAGES_TABLE } from './schemas'
|
||||||
|
import { normalizeMessageContentForStorageRole } from './message-content'
|
||||||
|
|
||||||
// Re-export types for compatibility with sessions-db.ts consumers
|
// Re-export types for compatibility with sessions-db.ts consumers
|
||||||
export interface HermesSessionRow {
|
export interface HermesSessionRow {
|
||||||
@@ -377,7 +378,7 @@ export function addMessage(msg: {
|
|||||||
`INSERT INTO ${MESSAGES_TABLE} (session_id, role, content, tool_call_id, tool_calls, tool_name, timestamp, token_count, finish_reason, reasoning, reasoning_details, reasoning_content)
|
`INSERT INTO ${MESSAGES_TABLE} (session_id, role, content, tool_call_id, tool_calls, tool_name, timestamp, token_count, finish_reason, reasoning, reasoning_details, reasoning_content)
|
||||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||||
).run(
|
).run(
|
||||||
msg.session_id, msg.role, msg.content,
|
msg.session_id, msg.role, normalizeMessageContentForStorageRole(msg.role, msg.content),
|
||||||
msg.tool_call_id ?? null, toolCallsJson, msg.tool_name ?? null,
|
msg.tool_call_id ?? null, toolCallsJson, msg.tool_name ?? null,
|
||||||
msg.timestamp ?? Math.floor(Date.now() / 1000),
|
msg.timestamp ?? Math.floor(Date.now() / 1000),
|
||||||
msg.token_count ?? null, msg.finish_reason ?? null,
|
msg.token_count ?? null, msg.finish_reason ?? null,
|
||||||
@@ -412,7 +413,7 @@ export function addMessages(msgs: Array<{
|
|||||||
for (const msg of msgs) {
|
for (const msg of msgs) {
|
||||||
const toolCallsJson = msg.tool_calls ? JSON.stringify(msg.tool_calls) : null
|
const toolCallsJson = msg.tool_calls ? JSON.stringify(msg.tool_calls) : null
|
||||||
insert.run(
|
insert.run(
|
||||||
msg.session_id, msg.role, msg.content,
|
msg.session_id, msg.role, normalizeMessageContentForStorageRole(msg.role, msg.content),
|
||||||
msg.tool_call_id ?? null, toolCallsJson, msg.tool_name ?? null,
|
msg.tool_call_id ?? null, toolCallsJson, msg.tool_name ?? null,
|
||||||
msg.timestamp ?? Math.floor(Date.now() / 1000),
|
msg.timestamp ?? Math.floor(Date.now() / 1000),
|
||||||
msg.token_count ?? null, msg.finish_reason ?? null,
|
msg.token_count ?? null, msg.finish_reason ?? null,
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import type { Server as HttpServer } from 'http'
|
|||||||
import { getToken } from '../../../services/auth'
|
import { getToken } from '../../../services/auth'
|
||||||
import { logger } from '../../../services/logger'
|
import { logger } from '../../../services/logger'
|
||||||
import { getDb } from '../../../db'
|
import { getDb } from '../../../db'
|
||||||
|
import { normalizeMessageContentForStorage, normalizeMessageContentForStorageRole } from '../../../db/hermes/message-content'
|
||||||
import { AgentClients, GROUP_CHAT_AGENT_SOCKET_SECRET } from './agent-clients'
|
import { AgentClients, GROUP_CHAT_AGENT_SOCKET_SECRET } from './agent-clients'
|
||||||
import { ContextEngine } from '../context-engine/compressor'
|
import { ContextEngine } from '../context-engine/compressor'
|
||||||
import { SessionDeleter } from '../session-deleter'
|
import { SessionDeleter } from '../session-deleter'
|
||||||
@@ -34,6 +35,10 @@ function contentToStorageString(content: unknown): string {
|
|||||||
return JSON.stringify(content ?? '')
|
return JSON.stringify(content ?? '')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function messageContentForStorage(role: string | undefined, content: string): string {
|
||||||
|
return normalizeMessageContentForStorageRole(role, content)
|
||||||
|
}
|
||||||
|
|
||||||
function contentToText(content: unknown): string {
|
function contentToText(content: unknown): string {
|
||||||
if (typeof content === 'string') {
|
if (typeof content === 'string') {
|
||||||
const trimmed = content.trim()
|
const trimmed = content.trim()
|
||||||
@@ -406,7 +411,7 @@ class ChatStorage {
|
|||||||
reasoning_details = excluded.reasoning_details,
|
reasoning_details = excluded.reasoning_details,
|
||||||
reasoning_content = excluded.reasoning_content`
|
reasoning_content = excluded.reasoning_content`
|
||||||
).run(
|
).run(
|
||||||
msg.id, msg.roomId, msg.senderId, msg.senderName, msg.content, msg.timestamp,
|
msg.id, msg.roomId, msg.senderId, msg.senderName, messageContentForStorage(msg.role, msg.content), msg.timestamp,
|
||||||
msg.role || 'user',
|
msg.role || 'user',
|
||||||
msg.tool_call_id ?? null,
|
msg.tool_call_id ?? null,
|
||||||
toolCallsJson,
|
toolCallsJson,
|
||||||
|
|||||||
@@ -0,0 +1,62 @@
|
|||||||
|
import { describe, expect, it } from 'vitest'
|
||||||
|
|
||||||
|
import {
|
||||||
|
normalizeMessageContentForStorage,
|
||||||
|
normalizeMessageContentForStorageRole,
|
||||||
|
} from '../../packages/server/src/db/hermes/message-content'
|
||||||
|
|
||||||
|
describe('message content normalization', () => {
|
||||||
|
it('summarizes multimodal envelopes without persisting base64 images', () => {
|
||||||
|
const content = {
|
||||||
|
_multimodal: true,
|
||||||
|
content: [
|
||||||
|
{ type: 'text', text: 'Image loaded into context.' },
|
||||||
|
{ type: 'image_url', image_url: { url: 'data:image/png;base64,AAAA' } },
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
const normalized = normalizeMessageContentForStorage(JSON.stringify(content))
|
||||||
|
|
||||||
|
expect(normalized).toBe('Image loaded into context.\n[screenshot]')
|
||||||
|
expect(normalized).not.toContain('data:image/')
|
||||||
|
expect(normalized).not.toContain('AAAA')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('summarizes OpenAI-style content part arrays', () => {
|
||||||
|
const normalized = normalizeMessageContentForStorage([
|
||||||
|
{ type: 'text', text: 'Question: what is shown?' },
|
||||||
|
{ type: 'input_image', image_url: 'data:image/jpeg;base64,BBBB' },
|
||||||
|
])
|
||||||
|
|
||||||
|
expect(normalized).toBe('Question: what is shown?\n[screenshot]')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('redacts nested data images in non-envelope JSON without dropping other fields', () => {
|
||||||
|
const normalized = normalizeMessageContentForStorage(JSON.stringify({
|
||||||
|
output: {
|
||||||
|
url: 'data:image/png;base64,CCCC',
|
||||||
|
status: 'ok',
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
expect(JSON.parse(normalized)).toEqual({
|
||||||
|
output: {
|
||||||
|
url: '[screenshot]',
|
||||||
|
status: 'ok',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('does not parse or rewrite unrelated JSON strings', () => {
|
||||||
|
const content = '{\n "type": "event",\n "payload": "ok"\n}'
|
||||||
|
|
||||||
|
expect(normalizeMessageContentForStorage(content)).toBe(content)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('keeps user-authored image data untouched and only cleans non-user messages', () => {
|
||||||
|
const content = '{"content":[{"type":"image_url","image_url":{"url":"data:image/png;base64,DDDD"}}]}'
|
||||||
|
|
||||||
|
expect(normalizeMessageContentForStorageRole('user', content)).toBe(content)
|
||||||
|
expect(normalizeMessageContentForStorageRole('tool', content)).not.toContain('data:image/')
|
||||||
|
})
|
||||||
|
})
|
||||||
Reference in New Issue
Block a user