feat: add robust LLM JSON parser and fix Group Chat schema (#388)

Add robust LLM JSON parsing utilities to handle unreliable model output: - Parse tool arguments with tolerance for Python format (single quotes, trailing commas) - Extract text from Anthropic-style content arrays in streaming events - Normalize tool_result content to string format per Hermes spec - Parse message.delta and run.completed output to avoid displaying JSON strings Fix Group Chat database schema errors: - Add id column as PRIMARY KEY to gc_room_agents and gc_room_members tables - Change from composite primary keys to single-column id keys - Update tests to match new schema structure Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-02 08:58:14 +08:00
parent 9325aa5482
commit 969c7c0e1a
5 changed files with 457 additions and 135 deletions
@@ -119,6 +119,7 @@ export const GC_MESSAGES_SCHEMA: Record<string, string> = {
 export const GC_ROOM_AGENTS_TABLE = 'gc_room_agents'

 export const GC_ROOM_AGENTS_SCHEMA: Record<string, string> = {
+  id: 'TEXT PRIMARY KEY',
  roomId: 'TEXT NOT NULL',
  agentId: 'TEXT NOT NULL',
  profile: 'TEXT NOT NULL',
@@ -140,6 +141,7 @@ export const GC_CONTEXT_SNAPSHOTS_SCHEMA: Record<string, string> = {
 export const GC_ROOM_MEMBERS_TABLE = 'gc_room_members'

 export const GC_ROOM_MEMBERS_SCHEMA: Record<string, string> = {
+  id: 'TEXT PRIMARY KEY',
  roomId: 'TEXT NOT NULL',
  userId: 'TEXT NOT NULL',
  userName: 'TEXT NOT NULL',
@@ -474,16 +476,14 @@ export function initAllHermesTables(retryCount = 0): void {
    syncTable(GC_PENDING_SESSION_DELETES_TABLE, GC_PENDING_SESSION_DELETES_SCHEMA)
    syncTable(GC_SESSION_PROFILES_TABLE, GC_SESSION_PROFILES_SCHEMA)

-    // Group chat - composite primary key tables
+    // Group chat - single-column primary key tables (PRIMARY KEY in column definition)
    syncTable(GC_ROOM_AGENTS_TABLE, GC_ROOM_AGENTS_SCHEMA, {
-      primaryKey: 'roomId, agentId',
      indexes: {
        idx_gc_room_agents_profile: 'CREATE INDEX idx_gc_room_agents_profile ON gc_room_agents(profile)',
      }
    })

    syncTable(GC_ROOM_MEMBERS_TABLE, GC_ROOM_MEMBERS_SCHEMA, {
-      primaryKey: 'roomId, userId',
      indexes: {
        idx_gc_room_members_user: 'CREATE INDEX idx_gc_room_members_user ON gc_room_members(userId)',
      }
@@ -0,0 +1,267 @@
+/**
+ * LLM JSON Parsing Utilities
+ *
+ * Handles unreliable JSON output from large language models.
+ * Provides extraction, tolerant parsing, and validation.
+ *
+ * Based on production-grade patterns for handling LLM JSON:
+ * - Extract JSON from text (code blocks, plain objects)
+ * - Fix common LLM mistakes (single quotes, missing quotes, trailing commas)
+ * - Validate against schema (zod)
+ * - Retry on failure
+ */
+
+/**
+ * Extract JSON string from LLM text output.
+ * Handles: ```json code blocks, plain {...} objects
+ */
+export function extractJSON(text: string): string {
+  if (!text || typeof text !== 'string') {
+    throw new Error('Invalid text: must be non-empty string')
+  }
+
+  const trimmed = text.trim()
+
+  // Extract from ```json ... ``` code block
+  const codeBlockMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)\s*```/)
+  if (codeBlockMatch) {
+    return codeBlockMatch[1].trim()
+  }
+
+  // Extract first {...} object (greedy match for nested objects)
+  const objectMatch = trimmed.match(/\{[\s\S]*\}/)
+  if (objectMatch) {
+    return objectMatch[0]
+  }
+
+  // Extract first [...] array (greedy match for nested arrays)
+  const arrayMatch = trimmed.match(/\[[\s\S]*\]/)
+  if (arrayMatch) {
+    return arrayMatch[0]
+  }
+
+  throw new Error('No JSON found in text (no code blocks, objects, or arrays detected)')
+}
+
+/**
+ * Fix common LLM JSON mistakes before parsing.
+ * Handles: single quotes, unquoted keys, trailing commas, Python booleans/null
+ */
+export function fixLLMJSON(jsonStr: string): string {
+  if (!jsonStr || typeof jsonStr !== 'string') {
+    throw new Error('Invalid JSON string')
+  }
+
+  let fixed = jsonStr
+
+  // Fix 1: Python boolean/null literals
+  fixed = fixed.replace(/\bTrue\b/g, 'true')
+  fixed = fixed.replace(/\bFalse\b/g, 'false')
+  fixed = fixed.replace(/\bNone\b/g, 'null')
+
+  // Fix 2: Single quotes to double quotes (but be careful with escaped quotes)
+  // This is a simple replacement - works for most cases but may fail on edge cases
+  fixed = fixed.replace(/'/g, '"')
+
+  // Fix 3: Unquoted object keys (e.g., {name: "value"} -> {"name": "value"})
+  // Match word followed by : (not already quoted)
+  fixed = fixed.replace(/(\w+):/g, '"$1":')
+
+  // Fix 4: Trailing commas in objects
+  fixed = fixed.replace(/,\s*}/g, '}')
+
+  // Fix 5: Trailing commas in arrays
+  fixed = fixed.replace(/,\s*]/g, ']')
+
+  // Fix 6: Remove extra text before/after JSON (common in LLM outputs)
+  // Find first { or [ and match to closing bracket
+  const firstBrace = fixed.indexOf('{')
+  const firstBracket = fixed.indexOf('[')
+
+  if (firstBrace >= 0 && (firstBracket < 0 || firstBrace < firstBracket)) {
+    // Object first
+    let depth = 0
+    let start = firstBrace
+    let end = -1
+    for (let i = start; i < fixed.length; i++) {
+      if (fixed[i] === '{') depth++
+      else if (fixed[i] === '}') depth--
+      if (depth === 0) {
+        end = i + 1
+        break
+      }
+    }
+    if (end > 0) fixed = fixed.substring(start, end)
+  } else if (firstBracket >= 0) {
+    // Array first
+    let depth = 0
+    let start = firstBracket
+    let end = -1
+    for (let i = start; i < fixed.length; i++) {
+      if (fixed[i] === '[') depth++
+      else if (fixed[i] === ']') depth--
+      if (depth === 0) {
+        end = i + 1
+        break
+      }
+    }
+    if (end > 0) fixed = fixed.substring(start, end)
+  }
+
+  return fixed
+}
+
+/**
+ * Parse LLM JSON with fallback attempts.
+ * Tries: direct parse -> fixed parse -> extracted parse
+ */
+export function parseLLMJSON(text: string, retries = 3): any {
+  const errors: Error[] = []
+
+  // Attempt 1: Direct parse (already valid JSON)
+  try {
+    return JSON.parse(text)
+  } catch (e) {
+    errors.push(e as Error)
+  }
+
+  for (let attempt = 0; attempt < retries; attempt++) {
+    try {
+      // Attempt 2: Extract and fix
+      const extracted = extractJSON(text)
+      const fixed = fixLLMJSON(extracted)
+      return JSON.parse(fixed)
+    } catch (e) {
+      errors.push(e as Error)
+      // If extraction failed, try fixing the whole text
+      try {
+        const fixed = fixLLMJSON(text)
+        return JSON.parse(fixed)
+      } catch (e2) {
+        errors.push(e2 as Error)
+      }
+    }
+  }
+
+  // All attempts failed
+  const error = new Error(`Failed to parse LLM JSON after ${retries + 1} attempts`)
+  error.cause = errors
+  throw error
+}
+
+/**
+ * Parse LLM JSON with schema validation (zod).
+ * Returns validated data or throws validation error.
+ */
+export async function parseLLMJSONWithSchema<T>(
+  text: string,
+  schema: { parse: (data: any) => T },
+  retries = 3
+): Promise<T> {
+  const data = parseLLMJSON(text, retries)
+
+  try {
+    return schema.parse(data)
+  } catch (e) {
+    const error = new Error('LLM JSON schema validation failed')
+    error.cause = e
+    throw error
+  }
+}
+
+/**
+ * Safe parse - returns null on failure instead of throwing.
+ * Useful for optional JSON fields in LLM responses.
+ */
+export function safeParseLLMJSON(text: string): any | null {
+  try {
+    return parseLLMJSON(text, 1)
+  } catch {
+    return null
+  }
+}
+
+/**
+ * Parse tool_call arguments from LLM output.
+ * Specifically optimized for OpenAI-style tool calls.
+ */
+export function parseToolArguments(args: string | object): any {
+  if (typeof args === 'object') {
+    return args // Already parsed
+  }
+
+  if (typeof args !== 'string') {
+    throw new Error('Invalid arguments: must be string or object')
+  }
+
+  const trimmed = args.trim()
+
+  // Handle empty object
+  if (trimmed === '{}' || trimmed === '[]') {
+    return trimmed === '{}' ? {} : []
+  }
+
+  try {
+    // Try direct parse first
+    return JSON.parse(trimmed)
+  } catch {
+    // Fall back to LLM JSON parsing
+    return parseLLMJSON(trimmed, 2)
+  }
+}
+
+/**
+ * Parse array content from LLM (common in Anthropic-style messages).
+ * Handles Python-style arrays with thinking/text/tool_use blocks.
+ */
+export function parseAnthropicContentArray(content: string): Array<{
+  type: string
+  text?: string
+  thinking?: string
+  id?: string
+  name?: string
+  input?: any
+}> {
+  if (!content || typeof content !== 'string') {
+    return []
+  }
+
+  const trimmed = content.trim()
+
+  // Handle double-serialized content: "[{...}]" -> "[{...}]"
+  let contentToParse = trimmed
+  if (trimmed.startsWith('"') && trimmed.endsWith('"') && trimmed.length >= 2) {
+    contentToParse = trimmed.slice(1, -1)
+  }
+
+  if (!contentToParse.startsWith('[') || !contentToParse.endsWith(']')) {
+    throw new Error('Content is not an array')
+  }
+
+  try {
+    // Parse with Python-to-JSON conversion
+    const parsed = JSON.parse(
+      contentToParse
+        .replace(/'/g, '"') // Python single quotes
+        .replace(/True/g, 'true')
+        .replace(/False/g, 'false')
+        .replace(/None/g, 'null')
+    )
+
+    if (!Array.isArray(parsed)) {
+      throw new Error('Parsed content is not an array')
+    }
+
+    return parsed
+  } catch (e) {
+    // Fall back to full LLM JSON parsing
+    const fixed = fixLLMJSON(contentToParse)
+    const parsed = JSON.parse(fixed)
+
+    if (!Array.isArray(parsed)) {
+      throw new Error('Parsed content is not an array')
+    }
+
+    return parsed
+  }
+}
@@ -26,6 +26,7 @@ import { getSessionDetailFromDb } from '../../db/hermes/sessions-db'
 import { getModelContextLength } from './model-context'
 import { ChatContextCompressor, countTokens, SUMMARY_PREFIX } from '../../lib/context-compressor'
 import { getCompressionSnapshot } from '../../db/hermes/compression-snapshot'
+import { parseLLMJSON, parseToolArguments, parseAnthropicContentArray } from '../../lib/llm-json'
 import { logger } from '../logger'

 const compressor = new ChatContextCompressor()
@@ -59,18 +60,23 @@ function convertToAnthropicFormat(messages: any[]): any[] {
      if (m.tool_calls && Array.isArray(m.tool_calls)) {
        for (const tc of m.tool_calls) {
          if (tc.id && tc.function) {
-            let args = tc.function.arguments || '{}'
            try {
-              args = typeof args === 'string' ? JSON.parse(args) : args
-            } catch {
-              args = {}
+              const args = parseToolArguments(tc.function.arguments || '{}')
+              blocks.push({
+                type: 'tool_use',
+                id: tc.id,
+                name: tc.function.name,
+                input: args
+              })
+            } catch (e) {
+              logger.warn(e, '[chat-run-socket] failed to parse tool arguments for tool %s', tc.id)
+              blocks.push({
+                type: 'tool_use',
+                id: tc.id,
+                name: tc.function.name,
+                input: {}
+              })
            }
-            blocks.push({
-              type: 'tool_use',
-              id: tc.id,
-              name: tc.function.name,
-              input: args
-            })
          }
        }
      }
@@ -86,11 +92,34 @@ function convertToAnthropicFormat(messages: any[]): any[] {

    if (role === 'tool') {
      // Convert tool message to tool_result in user message
+      // Follow Hermes official format: content is a string (not array)
      const toolContent = content || '(no output)'
+
+      // Normalize tool_result content to string format
+      // Use robust LLM JSON parser if content looks like JSON
+      let resultContent: string
+      if (typeof toolContent === 'string') {
+        try {
+          // Try to parse as JSON first (handles Python format, single quotes, etc.)
+          const parsed = parseLLMJSON(toolContent, 2)
+          // Re-serialize to ensure clean JSON string
+          resultContent = JSON.stringify(parsed)
+        } catch {
+          // Not valid JSON, use as-is
+          resultContent = toolContent
+        }
+      } else if (typeof toolContent === 'object' && toolContent !== null) {
+        // Object or array, serialize to JSON string
+        resultContent = JSON.stringify(toolContent)
+      } else {
+        // Primitive type (null, undefined, number, boolean)
+        resultContent = String(toolContent !== null && toolContent !== undefined ? toolContent : '(no output)')
+      }
+
      const toolResult = {
        type: 'tool_result',
        tool_use_id: m.tool_call_id || '',
-        content: typeof toolContent === 'string' ? toolContent : JSON.stringify(toolContent)
+        content: resultContent
      }

      // Merge with previous user message if it ends with tool_result
@@ -110,15 +139,19 @@ function convertToAnthropicFormat(messages: any[]): any[] {

    // Regular user message
    if (role === 'user') {
+      // Format: { role: 'user', content: [{ type: 'text', text: '...' }] }
      if (typeof content === 'string') {
-        result.push({ role: 'user', content: content || '' })
+        result.push({ role: 'user', content: [{ type: 'text', text: content || '' }] })
      } else if (Array.isArray(content)) {
+        // Already in array format, assume it's correct
        result.push({ role: 'user', content })
+      } else if (content) {
+        // Fallback for other types
+        result.push({ role: 'user', content: [{ type: 'text', text: String(content) }] })
      }
      continue
    }
  }
-
  return result
 }

@@ -242,45 +275,38 @@ export class ChatRunSocket {

            if (contentToParse.startsWith('[') && contentToParse.endsWith(']')) {
              try {
-                // Parse stringified Python-like array to JSON
-                const parsedContent = JSON.parse(
-                  contentToParse
-                    .replace(/'/g, '"')  // Python single quotes to JSON double quotes
-                    .replace(/True/g, 'true')
-                    .replace(/False/g, 'false')
-                    .replace(/None/g, 'null')
-                )
-                if (Array.isArray(parsedContent)) {
-                  const textBlocks: string[] = []
-                  const toolCalls: any[] = []
-                  let reasoningContent: string | null = null
+                // Use robust LLM JSON parser
+                const parsedContent = parseAnthropicContentArray(contentToParse)
+                const textBlocks: string[] = []
+                const toolCalls: any[] = []
+                let reasoningContent: string | null = null

-                  for (const block of parsedContent) {
-                    if (block.type === 'thinking') {
-                      reasoningContent = block.thinking
-                    } else if (block.type === 'text') {
-                      textBlocks.push(block.text)
-                    } else if (block.type === 'tool_use') {
-                      toolCalls.push({
-                        id: block.id,
-                        type: 'function',
-                        function: {
-                          name: block.name,
-                          arguments: JSON.stringify(block.input)
-                        }
-                      })
-                    }
-                  }
-
-                  msg.content = textBlocks.join('') || ''
-                  if (toolCalls.length > 0) {
-                    msg.tool_calls = toolCalls
-                  }
-                  if (reasoningContent) {
-                    msg.reasoning = reasoningContent
+                for (const block of parsedContent) {
+                  if (block.type === 'thinking') {
+                    reasoningContent = block.thinking || null
+                  } else if (block.type === 'text') {
+                    textBlocks.push(block.text || '')
+                  } else if (block.type === 'tool_use') {
+                    toolCalls.push({
+                      id: block.id,
+                      type: 'function',
+                      function: {
+                        name: block.name,
+                        arguments: typeof block.input === 'object' ? JSON.stringify(block.input) : (block.input ?? '{}')
+                      }
+                    })
                  }
                }
+
+                msg.content = textBlocks.join('') || ''
+                if (toolCalls.length > 0) {
+                  msg.tool_calls = toolCalls
+                }
+                if (reasoningContent) {
+                  msg.reasoning = reasoningContent
+                }
              } catch (e) {
+                logger.warn(e, '[chat-run-socket] failed to parse array content for message %s, keeping original', m.id)
                // Parsing failed, keep original content
                msg.content = m.content
              }
@@ -301,7 +327,7 @@ export class ChatRunSocket {
                  type: 'function',
                  function: {
                    name: block.name,
-                    arguments: JSON.stringify(block.input)
+                    arguments: JSON.stringify(block.input ?? {})
                  }
                })
              }
@@ -467,14 +493,12 @@ export class ChatRunSocket {
        socket.emit(event, tagged)
      }
    }
-
    try {
      // Build upstream request body
      const body: Record<string, any> = { input }
      if (hermesSessionId) body.session_id = hermesSessionId
      if (model) body.model = model
      if (instructions) body.instructions = instructions
-
      // Inject workspace context if set for this session
      if (session_id) {
        const sessionRow = getSession(session_id)
@@ -485,7 +509,6 @@ export class ChatRunSocket {
            : workspaceCtx
        }
      }
-
      // Build conversation_history from DB if session_id is provided
      if (session_id) {
        try {
@@ -551,7 +574,6 @@ export class ChatRunSocket {
              return msg
            })
              .filter(m => m !== null)
-
            // Context compression with snapshot awareness
            const contextLength = getModelContextLength(profile)
            const triggerTokens = Math.floor(contextLength / 2)
@@ -795,7 +817,6 @@ export class ChatRunSocket {
        logger.info('[chat-run-socket] converted conversation_history to Anthropic format for session %s: %d messages, content: %s',
          session_id || '(new)', body.conversation_history.length, JSON.stringify(body.conversation_history, null, 2))
      }
-
      const res = await fetch(`${upstream}/v1/runs`, {
        method: 'POST',
        headers,
@@ -866,15 +887,30 @@ export class ChatRunSocket {

              switch (parsed.event) {
                case 'message.delta': {
+                  let deltaText = parsed.delta || ''
+
+                  // Try to extract text from JSON delta (e.g., "[{\"type\":\"text\",\"text\":\"hello\"}]")
+                  if (deltaText.trim().startsWith('[') && deltaText.trim().endsWith(']')) {
+                    try {
+                      const parsedDelta = parseAnthropicContentArray(deltaText)
+                      const textParts = parsedDelta
+                        .filter((b: any) => b.type === 'text')
+                        .map((b: any) => b.text || '')
+                      deltaText = textParts.join('')
+                    } catch {
+                      // If parsing fails, use delta as-is
+                    }
+                  }
+
                  if (last?.role === 'assistant' && last.finish_reason == null) {
-                    last.content += (parsed.delta || '')
+                    last.content += deltaText
                  } else {
                    msgs.push({
                      id: msgs.length + 1,
                      session_id,
                      hermesSessionId,
                      role: 'assistant',
-                      content: parsed.delta || '',
+                      content: deltaText,
                      timestamp: Math.floor(Date.now() / 1000),
                    })
                  }
@@ -934,21 +970,57 @@ export class ChatRunSocket {
                  logger.info('[chat-run-socket] run.completed keys: %s', Object.keys(parsed))
                  // Finalize assistant message — if no content was streamed, use output
                  if (parsed.output && !runProducedAssistantText(msgs)) {
+                    let outputContent = parsed.output
+
+                    // Parse output if it's a stringified array
+                    if (typeof outputContent === 'string' &&
+                      outputContent.trim().startsWith('[') &&
+                      outputContent.trim().endsWith(']')) {
+                      try {
+                        const parsedOutput = parseAnthropicContentArray(outputContent)
+                        const textParts = parsedOutput
+                          .filter((b: any) => b.type === 'text')
+                          .map((b: any) => b.text || '')
+                        outputContent = textParts.join('')
+                      } catch {
+                        // If parsing fails, use output as-is
+                      }
+                    }
+
                    if (last?.role === 'assistant') {
-                      last.content = parsed.output
+                      last.content = outputContent
                    } else {
                      msgs.push({
                        id: msgs.length + 1,
                        session_id,
                        hermesSessionId,
                        role: 'assistant',
-                        content: parsed.output,
+                        content: outputContent,
                        timestamp: Math.floor(Date.now() / 1000),
                      })
                    }
                  }

+                  // Always parse output if it's an array format (for parsed_content field)
+                  // Only extract text content (tool_calls and reasoning are already sent via other events)
+                  if (parsed.output && typeof parsed.output === 'string' &&
+                    parsed.output.trim().startsWith('[') && parsed.output.trim().endsWith(']')) {
+                    try {
+                      const parsedOutput = parseAnthropicContentArray(parsed.output)
+                      const textParts = parsedOutput
+                        .filter((b: any) => b.type === 'text')
+                        .map((b: any) => b.text || '')
+
+                      // Set parsed_content for frontend (only text content)
+                      parsed.parsed_content = textParts.join('') || ''
+                      logger.info('[chat-run-socket] parsed output from run.completed event')
+                    } catch (e) {
+                      logger.error(e, '[chat-run-socket] failed to parse output from run.completed')
+                    }
+                  }
+
                  // Parse stringified array content for all assistant messages
+                  // Only extract text content (tool_calls and reasoning are already in message fields)
                  let parsedCount = 0
                  for (const msg of msgs) {
                    if (msg.role === 'assistant' && typeof msg.content === 'string' &&
@@ -956,44 +1028,13 @@ export class ChatRunSocket {
                      try {
                        logger.info('[chat-run-socket] parsing array content for message %s, content preview: %s',
                          msg.id, msg.content.slice(0, 100))
-                        const parsedContent = JSON.parse(
-                          msg.content
-                            .replace(/'/g, '"')
-                            .replace(/True/g, 'true')
-                            .replace(/False/g, 'false')
-                            .replace(/None/g, 'null')
-                        )
-                        if (Array.isArray(parsedContent)) {
-                          const textBlocks: string[] = []
-                          const toolCalls: any[] = []
-                          let reasoningContent: string | null = null
+                        const parsedContent = parseAnthropicContentArray(msg.content)
+                        const textBlocks = parsedContent
+                          .filter((b: any) => b.type === 'text')
+                          .map((b: any) => b.text || '')

-                          for (const block of parsedContent) {
-                            if (block.type === 'thinking') {
-                              reasoningContent = block.thinking
-                            } else if (block.type === 'text') {
-                              textBlocks.push(block.text)
-                            } else if (block.type === 'tool_use') {
-                              toolCalls.push({
-                                id: block.id,
-                                type: 'function',
-                                function: {
-                                  name: block.name,
-                                  arguments: JSON.stringify(block.input)
-                                }
-                              })
-                            }
-                          }
-
-                          msg.content = textBlocks.join('') || ''
-                          if (toolCalls.length > 0) {
-                            msg.tool_calls = toolCalls
-                          }
-                          if (reasoningContent) {
-                            msg.reasoning = reasoningContent
-                          }
-                          parsedCount++
-                        }
+                        msg.content = textBlocks.join('') || ''
+                        parsedCount++
                      } catch (e) {
                        logger.error(e, '[chat-run-socket] failed to parse array content for message %s', msg.id)
                      }
@@ -1252,7 +1293,6 @@ export class ChatRunSocket {
    if (start === -1) return
    // 替换
    msg.splice(start, end - start + 1, ...newItems)
-    console.log(msg)
  }
  /** Enqueue an ephemeral Hermes session for deferred deletion */
  private enqueueEphemeralDelete(hermesSessionId: string, profile?: string) {