filter empty assistant history (#781)

2026-05-16 11:01:33 +08:00
parent 24e906998a
commit c5380c4ab5
3 changed files with 135 additions and 8 deletions
@@ -12,6 +12,7 @@ import { getModelContextLength } from '../model-context'
 import { logger } from '../../logger'
 import { bridgeLogger } from '../../logger'
 import { calcAndUpdateUsage, estimateUsageTokensFromMessages } from './usage'
+import { isAssistantMessageSendable } from './message-format'
 import type { ChatMessage } from '../../../lib/context-compressor'
 import type { SessionState, BridgeCompressionResult } from './types'

@@ -62,6 +63,10 @@ export async function buildDbHistory(
      msg.tool_call_id = callId
    }
    if (m.tool_name) msg.name = m.tool_name
+    if (m.role === 'assistant' && !isAssistantMessageSendable(msg)) {
+      logger.warn('[chat-run-socket] skipped empty assistant message while building history for session %s', sessionId)
+      return null
+    }
    return msg
  }).filter((m): m is ChatMessage => m !== null)
 }
@@ -2,6 +2,48 @@ import { parseAnthropicContentArray } from '../../../lib/llm-json'
 import { logger } from '../../logger'
 import type { SessionMessage } from './types'

+function cleanToolCalls(toolCalls: any): any[] {
+  return Array.isArray(toolCalls)
+    ? toolCalls
+        .filter((tc: any) => tc?.id && String(tc.id).length > 0)
+        .map((tc: any) => ({
+          id: tc.id,
+          type: tc.type,
+          function: tc.function,
+        }))
+    : []
+}
+
+function hasSendableContent(content: unknown): boolean {
+  if (typeof content === 'string') return content.trim().length > 0
+  if (Array.isArray(content)) {
+    return content.some((block: any) => {
+      if (!block || typeof block !== 'object') return false
+      if (block.type === 'text') return typeof block.text === 'string' && block.text.trim().length > 0
+      return Boolean(block.type && block.type !== 'thinking')
+    })
+  }
+  return false
+}
+
+function toolCallsToText(toolCalls: any[]): string {
+  return toolCalls
+    .map((tc: any) => {
+      const name = tc?.function?.name || 'unknown'
+      let args = typeof tc?.function?.arguments === 'string'
+        ? tc.function.arguments
+        : JSON.stringify(tc?.function?.arguments ?? {})
+      if (args.length > 4000) args = `${args.slice(0, 4000)}...`
+      return `[Calling tool: ${name} with arguments: ${args}]`
+    })
+    .join('\n')
+}
+
+export function isAssistantMessageSendable(message: { content?: unknown; tool_calls?: any }): boolean {
+  if (hasSendableContent(message.content)) return true
+  return cleanToolCalls(message.tool_calls).length > 0
+}
+
 /**
 * Convert OpenAI format conversation history to Anthropic format.
 */
@@ -33,7 +75,18 @@ export function convertHistoryFormat(messages: any[]): any[] {
      continue
    }
    if (role === 'assistant') {
-      result.push({ ...m })
+      const toolCalls = cleanToolCalls(m.tool_calls)
+      const item = { ...m }
+      delete item.reasoning_content
+      if (toolCalls.length > 0 && !hasSendableContent(item.content)) {
+        item.content = toolCallsToText(toolCalls)
+      }
+      delete item.tool_calls
+      if (!isAssistantMessageSendable(item)) {
+        logger.warn('[chat-run-socket] skipped empty assistant message in conversation history')
+        continue
+      }
+      result.push(item)
      continue
    }
  }
@@ -127,16 +180,15 @@ export function handleMessage(messages: SessionMessage[], sid: string): any[] {
        }

        if (m.tool_calls?.length) {
-          const cleanedToolCalls = m.tool_calls
-            .filter((tc: any) => tc.id && tc.id.length > 0)
-            .map((tc: any) => ({
-              id: tc.id,
-              type: tc.type,
-              function: tc.function,
-            }))
+          const cleanedToolCalls = cleanToolCalls(m.tool_calls)
          if (cleanedToolCalls.length > 0) msg.tool_calls = cleanedToolCalls
        }

+        if (m.role === 'assistant' && !isAssistantMessageSendable(msg)) {
+          logger.warn('[chat-run-socket] skipped empty assistant message %s while loading session %s', m.id, sid)
+          return null
+        }
+
        // For tool messages, ensure tool_call_id exists
        if (m.role === 'tool') {
          let callId = m.tool_call_id
@@ -0,0 +1,70 @@
+import { describe, expect, it, vi } from 'vitest'
+
+vi.mock('../../packages/server/src/services/logger', () => ({
+  logger: {
+    info: vi.fn(),
+    warn: vi.fn(),
+  },
+}))
+
+import {
+  convertHistoryFormat,
+  handleMessage,
+  isAssistantMessageSendable,
+} from '../../packages/server/src/services/hermes/run-chat/message-format'
+import type { SessionMessage } from '../../packages/server/src/services/hermes/run-chat/types'
+
+describe('run-chat message formatting', () => {
+  it('drops empty assistant history messages without tool calls', () => {
+    const formatted = convertHistoryFormat([
+      { role: 'user', content: 'run a command' },
+      { role: 'assistant', content: '' },
+      { role: 'user', content: 'next turn' },
+    ])
+
+    expect(formatted).toEqual([
+      { role: 'user', content: 'run a command' },
+      { role: 'user', content: 'next turn' },
+    ])
+  })
+
+  it('converts empty assistant tool-call history messages to non-empty text', () => {
+    const toolCalls = [{
+      id: 'call_1',
+      type: 'function',
+      function: { name: 'terminal', arguments: '{}' },
+    }]
+    const formatted = convertHistoryFormat([
+      { role: 'assistant', content: '', tool_calls: toolCalls },
+    ])
+
+    expect(formatted).toEqual([
+      { role: 'assistant', content: '[Calling tool: terminal with arguments: {}]' },
+    ])
+  })
+
+  it('drops stale empty assistant messages loaded from the session database', () => {
+    const messages: SessionMessage[] = [
+      { id: 1, session_id: 's1', role: 'user', content: 'first', timestamp: 1 },
+      { id: 2, session_id: 's1', role: 'assistant', content: '', timestamp: 2 },
+      { id: 3, session_id: 's1', role: 'assistant', content: 'done', timestamp: 3 },
+    ]
+
+    expect(handleMessage(messages, 's1').map(m => ({ role: m.role, content: m.content }))).toEqual([
+      { role: 'user', content: 'first' },
+      { role: 'assistant', content: 'done' },
+    ])
+  })
+
+  it('treats assistant tool-call messages as sendable even with empty text', () => {
+    expect(isAssistantMessageSendable({
+      content: '',
+      tool_calls: [{
+        id: 'call_1',
+        type: 'function',
+        function: { name: 'terminal', arguments: '{}' },
+      }],
+    })).toBe(true)
+    expect(isAssistantMessageSendable({ content: '' })).toBe(false)
+  })
+})