[codex] Add local tool trace toggle (#806)

* test: harden tool approval browser contract

* test: cover tool trace display edge cases

* test: cover resumed tool trace edge cases

* feat: hide tool traces by default

* Add local tool trace toggle

---------

Co-authored-by: Zhicheng Han <zhicheng.han@mathematik.uni-goettingen.de>
This commit is contained in:
ekko
2026-05-17 09:01:59 +08:00
committed by GitHub
parent 569ddc28da
commit 0c2bafc619
19 changed files with 975 additions and 29 deletions
@@ -8,10 +8,12 @@ import { setModelContext } from '@/api/hermes/model-context'
import { NButton, NTooltip, NSwitch, NModal, NInputNumber, useMessage } from 'naive-ui'
import { computed, ref, nextTick, onMounted, onUnmounted, watch } from 'vue'
import { useI18n } from 'vue-i18n'
import { useToolTraceVisibility } from '@/composables/useToolTraceVisibility'
const chatStore = useChatStore()
const { t } = useI18n()
const message = useMessage()
const { toolTraceVisible, toggleToolTraceVisible } = useToolTraceVisibility()
const inputText = ref('')
const textareaRef = ref<HTMLTextAreaElement>()
const commandDropdownRef = ref<HTMLDivElement>()
@@ -430,6 +432,24 @@ function isImage(type: string): boolean {
/>
</div>
<NTooltip trigger="hover">
<template #trigger>
<NButton
quaternary
size="tiny"
class="tool-trace-toggle"
:class="{ active: toolTraceVisible }"
:aria-label="toolTraceVisible ? t('chat.hideToolCalls') : t('chat.showToolCalls')"
@click="toggleToolTraceVisible"
>
<svg class="tool-trace-icon" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
<path d="M14.7 6.3a4.5 4.5 0 0 0-5.8 5.8L3.5 17.5a2.1 2.1 0 0 0 3 3l5.4-5.4a4.5 4.5 0 0 0 5.8-5.8l-3 3-3-3 3-3z"/>
</svg>
</NButton>
</template>
{{ toolTraceVisible ? t('chat.hideToolCalls') : t('chat.showToolCalls') }}
</NTooltip>
<span v-if="totalTokens > 0" class="context-info" :class="{ 'context-warning': usagePercent > 80 }">
{{ formatTokens(totalTokens) }} /
<NTooltip trigger="hover">
@@ -614,20 +634,65 @@ function isImage(type: string): boolean {
display: flex;
align-items: center;
gap: 6px;
padding: 0 8px;
padding: 0 0 0 8px;
border-left: 1px solid $border-light;
margin-left: 4px;
.switch-label {
display: flex;
align-items: center;
color: $text-muted;
justify-content: center;
width: 16px;
height: 16px;
color: #999999;
font-size: 12px;
svg {
opacity: 0.7;
opacity: 1;
}
}
:deep(.n-switch),
:deep(.n-switch__rail) {
margin-right: 0;
}
}
.tool-trace-toggle {
display: inline-flex;
align-items: center;
justify-content: center;
color: #999999;
width: 24px;
min-width: 24px;
height: 22px;
margin-left: -4px;
padding: 0;
background: transparent !important;
opacity: 1;
:deep(.n-button__state-border),
:deep(.n-button__border),
:deep(.n-button__ripple) {
display: none;
}
.tool-trace-icon {
display: block;
flex: 0 0 16px;
width: 16px;
height: 16px;
}
&.active {
color: #999999;
opacity: 1;
}
&:hover {
color: #999999;
opacity: 1;
}
}
.context-info {
@@ -3,6 +3,7 @@ import { ref, computed, watch, nextTick } from "vue";
import { useI18n } from "vue-i18n";
import MessageItem from "./MessageItem.vue";
import { useChatStore } from "@/stores/hermes/chat";
import { useToolTraceVisibility } from "@/composables/useToolTraceVisibility";
import type { Session } from "@/stores/hermes/chat";
const props = defineProps<{
@@ -10,6 +11,7 @@ const props = defineProps<{
}>();
const chatStore = useChatStore();
const { toolTraceVisible } = useToolTraceVisibility();
const { t } = useI18n();
const listRef = ref<HTMLElement>();
@@ -18,10 +20,10 @@ const activeSession = computed(() => props.session || chatStore.activeSession);
const displayMessages = computed(() =>
(activeSession.value?.messages || []).filter((m) => {
// Filter out tool messages without name (internal use only)
if (m.role === 'tool' && !m.toolName) return false
// Filter out messages with empty content (except tool messages)
if (m.role !== 'tool' && !m.content?.trim()) return false
// Tool messages without a name are internal use only and remain hidden.
if (m.role === 'tool') return toolTraceVisible.value && !!m.toolName
// Filter out messages with empty content.
if (!m.content?.trim()) return false
return true
}),
);
@@ -19,7 +19,13 @@ import { useGlobalSpeech } from "@/composables/useSpeech";
import { useVoiceSettings } from "@/composables/useVoiceSettings";
import { speedToEdgeRate, hzToEdgePitch } from "@/utils/ttsHelpers";
const TOOL_PAYLOAD_DISPLAY_LIMIT = 2000;
const TOOL_PAYLOAD_DISPLAY_LIMIT = 1000;
const JSON_STRING_DISPLAY_LIMIT = 200;
const JSON_MAX_DEPTH = 6;
const JSON_MAX_NODES = 1000;
const JSON_MAX_KEYS_PER_OBJECT = 50;
const JSON_MAX_ITEMS_PER_ARRAY = 50;
const JSON_TRUNCATED_KEY = "__truncated__";
const props = defineProps<{ message: Message; highlight?: boolean }>();
const { t } = useI18n();
@@ -353,19 +359,96 @@ type ToolPayload = {
language?: string;
};
function truncateLongString(value: string, marker: string): string {
return value.length > JSON_STRING_DISPLAY_LIMIT
? value.slice(0, JSON_STRING_DISPLAY_LIMIT) + "\n" + marker
: value;
}
function truncateJsonValue(value: unknown, marker: string): unknown {
let nodeCount = 0;
const seen = new WeakSet<object>();
function stringifyLength(candidate: unknown): number {
return JSON.stringify(candidate, null, 2).length;
}
function visit(current: unknown, depth: number): unknown {
nodeCount += 1;
if (nodeCount > JSON_MAX_NODES) {
return marker;
}
if (typeof current === "string") return truncateLongString(current, marker);
if (current === null || typeof current !== "object") return current;
if (seen.has(current)) return `[Circular ${marker}]`;
if (depth >= JSON_MAX_DEPTH) {
return Array.isArray(current) ? `[Array ${marker}]` : `[Object ${marker}]`;
}
seen.add(current);
if (Array.isArray(current)) {
const result: unknown[] = [];
const maxItems = Math.min(current.length, JSON_MAX_ITEMS_PER_ARRAY);
for (let i = 0; i < maxItems; i += 1) {
const remaining = current.length - i;
result.push(visit(current[i], depth + 1));
if (stringifyLength(result) > TOOL_PAYLOAD_DISPLAY_LIMIT) {
result.pop();
result.push(`${marker}: ${remaining} more items`);
seen.delete(current);
return result;
}
}
if (current.length > maxItems) {
result.push(`${marker}: ${current.length - maxItems} more items`);
}
seen.delete(current);
return result;
}
const entries = Object.entries(current as Record<string, unknown>);
const result: Record<string, unknown> = {};
const maxKeys = Math.min(entries.length, JSON_MAX_KEYS_PER_OBJECT);
for (let i = 0; i < maxKeys; i += 1) {
const [key, val] = entries[i];
const remaining = entries.length - i;
result[key] = visit(val, depth + 1);
if (stringifyLength(result) > TOOL_PAYLOAD_DISPLAY_LIMIT) {
delete result[key];
result[JSON_TRUNCATED_KEY] = `${marker}: ${remaining} more keys`;
seen.delete(current);
return result;
}
}
if (entries.length > maxKeys) {
result[JSON_TRUNCATED_KEY] = `${marker}: ${entries.length - maxKeys} more keys`;
}
seen.delete(current);
return result;
}
const truncated = visit(value, 0);
if (stringifyLength(truncated) <= TOOL_PAYLOAD_DISPLAY_LIMIT) return truncated;
return { [JSON_TRUNCATED_KEY]: marker };
}
function formatToolPayload(raw?: string): ToolPayload {
if (!raw) {
return { full: "", display: "" };
}
try {
const full = JSON.stringify(JSON.parse(raw), null, 2);
const parsed = JSON.parse(raw);
const full = JSON.stringify(parsed, null, 2);
const display = full.length > TOOL_PAYLOAD_DISPLAY_LIMIT
? JSON.stringify(truncateJsonValue(parsed, t("chat.truncated")), null, 2)
: full;
return {
full,
display:
full.length > TOOL_PAYLOAD_DISPLAY_LIMIT
? full.slice(0, TOOL_PAYLOAD_DISPLAY_LIMIT) + "\n" + t("chat.truncated")
: full,
display,
language: "json",
};
} catch {
@@ -6,10 +6,12 @@ import { useChatStore } from "@/stores/hermes/chat";
import thinkingVideoLight from "@/assets/thinking-light.mp4";
import thinkingVideoDark from "@/assets/thinking-dark.mp4";
import { useTheme } from "@/composables/useTheme";
import { useToolTraceVisibility } from "@/composables/useToolTraceVisibility";
const chatStore = useChatStore();
const { t } = useI18n();
const { isDark } = useTheme();
const { toolTraceVisible } = useToolTraceVisibility();
const listRef = ref<HTMLElement>();
function formatTokens(n: number): string {
@@ -41,9 +43,16 @@ const currentToolCalls = computed(() => {
return [...tools].reverse();
});
const displayMessages = computed(() =>
chatStore.messages.filter((m) => {
if (m.role === "tool") return false;
const visibleToolCalls = computed(() =>
toolTraceVisible.value ? currentToolCalls.value.filter((tool) => !!tool.toolName) : [],
);
const displayMessages = computed(() => {
const currentToolIds = new Set(currentToolCalls.value.map((tool) => tool.id));
return chatStore.messages.filter((m) => {
if (m.role === "tool") {
return toolTraceVisible.value && !!m.toolName && !(chatStore.isRunActive && currentToolIds.has(m.id));
}
if (
m.role === "assistant" &&
m.isStreaming &&
@@ -54,8 +63,8 @@ const displayMessages = computed(() =>
return false;
}
return true;
}),
);
});
});
const queuedMessages = computed(() => {
const sid = chatStore.activeSessionId;
@@ -171,7 +180,7 @@ watch(currentToolCalls, () => {
playsinline
class="thinking-video"
/>
<div v-if="currentToolCalls.length > 0 || chatStore.compressionState || chatStore.abortState" class="tool-calls-panel">
<div v-if="visibleToolCalls.length > 0 || chatStore.compressionState || chatStore.abortState" class="tool-calls-panel">
<!-- Abort indicator -->
<div v-if="chatStore.abortState" class="tool-call-item compression-item">
<svg
@@ -254,7 +263,7 @@ watch(currentToolCalls, () => {
</div>
<!-- Tool calls -->
<div
v-for="tc in currentToolCalls"
v-for="tc in visibleToolCalls"
:key="tc.id"
class="tool-call-item"
>
@@ -0,0 +1,34 @@
import { ref } from 'vue'
const STORAGE_KEY = 'hermes_show_tool_calls'
function readInitialValue(): boolean {
try {
return localStorage.getItem(STORAGE_KEY) !== 'false'
} catch {
return true
}
}
const toolTraceVisible = ref(readInitialValue())
function setToolTraceVisible(value: boolean) {
toolTraceVisible.value = value
try {
localStorage.setItem(STORAGE_KEY, String(value))
} catch {
// Ignore storage failures; the in-memory toggle still works for this tab.
}
}
function toggleToolTraceVisible() {
setToolTraceVisible(!toolTraceVisible.value)
}
export function useToolTraceVisibility() {
return {
toolTraceVisible,
setToolTraceVisible,
toggleToolTraceVisible,
}
}
+2
View File
@@ -139,6 +139,8 @@ export default {
destroy: 'Bridge-Agent für diese Sitzung freigeben',
},
attachFiles: 'Dateien anhangen',
showToolCalls: 'Tool-Aufrufe anzeigen',
hideToolCalls: 'Tool-Aufrufe ausblenden',
messageQueue: 'Nachrichtenwarteschlange',
removeQueuedMessage: 'Nachricht aus Warteschlange entfernen',
stop: 'Stopp',
+2
View File
@@ -153,6 +153,8 @@ export default {
},
attachFiles: 'Attach files',
autoPlaySpeech: 'Auto-play voice',
showToolCalls: 'Show tool calls',
hideToolCalls: 'Hide tool calls',
messageQueue: 'Message queue',
removeQueuedMessage: 'Remove queued message',
stop: 'Stop',
+2
View File
@@ -139,6 +139,8 @@ export default {
destroy: 'Liberar el agente Bridge de esta sesión',
},
attachFiles: 'Adjuntar archivos',
showToolCalls: 'Mostrar llamadas de herramientas',
hideToolCalls: 'Ocultar llamadas de herramientas',
messageQueue: 'Cola de mensajes',
removeQueuedMessage: 'Quitar mensaje de la cola',
stop: 'Detener',
+2
View File
@@ -139,6 +139,8 @@ export default {
destroy: 'Libérer lagent Bridge de cette session',
},
attachFiles: 'Joindre des fichiers',
showToolCalls: 'Afficher les appels doutils',
hideToolCalls: 'Masquer les appels doutils',
messageQueue: 'File de messages',
removeQueuedMessage: 'Retirer le message de la file',
stop: 'Arreter',
+2
View File
@@ -139,6 +139,8 @@ export default {
destroy: 'このセッションの Bridge Agent を解放',
},
attachFiles: 'ファイルを添付',
showToolCalls: 'ツール呼び出しを表示',
hideToolCalls: 'ツール呼び出しを非表示',
messageQueue: 'メッセージキュー',
removeQueuedMessage: 'キューのメッセージを削除',
stop: '停止',
+2
View File
@@ -139,6 +139,8 @@ export default {
destroy: '이 세션의 Bridge Agent 해제',
},
attachFiles: '파일 첨부',
showToolCalls: '도구 호출 표시',
hideToolCalls: '도구 호출 숨기기',
messageQueue: '메시지 대기열',
removeQueuedMessage: '대기열 메시지 제거',
stop: '중지',
+2
View File
@@ -139,6 +139,8 @@ export default {
destroy: 'Liberar o Bridge Agent desta sessão',
},
attachFiles: 'Anexar arquivos',
showToolCalls: 'Mostrar chamadas de ferramentas',
hideToolCalls: 'Ocultar chamadas de ferramentas',
messageQueue: 'Fila de mensagens',
removeQueuedMessage: 'Remover mensagem da fila',
stop: 'Parar',
@@ -152,6 +152,8 @@ export default {
},
attachFiles: '新增附件',
autoPlaySpeech: '自動播放語音',
showToolCalls: '顯示工具呼叫',
hideToolCalls: '隱藏工具呼叫',
messageQueue: '訊息佇列',
removeQueuedMessage: '移除佇列訊息',
stop: '停止',
+2
View File
@@ -153,6 +153,8 @@ export default {
},
attachFiles: '添加附件',
autoPlaySpeech: '自动播放语音',
showToolCalls: '显示工具调用',
hideToolCalls: '隐藏工具调用',
messageQueue: '消息队列',
removeQueuedMessage: '移除队列消息',
stop: '停止',
+5 -4
View File
@@ -136,10 +136,11 @@ async function buildContentBlocks(
}
function mapHermesMessages(msgs: HermesMessage[]): Message[] {
// Filter out assistant messages with empty content
// Filter out assistant messages with no display content unless they carry tool call metadata
// needed to name later tool result rows when resuming persisted history.
const filteredMsgs = msgs.filter(m => {
if (m.role === 'assistant') {
return m.content && m.content.trim() !== ''
return (m.tool_calls?.length || 0) > 0 || (m.content && m.content.trim() !== '')
}
return true
})
@@ -169,7 +170,7 @@ function mapHermesMessages(msgs: HermesMessage[]): Message[] {
role: 'tool',
content: '',
timestamp: Math.round(msg.timestamp * 1000),
toolName: tc.function?.name || 'tool',
toolName: tc.function?.name || undefined,
toolCallId: tc.id,
toolArgs: tc.function?.arguments || undefined,
toolStatus: 'done',
@@ -181,7 +182,7 @@ function mapHermesMessages(msgs: HermesMessage[]): Message[] {
// Tool result messages
if (msg.role === 'tool') {
const tcId = msg.tool_call_id || ''
const toolName = msg.tool_name || toolNameMap.get(tcId) || 'tool'
const toolName = msg.tool_name || toolNameMap.get(tcId) || undefined
const toolArgs = toolArgsMap.get(tcId) || undefined
// Extract a short preview from the content
let preview = ''
+39 -4
View File
@@ -120,9 +120,11 @@ describe('MessageItem tool details', () => {
const expected = JSON.stringify(message, null, 2)
const code = wrapper.find('.tool-details code.hljs')
const displayed = JSON.parse(code.text())
expect(wrapper.find('.tool-details .code-lang').text()).toBe('json')
expect(wrapper.html()).toContain('chat.truncated')
expect(code.findAll('span')).toHaveLength(0)
expect(displayed.content).toContain('chat.truncated')
expect(code.findAll('span').length).toBeGreaterThan(0)
await wrapper.find('.tool-details [data-copy-code="true"]').trigger('click')
expect(writeText).toHaveBeenCalledWith(expected)
@@ -150,14 +152,45 @@ describe('MessageItem tool details', () => {
await wrapper.find('.tool-line').trigger('click')
const code = wrapper.find('.tool-details code.hljs')
const displayed = JSON.parse(code.text())
expect(wrapper.find('.tool-details .code-lang').text()).toBe('json')
expect(wrapper.html()).toContain('chat.truncated')
expect(wrapper.find('.tool-details code.hljs').findAll('span')).toHaveLength(0)
expect(displayed.content).toContain('chat.truncated')
expect(code.findAll('span').length).toBeGreaterThan(0)
await wrapper.find('.tool-details [data-copy-code="true"]').trigger('click')
expect(writeText).toHaveBeenCalledWith(JSON.stringify(fullResult, null, 2))
})
it('truncates large JSON arrays at item boundaries so display remains parseable JSON', async () => {
const fullResult = Array.from({ length: 100 }, (_, index) => ({
index,
value: `item-${index}-${'x'.repeat(80)}`,
}))
const wrapper = mount(MessageItem, {
props: {
message: {
id: 'tool-array',
role: 'tool',
content: '',
timestamp: Date.now(),
toolName: 'browser_snapshot',
toolResult: JSON.stringify(fullResult),
toolStatus: 'done',
} satisfies Message,
},
})
await wrapper.find('.tool-line').trigger('click')
const code = wrapper.find('.tool-details code.hljs')
const displayed = JSON.parse(code.text())
expect(Array.isArray(displayed)).toBe(true)
expect(displayed.at(-1)).toContain('chat.truncated')
expect(code.text().length).toBeLessThanOrEqual(1000)
})
it('copies the full large raw tool result even when the display is truncated', async () => {
const writeText = vi.mocked(navigator.clipboard.writeText)
const fullResult = 'line\n'.repeat(1200)
@@ -177,9 +210,11 @@ describe('MessageItem tool details', () => {
await wrapper.find('.tool-line').trigger('click')
const displayedResult = fullResult.slice(0, 1000) + '\nchat.truncated'
const code = wrapper.find('.tool-details code.hljs')
expect(wrapper.find('.tool-details .code-lang').text()).toBe('text')
expect(wrapper.html()).toContain('chat.truncated')
expect(wrapper.find('.tool-details code.hljs').findAll('span')).toHaveLength(0)
expect(code.text()).toBe(displayedResult)
expect(code.findAll('span')).toHaveLength(0)
await wrapper.find('.tool-details [data-copy-code="true"]').trigger('click')
expect(writeText).toHaveBeenCalledWith(fullResult)
+118
View File
@@ -0,0 +1,118 @@
// @vitest-environment jsdom
import { beforeEach, describe, expect, it, vi } from 'vitest'
import { mount } from '@vue/test-utils'
import { createPinia, setActivePinia } from 'pinia'
import { defineComponent } from 'vue'
vi.mock('vue-i18n', () => ({
useI18n: () => ({
t: (key: string) => key,
}),
}))
vi.mock('@/composables/useTheme', () => ({
useTheme: () => ({ isDark: false }),
}))
import MessageList from '@/components/hermes/chat/MessageList.vue'
import HistoryMessageList from '@/components/hermes/chat/HistoryMessageList.vue'
import { useChatStore, type Message, type Session } from '@/stores/hermes/chat'
import { useToolTraceVisibility } from '@/composables/useToolTraceVisibility'
const MessageItemStub = defineComponent({
name: 'MessageItem',
props: {
message: { type: Object, required: true },
highlight: { type: Boolean, default: false },
},
template: '<div class="stub-message" :data-role="message.role" :data-id="message.id">{{ message.toolName || message.content }}</div>',
})
function makeSession(messages: Message[]): Session {
return {
id: 'session-1',
title: 'Tool trace visibility',
messages,
createdAt: Date.now(),
updatedAt: Date.now(),
}
}
const sampleMessages: Message[] = [
{ id: 'user-1', role: 'user', content: 'inspect repo', timestamp: 1 },
{ id: 'tool-named', role: 'tool', content: '', timestamp: 2, toolName: 'read_file', toolResult: 'ok', toolStatus: 'done' },
{ id: 'tool-internal', role: 'tool', content: '', timestamp: 3, toolResult: 'internal', toolStatus: 'done' },
{ id: 'assistant-1', role: 'assistant', content: 'done', timestamp: 4 },
]
describe('tool trace visibility', () => {
beforeEach(() => {
setActivePinia(createPinia())
localStorage.removeItem('hermes_show_tool_calls')
useToolTraceVisibility().setToolTraceVisible(true)
})
function mountLiveList() {
const chatStore = useChatStore()
chatStore.activeSessionId = 'session-1'
chatStore.activeSession = makeSession(sampleMessages)
chatStore.abortState = { aborting: true, synced: false }
return mount(MessageList, {
global: {
stubs: {
MessageItem: MessageItemStub,
Transition: false,
},
},
})
}
it('shows named transcript and live tool traces by default while keeping unnamed internal tools hidden', () => {
const wrapper = mountLiveList()
expect(wrapper.findAll('.stub-message').map(node => node.attributes('data-id'))).toEqual([
'user-1',
'tool-named',
'assistant-1',
])
expect(wrapper.findAll('.tool-call-name').map(node => node.text())).toContain('read_file')
})
it('applies the same default-visible rule to history sessions', () => {
const wrapper = mount(HistoryMessageList, {
props: { session: makeSession(sampleMessages) },
global: {
stubs: { MessageItem: MessageItemStub },
},
})
expect(wrapper.findAll('.stub-message').map(node => node.attributes('data-id'))).toEqual([
'user-1',
'tool-named',
'assistant-1',
])
})
it('hides named live and history tool traces when the localStorage toggle is off', () => {
useToolTraceVisibility().setToolTraceVisible(false)
const liveWrapper = mountLiveList()
expect(liveWrapper.findAll('.stub-message').map(node => node.attributes('data-id'))).toEqual([
'user-1',
'assistant-1',
])
expect(liveWrapper.findAll('.tool-call-name').map(node => node.text())).not.toContain('read_file')
const historyWrapper = mount(HistoryMessageList, {
props: { session: makeSession(sampleMessages) },
global: {
stubs: { MessageItem: MessageItemStub },
},
})
expect(historyWrapper.findAll('.stub-message').map(node => node.attributes('data-id'))).toEqual([
'user-1',
'assistant-1',
])
})
})
+572
View File
@@ -246,3 +246,575 @@ test('surfaces an empty completed run as an error instead of leaving chat stalle
await expect(page.getByRole('button', { name: 'Stop' })).toHaveCount(0)
expect(api.unexpectedRequests).toEqual([])
})
test('renders tool trace and sends explicit approval decisions over the chat-run socket', async ({ page }) => {
await authenticate(page, TEST_ACCESS_KEY, 'research')
const api = await mockHermesApi(page)
await mockChatSocket(page)
await page.goto('/#/hermes/chat')
await sendChatMessage(page, 'Use write_file with approval')
const { run } = await waitForRun(page)
await page.evaluate((sid) => {
const socket = (window as any).__PW_CHAT_SOCKET__.latest
socket.__trigger('run.started', { event: 'run.started', session_id: sid, run_id: 'run-approval' })
socket.__trigger('tool.started', {
event: 'tool.started',
session_id: sid,
run_id: 'run-approval',
tool_call_id: 'tool-call-1',
tool: 'write_file',
preview: 'Writing approved file',
arguments: JSON.stringify({ path: '/tmp/approved.txt', content: 'hello' }),
})
socket.__trigger('approval.requested', {
event: 'approval.requested',
session_id: sid,
run_id: 'run-approval',
approval_id: 'approval-1',
command: 'write_file /tmp/approved.txt',
description: 'Allow write_file to create /tmp/approved.txt',
choices: ['once', 'deny'],
allow_permanent: false,
})
}, run.session_id)
await expect(page.getByText('write_file', { exact: true })).toBeVisible()
await expect(page.getByText('Writing approved file')).toBeVisible()
await expect(page.locator('.message.tool .tool-line')).toHaveCount(0)
await expect(page.locator('.tool-calls-panel .tool-call-name').filter({ hasText: 'write_file' })).toBeVisible()
await expect(page.getByText('Allow write_file to create /tmp/approved.txt')).toBeVisible()
await expect(page.getByText('write_file /tmp/approved.txt')).toBeVisible()
await expect(page.getByRole('button', { name: 'Allow once' })).toBeVisible()
await expect(page.getByRole('button', { name: 'Allow session' })).toHaveCount(0)
await expect(page.getByRole('button', { name: 'Deny' })).toBeVisible()
await page.evaluate((sid) => {
const socket = (window as any).__PW_CHAT_SOCKET__.latest
socket.__trigger('approval.resolved', {
event: 'approval.resolved',
session_id: sid,
run_id: 'run-approval',
approval_id: 'approval-other',
choice: 'deny',
resolved: true,
})
}, run.session_id)
await expect(page.getByText('Allow write_file to create /tmp/approved.txt')).toBeVisible()
await expect(page.getByRole('button', { name: 'Allow once' })).toBeVisible()
await page.getByRole('button', { name: 'Allow once' }).click()
await expect(page.getByText('Allow write_file to create /tmp/approved.txt')).toHaveCount(0)
await expect(page.getByRole('button', { name: 'Allow once' })).toHaveCount(0)
await expect.poll(async () => page.evaluate(() => {
const emitted = (window as any).__PW_CHAT_SOCKET__.emitted
return emitted.filter((item: any) => item.event === 'approval.respond')
})).toEqual([
{
event: 'approval.respond',
payload: {
session_id: run.session_id,
approval_id: 'approval-1',
choice: 'once',
},
},
])
await page.evaluate((sid) => {
const socket = (window as any).__PW_CHAT_SOCKET__.latest
socket.__trigger('approval.resolved', {
event: 'approval.resolved',
session_id: sid,
run_id: 'run-approval',
approval_id: 'approval-1',
choice: 'once',
resolved: true,
})
socket.__trigger('tool.completed', {
event: 'tool.completed',
session_id: sid,
run_id: 'run-approval',
tool_call_id: 'tool-call-1',
tool: 'write_file',
output: JSON.stringify({ ok: true, path: '/tmp/approved.txt' }),
duration: 42,
})
socket.__trigger('message.delta', {
event: 'message.delta',
session_id: sid,
run_id: 'run-approval',
delta: 'Delta-only approved tool result.',
})
socket.__trigger('run.completed', {
event: 'run.completed',
session_id: sid,
run_id: 'run-approval',
output: 'Completion fallback should stay hidden.',
})
}, run.session_id)
const persistedToolTrace = page.locator('.message.tool .tool-line').filter({ hasText: 'write_file' })
await expect(persistedToolTrace).toHaveCount(1)
await persistedToolTrace.click()
const toolDetails = page.locator('.message.tool .tool-details')
await expect(toolDetails).toContainText('/tmp/approved.txt')
await expect(toolDetails).toContainText('ok')
await expect(page.getByText('Delta-only approved tool result.')).toBeVisible()
await expect(page.getByText('Completion fallback should stay hidden.')).toHaveCount(0)
await expect(page.locator('.tool-calls-panel .tool-call-name').filter({ hasText: 'write_file' })).toHaveCount(0)
await expect(page.getByRole('button', { name: 'Stop' })).toHaveCount(0)
expect(api.unexpectedRequests).toEqual([])
})
test('keeps prior tool trace visible while hiding only the active run tool trace', async ({ page }) => {
await authenticate(page, TEST_ACCESS_KEY, 'research')
const api = await mockHermesApi(page)
await mockChatSocket(page)
await page.goto('/#/hermes/chat')
await sendChatMessage(page, 'First tool trace')
const first = await waitForRun(page)
await page.evaluate((sid) => {
const socket = (window as any).__PW_CHAT_SOCKET__.latest
socket.__trigger('run.started', { event: 'run.started', session_id: sid, run_id: 'run-history-1' })
socket.__trigger('tool.started', {
event: 'tool.started',
session_id: sid,
run_id: 'run-history-1',
tool_call_id: 'tool-history-1',
tool: 'read_file',
preview: 'Read historical file',
arguments: JSON.stringify({ path: '/tmp/history.txt' }),
})
socket.__trigger('tool.completed', {
event: 'tool.completed',
session_id: sid,
run_id: 'run-history-1',
tool_call_id: 'tool-history-1',
tool: 'read_file',
output: JSON.stringify({ ok: true, path: '/tmp/history.txt' }),
duration: 12,
})
socket.__trigger('message.delta', {
event: 'message.delta',
session_id: sid,
run_id: 'run-history-1',
delta: 'First tool answer.',
})
socket.__trigger('run.completed', {
event: 'run.completed',
session_id: sid,
run_id: 'run-history-1',
output: 'First fallback should stay hidden.',
})
}, first.run.session_id)
const transcriptTools = page.locator('.message.tool .tool-line')
await expect(transcriptTools.filter({ hasText: 'read_file' })).toHaveCount(1)
await expect(page.locator('.tool-calls-panel .tool-call-name').filter({ hasText: 'read_file' })).toHaveCount(0)
await sendChatMessage(page, 'Second tool trace')
const second = await waitForRun(page, 1)
await page.evaluate((sid) => {
const socket = (window as any).__PW_CHAT_SOCKET__.latest
socket.__trigger('run.started', { event: 'run.started', session_id: sid, run_id: 'run-history-2' })
socket.__trigger('tool.started', {
event: 'tool.started',
session_id: sid,
run_id: 'run-history-2',
tool_call_id: 'tool-history-2',
tool: 'write_file',
preview: 'Write current file',
arguments: JSON.stringify({ path: '/tmp/current.txt', content: 'now' }),
})
}, second.run.session_id)
await expect(transcriptTools.filter({ hasText: 'read_file' })).toHaveCount(1)
await expect(transcriptTools.filter({ hasText: 'write_file' })).toHaveCount(0)
await expect(page.locator('.tool-calls-panel .tool-call-name').filter({ hasText: 'read_file' })).toHaveCount(0)
await expect(page.locator('.tool-calls-panel .tool-call-name').filter({ hasText: 'write_file' })).toHaveCount(1)
await page.evaluate((sid) => {
const socket = (window as any).__PW_CHAT_SOCKET__.latest
socket.__trigger('tool.completed', {
event: 'tool.completed',
session_id: sid,
run_id: 'run-history-2',
tool_call_id: 'tool-history-2',
tool: 'write_file',
output: JSON.stringify({ ok: true, path: '/tmp/current.txt' }),
duration: 15,
})
socket.__trigger('message.delta', {
event: 'message.delta',
session_id: sid,
run_id: 'run-history-2',
delta: 'Second tool answer.',
})
socket.__trigger('run.completed', {
event: 'run.completed',
session_id: sid,
run_id: 'run-history-2',
output: 'Second fallback should stay hidden.',
})
}, second.run.session_id)
await expect(transcriptTools).toHaveCount(2)
await expect(transcriptTools.filter({ hasText: 'read_file' })).toHaveCount(1)
await expect(transcriptTools.filter({ hasText: 'write_file' })).toHaveCount(1)
await expect(page.getByText('First fallback should stay hidden.')).toHaveCount(0)
await expect(page.getByText('Second fallback should stay hidden.')).toHaveCount(0)
await expect(page.getByRole('button', { name: 'Stop' })).toHaveCount(0)
expect(api.unexpectedRequests).toEqual([])
})
test('keeps completed same-run tool traces hidden until the run finishes', async ({ page }) => {
await authenticate(page, TEST_ACCESS_KEY, 'research')
const api = await mockHermesApi(page)
await mockChatSocket(page)
await page.goto('/#/hermes/chat')
await sendChatMessage(page, 'Run multiple tools')
const { run } = await waitForRun(page)
await page.evaluate((sid) => {
const socket = (window as any).__PW_CHAT_SOCKET__.latest
socket.__trigger('run.started', { event: 'run.started', session_id: sid, run_id: 'run-multi-tool' })
socket.__trigger('tool.started', {
event: 'tool.started',
session_id: sid,
run_id: 'run-multi-tool',
tool_call_id: 'tool-multi-1',
tool: 'read_file',
preview: 'Read config',
arguments: JSON.stringify({ path: '/tmp/config.json' }),
})
socket.__trigger('tool.started', {
event: 'tool.started',
session_id: sid,
run_id: 'run-multi-tool',
tool_call_id: 'tool-multi-2',
tool: 'shell_exec',
preview: 'Run command',
arguments: JSON.stringify({ command: 'false' }),
})
}, run.session_id)
const transcriptTools = page.locator('.message.tool .tool-line')
await expect(transcriptTools).toHaveCount(0)
await expect(page.locator('.tool-calls-panel .tool-call-name').filter({ hasText: 'read_file' })).toHaveCount(1)
await expect(page.locator('.tool-calls-panel .tool-call-name').filter({ hasText: 'shell_exec' })).toHaveCount(1)
await page.evaluate((sid) => {
const socket = (window as any).__PW_CHAT_SOCKET__.latest
socket.__trigger('tool.completed', {
event: 'tool.completed',
session_id: sid,
run_id: 'run-multi-tool',
tool_call_id: 'tool-multi-1',
tool: 'read_file',
output: JSON.stringify({ ok: true, path: '/tmp/config.json' }),
duration: 11,
})
socket.__trigger('tool.completed', {
event: 'tool.completed',
session_id: sid,
run_id: 'run-multi-tool',
tool_call_id: 'tool-multi-2',
tool: 'shell_exec',
output: 'exit status 1',
error: true,
duration: 13,
})
}, run.session_id)
await expect(transcriptTools).toHaveCount(0)
await expect(page.locator('.tool-calls-panel .tool-call-name').filter({ hasText: 'read_file' })).toHaveCount(1)
await expect(page.locator('.tool-calls-panel .tool-call-name').filter({ hasText: 'shell_exec' })).toHaveCount(1)
await page.evaluate((sid) => {
const socket = (window as any).__PW_CHAT_SOCKET__.latest
socket.__trigger('message.delta', {
event: 'message.delta',
session_id: sid,
run_id: 'run-multi-tool',
delta: 'Multiple tools finished.',
})
socket.__trigger('run.completed', {
event: 'run.completed',
session_id: sid,
run_id: 'run-multi-tool',
output: 'Multi-tool fallback should stay hidden.',
})
}, run.session_id)
await expect(transcriptTools).toHaveCount(2)
await expect(transcriptTools.filter({ hasText: 'read_file' })).toHaveCount(1)
await expect(transcriptTools.filter({ hasText: 'shell_exec' })).toHaveCount(1)
await expect(page.locator('.tool-calls-panel .tool-call-name').filter({ hasText: 'read_file' })).toHaveCount(0)
await expect(page.locator('.tool-calls-panel .tool-call-name').filter({ hasText: 'shell_exec' })).toHaveCount(0)
await expect(page.locator('.message.tool .tool-error-badge')).toHaveCount(1)
await transcriptTools.filter({ hasText: 'shell_exec' }).click()
await expect(page.locator('.message.tool .tool-details')).toContainText('exit status 1')
await expect(page.getByText('Multi-tool fallback should stay hidden.')).toHaveCount(0)
await expect(page.getByRole('button', { name: 'Stop' })).toHaveCount(0)
expect(api.unexpectedRequests).toEqual([])
})
test('keeps unnamed tool trace messages out of the transcript after completion', async ({ page }) => {
await authenticate(page, TEST_ACCESS_KEY, 'research')
const api = await mockHermesApi(page)
await mockChatSocket(page)
await page.goto('/#/hermes/chat')
await sendChatMessage(page, 'Run internal unnamed tool')
const { run } = await waitForRun(page)
await page.evaluate((sid) => {
const socket = (window as any).__PW_CHAT_SOCKET__.latest
socket.__trigger('run.started', { event: 'run.started', session_id: sid, run_id: 'run-unnamed-tool' })
socket.__trigger('tool.started', {
event: 'tool.started',
session_id: sid,
run_id: 'run-unnamed-tool',
tool_call_id: 'tool-unnamed-1',
preview: 'Internal unnamed work',
arguments: JSON.stringify({ internal: true }),
})
socket.__trigger('tool.completed', {
event: 'tool.completed',
session_id: sid,
run_id: 'run-unnamed-tool',
tool_call_id: 'tool-unnamed-1',
output: JSON.stringify({ internal: true, ok: true }),
duration: 9,
})
socket.__trigger('message.delta', {
event: 'message.delta',
session_id: sid,
run_id: 'run-unnamed-tool',
delta: 'Unnamed internal tool finished.',
})
socket.__trigger('run.completed', {
event: 'run.completed',
session_id: sid,
run_id: 'run-unnamed-tool',
output: 'Unnamed fallback should stay hidden.',
})
}, run.session_id)
await expect(page.locator('.message.tool .tool-line')).toHaveCount(0)
await expect(page.getByText('Unnamed internal tool finished.')).toBeVisible()
await expect(page.getByText('Unnamed fallback should stay hidden.')).toHaveCount(0)
await expect(page.getByRole('button', { name: 'Stop' })).toHaveCount(0)
expect(api.unexpectedRequests).toEqual([])
})
test('keeps unnamed resumed tool traces hidden after session reload', async ({ page }) => {
const sessionId = 'session-history-unnamed-tool'
const sessionSummary = {
id: sessionId,
source: 'api_server',
model: 'test-model',
title: 'Unnamed tool history',
preview: 'History answer visible.',
started_at: 1,
ended_at: 4,
last_active: 4,
message_count: 4,
tool_call_count: 1,
input_tokens: 0,
output_tokens: 0,
cache_read_tokens: 0,
cache_write_tokens: 0,
reasoning_tokens: 0,
billing_provider: 'test-provider',
estimated_cost_usd: 0,
actual_cost_usd: null,
cost_status: 'none',
workspace: null,
}
await authenticate(page, TEST_ACCESS_KEY, 'research')
await page.addInitScript((sid) => {
;(window as any).__PW_CHAT_SOCKET_RESUMES__ = {
[sid]: {
session_id: sid,
isWorking: false,
events: [],
messages: [
{
id: 1,
session_id: sid,
role: 'user',
content: 'Resume unnamed internal tool',
tool_call_id: null,
tool_calls: null,
tool_name: null,
timestamp: 1,
token_count: null,
finish_reason: null,
reasoning: null,
},
{
id: 2,
session_id: sid,
role: 'assistant',
content: '',
tool_call_id: null,
tool_calls: [{ id: 'tool-resume-unnamed-1', type: 'function', function: { arguments: JSON.stringify({ internal: true }) } }],
tool_name: null,
timestamp: 2,
token_count: null,
finish_reason: 'tool_calls',
reasoning: null,
},
{
id: 3,
session_id: sid,
role: 'tool',
content: JSON.stringify({ internal: true, ok: true }),
tool_call_id: 'tool-resume-unnamed-1',
tool_calls: null,
tool_name: null,
timestamp: 3,
token_count: null,
finish_reason: null,
reasoning: null,
},
{
id: 4,
session_id: sid,
role: 'assistant',
content: 'History answer visible.',
tool_call_id: null,
tool_calls: null,
tool_name: null,
timestamp: 4,
token_count: null,
finish_reason: 'stop',
reasoning: null,
},
],
},
}
}, sessionId)
const api = await mockHermesApi(page, { sessions: [sessionSummary] })
await mockChatSocket(page)
await page.goto('/#/hermes/chat')
await expect(page.getByText('History answer visible.')).toBeVisible()
await expect(page.locator('.message.tool .tool-line')).toHaveCount(0)
await expect(page.locator('.message.tool')).toHaveCount(0)
const resumeRequest = await page.waitForFunction((sid) => {
const state = (window as any).__PW_CHAT_SOCKET__
return state?.emitted?.some((item: any) => item.event === 'resume' && item.payload?.session_id === sid)
}, sessionId)
expect(await resumeRequest.jsonValue()).toBe(true)
expect(api.unexpectedRequests).toEqual([])
})
test('restores named resumed tool traces from assistant tool calls after session reload', async ({ page }) => {
const sessionId = 'session-history-named-tool'
const sessionSummary = {
id: sessionId,
source: 'api_server',
model: 'test-model',
title: 'Named tool history',
preview: 'Named history answer visible.',
started_at: 1,
ended_at: 4,
last_active: 4,
message_count: 4,
tool_call_count: 1,
input_tokens: 0,
output_tokens: 0,
cache_read_tokens: 0,
cache_write_tokens: 0,
reasoning_tokens: 0,
billing_provider: 'test-provider',
estimated_cost_usd: 0,
actual_cost_usd: null,
cost_status: 'none',
workspace: null,
}
await authenticate(page, TEST_ACCESS_KEY, 'research')
await page.addInitScript((sid) => {
;(window as any).__PW_CHAT_SOCKET_RESUMES__ = {
[sid]: {
session_id: sid,
isWorking: false,
events: [],
messages: [
{
id: 1,
session_id: sid,
role: 'user',
content: 'Resume named tool',
tool_call_id: null,
tool_calls: null,
tool_name: null,
timestamp: 1,
token_count: null,
finish_reason: null,
reasoning: null,
},
{
id: 2,
session_id: sid,
role: 'assistant',
content: '',
tool_call_id: null,
tool_calls: [{ id: 'tool-resume-named-1', type: 'function', function: { name: 'read_file', arguments: JSON.stringify({ path: '/tmp/history.txt' }) } }],
tool_name: null,
timestamp: 2,
token_count: null,
finish_reason: 'tool_calls',
reasoning: null,
},
{
id: 3,
session_id: sid,
role: 'tool',
content: JSON.stringify({ ok: true, path: '/tmp/history.txt' }),
tool_call_id: 'tool-resume-named-1',
tool_calls: null,
tool_name: null,
timestamp: 3,
token_count: null,
finish_reason: null,
reasoning: null,
},
{
id: 4,
session_id: sid,
role: 'assistant',
content: 'Named history answer visible.',
tool_call_id: null,
tool_calls: null,
tool_name: null,
timestamp: 4,
token_count: null,
finish_reason: 'stop',
reasoning: null,
},
],
},
}
}, sessionId)
const api = await mockHermesApi(page, { sessions: [sessionSummary] })
await mockChatSocket(page)
await page.goto('/#/hermes/chat')
await expect(page.getByText('Named history answer visible.')).toBeVisible()
const restoredTrace = page.locator('.message.tool .tool-line').filter({ hasText: 'read_file' })
await expect(restoredTrace).toHaveCount(1)
await restoredTrace.click()
await expect(page.locator('.message.tool .tool-details')).toContainText('/tmp/history.txt')
expect(api.unexpectedRequests).toEqual([])
})
+10 -1
View File
@@ -13,6 +13,7 @@ export interface MockedRequest {
interface MockHermesApiOptions {
tokenValidationStatus?: number
initialProfileName?: 'default' | 'research'
sessions?: unknown[]
}
const sampleModelGroup = {
@@ -102,7 +103,7 @@ export async function mockHermesApi(page: Page, options: MockHermesApiOptions =
}
if (pathname === '/api/hermes/sessions') {
await route.fulfill(jsonResponse({ sessions: [] }, tokenValidationStatus))
await route.fulfill(jsonResponse({ sessions: options.sessions ?? [] }, tokenValidationStatus))
return
}
@@ -249,6 +250,14 @@ function makeSocket(url, options) {
},
emit(event, payload) {
state.emitted.push({ event, payload })
if (event === 'resume') {
const sessionId = payload && payload.session_id
const resumes = window.__PW_CHAT_SOCKET_RESUMES__ || {}
const response = sessionId ? resumes[sessionId] : null
if (response) {
setTimeout(() => this.__trigger('resumed', response), 0)
}
}
return this
},
removeAllListeners() {