From a43ead594ea8c6ff66855f16f3eb710a2ab88d35 Mon Sep 17 00:00:00 2001 From: ekko <152005280+EKKOLearnAI@users.noreply.github.com> Date: Wed, 27 May 2026 11:44:04 +0800 Subject: [PATCH] fix bridge surrogate json encoding (#1059) --- .../hermes/agent-bridge/hermes_bridge.py | 26 ++++++- tests/server/agent-bridge-profile-env.test.ts | 74 +++++++++++++++++++ 2 files changed, 97 insertions(+), 3 deletions(-) diff --git a/packages/server/src/services/hermes/agent-bridge/hermes_bridge.py b/packages/server/src/services/hermes/agent-bridge/hermes_bridge.py index 95b0cf7..c0deaab 100755 --- a/packages/server/src/services/hermes/agent-bridge/hermes_bridge.py +++ b/packages/server/src/services/hermes/agent-bridge/hermes_bridge.py @@ -19,6 +19,7 @@ import json import locale import os import queue +import re import signal import shutil import socket @@ -47,6 +48,7 @@ OPENROUTER_ATTRIBUTION_ENV = { "title": "HERMES_OPENROUTER_APP_TITLE", "categories": "HERMES_OPENROUTER_APP_CATEGORIES", } +_SURROGATE_RE = re.compile("[\ud800-\udfff]") def _bridge_platform() -> str: @@ -265,6 +267,25 @@ def _jsonable(value: Any) -> Any: return str(value) +def _sanitize_surrogates(value: Any) -> Any: + if isinstance(value, str): + return _SURROGATE_RE.sub("\ufffd", value) + if isinstance(value, dict): + return {_sanitize_surrogates(k): _sanitize_surrogates(v) for k, v in value.items()} + if isinstance(value, (list, tuple)): + return [_sanitize_surrogates(v) for v in value] + return value + + +def _json_default(value: Any) -> str: + return _sanitize_surrogates(str(value)) + + +def _json_line_bytes(value: Any) -> bytes: + payload = json.dumps(_sanitize_surrogates(value), ensure_ascii=False, default=_json_default) + "\n" + return payload.encode("utf-8") + + def _agent_root() -> Path | None: return _find_agent_root(os.environ.get("HERMES_AGENT_ROOT")) @@ -2419,7 +2440,7 @@ def _connect_bridge_socket(endpoint: str, timeout: float) -> socket.socket: def _send_bridge_request(endpoint: str, req: dict[str, Any], timeout: float) -> dict[str, Any]: sock = _connect_bridge_socket(endpoint, timeout) try: - sock.sendall((json.dumps(req, ensure_ascii=False, default=str) + "\n").encode("utf-8")) + sock.sendall(_json_line_bytes(req)) chunks: list[bytes] = [] while True: chunk = sock.recv(65536) @@ -2574,8 +2595,7 @@ def _read_json_request(conn: socket.socket) -> dict[str, Any]: def _write_json_response(conn: socket.socket, resp: dict[str, Any]) -> None: - payload = json.dumps(resp, ensure_ascii=False, default=str) + "\n" - conn.sendall(payload.encode("utf-8")) + conn.sendall(_json_line_bytes(resp)) class BridgeBroker: diff --git a/tests/server/agent-bridge-profile-env.test.ts b/tests/server/agent-bridge-profile-env.test.ts index c1536bf..bfafd74 100644 --- a/tests/server/agent-bridge-profile-env.test.ts +++ b/tests/server/agent-bridge-profile-env.test.ts @@ -32,6 +32,80 @@ async function runBridgeProbe(script: string): Promise { return JSON.parse(stdout) } +describe('agent bridge JSON encoding', () => { + it('replaces lone surrogate characters before bridge socket writes', async () => { + const result = await runBridgeProbe(String.raw` +import importlib.util +import json +import os +import sys + +spec = importlib.util.spec_from_file_location("hermes_bridge", os.environ["BRIDGE_PATH"]) +bridge = importlib.util.module_from_spec(spec) +sys.modules["hermes_bridge"] = bridge +spec.loader.exec_module(bridge) + +class FakeSocket: + def __init__(self): + self.sent = [] + self.closed = False + self._read = False + + def sendall(self, payload): + self.sent.append(payload) + + def recv(self, size): + if self._read: + return b"" + self._read = True + return b'{"ok":true}\n' + + def close(self): + self.closed = True + +class FakeConn: + def __init__(self): + self.sent = b"" + + def sendall(self, payload): + self.sent += payload + +fake_socket = FakeSocket() +bridge._connect_bridge_socket = lambda endpoint, timeout: fake_socket +bridge._send_bridge_request("tcp://127.0.0.1:1", { + "message": "request-\ud800", + "items": ["nested-\udfff"], +}, 1) + +fake_conn = FakeConn() +bridge._write_json_response(fake_conn, { + "ok": True, + "message": "response-\udc00", + "nested": {"key-\ud800": "value-\udfff"}, +}) + +print(json.dumps({ + "request": json.loads(fake_socket.sent[0].decode("utf-8")), + "response": json.loads(fake_conn.sent.decode("utf-8")), + "closed": fake_socket.closed, +})) +`) + + expect(result).toEqual({ + request: { + message: 'request-\uFFFD', + items: ['nested-\uFFFD'], + }, + response: { + ok: true, + message: 'response-\uFFFD', + nested: { 'key-\uFFFD': 'value-\uFFFD' }, + }, + closed: true, + }) + }) +}) + describe('agent bridge profile environment', () => { it('runs agent calls with the requested profile HERMES_HOME and restores the bridge home', async () => { const profileHome = join(tempDir, 'profiles', 'work')