fix bridge surrogate json encoding (#1059)

This commit is contained in:
ekko
2026-05-27 11:44:04 +08:00
committed by GitHub
parent 1ec9568502
commit a43ead594e
2 changed files with 97 additions and 3 deletions
@@ -19,6 +19,7 @@ import json
import locale
import os
import queue
import re
import signal
import shutil
import socket
@@ -47,6 +48,7 @@ OPENROUTER_ATTRIBUTION_ENV = {
"title": "HERMES_OPENROUTER_APP_TITLE",
"categories": "HERMES_OPENROUTER_APP_CATEGORIES",
}
_SURROGATE_RE = re.compile("[\ud800-\udfff]")
def _bridge_platform() -> str:
@@ -265,6 +267,25 @@ def _jsonable(value: Any) -> Any:
return str(value)
def _sanitize_surrogates(value: Any) -> Any:
if isinstance(value, str):
return _SURROGATE_RE.sub("\ufffd", value)
if isinstance(value, dict):
return {_sanitize_surrogates(k): _sanitize_surrogates(v) for k, v in value.items()}
if isinstance(value, (list, tuple)):
return [_sanitize_surrogates(v) for v in value]
return value
def _json_default(value: Any) -> str:
return _sanitize_surrogates(str(value))
def _json_line_bytes(value: Any) -> bytes:
payload = json.dumps(_sanitize_surrogates(value), ensure_ascii=False, default=_json_default) + "\n"
return payload.encode("utf-8")
def _agent_root() -> Path | None:
return _find_agent_root(os.environ.get("HERMES_AGENT_ROOT"))
@@ -2419,7 +2440,7 @@ def _connect_bridge_socket(endpoint: str, timeout: float) -> socket.socket:
def _send_bridge_request(endpoint: str, req: dict[str, Any], timeout: float) -> dict[str, Any]:
sock = _connect_bridge_socket(endpoint, timeout)
try:
sock.sendall((json.dumps(req, ensure_ascii=False, default=str) + "\n").encode("utf-8"))
sock.sendall(_json_line_bytes(req))
chunks: list[bytes] = []
while True:
chunk = sock.recv(65536)
@@ -2574,8 +2595,7 @@ def _read_json_request(conn: socket.socket) -> dict[str, Any]:
def _write_json_response(conn: socket.socket, resp: dict[str, Any]) -> None:
payload = json.dumps(resp, ensure_ascii=False, default=str) + "\n"
conn.sendall(payload.encode("utf-8"))
conn.sendall(_json_line_bytes(resp))
class BridgeBroker:
@@ -32,6 +32,80 @@ async function runBridgeProbe(script: string): Promise<any> {
return JSON.parse(stdout)
}
describe('agent bridge JSON encoding', () => {
it('replaces lone surrogate characters before bridge socket writes', async () => {
const result = await runBridgeProbe(String.raw`
import importlib.util
import json
import os
import sys
spec = importlib.util.spec_from_file_location("hermes_bridge", os.environ["BRIDGE_PATH"])
bridge = importlib.util.module_from_spec(spec)
sys.modules["hermes_bridge"] = bridge
spec.loader.exec_module(bridge)
class FakeSocket:
def __init__(self):
self.sent = []
self.closed = False
self._read = False
def sendall(self, payload):
self.sent.append(payload)
def recv(self, size):
if self._read:
return b""
self._read = True
return b'{"ok":true}\n'
def close(self):
self.closed = True
class FakeConn:
def __init__(self):
self.sent = b""
def sendall(self, payload):
self.sent += payload
fake_socket = FakeSocket()
bridge._connect_bridge_socket = lambda endpoint, timeout: fake_socket
bridge._send_bridge_request("tcp://127.0.0.1:1", {
"message": "request-\ud800",
"items": ["nested-\udfff"],
}, 1)
fake_conn = FakeConn()
bridge._write_json_response(fake_conn, {
"ok": True,
"message": "response-\udc00",
"nested": {"key-\ud800": "value-\udfff"},
})
print(json.dumps({
"request": json.loads(fake_socket.sent[0].decode("utf-8")),
"response": json.loads(fake_conn.sent.decode("utf-8")),
"closed": fake_socket.closed,
}))
`)
expect(result).toEqual({
request: {
message: 'request-\uFFFD',
items: ['nested-\uFFFD'],
},
response: {
ok: true,
message: 'response-\uFFFD',
nested: { 'key-\uFFFD': 'value-\uFFFD' },
},
closed: true,
})
})
})
describe('agent bridge profile environment', () => {
it('runs agent calls with the requested profile HERMES_HOME and restores the bridge home', async () => {
const profileHome = join(tempDir, 'profiles', 'work')