fix: harden windows gateway liveness (#658)

This commit is contained in:
luSkyl
2026-05-12 20:44:34 +08:00
committed by GitHub
parent b9b99042a0
commit c987448f01
3 changed files with 201 additions and 17 deletions
+83 -9
View File
@@ -2,7 +2,7 @@
import { spawn, execSync } from 'child_process'
import { resolve, dirname, join } from 'path'
import { fileURLToPath } from 'url'
import { readFileSync, writeFileSync, unlinkSync, mkdirSync, openSync, chmodSync, statSync } from 'fs'
import { readFileSync, writeFileSync, unlinkSync, mkdirSync, openSync, chmodSync, statSync, existsSync } from 'fs'
import { randomBytes } from 'crypto'
import { homedir } from 'os'
@@ -61,7 +61,18 @@ function getCliBin() {
}
function getWindowsShell() {
return process.env.ComSpec || 'cmd.exe'
const systemRoot = process.env.SystemRoot || 'C:\\Windows'
const candidates = [
process.env.ComSpec,
join(systemRoot, 'System32', 'WindowsPowerShell', 'v1.0', 'powershell.exe'),
join(systemRoot, 'System32', 'cmd.exe'),
].filter(Boolean)
for (const candidate of candidates) {
if (existsSync(candidate)) return candidate
}
return 'cmd.exe'
}
function quoteForWindowsCommand(value) {
@@ -70,6 +81,11 @@ function quoteForWindowsCommand(value) {
function spawnCli(command, args, options) {
if (process.platform === 'win32') {
const lowerCommand = String(command).toLowerCase()
if (!lowerCommand.endsWith('.cmd') && !lowerCommand.endsWith('.bat')) {
return spawn(command, args, options)
}
const commandLine = `${quoteForWindowsCommand(command)} ${args.map(arg => String(arg)).join(' ')}`
return spawn(getWindowsShell(), ['/d', '/s', '/c', commandLine], options)
}
@@ -123,20 +139,62 @@ function getPort() {
return argPort ?? DEFAULT_PORT
}
function getPid() {
function getListeningPids(port) {
if (!port || isNaN(port)) return []
try {
return parseInt(readFileSync(PID_FILE, 'utf-8').trim())
if (process.platform === 'win32') {
const out = execSync('netstat -aon -p tcp', { encoding: 'utf-8' })
return [...new Set(out.split('\n')
.map(line => line.trim())
.filter(line => line.includes('LISTENING'))
.map(line => line.split(/\s+/))
.filter(parts => {
const address = parts[1] || ''
const listenPort = parseInt(address.split(':').pop(), 10)
return listenPort === port
})
.map(parts => parseInt(parts[parts.length - 1], 10))
.filter(pid => Number.isFinite(pid)))]
}
const out = execSync(`lsof -tiTCP:${port} -sTCP:LISTEN`, { encoding: 'utf-8' }).trim()
return [...new Set(out.split('\n').map(pid => parseInt(pid, 10)).filter(pid => Number.isFinite(pid)))]
} catch {
return null
return []
}
}
function recoverPidFromPort() {
const port = getPortFromArgs() ?? DEFAULT_PORT
for (const pid of getListeningPids(port)) {
if (isRunning(pid)) {
mkdirSync(PID_DIR, { recursive: true })
writePid(pid)
return pid
}
}
return null
}
function getPid() {
const recovered = recoverPidFromPort()
if (recovered) return recovered
try {
const pid = parseInt(readFileSync(PID_FILE, 'utf-8').trim())
if (pid && isRunning(pid)) return pid
} catch {}
return null
}
function isRunning(pid) {
try {
process.kill(pid, 0)
return true
} catch {
return false
} catch (err) {
return err?.code === 'EPERM'
}
}
@@ -202,10 +260,16 @@ function startDaemon(port) {
} catch { }
const logStream = openSync(LOG_FILE, 'a')
const windowsShell = process.platform === 'win32' ? getWindowsShell() : null
const serverEnv = { ...process.env, NODE_ENV: 'production', PORT: String(port), AUTH_TOKEN: token }
if (windowsShell) {
serverEnv.SHELL = serverEnv.SHELL?.trim() || windowsShell
serverEnv.ComSpec = serverEnv.ComSpec?.trim() || windowsShell
}
const child = spawn(process.execPath, [serverEntry], {
detached: true,
stdio: ['ignore', logStream, logStream],
env: { ...process.env, NODE_ENV: 'production', PORT: String(port), AUTH_TOKEN: token },
env: serverEnv,
windowsHide: true,
})
@@ -391,9 +455,19 @@ switch (command) {
default:
ensureNativeModules()
const port = !isNaN(command) ? parseInt(command) : DEFAULT_PORT
const windowsShell = process.platform === 'win32' ? getWindowsShell() : null
const serverEnv = {
...process.env,
NODE_ENV: 'production',
PORT: String(port),
}
if (windowsShell) {
serverEnv.SHELL = serverEnv.SHELL?.trim() || windowsShell
serverEnv.ComSpec = serverEnv.ComSpec?.trim() || windowsShell
}
const child = spawn(process.execPath, [serverEntry], {
stdio: 'inherit',
env: { ...process.env, NODE_ENV: 'production', PORT: String(port) },
env: serverEnv,
windowsHide: true,
})
child.on('exit', (code) => process.exit(code ?? 1))
@@ -200,15 +200,28 @@ export class GatewayManager {
}
}
/** profile gateway.pid 文件读取 PIDJSON 格式 { "pid": 12345 } */
/** Read a profile gateway PID, falling back to runtime state when gateway.pid is missing. */
private readPidFile(name: string): number | null {
const pidPath = join(this.profileDir(name), 'gateway.pid')
if (!existsSync(pidPath)) return null
const profilePath = this.profileDir(name)
const pidPath = join(profilePath, 'gateway.pid')
try {
const content = readFileSync(pidPath, 'utf-8').trim()
if (existsSync(pidPath)) {
const content = readFileSync(pidPath, 'utf-8').trim()
const data = JSON.parse(content)
return typeof data.pid === 'number' ? data.pid : parseInt(data.pid, 10) || null
}
} catch {}
const statePath = join(profilePath, 'gateway_state.json')
if (!existsSync(statePath)) return null
try {
const content = readFileSync(statePath, 'utf-8').trim()
const data = JSON.parse(content)
return typeof data.pid === 'number' ? data.pid : parseInt(data.pid, 10) || null
const pid = typeof data.pid === 'number' ? data.pid : parseInt(data.pid, 10) || null
const state = data?.gateway_state
return pid && (state === 'running' || state === 'starting') ? pid : null
} catch {
return null
}
@@ -218,13 +231,13 @@ export class GatewayManager {
// 进程 & 端口检测工具
// ============================
/** 检查进程是否存活(发送信号 0,不实际杀死进程) */
/** Check process liveness without sending a terminating signal. */
private isProcessAlive(pid: number): boolean {
try {
process.kill(pid, 0)
return true
} catch {
return false
} catch (err: any) {
return err?.code === 'EPERM'
}
}
+97
View File
@@ -0,0 +1,97 @@
import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'fs'
import { tmpdir } from 'os'
import { join } from 'path'
import { afterEach, describe, expect, it, vi } from 'vitest'
const originalHermesHome = process.env.HERMES_HOME
const tempHomes: string[] = []
function createHermesHome(): string {
const home = mkdtempSync(join(tmpdir(), 'hermes-web-ui-gateway-'))
tempHomes.push(home)
return home
}
async function createManager(home: string): Promise<any> {
process.env.HERMES_HOME = home
vi.resetModules()
const { GatewayManager } = await import('../../packages/server/src/services/hermes/gateway-manager')
return new GatewayManager('default') as any
}
afterEach(() => {
vi.restoreAllMocks()
vi.resetModules()
if (originalHermesHome === undefined) {
delete process.env.HERMES_HOME
} else {
process.env.HERMES_HOME = originalHermesHome
}
for (const home of tempHomes.splice(0)) {
rmSync(home, { recursive: true, force: true })
}
})
describe('GatewayManager Windows process recovery', () => {
it('treats EPERM from process.kill(pid, 0) as an alive process', async () => {
const manager = await createManager(createHermesHome())
;(vi.spyOn(process, 'kill') as any).mockImplementation(() => {
const error = new Error('permission denied') as NodeJS.ErrnoException
error.code = 'EPERM'
throw error
})
expect(manager.isProcessAlive(12345)).toBe(true)
})
it('returns false for missing processes', async () => {
const manager = await createManager(createHermesHome())
;(vi.spyOn(process, 'kill') as any).mockImplementation(() => {
const error = new Error('missing process') as NodeJS.ErrnoException
error.code = 'ESRCH'
throw error
})
expect(manager.isProcessAlive(12345)).toBe(false)
})
it('prefers gateway.pid when PID metadata exists', async () => {
const home = createHermesHome()
writeFileSync(join(home, 'gateway.pid'), JSON.stringify({ pid: 11111 }))
writeFileSync(join(home, 'gateway_state.json'), JSON.stringify({ pid: 22222, gateway_state: 'running' }))
const manager = await createManager(home)
expect(manager.readPidFile('default')).toBe(11111)
})
it('falls back to gateway_state.json when gateway.pid is missing', async () => {
const home = createHermesHome()
writeFileSync(join(home, 'gateway_state.json'), JSON.stringify({ pid: '22222', gateway_state: 'running' }))
const manager = await createManager(home)
expect(manager.readPidFile('default')).toBe(22222)
})
it('does not use gateway_state.json for stopped gateways', async () => {
const home = createHermesHome()
writeFileSync(join(home, 'gateway_state.json'), JSON.stringify({ pid: 22222, gateway_state: 'stopped' }))
const manager = await createManager(home)
expect(manager.readPidFile('default')).toBeNull()
})
it('uses profile-scoped gateway_state.json fallback', async () => {
const home = createHermesHome()
const profileHome = join(home, 'profiles', 'work')
mkdirSync(profileHome, { recursive: true })
writeFileSync(join(profileHome, 'gateway_state.json'), JSON.stringify({ pid: 33333, gateway_state: 'starting' }))
const manager = await createManager(home)
expect(manager.readPidFile('work')).toBe(33333)
})
})