fix: harden windows gateway liveness (#658)
This commit is contained in:
+83
-9
@@ -2,7 +2,7 @@
|
||||
import { spawn, execSync } from 'child_process'
|
||||
import { resolve, dirname, join } from 'path'
|
||||
import { fileURLToPath } from 'url'
|
||||
import { readFileSync, writeFileSync, unlinkSync, mkdirSync, openSync, chmodSync, statSync } from 'fs'
|
||||
import { readFileSync, writeFileSync, unlinkSync, mkdirSync, openSync, chmodSync, statSync, existsSync } from 'fs'
|
||||
import { randomBytes } from 'crypto'
|
||||
import { homedir } from 'os'
|
||||
|
||||
@@ -61,7 +61,18 @@ function getCliBin() {
|
||||
}
|
||||
|
||||
function getWindowsShell() {
|
||||
return process.env.ComSpec || 'cmd.exe'
|
||||
const systemRoot = process.env.SystemRoot || 'C:\\Windows'
|
||||
const candidates = [
|
||||
process.env.ComSpec,
|
||||
join(systemRoot, 'System32', 'WindowsPowerShell', 'v1.0', 'powershell.exe'),
|
||||
join(systemRoot, 'System32', 'cmd.exe'),
|
||||
].filter(Boolean)
|
||||
|
||||
for (const candidate of candidates) {
|
||||
if (existsSync(candidate)) return candidate
|
||||
}
|
||||
|
||||
return 'cmd.exe'
|
||||
}
|
||||
|
||||
function quoteForWindowsCommand(value) {
|
||||
@@ -70,6 +81,11 @@ function quoteForWindowsCommand(value) {
|
||||
|
||||
function spawnCli(command, args, options) {
|
||||
if (process.platform === 'win32') {
|
||||
const lowerCommand = String(command).toLowerCase()
|
||||
if (!lowerCommand.endsWith('.cmd') && !lowerCommand.endsWith('.bat')) {
|
||||
return spawn(command, args, options)
|
||||
}
|
||||
|
||||
const commandLine = `${quoteForWindowsCommand(command)} ${args.map(arg => String(arg)).join(' ')}`
|
||||
return spawn(getWindowsShell(), ['/d', '/s', '/c', commandLine], options)
|
||||
}
|
||||
@@ -123,20 +139,62 @@ function getPort() {
|
||||
return argPort ?? DEFAULT_PORT
|
||||
}
|
||||
|
||||
function getPid() {
|
||||
function getListeningPids(port) {
|
||||
if (!port || isNaN(port)) return []
|
||||
|
||||
try {
|
||||
return parseInt(readFileSync(PID_FILE, 'utf-8').trim())
|
||||
if (process.platform === 'win32') {
|
||||
const out = execSync('netstat -aon -p tcp', { encoding: 'utf-8' })
|
||||
return [...new Set(out.split('\n')
|
||||
.map(line => line.trim())
|
||||
.filter(line => line.includes('LISTENING'))
|
||||
.map(line => line.split(/\s+/))
|
||||
.filter(parts => {
|
||||
const address = parts[1] || ''
|
||||
const listenPort = parseInt(address.split(':').pop(), 10)
|
||||
return listenPort === port
|
||||
})
|
||||
.map(parts => parseInt(parts[parts.length - 1], 10))
|
||||
.filter(pid => Number.isFinite(pid)))]
|
||||
}
|
||||
|
||||
const out = execSync(`lsof -tiTCP:${port} -sTCP:LISTEN`, { encoding: 'utf-8' }).trim()
|
||||
return [...new Set(out.split('\n').map(pid => parseInt(pid, 10)).filter(pid => Number.isFinite(pid)))]
|
||||
} catch {
|
||||
return null
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
function recoverPidFromPort() {
|
||||
const port = getPortFromArgs() ?? DEFAULT_PORT
|
||||
for (const pid of getListeningPids(port)) {
|
||||
if (isRunning(pid)) {
|
||||
mkdirSync(PID_DIR, { recursive: true })
|
||||
writePid(pid)
|
||||
return pid
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function getPid() {
|
||||
const recovered = recoverPidFromPort()
|
||||
if (recovered) return recovered
|
||||
|
||||
try {
|
||||
const pid = parseInt(readFileSync(PID_FILE, 'utf-8').trim())
|
||||
if (pid && isRunning(pid)) return pid
|
||||
} catch {}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
function isRunning(pid) {
|
||||
try {
|
||||
process.kill(pid, 0)
|
||||
return true
|
||||
} catch {
|
||||
return false
|
||||
} catch (err) {
|
||||
return err?.code === 'EPERM'
|
||||
}
|
||||
}
|
||||
|
||||
@@ -202,10 +260,16 @@ function startDaemon(port) {
|
||||
} catch { }
|
||||
|
||||
const logStream = openSync(LOG_FILE, 'a')
|
||||
const windowsShell = process.platform === 'win32' ? getWindowsShell() : null
|
||||
const serverEnv = { ...process.env, NODE_ENV: 'production', PORT: String(port), AUTH_TOKEN: token }
|
||||
if (windowsShell) {
|
||||
serverEnv.SHELL = serverEnv.SHELL?.trim() || windowsShell
|
||||
serverEnv.ComSpec = serverEnv.ComSpec?.trim() || windowsShell
|
||||
}
|
||||
const child = spawn(process.execPath, [serverEntry], {
|
||||
detached: true,
|
||||
stdio: ['ignore', logStream, logStream],
|
||||
env: { ...process.env, NODE_ENV: 'production', PORT: String(port), AUTH_TOKEN: token },
|
||||
env: serverEnv,
|
||||
windowsHide: true,
|
||||
})
|
||||
|
||||
@@ -391,9 +455,19 @@ switch (command) {
|
||||
default:
|
||||
ensureNativeModules()
|
||||
const port = !isNaN(command) ? parseInt(command) : DEFAULT_PORT
|
||||
const windowsShell = process.platform === 'win32' ? getWindowsShell() : null
|
||||
const serverEnv = {
|
||||
...process.env,
|
||||
NODE_ENV: 'production',
|
||||
PORT: String(port),
|
||||
}
|
||||
if (windowsShell) {
|
||||
serverEnv.SHELL = serverEnv.SHELL?.trim() || windowsShell
|
||||
serverEnv.ComSpec = serverEnv.ComSpec?.trim() || windowsShell
|
||||
}
|
||||
const child = spawn(process.execPath, [serverEntry], {
|
||||
stdio: 'inherit',
|
||||
env: { ...process.env, NODE_ENV: 'production', PORT: String(port) },
|
||||
env: serverEnv,
|
||||
windowsHide: true,
|
||||
})
|
||||
child.on('exit', (code) => process.exit(code ?? 1))
|
||||
|
||||
@@ -200,15 +200,28 @@ export class GatewayManager {
|
||||
}
|
||||
}
|
||||
|
||||
/** 从 profile 的 gateway.pid 文件读取 PID(JSON 格式 { "pid": 12345 }) */
|
||||
/** Read a profile gateway PID, falling back to runtime state when gateway.pid is missing. */
|
||||
private readPidFile(name: string): number | null {
|
||||
const pidPath = join(this.profileDir(name), 'gateway.pid')
|
||||
if (!existsSync(pidPath)) return null
|
||||
const profilePath = this.profileDir(name)
|
||||
const pidPath = join(profilePath, 'gateway.pid')
|
||||
|
||||
try {
|
||||
const content = readFileSync(pidPath, 'utf-8').trim()
|
||||
if (existsSync(pidPath)) {
|
||||
const content = readFileSync(pidPath, 'utf-8').trim()
|
||||
const data = JSON.parse(content)
|
||||
return typeof data.pid === 'number' ? data.pid : parseInt(data.pid, 10) || null
|
||||
}
|
||||
} catch {}
|
||||
|
||||
const statePath = join(profilePath, 'gateway_state.json')
|
||||
if (!existsSync(statePath)) return null
|
||||
|
||||
try {
|
||||
const content = readFileSync(statePath, 'utf-8').trim()
|
||||
const data = JSON.parse(content)
|
||||
return typeof data.pid === 'number' ? data.pid : parseInt(data.pid, 10) || null
|
||||
const pid = typeof data.pid === 'number' ? data.pid : parseInt(data.pid, 10) || null
|
||||
const state = data?.gateway_state
|
||||
return pid && (state === 'running' || state === 'starting') ? pid : null
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
@@ -218,13 +231,13 @@ export class GatewayManager {
|
||||
// 进程 & 端口检测工具
|
||||
// ============================
|
||||
|
||||
/** 检查进程是否存活(发送信号 0,不实际杀死进程) */
|
||||
/** Check process liveness without sending a terminating signal. */
|
||||
private isProcessAlive(pid: number): boolean {
|
||||
try {
|
||||
process.kill(pid, 0)
|
||||
return true
|
||||
} catch {
|
||||
return false
|
||||
} catch (err: any) {
|
||||
return err?.code === 'EPERM'
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'fs'
|
||||
import { tmpdir } from 'os'
|
||||
import { join } from 'path'
|
||||
import { afterEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
const originalHermesHome = process.env.HERMES_HOME
|
||||
const tempHomes: string[] = []
|
||||
|
||||
function createHermesHome(): string {
|
||||
const home = mkdtempSync(join(tmpdir(), 'hermes-web-ui-gateway-'))
|
||||
tempHomes.push(home)
|
||||
return home
|
||||
}
|
||||
|
||||
async function createManager(home: string): Promise<any> {
|
||||
process.env.HERMES_HOME = home
|
||||
vi.resetModules()
|
||||
const { GatewayManager } = await import('../../packages/server/src/services/hermes/gateway-manager')
|
||||
return new GatewayManager('default') as any
|
||||
}
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks()
|
||||
vi.resetModules()
|
||||
if (originalHermesHome === undefined) {
|
||||
delete process.env.HERMES_HOME
|
||||
} else {
|
||||
process.env.HERMES_HOME = originalHermesHome
|
||||
}
|
||||
|
||||
for (const home of tempHomes.splice(0)) {
|
||||
rmSync(home, { recursive: true, force: true })
|
||||
}
|
||||
})
|
||||
|
||||
describe('GatewayManager Windows process recovery', () => {
|
||||
it('treats EPERM from process.kill(pid, 0) as an alive process', async () => {
|
||||
const manager = await createManager(createHermesHome())
|
||||
;(vi.spyOn(process, 'kill') as any).mockImplementation(() => {
|
||||
const error = new Error('permission denied') as NodeJS.ErrnoException
|
||||
error.code = 'EPERM'
|
||||
throw error
|
||||
})
|
||||
|
||||
expect(manager.isProcessAlive(12345)).toBe(true)
|
||||
})
|
||||
|
||||
it('returns false for missing processes', async () => {
|
||||
const manager = await createManager(createHermesHome())
|
||||
;(vi.spyOn(process, 'kill') as any).mockImplementation(() => {
|
||||
const error = new Error('missing process') as NodeJS.ErrnoException
|
||||
error.code = 'ESRCH'
|
||||
throw error
|
||||
})
|
||||
|
||||
expect(manager.isProcessAlive(12345)).toBe(false)
|
||||
})
|
||||
|
||||
it('prefers gateway.pid when PID metadata exists', async () => {
|
||||
const home = createHermesHome()
|
||||
writeFileSync(join(home, 'gateway.pid'), JSON.stringify({ pid: 11111 }))
|
||||
writeFileSync(join(home, 'gateway_state.json'), JSON.stringify({ pid: 22222, gateway_state: 'running' }))
|
||||
|
||||
const manager = await createManager(home)
|
||||
|
||||
expect(manager.readPidFile('default')).toBe(11111)
|
||||
})
|
||||
|
||||
it('falls back to gateway_state.json when gateway.pid is missing', async () => {
|
||||
const home = createHermesHome()
|
||||
writeFileSync(join(home, 'gateway_state.json'), JSON.stringify({ pid: '22222', gateway_state: 'running' }))
|
||||
|
||||
const manager = await createManager(home)
|
||||
|
||||
expect(manager.readPidFile('default')).toBe(22222)
|
||||
})
|
||||
|
||||
it('does not use gateway_state.json for stopped gateways', async () => {
|
||||
const home = createHermesHome()
|
||||
writeFileSync(join(home, 'gateway_state.json'), JSON.stringify({ pid: 22222, gateway_state: 'stopped' }))
|
||||
|
||||
const manager = await createManager(home)
|
||||
|
||||
expect(manager.readPidFile('default')).toBeNull()
|
||||
})
|
||||
|
||||
it('uses profile-scoped gateway_state.json fallback', async () => {
|
||||
const home = createHermesHome()
|
||||
const profileHome = join(home, 'profiles', 'work')
|
||||
mkdirSync(profileHome, { recursive: true })
|
||||
writeFileSync(join(profileHome, 'gateway_state.json'), JSON.stringify({ pid: 33333, gateway_state: 'starting' }))
|
||||
|
||||
const manager = await createManager(home)
|
||||
|
||||
expect(manager.readPidFile('work')).toBe(33333)
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user