fix: harden windows gateway liveness (#658)
This commit is contained in:
+83
-9
@@ -2,7 +2,7 @@
|
|||||||
import { spawn, execSync } from 'child_process'
|
import { spawn, execSync } from 'child_process'
|
||||||
import { resolve, dirname, join } from 'path'
|
import { resolve, dirname, join } from 'path'
|
||||||
import { fileURLToPath } from 'url'
|
import { fileURLToPath } from 'url'
|
||||||
import { readFileSync, writeFileSync, unlinkSync, mkdirSync, openSync, chmodSync, statSync } from 'fs'
|
import { readFileSync, writeFileSync, unlinkSync, mkdirSync, openSync, chmodSync, statSync, existsSync } from 'fs'
|
||||||
import { randomBytes } from 'crypto'
|
import { randomBytes } from 'crypto'
|
||||||
import { homedir } from 'os'
|
import { homedir } from 'os'
|
||||||
|
|
||||||
@@ -61,7 +61,18 @@ function getCliBin() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function getWindowsShell() {
|
function getWindowsShell() {
|
||||||
return process.env.ComSpec || 'cmd.exe'
|
const systemRoot = process.env.SystemRoot || 'C:\\Windows'
|
||||||
|
const candidates = [
|
||||||
|
process.env.ComSpec,
|
||||||
|
join(systemRoot, 'System32', 'WindowsPowerShell', 'v1.0', 'powershell.exe'),
|
||||||
|
join(systemRoot, 'System32', 'cmd.exe'),
|
||||||
|
].filter(Boolean)
|
||||||
|
|
||||||
|
for (const candidate of candidates) {
|
||||||
|
if (existsSync(candidate)) return candidate
|
||||||
|
}
|
||||||
|
|
||||||
|
return 'cmd.exe'
|
||||||
}
|
}
|
||||||
|
|
||||||
function quoteForWindowsCommand(value) {
|
function quoteForWindowsCommand(value) {
|
||||||
@@ -70,6 +81,11 @@ function quoteForWindowsCommand(value) {
|
|||||||
|
|
||||||
function spawnCli(command, args, options) {
|
function spawnCli(command, args, options) {
|
||||||
if (process.platform === 'win32') {
|
if (process.platform === 'win32') {
|
||||||
|
const lowerCommand = String(command).toLowerCase()
|
||||||
|
if (!lowerCommand.endsWith('.cmd') && !lowerCommand.endsWith('.bat')) {
|
||||||
|
return spawn(command, args, options)
|
||||||
|
}
|
||||||
|
|
||||||
const commandLine = `${quoteForWindowsCommand(command)} ${args.map(arg => String(arg)).join(' ')}`
|
const commandLine = `${quoteForWindowsCommand(command)} ${args.map(arg => String(arg)).join(' ')}`
|
||||||
return spawn(getWindowsShell(), ['/d', '/s', '/c', commandLine], options)
|
return spawn(getWindowsShell(), ['/d', '/s', '/c', commandLine], options)
|
||||||
}
|
}
|
||||||
@@ -123,20 +139,62 @@ function getPort() {
|
|||||||
return argPort ?? DEFAULT_PORT
|
return argPort ?? DEFAULT_PORT
|
||||||
}
|
}
|
||||||
|
|
||||||
function getPid() {
|
function getListeningPids(port) {
|
||||||
|
if (!port || isNaN(port)) return []
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return parseInt(readFileSync(PID_FILE, 'utf-8').trim())
|
if (process.platform === 'win32') {
|
||||||
|
const out = execSync('netstat -aon -p tcp', { encoding: 'utf-8' })
|
||||||
|
return [...new Set(out.split('\n')
|
||||||
|
.map(line => line.trim())
|
||||||
|
.filter(line => line.includes('LISTENING'))
|
||||||
|
.map(line => line.split(/\s+/))
|
||||||
|
.filter(parts => {
|
||||||
|
const address = parts[1] || ''
|
||||||
|
const listenPort = parseInt(address.split(':').pop(), 10)
|
||||||
|
return listenPort === port
|
||||||
|
})
|
||||||
|
.map(parts => parseInt(parts[parts.length - 1], 10))
|
||||||
|
.filter(pid => Number.isFinite(pid)))]
|
||||||
|
}
|
||||||
|
|
||||||
|
const out = execSync(`lsof -tiTCP:${port} -sTCP:LISTEN`, { encoding: 'utf-8' }).trim()
|
||||||
|
return [...new Set(out.split('\n').map(pid => parseInt(pid, 10)).filter(pid => Number.isFinite(pid)))]
|
||||||
} catch {
|
} catch {
|
||||||
return null
|
return []
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function recoverPidFromPort() {
|
||||||
|
const port = getPortFromArgs() ?? DEFAULT_PORT
|
||||||
|
for (const pid of getListeningPids(port)) {
|
||||||
|
if (isRunning(pid)) {
|
||||||
|
mkdirSync(PID_DIR, { recursive: true })
|
||||||
|
writePid(pid)
|
||||||
|
return pid
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
function getPid() {
|
||||||
|
const recovered = recoverPidFromPort()
|
||||||
|
if (recovered) return recovered
|
||||||
|
|
||||||
|
try {
|
||||||
|
const pid = parseInt(readFileSync(PID_FILE, 'utf-8').trim())
|
||||||
|
if (pid && isRunning(pid)) return pid
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
function isRunning(pid) {
|
function isRunning(pid) {
|
||||||
try {
|
try {
|
||||||
process.kill(pid, 0)
|
process.kill(pid, 0)
|
||||||
return true
|
return true
|
||||||
} catch {
|
} catch (err) {
|
||||||
return false
|
return err?.code === 'EPERM'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -202,10 +260,16 @@ function startDaemon(port) {
|
|||||||
} catch { }
|
} catch { }
|
||||||
|
|
||||||
const logStream = openSync(LOG_FILE, 'a')
|
const logStream = openSync(LOG_FILE, 'a')
|
||||||
|
const windowsShell = process.platform === 'win32' ? getWindowsShell() : null
|
||||||
|
const serverEnv = { ...process.env, NODE_ENV: 'production', PORT: String(port), AUTH_TOKEN: token }
|
||||||
|
if (windowsShell) {
|
||||||
|
serverEnv.SHELL = serverEnv.SHELL?.trim() || windowsShell
|
||||||
|
serverEnv.ComSpec = serverEnv.ComSpec?.trim() || windowsShell
|
||||||
|
}
|
||||||
const child = spawn(process.execPath, [serverEntry], {
|
const child = spawn(process.execPath, [serverEntry], {
|
||||||
detached: true,
|
detached: true,
|
||||||
stdio: ['ignore', logStream, logStream],
|
stdio: ['ignore', logStream, logStream],
|
||||||
env: { ...process.env, NODE_ENV: 'production', PORT: String(port), AUTH_TOKEN: token },
|
env: serverEnv,
|
||||||
windowsHide: true,
|
windowsHide: true,
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -391,9 +455,19 @@ switch (command) {
|
|||||||
default:
|
default:
|
||||||
ensureNativeModules()
|
ensureNativeModules()
|
||||||
const port = !isNaN(command) ? parseInt(command) : DEFAULT_PORT
|
const port = !isNaN(command) ? parseInt(command) : DEFAULT_PORT
|
||||||
|
const windowsShell = process.platform === 'win32' ? getWindowsShell() : null
|
||||||
|
const serverEnv = {
|
||||||
|
...process.env,
|
||||||
|
NODE_ENV: 'production',
|
||||||
|
PORT: String(port),
|
||||||
|
}
|
||||||
|
if (windowsShell) {
|
||||||
|
serverEnv.SHELL = serverEnv.SHELL?.trim() || windowsShell
|
||||||
|
serverEnv.ComSpec = serverEnv.ComSpec?.trim() || windowsShell
|
||||||
|
}
|
||||||
const child = spawn(process.execPath, [serverEntry], {
|
const child = spawn(process.execPath, [serverEntry], {
|
||||||
stdio: 'inherit',
|
stdio: 'inherit',
|
||||||
env: { ...process.env, NODE_ENV: 'production', PORT: String(port) },
|
env: serverEnv,
|
||||||
windowsHide: true,
|
windowsHide: true,
|
||||||
})
|
})
|
||||||
child.on('exit', (code) => process.exit(code ?? 1))
|
child.on('exit', (code) => process.exit(code ?? 1))
|
||||||
|
|||||||
@@ -200,15 +200,28 @@ export class GatewayManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 从 profile 的 gateway.pid 文件读取 PID(JSON 格式 { "pid": 12345 }) */
|
/** Read a profile gateway PID, falling back to runtime state when gateway.pid is missing. */
|
||||||
private readPidFile(name: string): number | null {
|
private readPidFile(name: string): number | null {
|
||||||
const pidPath = join(this.profileDir(name), 'gateway.pid')
|
const profilePath = this.profileDir(name)
|
||||||
if (!existsSync(pidPath)) return null
|
const pidPath = join(profilePath, 'gateway.pid')
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const content = readFileSync(pidPath, 'utf-8').trim()
|
if (existsSync(pidPath)) {
|
||||||
|
const content = readFileSync(pidPath, 'utf-8').trim()
|
||||||
|
const data = JSON.parse(content)
|
||||||
|
return typeof data.pid === 'number' ? data.pid : parseInt(data.pid, 10) || null
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
const statePath = join(profilePath, 'gateway_state.json')
|
||||||
|
if (!existsSync(statePath)) return null
|
||||||
|
|
||||||
|
try {
|
||||||
|
const content = readFileSync(statePath, 'utf-8').trim()
|
||||||
const data = JSON.parse(content)
|
const data = JSON.parse(content)
|
||||||
return typeof data.pid === 'number' ? data.pid : parseInt(data.pid, 10) || null
|
const pid = typeof data.pid === 'number' ? data.pid : parseInt(data.pid, 10) || null
|
||||||
|
const state = data?.gateway_state
|
||||||
|
return pid && (state === 'running' || state === 'starting') ? pid : null
|
||||||
} catch {
|
} catch {
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
@@ -218,13 +231,13 @@ export class GatewayManager {
|
|||||||
// 进程 & 端口检测工具
|
// 进程 & 端口检测工具
|
||||||
// ============================
|
// ============================
|
||||||
|
|
||||||
/** 检查进程是否存活(发送信号 0,不实际杀死进程) */
|
/** Check process liveness without sending a terminating signal. */
|
||||||
private isProcessAlive(pid: number): boolean {
|
private isProcessAlive(pid: number): boolean {
|
||||||
try {
|
try {
|
||||||
process.kill(pid, 0)
|
process.kill(pid, 0)
|
||||||
return true
|
return true
|
||||||
} catch {
|
} catch (err: any) {
|
||||||
return false
|
return err?.code === 'EPERM'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,97 @@
|
|||||||
|
import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'fs'
|
||||||
|
import { tmpdir } from 'os'
|
||||||
|
import { join } from 'path'
|
||||||
|
import { afterEach, describe, expect, it, vi } from 'vitest'
|
||||||
|
|
||||||
|
const originalHermesHome = process.env.HERMES_HOME
|
||||||
|
const tempHomes: string[] = []
|
||||||
|
|
||||||
|
function createHermesHome(): string {
|
||||||
|
const home = mkdtempSync(join(tmpdir(), 'hermes-web-ui-gateway-'))
|
||||||
|
tempHomes.push(home)
|
||||||
|
return home
|
||||||
|
}
|
||||||
|
|
||||||
|
async function createManager(home: string): Promise<any> {
|
||||||
|
process.env.HERMES_HOME = home
|
||||||
|
vi.resetModules()
|
||||||
|
const { GatewayManager } = await import('../../packages/server/src/services/hermes/gateway-manager')
|
||||||
|
return new GatewayManager('default') as any
|
||||||
|
}
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
vi.restoreAllMocks()
|
||||||
|
vi.resetModules()
|
||||||
|
if (originalHermesHome === undefined) {
|
||||||
|
delete process.env.HERMES_HOME
|
||||||
|
} else {
|
||||||
|
process.env.HERMES_HOME = originalHermesHome
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const home of tempHomes.splice(0)) {
|
||||||
|
rmSync(home, { recursive: true, force: true })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('GatewayManager Windows process recovery', () => {
|
||||||
|
it('treats EPERM from process.kill(pid, 0) as an alive process', async () => {
|
||||||
|
const manager = await createManager(createHermesHome())
|
||||||
|
;(vi.spyOn(process, 'kill') as any).mockImplementation(() => {
|
||||||
|
const error = new Error('permission denied') as NodeJS.ErrnoException
|
||||||
|
error.code = 'EPERM'
|
||||||
|
throw error
|
||||||
|
})
|
||||||
|
|
||||||
|
expect(manager.isProcessAlive(12345)).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('returns false for missing processes', async () => {
|
||||||
|
const manager = await createManager(createHermesHome())
|
||||||
|
;(vi.spyOn(process, 'kill') as any).mockImplementation(() => {
|
||||||
|
const error = new Error('missing process') as NodeJS.ErrnoException
|
||||||
|
error.code = 'ESRCH'
|
||||||
|
throw error
|
||||||
|
})
|
||||||
|
|
||||||
|
expect(manager.isProcessAlive(12345)).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('prefers gateway.pid when PID metadata exists', async () => {
|
||||||
|
const home = createHermesHome()
|
||||||
|
writeFileSync(join(home, 'gateway.pid'), JSON.stringify({ pid: 11111 }))
|
||||||
|
writeFileSync(join(home, 'gateway_state.json'), JSON.stringify({ pid: 22222, gateway_state: 'running' }))
|
||||||
|
|
||||||
|
const manager = await createManager(home)
|
||||||
|
|
||||||
|
expect(manager.readPidFile('default')).toBe(11111)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('falls back to gateway_state.json when gateway.pid is missing', async () => {
|
||||||
|
const home = createHermesHome()
|
||||||
|
writeFileSync(join(home, 'gateway_state.json'), JSON.stringify({ pid: '22222', gateway_state: 'running' }))
|
||||||
|
|
||||||
|
const manager = await createManager(home)
|
||||||
|
|
||||||
|
expect(manager.readPidFile('default')).toBe(22222)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('does not use gateway_state.json for stopped gateways', async () => {
|
||||||
|
const home = createHermesHome()
|
||||||
|
writeFileSync(join(home, 'gateway_state.json'), JSON.stringify({ pid: 22222, gateway_state: 'stopped' }))
|
||||||
|
|
||||||
|
const manager = await createManager(home)
|
||||||
|
|
||||||
|
expect(manager.readPidFile('default')).toBeNull()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('uses profile-scoped gateway_state.json fallback', async () => {
|
||||||
|
const home = createHermesHome()
|
||||||
|
const profileHome = join(home, 'profiles', 'work')
|
||||||
|
mkdirSync(profileHome, { recursive: true })
|
||||||
|
writeFileSync(join(profileHome, 'gateway_state.json'), JSON.stringify({ pid: 33333, gateway_state: 'starting' }))
|
||||||
|
|
||||||
|
const manager = await createManager(home)
|
||||||
|
|
||||||
|
expect(manager.readPidFile('work')).toBe(33333)
|
||||||
|
})
|
||||||
|
})
|
||||||
Reference in New Issue
Block a user