diff --git a/packages/server/src/controllers/hermes/media.ts b/packages/server/src/controllers/hermes/media.ts index 501c5df..bf31c6e 100644 --- a/packages/server/src/controllers/hermes/media.ts +++ b/packages/server/src/controllers/hermes/media.ts @@ -3,10 +3,14 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs' import { dirname, extname, isAbsolute, join, resolve } from 'path' import { getActiveAuthPath } from '../../services/hermes/hermes-profile' import { config } from '../../config' +import { readConfigYaml } from '../../services/config-helpers' const XAI_VIDEO_GENERATIONS_URL = 'https://api.x.ai/v1/videos/generations' const XAI_VIDEO_STATUS_URL = 'https://api.x.ai/v1/videos' const XAI_VIDEO_MODEL = 'grok-imagine-video' +const APIKEY_IMAGE_PROVIDER = 'fun-codex' +const APIKEY_IMAGE_MODEL = 'gpt-image-2' +const APIKEY_IMAGE_TO_IMAGE_MODEL = 'gpt-5.4-mini' const MAX_IMAGE_BYTES = 25 * 1024 * 1024 const DEFAULT_POLL_INTERVAL_MS = 5000 const DEFAULT_TIMEOUT_MS = 10 * 60 * 1000 @@ -16,6 +20,14 @@ type AuthJson = { credential_pool?: Record } +type ApiKeyImageMode = 'text' | 'image' | 'edit' + +type FunCodexProvider = { + apiKey: string + baseUrl: string + model: string +} + function readJsonFile(path: string): any { try { return JSON.parse(readFileSync(path, 'utf-8')) @@ -24,6 +36,29 @@ function readJsonFile(path: string): any { } } +function buildApiUrl(baseUrl: string, pathWithV1: string): string { + const base = (baseUrl || 'https://api.apikey.fun/v1').replace(/\/+$/, '') + const apiPath = pathWithV1.startsWith('/') ? pathWithV1 : `/${pathWithV1}` + if (base.endsWith('/v1') && apiPath.startsWith('/v1/')) return `${base}${apiPath.slice(3)}` + return `${base}${apiPath}` +} + +async function resolveFunCodexProvider(): Promise { + const hermesConfig = await readConfigYaml() + const customProviders = Array.isArray(hermesConfig.custom_providers) + ? hermesConfig.custom_providers as any[] + : [] + const provider = customProviders.find(entry => String(entry?.name || '').trim() === APIKEY_IMAGE_PROVIDER) + const apiKey = String(provider?.api_key || '').trim() + const baseUrl = String(provider?.base_url || '').trim() + if (!provider || !apiKey || !baseUrl) return null + return { + apiKey, + baseUrl, + model: String(provider?.model || '').trim(), + } +} + function resolveXaiToken(): { token: string; source: string } | null { const envToken = String(process.env.XAI_API_KEY || '').trim() if (envToken) return { token: envToken, source: 'XAI_API_KEY' } @@ -103,6 +138,88 @@ function normalizeImageInput(body: any): string { return imagePathToDataUri(imagePath) } +function imageDataUriToBytes(dataUri: string): { buffer: Buffer; mime: string; name: string } { + const match = dataUri.match(/^data:([^;,]+);base64,(.+)$/) + if (!match) { + const err: any = new Error('image_base64 must be a valid image data URI for edit mode') + err.status = 400 + throw err + } + const mime = match[1] + if (!mime.startsWith('image/')) { + const err: any = new Error('image data URI must use an image mime type') + err.status = 400 + throw err + } + return { + buffer: Buffer.from(match[2], 'base64'), + mime, + name: `source.${mime === 'image/jpeg' ? 'jpg' : mime.split('/')[1] || 'png'}`, + } +} + +async function fetchImageBytes(url: string): Promise<{ buffer: Buffer; mime: string; name: string }> { + const res = await fetch(url) + if (!res.ok) { + const err: any = new Error(`image_url fetch failed: ${res.status} ${res.statusText}`) + err.status = 400 + throw err + } + const mime = String(res.headers.get('content-type') || '').split(';')[0] || 'image/png' + if (!mime.startsWith('image/')) { + const err: any = new Error('image_url did not return an image') + err.status = 400 + throw err + } + const buffer = Buffer.from(await res.arrayBuffer()) + if (buffer.length > MAX_IMAGE_BYTES) { + const err: any = new Error(`image is too large (max ${MAX_IMAGE_BYTES} bytes)`) + err.status = 413 + throw err + } + const name = new URL(url).pathname.split('/').pop() || 'source.png' + return { buffer, mime, name } +} + +async function normalizeImageFile(body: any): Promise<{ buffer: Buffer; mime: string; name: string }> { + const imageUrl = typeof body.image_url === 'string' ? body.image_url.trim() : '' + if (imageUrl) return fetchImageBytes(imageUrl) + + const imageBase64 = typeof body.image_base64 === 'string' ? body.image_base64.trim() : '' + if (imageBase64) { + const dataUri = imageBase64.startsWith('data:image/') + ? imageBase64 + : `data:${String(body.mime_type || '').trim()};base64,${imageBase64}` + return imageDataUriToBytes(dataUri) + } + + const imagePath = typeof body.image_path === 'string' ? body.image_path.trim() : '' + if (!imagePath) { + const err: any = new Error('image_path, image_url, or image_base64 is required') + err.status = 400 + throw err + } + const resolvedPath = isAbsolute(imagePath) ? imagePath : resolve(process.cwd(), imagePath) + if (!existsSync(resolvedPath)) { + const err: any = new Error('image_path does not exist') + err.status = 404 + throw err + } + const buffer = readFileSync(resolvedPath) + if (buffer.length > MAX_IMAGE_BYTES) { + const err: any = new Error(`image is too large (max ${MAX_IMAGE_BYTES} bytes)`) + err.status = 413 + throw err + } + const mime = mimeFromMagic(buffer) || mimeFromPath(resolvedPath) + if (!mime) { + const err: any = new Error('unsupported image type; use png, jpeg, or webp') + err.status = 400 + throw err + } + return { buffer, mime, name: resolvedPath.split(/[\\/]/).pop() || 'source.png' } +} + function normalizeDuration(value: unknown): number { const duration = Number(value || 8) if (!Number.isFinite(duration) || duration < 1 || duration > 15) { @@ -118,6 +235,224 @@ export function defaultMediaOutputPath(requestId: string, now = new Date()): str return join(config.appHome, 'media', `${safeRequestId}.mp4`) } +export function defaultImageOutputPath(requestId: string, index = 0): string { + const safeRequestId = requestId.replace(/[^A-Za-z0-9_-]/g, '_') || `image_${Date.now()}` + const suffix = index > 0 ? `-${index + 1}` : '' + return join(config.appHome, 'media', `${safeRequestId}${suffix}.png`) +} + +function normalizeImageMode(value: unknown): ApiKeyImageMode { + const mode = String(value || 'text').trim().toLowerCase() + if (mode === 'text' || mode === 'image' || mode === 'edit') return mode + const err: any = new Error('mode must be one of text, image, or edit') + err.status = 400 + throw err +} + +function normalizePositiveInt(value: unknown, fallback: number, key: string): number { + const parsed = Number(value || fallback) + if (!Number.isFinite(parsed) || parsed < 1) { + const err: any = new Error(`${key} must be a positive number`) + err.status = 400 + throw err + } + return Math.floor(parsed) +} + +function collectImageBase64(event: any, images: string[] = []): string[] { + if (!event || typeof event !== 'object') return images + for (const key of ['b64_json', 'base64', 'image_base64', 'partial_image_b64']) { + if (typeof event[key] === 'string' && event[key]) images.push(event[key]) + } + for (const item of event.data || []) collectImageBase64(item, images) + for (const item of event.response?.output || []) { + if (typeof item?.result === 'string' && item.result) images.push(item.result) + collectImageBase64(item, images) + } + if (typeof event.item?.result === 'string' && event.item.result) images.push(event.item.result) + return images +} + +function isPartialImageEvent(event: any): boolean { + return event?.type === 'image_generation.partial_image' || + event?.type === 'response.image_generation_call.partial_image' +} + +function throwIfImageStreamError(event: any): void { + if (event?.type !== 'error' && event?.type !== 'response.failed') return + const err: any = new Error(event?.response?.error?.message || event?.error?.message || 'image generation failed') + err.status = 502 + throw err +} + +async function readSseImageResults(res: Response, limit: number): Promise { + if (!res.body) throw new Error('image generation response is not readable') + const reader = res.body.getReader() + const decoder = new TextDecoder() + const images: string[] = [] + let buffer = '' + while (true) { + const { value, done } = await reader.read() + if (done) break + buffer += decoder.decode(value, { stream: true }) + const frames = buffer.split(/\r?\n\r?\n/) + buffer = frames.pop() || '' + for (const frame of frames) { + const data = frame + .split(/\r?\n/) + .filter(line => line.startsWith('data:')) + .map(line => line.slice(5).trimStart()) + .join('\n') + .trim() + if (!data || data === '[DONE]') continue + const event = JSON.parse(data) + throwIfImageStreamError(event) + if (isPartialImageEvent(event)) continue + collectImageBase64(event, images) + if (images.length >= limit) return images.slice(0, limit) + } + } + return images.slice(0, limit) +} + +async function requestApiKeyImage(provider: FunCodexProvider, mode: ApiKeyImageMode, body: any): Promise { + const prompt = typeof body.prompt === 'string' ? body.prompt.trim() : '' + if (!prompt) { + const err: any = new Error('prompt is required') + err.status = 400 + throw err + } + + const n = normalizePositiveInt(body.n, 1, 'n') + const timeoutMs = normalizePositiveInt(body.timeout_ms, DEFAULT_TIMEOUT_MS, 'timeout_ms') + const headers = { + Accept: 'text/event-stream', + Authorization: `Bearer ${provider.apiKey}`, + } + + let res: Response + if (mode === 'text') { + res = await fetch(buildApiUrl(provider.baseUrl, '/v1/images/generations'), { + method: 'POST', + headers: { ...headers, 'Content-Type': 'application/json' }, + signal: AbortSignal.timeout(timeoutMs), + body: JSON.stringify({ + model: body.model || APIKEY_IMAGE_MODEL, + prompt, + n, + size: body.size || '1024x1024', + quality: body.quality || 'auto', + stream: true, + response_format: 'b64_json', + }), + }) + } else if (mode === 'image') { + res = await fetch(buildApiUrl(provider.baseUrl, '/v1/responses'), { + method: 'POST', + headers: { ...headers, 'Content-Type': 'application/json' }, + signal: AbortSignal.timeout(timeoutMs), + body: JSON.stringify({ + model: body.model || provider.model || APIKEY_IMAGE_TO_IMAGE_MODEL, + stream: true, + input: [{ + role: 'user', + content: [ + { type: 'input_text', text: prompt }, + { type: 'input_image', image_url: normalizeImageInput(body) }, + ], + }], + tools: [{ + type: 'image_generation', + model: body.image_model || APIKEY_IMAGE_MODEL, + size: body.size || '1024x1024', + quality: body.quality || 'auto', + output_format: body.output_format || 'png', + }], + tool_choice: { type: 'image_generation' }, + }), + }) + } else { + const image = await normalizeImageFile(body) + const imageBytes = new Uint8Array(image.buffer.byteLength) + imageBytes.set(image.buffer) + const form = new FormData() + form.append('image', new Blob([imageBytes.buffer], { type: image.mime }), image.name) + form.append('prompt', prompt) + form.append('model', body.model || APIKEY_IMAGE_MODEL) + form.append('n', String(n)) + form.append('quality', body.quality || 'auto') + form.append('size', body.size || '1024x1024') + form.append('stream', 'true') + form.append('response_format', 'b64_json') + res = await fetch(buildApiUrl(provider.baseUrl, '/v1/images/edits'), { + method: 'POST', + headers, + signal: AbortSignal.timeout(timeoutMs), + body: form, + }) + } + + if (!res.ok) { + const detail = await res.text().catch(() => '') + const err: any = new Error(`image generation request failed: ${res.status} ${detail || res.statusText}`) + err.status = res.status === 401 || res.status === 403 ? 502 : 502 + throw err + } + const images = await readSseImageResults(res, n) + if (images.length === 0) { + const err: any = new Error('image generation stream ended without image data') + err.status = 502 + throw err + } + return images +} + +function saveGeneratedImages(images: string[], requestedOutputPath?: string): string[] { + return images.map((image, index) => { + const outputPath = requestedOutputPath && images.length === 1 + ? requestedOutputPath + : requestedOutputPath + ? requestedOutputPath.replace(/(\.[^.\\/]+)?$/, `${index > 0 ? `-${index + 1}` : ''}$1`) + : defaultImageOutputPath(`image_${Date.now()}`, index) + mkdirSync(dirname(outputPath), { recursive: true }) + writeFileSync(outputPath, Buffer.from(image, 'base64')) + return outputPath + }) +} + +export async function apiKeyImageGenerate(ctx: Context) { + const provider = await resolveFunCodexProvider() + if (!provider) { + ctx.status = 401 + ctx.body = { + error: 'Missing fun-codex provider in active profile config.yaml.', + code: 'missing_fun_codex_provider', + } + return + } + + const body = ctx.request.body as any + try { + const mode = normalizeImageMode(body.mode) + const images = await requestApiKeyImage(provider, mode, body) + const requestedOutputPath = typeof body.output_path === 'string' ? body.output_path.trim() : '' + const outputPaths = saveGeneratedImages(images, requestedOutputPath || undefined) + ctx.body = { + ok: true, + mode, + output_paths: outputPaths, + provider: APIKEY_IMAGE_PROVIDER, + base_url: provider.baseUrl, + } + } catch (err: any) { + ctx.status = err.status || 500 + ctx.body = { + error: err.message || String(err), + code: err.code || 'image_generation_failed', + } + } +} + async function requestXaiJson(url: string, token: string, init: RequestInit = {}): Promise { const res = await fetch(url, { ...init, diff --git a/packages/server/src/routes/hermes/media.ts b/packages/server/src/routes/hermes/media.ts index 1b56a09..f217e59 100644 --- a/packages/server/src/routes/hermes/media.ts +++ b/packages/server/src/routes/hermes/media.ts @@ -4,3 +4,4 @@ import * as ctrl from '../../controllers/hermes/media' export const mediaRoutes = new Router() mediaRoutes.post('/api/hermes/media/grok-image-to-video', ctrl.grokImageToVideo) +mediaRoutes.post('/api/hermes/media/apikey-image-generate', ctrl.apiKeyImageGenerate) diff --git a/packages/skills/apikey-image-gen/SKILL.md b/packages/skills/apikey-image-gen/SKILL.md new file mode 100644 index 0000000..8d53a84 --- /dev/null +++ b/packages/skills/apikey-image-gen/SKILL.md @@ -0,0 +1,166 @@ +--- +name: apikey-image-gen +description: "Generate or edit images through Hermes Web UI using the active profile's fun-codex provider from config.yaml." +version: 1.0.0 +author: Ekko +license: MIT +platforms: [linux, macos, windows, termux] +metadata: + hermes: + tags: [api.apikey.fun, image-generation, image-editing, media] +prerequisites: + commands: [curl] +--- + +# APIKEY Image Generation + +Use this skill when the user wants to generate an image, generate an image from a reference image, or edit an existing image. + +Always call Hermes Web UI's media endpoint. Do not call `api.apikey.fun` directly, and do not ask the user for an API key. The server reads the active profile's `config.yaml` and uses the `custom_providers` entry named `fun-codex`: + +```yaml +custom_providers: + - name: fun-codex + base_url: https://api.apikey.fun/v1 + api_key: ... + model: gpt-5.5 + api_mode: codex_responses +``` + +Endpoint: + +```bash +POST /api/hermes/media/apikey-image-generate +``` + +Resolve the Hermes Web UI base URL in this order: + +1. `HERMES_WEB_UI_URL` environment variable, if set. +2. `http://127.0.0.1:${PORT}`, if `PORT` is set. +3. `http://127.0.0.1:8648` for local development. + +When Hermes Web UI is running from Docker Compose, the default external URL is `http://127.0.0.1:6060`. + +Authentication: + +Send the Hermes Web UI bearer token. + +Resolve the token in this order: + +1. `AUTH_TOKEN` environment variable, if set. +2. `${HERMES_WEB_UI_HOME}/.token`, if `HERMES_WEB_UI_HOME` is set. +3. `${HERMES_WEBUI_STATE_DIR}/.token`, if `HERMES_WEBUI_STATE_DIR` is set. +4. `~/.hermes-web-ui/.token`. + +## Modes + +### Text To Image + +Use when there is no input image. + +```json +{ + "mode": "text", + "prompt": "A high quality product image of a matte black mechanical keyboard on a clean desk", + "size": "1024x1024", + "output_path": "/absolute/path/to/output.png" +} +``` + +The server calls `POST /v1/images/generations` against the `fun-codex` base URL. + +### Image To Image + +Use when the user provides a reference image and wants a new image based on it. + +```json +{ + "mode": "image", + "prompt": "Use this reference composition and generate a refined technology brand poster", + "image_path": "/absolute/path/to/reference.png", + "size": "1024x1024", + "output_path": "/absolute/path/to/output.png" +} +``` + +The server calls `POST /v1/responses` against the `fun-codex` base URL. + +### Image Edit + +Use when the user wants to modify an existing image while preserving parts of it. + +```json +{ + "mode": "edit", + "prompt": "Change the background to blue and keep the subject unchanged", + "image_path": "/absolute/path/to/source.png", + "size": "1024x1024", + "output_path": "/absolute/path/to/edited.png" +} +``` + +The server calls `POST /v1/images/edits` against the `fun-codex` base URL. + +## Request Fields + +- `mode`: `text`, `image`, or `edit`. +- `prompt`: required. +- `image_path`: local png, jpeg, or webp path. Required for `image` and `edit` unless using `image_url` or `image_base64`. +- `image_url`: optional alternative image input. +- `image_base64`: optional alternative image input. If it is not a data URI, include `mime_type`. +- `n`: number of images. Defaults to `1`. +- `size`: defaults to `1024x1024`. Common values: `1024x1024`, `1536x1024`, `1024x1536`, `2048x2048`, `3840x2160`, `2160x3840`, `auto`. +- `quality`: defaults to `auto`. +- `model`: optional override. Text/edit default to `gpt-image-2`; image mode defaults to the `fun-codex` model in `config.yaml`. +- `image_model`: optional image tool model for image mode. Defaults to `gpt-image-2`. +- `output_path`: optional absolute output file path. If omitted, the server saves to `${HERMES_WEB_UI_HOME:-~/.hermes-web-ui}/media/*.png`. +- `timeout_ms`: defaults to `600000`. + +## Curl Template + +```bash +TOKEN="${AUTH_TOKEN:-}" +if [ -z "$TOKEN" ] && [ -n "${HERMES_WEB_UI_HOME:-}" ] && [ -f "$HERMES_WEB_UI_HOME/.token" ]; then + TOKEN="$(cat "$HERMES_WEB_UI_HOME/.token")" +fi +if [ -z "$TOKEN" ] && [ -n "${HERMES_WEBUI_STATE_DIR:-}" ] && [ -f "$HERMES_WEBUI_STATE_DIR/.token" ]; then + TOKEN="$(cat "$HERMES_WEBUI_STATE_DIR/.token")" +fi +if [ -z "$TOKEN" ] && [ -f "$HOME/.hermes-web-ui/.token" ]; then + TOKEN="$(cat "$HOME/.hermes-web-ui/.token")" +fi +if [ -z "$TOKEN" ]; then + echo "Missing Hermes Web UI token. Check AUTH_TOKEN, HERMES_WEB_UI_HOME, HERMES_WEBUI_STATE_DIR, or ~/.hermes-web-ui/.token." >&2 + exit 1 +fi + +BASE_URL="${HERMES_WEB_UI_URL:-}" +if [ -z "$BASE_URL" ]; then + BASE_URL="http://127.0.0.1:${PORT:-8648}" +fi +BASE_URL="${BASE_URL%/}" + +curl -sS -X POST "$BASE_URL/api/hermes/media/apikey-image-generate" \ + -H "Authorization: Bearer $TOKEN" \ + -H 'Content-Type: application/json' \ + -d '{ + "mode": "text", + "prompt": "A cinematic 4K photo of a silver robot hand holding a small glowing cube", + "size": "3840x2160", + "output_path": "/absolute/path/to/output.png" + }' +``` + +Successful responses include: + +```json +{ + "ok": true, + "mode": "text", + "output_paths": ["/absolute/path/to/output.png"], + "provider": "fun-codex", + "base_url": "https://api.apikey.fun/v1" +} +``` + +If the response code is `missing_fun_codex_provider`, tell the user to configure `fun-codex` in the active profile's `config.yaml`. diff --git a/tests/server/media-controller.test.ts b/tests/server/media-controller.test.ts index dc20083..fbd81e0 100644 --- a/tests/server/media-controller.test.ts +++ b/tests/server/media-controller.test.ts @@ -15,9 +15,11 @@ afterEach(() => { describe('media controller', () => { it('uses Hermes Web UI media directory as the default generated video output path', async () => { process.env.HERMES_WEB_UI_HOME = '/tmp/hermes-web-ui-test-home' - const { defaultMediaOutputPath } = await import('../../packages/server/src/controllers/hermes/media') + const { defaultImageOutputPath, defaultMediaOutputPath } = await import('../../packages/server/src/controllers/hermes/media') expect(defaultMediaOutputPath('req_123')).toBe(join('/tmp/hermes-web-ui-test-home', 'media', 'req_123.mp4')) expect(defaultMediaOutputPath('bad/request:id')).toBe(join('/tmp/hermes-web-ui-test-home', 'media', 'bad_request_id.mp4')) + expect(defaultImageOutputPath('img_123')).toBe(join('/tmp/hermes-web-ui-test-home', 'media', 'img_123.png')) + expect(defaultImageOutputPath('bad/request:id', 1)).toBe(join('/tmp/hermes-web-ui-test-home', 'media', 'bad_request_id-2.png')) }) })