From 00c6b9532cc62579e86b859dfc301d4f0c1525a1 Mon Sep 17 00:00:00 2001 From: Zhicheng Han <43314240+hanzckernel@users.noreply.github.com> Date: Sat, 25 Apr 2026 12:57:22 +0200 Subject: [PATCH] fix: make context length lookup provider-aware (#207) --- .../src/services/hermes/model-context.ts | 46 ++++++++-- tests/server/model-context.test.ts | 88 +++++++++++++++++++ 2 files changed, 128 insertions(+), 6 deletions(-) create mode 100644 tests/server/model-context.test.ts diff --git a/packages/server/src/services/hermes/model-context.ts b/packages/server/src/services/hermes/model-context.ts index 32cbd3e..afa493b 100644 --- a/packages/server/src/services/hermes/model-context.ts +++ b/packages/server/src/services/hermes/model-context.ts @@ -125,18 +125,52 @@ function lookupCustomProviderContextLength(config: any, modelName: string, provi // --- Context lookup --- -function lookupContextFromCache(modelName: string): number | null { +const CACHE_PROVIDER_ALIASES: Record = { + gemini: ['google'], + moonshot: ['moonshotai'], + kilocode: ['kilo'], + 'ai-gateway': ['vercel'], + 'opencode-zen': ['opencode'], + 'opencode-go': ['opencode'], + 'glm-coding-plan': ['zai-coding-plan'], + 'kimi-coding': ['kimi-for-coding'], + 'kimi-coding-cn': ['kimi-for-coding'], +} + +function getContextFromProvider(prov: ProviderEntry | undefined, modelName: string): number | null { + const models = prov?.models || {} + const exact = models[modelName] + if (exact?.limit?.context) return exact.limit.context + + const lower = modelName.toLowerCase() + for (const [name, entry] of Object.entries(models)) { + if (name.toLowerCase() === lower && entry?.limit?.context) { + return entry.limit.context + } + } + return null +} + +function lookupContextFromCache(modelName: string, provider: string | null): number | null { const data = loadModelsDevCache() if (!data) return null - // Exact match first + if (provider) { + const providers = [provider, ...(CACHE_PROVIDER_ALIASES[provider] || [])] + for (const providerName of providers) { + const ctx = getContextFromProvider(data[providerName], modelName) + if (ctx) return ctx + } + return null + } + + // Legacy providerless lookup: exact model-name match across all providers. for (const prov of Object.values(data)) { - const models = prov.models || {} - const entry = models[modelName] + const entry = prov.models?.[modelName] if (entry?.limit?.context) return entry.limit.context } - // Case-insensitive fallback + // Legacy providerless case-insensitive fallback across all providers. const lower = modelName.toLowerCase() for (const prov of Object.values(data)) { const models = prov.models || {} @@ -175,7 +209,7 @@ export function getModelContextLength(profile?: string): number { if (customCtx && customCtx > 0) return customCtx // 3. models_dev_cache.json - const cached = lookupContextFromCache(model) + const cached = lookupContextFromCache(model, provider) if (cached) return cached // 4. Fallback diff --git a/tests/server/model-context.test.ts b/tests/server/model-context.test.ts new file mode 100644 index 0000000..d0b763e --- /dev/null +++ b/tests/server/model-context.test.ts @@ -0,0 +1,88 @@ +import { mkdirSync, writeFileSync } from 'fs' +import { join } from 'path' +import { tmpdir } from 'os' +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' + +function makeHome() { + const root = join(tmpdir(), `wui-model-context-${Date.now()}-${Math.random().toString(36).slice(2)}`) + const hermes = join(root, '.hermes') + mkdirSync(hermes, { recursive: true }) + return { root, hermes } +} + +function writeConfig(hermes: string, yaml: string) { + writeFileSync(join(hermes, 'config.yaml'), yaml) +} + +function writeModelsCache(hermes: string) { + writeFileSync(join(hermes, 'models_dev_cache.json'), JSON.stringify({ + openai: { + models: { + 'gpt-5.5': { limit: { context: 1_050_000 } }, + 'gpt-5.4': { limit: { context: 1_050_000 } }, + }, + }, + google: { + models: { + 'gemini-3.1-pro-preview': { limit: { context: 1_000_000 } }, + }, + }, + })) +} + +async function importContextService(home: string) { + vi.resetModules() + vi.stubEnv('HOME', home) + return await import('../../packages/server/src/services/hermes/model-context') +} + +describe('model context length resolution', () => { + beforeEach(() => { + vi.unstubAllEnvs() + }) + + afterEach(() => { + vi.unstubAllEnvs() + vi.resetModules() + }) + + it('does not borrow OpenAI context metadata for an openai-codex model with the same name', async () => { + const { root, hermes } = makeHome() + writeConfig(hermes, 'model:\n provider: openai-codex\n default: gpt-5.5\n') + writeModelsCache(hermes) + + const { getModelContextLength } = await importContextService(root) + + expect(getModelContextLength()).toBe(200_000) + }) + + it('still honors explicit model.context_length before provider-aware cache lookup', async () => { + const { root, hermes } = makeHome() + writeConfig(hermes, 'model:\n provider: openai-codex\n default: gpt-5.5\n context_length: 272000\n') + writeModelsCache(hermes) + + const { getModelContextLength } = await importContextService(root) + + expect(getModelContextLength()).toBe(272_000) + }) + + it('preserves providerless legacy lookup by model name', async () => { + const { root, hermes } = makeHome() + writeConfig(hermes, 'model:\n default: gpt-5.5\n') + writeModelsCache(hermes) + + const { getModelContextLength } = await importContextService(root) + + expect(getModelContextLength()).toBe(1_050_000) + }) + + it('uses intentional cache provider aliases without conflating openai-codex with openai', async () => { + const { root, hermes } = makeHome() + writeConfig(hermes, 'model:\n provider: gemini\n default: gemini-3.1-pro-preview\n') + writeModelsCache(hermes) + + const { getModelContextLength } = await importContextService(root) + + expect(getModelContextLength()).toBe(1_000_000) + }) +})