fix: make context length lookup provider-aware (#207)
This commit is contained in:
@@ -125,18 +125,52 @@ function lookupCustomProviderContextLength(config: any, modelName: string, provi
|
|||||||
|
|
||||||
// --- Context lookup ---
|
// --- Context lookup ---
|
||||||
|
|
||||||
function lookupContextFromCache(modelName: string): number | null {
|
const CACHE_PROVIDER_ALIASES: Record<string, string[]> = {
|
||||||
|
gemini: ['google'],
|
||||||
|
moonshot: ['moonshotai'],
|
||||||
|
kilocode: ['kilo'],
|
||||||
|
'ai-gateway': ['vercel'],
|
||||||
|
'opencode-zen': ['opencode'],
|
||||||
|
'opencode-go': ['opencode'],
|
||||||
|
'glm-coding-plan': ['zai-coding-plan'],
|
||||||
|
'kimi-coding': ['kimi-for-coding'],
|
||||||
|
'kimi-coding-cn': ['kimi-for-coding'],
|
||||||
|
}
|
||||||
|
|
||||||
|
function getContextFromProvider(prov: ProviderEntry | undefined, modelName: string): number | null {
|
||||||
|
const models = prov?.models || {}
|
||||||
|
const exact = models[modelName]
|
||||||
|
if (exact?.limit?.context) return exact.limit.context
|
||||||
|
|
||||||
|
const lower = modelName.toLowerCase()
|
||||||
|
for (const [name, entry] of Object.entries(models)) {
|
||||||
|
if (name.toLowerCase() === lower && entry?.limit?.context) {
|
||||||
|
return entry.limit.context
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
function lookupContextFromCache(modelName: string, provider: string | null): number | null {
|
||||||
const data = loadModelsDevCache()
|
const data = loadModelsDevCache()
|
||||||
if (!data) return null
|
if (!data) return null
|
||||||
|
|
||||||
// Exact match first
|
if (provider) {
|
||||||
|
const providers = [provider, ...(CACHE_PROVIDER_ALIASES[provider] || [])]
|
||||||
|
for (const providerName of providers) {
|
||||||
|
const ctx = getContextFromProvider(data[providerName], modelName)
|
||||||
|
if (ctx) return ctx
|
||||||
|
}
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
// Legacy providerless lookup: exact model-name match across all providers.
|
||||||
for (const prov of Object.values(data)) {
|
for (const prov of Object.values(data)) {
|
||||||
const models = prov.models || {}
|
const entry = prov.models?.[modelName]
|
||||||
const entry = models[modelName]
|
|
||||||
if (entry?.limit?.context) return entry.limit.context
|
if (entry?.limit?.context) return entry.limit.context
|
||||||
}
|
}
|
||||||
|
|
||||||
// Case-insensitive fallback
|
// Legacy providerless case-insensitive fallback across all providers.
|
||||||
const lower = modelName.toLowerCase()
|
const lower = modelName.toLowerCase()
|
||||||
for (const prov of Object.values(data)) {
|
for (const prov of Object.values(data)) {
|
||||||
const models = prov.models || {}
|
const models = prov.models || {}
|
||||||
@@ -175,7 +209,7 @@ export function getModelContextLength(profile?: string): number {
|
|||||||
if (customCtx && customCtx > 0) return customCtx
|
if (customCtx && customCtx > 0) return customCtx
|
||||||
|
|
||||||
// 3. models_dev_cache.json
|
// 3. models_dev_cache.json
|
||||||
const cached = lookupContextFromCache(model)
|
const cached = lookupContextFromCache(model, provider)
|
||||||
if (cached) return cached
|
if (cached) return cached
|
||||||
|
|
||||||
// 4. Fallback
|
// 4. Fallback
|
||||||
|
|||||||
@@ -0,0 +1,88 @@
|
|||||||
|
import { mkdirSync, writeFileSync } from 'fs'
|
||||||
|
import { join } from 'path'
|
||||||
|
import { tmpdir } from 'os'
|
||||||
|
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||||
|
|
||||||
|
function makeHome() {
|
||||||
|
const root = join(tmpdir(), `wui-model-context-${Date.now()}-${Math.random().toString(36).slice(2)}`)
|
||||||
|
const hermes = join(root, '.hermes')
|
||||||
|
mkdirSync(hermes, { recursive: true })
|
||||||
|
return { root, hermes }
|
||||||
|
}
|
||||||
|
|
||||||
|
function writeConfig(hermes: string, yaml: string) {
|
||||||
|
writeFileSync(join(hermes, 'config.yaml'), yaml)
|
||||||
|
}
|
||||||
|
|
||||||
|
function writeModelsCache(hermes: string) {
|
||||||
|
writeFileSync(join(hermes, 'models_dev_cache.json'), JSON.stringify({
|
||||||
|
openai: {
|
||||||
|
models: {
|
||||||
|
'gpt-5.5': { limit: { context: 1_050_000 } },
|
||||||
|
'gpt-5.4': { limit: { context: 1_050_000 } },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
google: {
|
||||||
|
models: {
|
||||||
|
'gemini-3.1-pro-preview': { limit: { context: 1_000_000 } },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
async function importContextService(home: string) {
|
||||||
|
vi.resetModules()
|
||||||
|
vi.stubEnv('HOME', home)
|
||||||
|
return await import('../../packages/server/src/services/hermes/model-context')
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('model context length resolution', () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.unstubAllEnvs()
|
||||||
|
})
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
vi.unstubAllEnvs()
|
||||||
|
vi.resetModules()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('does not borrow OpenAI context metadata for an openai-codex model with the same name', async () => {
|
||||||
|
const { root, hermes } = makeHome()
|
||||||
|
writeConfig(hermes, 'model:\n provider: openai-codex\n default: gpt-5.5\n')
|
||||||
|
writeModelsCache(hermes)
|
||||||
|
|
||||||
|
const { getModelContextLength } = await importContextService(root)
|
||||||
|
|
||||||
|
expect(getModelContextLength()).toBe(200_000)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('still honors explicit model.context_length before provider-aware cache lookup', async () => {
|
||||||
|
const { root, hermes } = makeHome()
|
||||||
|
writeConfig(hermes, 'model:\n provider: openai-codex\n default: gpt-5.5\n context_length: 272000\n')
|
||||||
|
writeModelsCache(hermes)
|
||||||
|
|
||||||
|
const { getModelContextLength } = await importContextService(root)
|
||||||
|
|
||||||
|
expect(getModelContextLength()).toBe(272_000)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('preserves providerless legacy lookup by model name', async () => {
|
||||||
|
const { root, hermes } = makeHome()
|
||||||
|
writeConfig(hermes, 'model:\n default: gpt-5.5\n')
|
||||||
|
writeModelsCache(hermes)
|
||||||
|
|
||||||
|
const { getModelContextLength } = await importContextService(root)
|
||||||
|
|
||||||
|
expect(getModelContextLength()).toBe(1_050_000)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('uses intentional cache provider aliases without conflating openai-codex with openai', async () => {
|
||||||
|
const { root, hermes } = makeHome()
|
||||||
|
writeConfig(hermes, 'model:\n provider: gemini\n default: gemini-3.1-pro-preview\n')
|
||||||
|
writeModelsCache(hermes)
|
||||||
|
|
||||||
|
const { getModelContextLength } = await importContextService(root)
|
||||||
|
|
||||||
|
expect(getModelContextLength()).toBe(1_000_000)
|
||||||
|
})
|
||||||
|
})
|
||||||
Reference in New Issue
Block a user