Update 2026-05-13 16:43:53
This commit is contained in:
@@ -0,0 +1,39 @@
|
||||
"""LLM provider abstraction module."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from importlib import import_module
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from nanobot.providers.base import LLMProvider, LLMResponse
|
||||
|
||||
__all__ = [
|
||||
"LLMProvider",
|
||||
"LLMResponse",
|
||||
"AnthropicProvider",
|
||||
"OpenAICompatProvider",
|
||||
"OpenAICodexProvider",
|
||||
"AzureOpenAIProvider",
|
||||
]
|
||||
|
||||
_LAZY_IMPORTS = {
|
||||
"AnthropicProvider": ".anthropic_provider",
|
||||
"OpenAICompatProvider": ".openai_compat_provider",
|
||||
"OpenAICodexProvider": ".openai_codex_provider",
|
||||
"AzureOpenAIProvider": ".azure_openai_provider",
|
||||
}
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from nanobot.providers.anthropic_provider import AnthropicProvider
|
||||
from nanobot.providers.azure_openai_provider import AzureOpenAIProvider
|
||||
from nanobot.providers.openai_compat_provider import OpenAICompatProvider
|
||||
from nanobot.providers.openai_codex_provider import OpenAICodexProvider
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
"""Lazily expose provider implementations without importing all backends up front."""
|
||||
module_name = _LAZY_IMPORTS.get(name)
|
||||
if module_name is None:
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
module = import_module(module_name, __name__)
|
||||
return getattr(module, name)
|
||||
@@ -0,0 +1,441 @@
|
||||
"""Anthropic provider — direct SDK integration for Claude models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import secrets
|
||||
import string
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import Any
|
||||
|
||||
import json_repair
|
||||
from loguru import logger
|
||||
|
||||
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
|
||||
|
||||
_ALNUM = string.ascii_letters + string.digits
|
||||
|
||||
|
||||
def _gen_tool_id() -> str:
|
||||
return "toolu_" + "".join(secrets.choice(_ALNUM) for _ in range(22))
|
||||
|
||||
|
||||
class AnthropicProvider(LLMProvider):
|
||||
"""LLM provider using the native Anthropic SDK for Claude models.
|
||||
|
||||
Handles message format conversion (OpenAI → Anthropic Messages API),
|
||||
prompt caching, extended thinking, tool calls, and streaming.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str | None = None,
|
||||
api_base: str | None = None,
|
||||
default_model: str = "claude-sonnet-4-20250514",
|
||||
extra_headers: dict[str, str] | None = None,
|
||||
):
|
||||
super().__init__(api_key, api_base)
|
||||
self.default_model = default_model
|
||||
self.extra_headers = extra_headers or {}
|
||||
|
||||
from anthropic import AsyncAnthropic
|
||||
|
||||
client_kw: dict[str, Any] = {}
|
||||
if api_key:
|
||||
client_kw["api_key"] = api_key
|
||||
if api_base:
|
||||
client_kw["base_url"] = api_base
|
||||
if extra_headers:
|
||||
client_kw["default_headers"] = extra_headers
|
||||
self._client = AsyncAnthropic(**client_kw)
|
||||
|
||||
@staticmethod
|
||||
def _strip_prefix(model: str) -> str:
|
||||
if model.startswith("anthropic/"):
|
||||
return model[len("anthropic/"):]
|
||||
return model
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Message conversion: OpenAI chat format → Anthropic Messages API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _convert_messages(
|
||||
self, messages: list[dict[str, Any]],
|
||||
) -> tuple[str | list[dict[str, Any]], list[dict[str, Any]]]:
|
||||
"""Return ``(system, anthropic_messages)``."""
|
||||
system: str | list[dict[str, Any]] = ""
|
||||
raw: list[dict[str, Any]] = []
|
||||
|
||||
for msg in messages:
|
||||
role = msg.get("role", "")
|
||||
content = msg.get("content")
|
||||
|
||||
if role == "system":
|
||||
system = content if isinstance(content, (str, list)) else str(content or "")
|
||||
continue
|
||||
|
||||
if role == "tool":
|
||||
block = self._tool_result_block(msg)
|
||||
if raw and raw[-1]["role"] == "user":
|
||||
prev_c = raw[-1]["content"]
|
||||
if isinstance(prev_c, list):
|
||||
prev_c.append(block)
|
||||
else:
|
||||
raw[-1]["content"] = [
|
||||
{"type": "text", "text": prev_c or ""}, block,
|
||||
]
|
||||
else:
|
||||
raw.append({"role": "user", "content": [block]})
|
||||
continue
|
||||
|
||||
if role == "assistant":
|
||||
raw.append({"role": "assistant", "content": self._assistant_blocks(msg)})
|
||||
continue
|
||||
|
||||
if role == "user":
|
||||
raw.append({
|
||||
"role": "user",
|
||||
"content": self._convert_user_content(content),
|
||||
})
|
||||
continue
|
||||
|
||||
return system, self._merge_consecutive(raw)
|
||||
|
||||
@staticmethod
|
||||
def _tool_result_block(msg: dict[str, Any]) -> dict[str, Any]:
|
||||
content = msg.get("content")
|
||||
block: dict[str, Any] = {
|
||||
"type": "tool_result",
|
||||
"tool_use_id": msg.get("tool_call_id", ""),
|
||||
}
|
||||
if isinstance(content, (str, list)):
|
||||
block["content"] = content
|
||||
else:
|
||||
block["content"] = str(content) if content else ""
|
||||
return block
|
||||
|
||||
@staticmethod
|
||||
def _assistant_blocks(msg: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
blocks: list[dict[str, Any]] = []
|
||||
content = msg.get("content")
|
||||
|
||||
for tb in msg.get("thinking_blocks") or []:
|
||||
if isinstance(tb, dict) and tb.get("type") == "thinking":
|
||||
blocks.append({
|
||||
"type": "thinking",
|
||||
"thinking": tb.get("thinking", ""),
|
||||
"signature": tb.get("signature", ""),
|
||||
})
|
||||
|
||||
if isinstance(content, str) and content:
|
||||
blocks.append({"type": "text", "text": content})
|
||||
elif isinstance(content, list):
|
||||
for item in content:
|
||||
blocks.append(item if isinstance(item, dict) else {"type": "text", "text": str(item)})
|
||||
|
||||
for tc in msg.get("tool_calls") or []:
|
||||
if not isinstance(tc, dict):
|
||||
continue
|
||||
func = tc.get("function", {})
|
||||
args = func.get("arguments", "{}")
|
||||
if isinstance(args, str):
|
||||
args = json_repair.loads(args)
|
||||
blocks.append({
|
||||
"type": "tool_use",
|
||||
"id": tc.get("id") or _gen_tool_id(),
|
||||
"name": func.get("name", ""),
|
||||
"input": args,
|
||||
})
|
||||
|
||||
return blocks or [{"type": "text", "text": ""}]
|
||||
|
||||
def _convert_user_content(self, content: Any) -> Any:
|
||||
"""Convert user message content, translating image_url blocks."""
|
||||
if isinstance(content, str) or content is None:
|
||||
return content or "(empty)"
|
||||
if not isinstance(content, list):
|
||||
return str(content)
|
||||
|
||||
result: list[dict[str, Any]] = []
|
||||
for item in content:
|
||||
if not isinstance(item, dict):
|
||||
result.append({"type": "text", "text": str(item)})
|
||||
continue
|
||||
if item.get("type") == "image_url":
|
||||
converted = self._convert_image_block(item)
|
||||
if converted:
|
||||
result.append(converted)
|
||||
continue
|
||||
result.append(item)
|
||||
return result or "(empty)"
|
||||
|
||||
@staticmethod
|
||||
def _convert_image_block(block: dict[str, Any]) -> dict[str, Any] | None:
|
||||
"""Convert OpenAI image_url block to Anthropic image block."""
|
||||
url = (block.get("image_url") or {}).get("url", "")
|
||||
if not url:
|
||||
return None
|
||||
m = re.match(r"data:(image/\w+);base64,(.+)", url, re.DOTALL)
|
||||
if m:
|
||||
return {
|
||||
"type": "image",
|
||||
"source": {"type": "base64", "media_type": m.group(1), "data": m.group(2)},
|
||||
}
|
||||
return {
|
||||
"type": "image",
|
||||
"source": {"type": "url", "url": url},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _merge_consecutive(msgs: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Anthropic requires alternating user/assistant roles."""
|
||||
merged: list[dict[str, Any]] = []
|
||||
for msg in msgs:
|
||||
if merged and merged[-1]["role"] == msg["role"]:
|
||||
prev_c = merged[-1]["content"]
|
||||
cur_c = msg["content"]
|
||||
if isinstance(prev_c, str):
|
||||
prev_c = [{"type": "text", "text": prev_c}]
|
||||
if isinstance(cur_c, str):
|
||||
cur_c = [{"type": "text", "text": cur_c}]
|
||||
if isinstance(cur_c, list):
|
||||
prev_c.extend(cur_c)
|
||||
merged[-1]["content"] = prev_c
|
||||
else:
|
||||
merged.append(msg)
|
||||
return merged
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tool definition conversion
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _convert_tools(tools: list[dict[str, Any]] | None) -> list[dict[str, Any]] | None:
|
||||
if not tools:
|
||||
return None
|
||||
result = []
|
||||
for tool in tools:
|
||||
func = tool.get("function", tool)
|
||||
entry: dict[str, Any] = {
|
||||
"name": func.get("name", ""),
|
||||
"input_schema": func.get("parameters", {"type": "object", "properties": {}}),
|
||||
}
|
||||
desc = func.get("description")
|
||||
if desc:
|
||||
entry["description"] = desc
|
||||
if "cache_control" in tool:
|
||||
entry["cache_control"] = tool["cache_control"]
|
||||
result.append(entry)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _convert_tool_choice(
|
||||
tool_choice: str | dict[str, Any] | None,
|
||||
thinking_enabled: bool = False,
|
||||
) -> dict[str, Any] | None:
|
||||
if thinking_enabled:
|
||||
return {"type": "auto"}
|
||||
if tool_choice is None or tool_choice == "auto":
|
||||
return {"type": "auto"}
|
||||
if tool_choice == "required":
|
||||
return {"type": "any"}
|
||||
if tool_choice == "none":
|
||||
return None
|
||||
if isinstance(tool_choice, dict):
|
||||
name = tool_choice.get("function", {}).get("name")
|
||||
if name:
|
||||
return {"type": "tool", "name": name}
|
||||
return {"type": "auto"}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Prompt caching
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _apply_cache_control(
|
||||
system: str | list[dict[str, Any]],
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None,
|
||||
) -> tuple[str | list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]] | None]:
|
||||
marker = {"type": "ephemeral"}
|
||||
|
||||
if isinstance(system, str) and system:
|
||||
system = [{"type": "text", "text": system, "cache_control": marker}]
|
||||
elif isinstance(system, list) and system:
|
||||
system = list(system)
|
||||
system[-1] = {**system[-1], "cache_control": marker}
|
||||
|
||||
new_msgs = list(messages)
|
||||
if len(new_msgs) >= 3:
|
||||
m = new_msgs[-2]
|
||||
c = m.get("content")
|
||||
if isinstance(c, str):
|
||||
new_msgs[-2] = {**m, "content": [{"type": "text", "text": c, "cache_control": marker}]}
|
||||
elif isinstance(c, list) and c:
|
||||
nc = list(c)
|
||||
nc[-1] = {**nc[-1], "cache_control": marker}
|
||||
new_msgs[-2] = {**m, "content": nc}
|
||||
|
||||
new_tools = tools
|
||||
if tools:
|
||||
new_tools = list(tools)
|
||||
new_tools[-1] = {**new_tools[-1], "cache_control": marker}
|
||||
|
||||
return system, new_msgs, new_tools
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Build API kwargs
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _build_kwargs(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None,
|
||||
model: str | None,
|
||||
max_tokens: int,
|
||||
temperature: float,
|
||||
reasoning_effort: str | None,
|
||||
tool_choice: str | dict[str, Any] | None,
|
||||
supports_caching: bool = True,
|
||||
) -> dict[str, Any]:
|
||||
model_name = self._strip_prefix(model or self.default_model)
|
||||
system, anthropic_msgs = self._convert_messages(self._sanitize_empty_content(messages))
|
||||
anthropic_tools = self._convert_tools(tools)
|
||||
|
||||
if supports_caching:
|
||||
system, anthropic_msgs, anthropic_tools = self._apply_cache_control(
|
||||
system, anthropic_msgs, anthropic_tools,
|
||||
)
|
||||
|
||||
max_tokens = max(1, max_tokens)
|
||||
thinking_enabled = bool(reasoning_effort)
|
||||
|
||||
kwargs: dict[str, Any] = {
|
||||
"model": model_name,
|
||||
"messages": anthropic_msgs,
|
||||
"max_tokens": max_tokens,
|
||||
}
|
||||
|
||||
if system:
|
||||
kwargs["system"] = system
|
||||
|
||||
if thinking_enabled:
|
||||
budget_map = {"low": 1024, "medium": 4096, "high": max(8192, max_tokens)}
|
||||
budget = budget_map.get(reasoning_effort.lower(), 4096) # type: ignore[union-attr]
|
||||
kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
|
||||
kwargs["max_tokens"] = max(max_tokens, budget + 4096)
|
||||
kwargs["temperature"] = 1.0
|
||||
else:
|
||||
kwargs["temperature"] = temperature
|
||||
|
||||
if anthropic_tools:
|
||||
kwargs["tools"] = anthropic_tools
|
||||
tc = self._convert_tool_choice(tool_choice, thinking_enabled)
|
||||
if tc:
|
||||
kwargs["tool_choice"] = tc
|
||||
|
||||
if self.extra_headers:
|
||||
kwargs["extra_headers"] = self.extra_headers
|
||||
|
||||
return kwargs
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Response parsing
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _parse_response(response: Any) -> LLMResponse:
|
||||
content_parts: list[str] = []
|
||||
tool_calls: list[ToolCallRequest] = []
|
||||
thinking_blocks: list[dict[str, Any]] = []
|
||||
|
||||
for block in response.content:
|
||||
if block.type == "text":
|
||||
content_parts.append(block.text)
|
||||
elif block.type == "tool_use":
|
||||
tool_calls.append(ToolCallRequest(
|
||||
id=block.id,
|
||||
name=block.name,
|
||||
arguments=block.input if isinstance(block.input, dict) else {},
|
||||
))
|
||||
elif block.type == "thinking":
|
||||
thinking_blocks.append({
|
||||
"type": "thinking",
|
||||
"thinking": block.thinking,
|
||||
"signature": getattr(block, "signature", ""),
|
||||
})
|
||||
|
||||
stop_map = {"tool_use": "tool_calls", "end_turn": "stop", "max_tokens": "length"}
|
||||
finish_reason = stop_map.get(response.stop_reason or "", response.stop_reason or "stop")
|
||||
|
||||
usage: dict[str, int] = {}
|
||||
if response.usage:
|
||||
usage = {
|
||||
"prompt_tokens": response.usage.input_tokens,
|
||||
"completion_tokens": response.usage.output_tokens,
|
||||
"total_tokens": response.usage.input_tokens + response.usage.output_tokens,
|
||||
}
|
||||
for attr in ("cache_creation_input_tokens", "cache_read_input_tokens"):
|
||||
val = getattr(response.usage, attr, 0)
|
||||
if val:
|
||||
usage[attr] = val
|
||||
|
||||
return LLMResponse(
|
||||
content="".join(content_parts) or None,
|
||||
tool_calls=tool_calls,
|
||||
finish_reason=finish_reason,
|
||||
usage=usage,
|
||||
thinking_blocks=thinking_blocks or None,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
) -> LLMResponse:
|
||||
kwargs = self._build_kwargs(
|
||||
messages, tools, model, max_tokens, temperature,
|
||||
reasoning_effort, tool_choice,
|
||||
)
|
||||
try:
|
||||
response = await self._client.messages.create(**kwargs)
|
||||
return self._parse_response(response)
|
||||
except Exception as e:
|
||||
return LLMResponse(content=f"Error calling LLM: {e}", finish_reason="error")
|
||||
|
||||
async def chat_stream(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
on_content_delta: Callable[[str], Awaitable[None]] | None = None,
|
||||
) -> LLMResponse:
|
||||
kwargs = self._build_kwargs(
|
||||
messages, tools, model, max_tokens, temperature,
|
||||
reasoning_effort, tool_choice,
|
||||
)
|
||||
try:
|
||||
async with self._client.messages.stream(**kwargs) as stream:
|
||||
if on_content_delta:
|
||||
async for text in stream.text_stream:
|
||||
await on_content_delta(text)
|
||||
response = await stream.get_final_message()
|
||||
return self._parse_response(response)
|
||||
except Exception as e:
|
||||
return LLMResponse(content=f"Error calling LLM: {e}", finish_reason="error")
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
return self.default_model
|
||||
@@ -0,0 +1,309 @@
|
||||
"""Azure OpenAI provider implementation with API version 2024-10-21."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import Any
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import httpx
|
||||
import json_repair
|
||||
|
||||
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
|
||||
|
||||
_AZURE_MSG_KEYS = frozenset({"role", "content", "tool_calls", "tool_call_id", "name"})
|
||||
|
||||
|
||||
class AzureOpenAIProvider(LLMProvider):
|
||||
"""
|
||||
Azure OpenAI provider with API version 2024-10-21 compliance.
|
||||
|
||||
Features:
|
||||
- Hardcoded API version 2024-10-21
|
||||
- Uses model field as Azure deployment name in URL path
|
||||
- Uses api-key header instead of Authorization Bearer
|
||||
- Uses max_completion_tokens instead of max_tokens
|
||||
- Direct HTTP calls, bypasses LiteLLM
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str = "",
|
||||
api_base: str = "",
|
||||
default_model: str = "gpt-5.2-chat",
|
||||
):
|
||||
super().__init__(api_key, api_base)
|
||||
self.default_model = default_model
|
||||
self.api_version = "2024-10-21"
|
||||
|
||||
# Validate required parameters
|
||||
if not api_key:
|
||||
raise ValueError("Azure OpenAI api_key is required")
|
||||
if not api_base:
|
||||
raise ValueError("Azure OpenAI api_base is required")
|
||||
|
||||
# Ensure api_base ends with /
|
||||
if not api_base.endswith('/'):
|
||||
api_base += '/'
|
||||
self.api_base = api_base
|
||||
|
||||
def _build_chat_url(self, deployment_name: str) -> str:
|
||||
"""Build the Azure OpenAI chat completions URL."""
|
||||
# Azure OpenAI URL format:
|
||||
# https://{resource}.openai.azure.com/openai/deployments/{deployment}/chat/completions?api-version={version}
|
||||
base_url = self.api_base
|
||||
if not base_url.endswith('/'):
|
||||
base_url += '/'
|
||||
|
||||
url = urljoin(
|
||||
base_url,
|
||||
f"openai/deployments/{deployment_name}/chat/completions"
|
||||
)
|
||||
return f"{url}?api-version={self.api_version}"
|
||||
|
||||
def _build_headers(self) -> dict[str, str]:
|
||||
"""Build headers for Azure OpenAI API with api-key header."""
|
||||
return {
|
||||
"Content-Type": "application/json",
|
||||
"api-key": self.api_key, # Azure OpenAI uses api-key header, not Authorization
|
||||
"x-session-affinity": uuid.uuid4().hex, # For cache locality
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _supports_temperature(
|
||||
deployment_name: str,
|
||||
reasoning_effort: str | None = None,
|
||||
) -> bool:
|
||||
"""Return True when temperature is likely supported for this deployment."""
|
||||
if reasoning_effort:
|
||||
return False
|
||||
name = deployment_name.lower()
|
||||
return not any(token in name for token in ("gpt-5", "o1", "o3", "o4"))
|
||||
|
||||
def _prepare_request_payload(
|
||||
self,
|
||||
deployment_name: str,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Prepare the request payload with Azure OpenAI 2024-10-21 compliance."""
|
||||
payload: dict[str, Any] = {
|
||||
"messages": self._sanitize_request_messages(
|
||||
self._sanitize_empty_content(messages),
|
||||
_AZURE_MSG_KEYS,
|
||||
),
|
||||
"max_completion_tokens": max(1, max_tokens), # Azure API 2024-10-21 uses max_completion_tokens
|
||||
}
|
||||
|
||||
if self._supports_temperature(deployment_name, reasoning_effort):
|
||||
payload["temperature"] = temperature
|
||||
|
||||
if reasoning_effort:
|
||||
payload["reasoning_effort"] = reasoning_effort
|
||||
|
||||
if tools:
|
||||
payload["tools"] = tools
|
||||
payload["tool_choice"] = tool_choice or "auto"
|
||||
|
||||
return payload
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
) -> LLMResponse:
|
||||
"""
|
||||
Send a chat completion request to Azure OpenAI.
|
||||
|
||||
Args:
|
||||
messages: List of message dicts with 'role' and 'content'.
|
||||
tools: Optional list of tool definitions in OpenAI format.
|
||||
model: Model identifier (used as deployment name).
|
||||
max_tokens: Maximum tokens in response (mapped to max_completion_tokens).
|
||||
temperature: Sampling temperature.
|
||||
reasoning_effort: Optional reasoning effort parameter.
|
||||
|
||||
Returns:
|
||||
LLMResponse with content and/or tool calls.
|
||||
"""
|
||||
deployment_name = model or self.default_model
|
||||
url = self._build_chat_url(deployment_name)
|
||||
headers = self._build_headers()
|
||||
payload = self._prepare_request_payload(
|
||||
deployment_name, messages, tools, max_tokens, temperature, reasoning_effort,
|
||||
tool_choice=tool_choice,
|
||||
)
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=60.0, verify=True) as client:
|
||||
response = await client.post(url, headers=headers, json=payload)
|
||||
if response.status_code != 200:
|
||||
return LLMResponse(
|
||||
content=f"Azure OpenAI API Error {response.status_code}: {response.text}",
|
||||
finish_reason="error",
|
||||
)
|
||||
|
||||
response_data = response.json()
|
||||
return self._parse_response(response_data)
|
||||
|
||||
except Exception as e:
|
||||
return LLMResponse(
|
||||
content=f"Error calling Azure OpenAI: {repr(e)}",
|
||||
finish_reason="error",
|
||||
)
|
||||
|
||||
def _parse_response(self, response: dict[str, Any]) -> LLMResponse:
|
||||
"""Parse Azure OpenAI response into our standard format."""
|
||||
try:
|
||||
choice = response["choices"][0]
|
||||
message = choice["message"]
|
||||
|
||||
tool_calls = []
|
||||
if message.get("tool_calls"):
|
||||
for tc in message["tool_calls"]:
|
||||
# Parse arguments from JSON string if needed
|
||||
args = tc["function"]["arguments"]
|
||||
if isinstance(args, str):
|
||||
args = json_repair.loads(args)
|
||||
|
||||
tool_calls.append(
|
||||
ToolCallRequest(
|
||||
id=tc["id"],
|
||||
name=tc["function"]["name"],
|
||||
arguments=args,
|
||||
)
|
||||
)
|
||||
|
||||
usage = {}
|
||||
if response.get("usage"):
|
||||
usage_data = response["usage"]
|
||||
usage = {
|
||||
"prompt_tokens": usage_data.get("prompt_tokens", 0),
|
||||
"completion_tokens": usage_data.get("completion_tokens", 0),
|
||||
"total_tokens": usage_data.get("total_tokens", 0),
|
||||
}
|
||||
|
||||
reasoning_content = message.get("reasoning_content") or None
|
||||
|
||||
return LLMResponse(
|
||||
content=message.get("content"),
|
||||
tool_calls=tool_calls,
|
||||
finish_reason=choice.get("finish_reason", "stop"),
|
||||
usage=usage,
|
||||
reasoning_content=reasoning_content,
|
||||
)
|
||||
|
||||
except (KeyError, IndexError) as e:
|
||||
return LLMResponse(
|
||||
content=f"Error parsing Azure OpenAI response: {str(e)}",
|
||||
finish_reason="error",
|
||||
)
|
||||
|
||||
async def chat_stream(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
on_content_delta: Callable[[str], Awaitable[None]] | None = None,
|
||||
) -> LLMResponse:
|
||||
"""Stream a chat completion via Azure OpenAI SSE."""
|
||||
deployment_name = model or self.default_model
|
||||
url = self._build_chat_url(deployment_name)
|
||||
headers = self._build_headers()
|
||||
payload = self._prepare_request_payload(
|
||||
deployment_name, messages, tools, max_tokens, temperature,
|
||||
reasoning_effort, tool_choice=tool_choice,
|
||||
)
|
||||
payload["stream"] = True
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=60.0, verify=True) as client:
|
||||
async with client.stream("POST", url, headers=headers, json=payload) as response:
|
||||
if response.status_code != 200:
|
||||
text = await response.aread()
|
||||
return LLMResponse(
|
||||
content=f"Azure OpenAI API Error {response.status_code}: {text.decode('utf-8', 'ignore')}",
|
||||
finish_reason="error",
|
||||
)
|
||||
return await self._consume_stream(response, on_content_delta)
|
||||
except Exception as e:
|
||||
return LLMResponse(content=f"Error calling Azure OpenAI: {repr(e)}", finish_reason="error")
|
||||
|
||||
async def _consume_stream(
|
||||
self,
|
||||
response: httpx.Response,
|
||||
on_content_delta: Callable[[str], Awaitable[None]] | None,
|
||||
) -> LLMResponse:
|
||||
"""Parse Azure OpenAI SSE stream into an LLMResponse."""
|
||||
content_parts: list[str] = []
|
||||
tool_call_buffers: dict[int, dict[str, str]] = {}
|
||||
finish_reason = "stop"
|
||||
|
||||
async for line in response.aiter_lines():
|
||||
if not line.startswith("data: "):
|
||||
continue
|
||||
data = line[6:].strip()
|
||||
if data == "[DONE]":
|
||||
break
|
||||
try:
|
||||
chunk = json.loads(data)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
choices = chunk.get("choices") or []
|
||||
if not choices:
|
||||
continue
|
||||
choice = choices[0]
|
||||
if choice.get("finish_reason"):
|
||||
finish_reason = choice["finish_reason"]
|
||||
delta = choice.get("delta") or {}
|
||||
|
||||
text = delta.get("content")
|
||||
if text:
|
||||
content_parts.append(text)
|
||||
if on_content_delta:
|
||||
await on_content_delta(text)
|
||||
|
||||
for tc in delta.get("tool_calls") or []:
|
||||
idx = tc.get("index", 0)
|
||||
buf = tool_call_buffers.setdefault(idx, {"id": "", "name": "", "arguments": ""})
|
||||
if tc.get("id"):
|
||||
buf["id"] = tc["id"]
|
||||
fn = tc.get("function") or {}
|
||||
if fn.get("name"):
|
||||
buf["name"] = fn["name"]
|
||||
if fn.get("arguments"):
|
||||
buf["arguments"] += fn["arguments"]
|
||||
|
||||
tool_calls = [
|
||||
ToolCallRequest(
|
||||
id=buf["id"], name=buf["name"],
|
||||
arguments=json_repair.loads(buf["arguments"]) if buf["arguments"] else {},
|
||||
)
|
||||
for buf in tool_call_buffers.values()
|
||||
]
|
||||
|
||||
return LLMResponse(
|
||||
content="".join(content_parts) or None,
|
||||
tool_calls=tool_calls,
|
||||
finish_reason=finish_reason,
|
||||
)
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
"""Get the default model (also used as default deployment name)."""
|
||||
return self.default_model
|
||||
@@ -0,0 +1,368 @@
|
||||
"""Base LLM provider interface."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Awaitable, Callable
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
@dataclass
|
||||
class ToolCallRequest:
|
||||
"""A tool call request from the LLM."""
|
||||
id: str
|
||||
name: str
|
||||
arguments: dict[str, Any]
|
||||
extra_content: dict[str, Any] | None = None
|
||||
provider_specific_fields: dict[str, Any] | None = None
|
||||
function_provider_specific_fields: dict[str, Any] | None = None
|
||||
|
||||
def to_openai_tool_call(self) -> dict[str, Any]:
|
||||
"""Serialize to an OpenAI-style tool_call payload."""
|
||||
tool_call = {
|
||||
"id": self.id,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": self.name,
|
||||
"arguments": json.dumps(self.arguments, ensure_ascii=False),
|
||||
},
|
||||
}
|
||||
if self.extra_content:
|
||||
tool_call["extra_content"] = self.extra_content
|
||||
if self.provider_specific_fields:
|
||||
tool_call["provider_specific_fields"] = self.provider_specific_fields
|
||||
if self.function_provider_specific_fields:
|
||||
tool_call["function"]["provider_specific_fields"] = self.function_provider_specific_fields
|
||||
return tool_call
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMResponse:
|
||||
"""Response from an LLM provider."""
|
||||
content: str | None
|
||||
tool_calls: list[ToolCallRequest] = field(default_factory=list)
|
||||
finish_reason: str = "stop"
|
||||
usage: dict[str, int] = field(default_factory=dict)
|
||||
reasoning_content: str | None = None # Kimi, DeepSeek-R1 etc.
|
||||
thinking_blocks: list[dict] | None = None # Anthropic extended thinking
|
||||
|
||||
@property
|
||||
def has_tool_calls(self) -> bool:
|
||||
"""Check if response contains tool calls."""
|
||||
return len(self.tool_calls) > 0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class GenerationSettings:
|
||||
"""Default generation parameters for LLM calls.
|
||||
|
||||
Stored on the provider so every call site inherits the same defaults
|
||||
without having to pass temperature / max_tokens / reasoning_effort
|
||||
through every layer. Individual call sites can still override by
|
||||
passing explicit keyword arguments to chat() / chat_with_retry().
|
||||
"""
|
||||
|
||||
temperature: float = 0.7
|
||||
max_tokens: int = 4096
|
||||
reasoning_effort: str | None = None
|
||||
|
||||
|
||||
class LLMProvider(ABC):
|
||||
"""
|
||||
Abstract base class for LLM providers.
|
||||
|
||||
Implementations should handle the specifics of each provider's API
|
||||
while maintaining a consistent interface.
|
||||
"""
|
||||
|
||||
_CHAT_RETRY_DELAYS = (1, 2, 4)
|
||||
_TRANSIENT_ERROR_MARKERS = (
|
||||
"429",
|
||||
"rate limit",
|
||||
"500",
|
||||
"502",
|
||||
"503",
|
||||
"504",
|
||||
"overloaded",
|
||||
"timeout",
|
||||
"timed out",
|
||||
"connection",
|
||||
"server error",
|
||||
"temporarily unavailable",
|
||||
)
|
||||
|
||||
_SENTINEL = object()
|
||||
|
||||
def __init__(self, api_key: str | None = None, api_base: str | None = None):
|
||||
self.api_key = api_key
|
||||
self.api_base = api_base
|
||||
self.generation: GenerationSettings = GenerationSettings()
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_empty_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Sanitize message content: fix empty blocks, strip internal _meta fields."""
|
||||
result: list[dict[str, Any]] = []
|
||||
for msg in messages:
|
||||
content = msg.get("content")
|
||||
|
||||
if isinstance(content, str) and not content:
|
||||
clean = dict(msg)
|
||||
clean["content"] = None if (msg.get("role") == "assistant" and msg.get("tool_calls")) else "(empty)"
|
||||
result.append(clean)
|
||||
continue
|
||||
|
||||
if isinstance(content, list):
|
||||
new_items: list[Any] = []
|
||||
changed = False
|
||||
for item in content:
|
||||
if (
|
||||
isinstance(item, dict)
|
||||
and item.get("type") in ("text", "input_text", "output_text")
|
||||
and not item.get("text")
|
||||
):
|
||||
changed = True
|
||||
continue
|
||||
if isinstance(item, dict) and "_meta" in item:
|
||||
new_items.append({k: v for k, v in item.items() if k != "_meta"})
|
||||
changed = True
|
||||
else:
|
||||
new_items.append(item)
|
||||
if changed:
|
||||
clean = dict(msg)
|
||||
if new_items:
|
||||
clean["content"] = new_items
|
||||
elif msg.get("role") == "assistant" and msg.get("tool_calls"):
|
||||
clean["content"] = None
|
||||
else:
|
||||
clean["content"] = "(empty)"
|
||||
result.append(clean)
|
||||
continue
|
||||
|
||||
if isinstance(content, dict):
|
||||
clean = dict(msg)
|
||||
clean["content"] = [content]
|
||||
result.append(clean)
|
||||
continue
|
||||
|
||||
result.append(msg)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_request_messages(
|
||||
messages: list[dict[str, Any]],
|
||||
allowed_keys: frozenset[str],
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Keep only provider-safe message keys and normalize assistant content."""
|
||||
sanitized = []
|
||||
for msg in messages:
|
||||
clean = {k: v for k, v in msg.items() if k in allowed_keys}
|
||||
if clean.get("role") == "assistant" and "content" not in clean:
|
||||
clean["content"] = None
|
||||
sanitized.append(clean)
|
||||
return sanitized
|
||||
|
||||
@abstractmethod
|
||||
async def chat(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
) -> LLMResponse:
|
||||
"""
|
||||
Send a chat completion request.
|
||||
|
||||
Args:
|
||||
messages: List of message dicts with 'role' and 'content'.
|
||||
tools: Optional list of tool definitions.
|
||||
model: Model identifier (provider-specific).
|
||||
max_tokens: Maximum tokens in response.
|
||||
temperature: Sampling temperature.
|
||||
tool_choice: Tool selection strategy ("auto", "required", or specific tool dict).
|
||||
|
||||
Returns:
|
||||
LLMResponse with content and/or tool calls.
|
||||
"""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def _is_transient_error(cls, content: str | None) -> bool:
|
||||
err = (content or "").lower()
|
||||
return any(marker in err for marker in cls._TRANSIENT_ERROR_MARKERS)
|
||||
|
||||
@staticmethod
|
||||
def _strip_image_content(messages: list[dict[str, Any]]) -> list[dict[str, Any]] | None:
|
||||
"""Replace image_url blocks with text placeholder. Returns None if no images found."""
|
||||
found = False
|
||||
result = []
|
||||
for msg in messages:
|
||||
content = msg.get("content")
|
||||
if isinstance(content, list):
|
||||
new_content = []
|
||||
for b in content:
|
||||
if isinstance(b, dict) and b.get("type") == "image_url":
|
||||
path = (b.get("_meta") or {}).get("path", "")
|
||||
placeholder = f"[image: {path}]" if path else "[image omitted]"
|
||||
new_content.append({"type": "text", "text": placeholder})
|
||||
found = True
|
||||
else:
|
||||
new_content.append(b)
|
||||
result.append({**msg, "content": new_content})
|
||||
else:
|
||||
result.append(msg)
|
||||
return result if found else None
|
||||
|
||||
async def _safe_chat(self, **kwargs: Any) -> LLMResponse:
|
||||
"""Call chat() and convert unexpected exceptions to error responses."""
|
||||
try:
|
||||
return await self.chat(**kwargs)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
return LLMResponse(content=f"Error calling LLM: {exc}", finish_reason="error")
|
||||
|
||||
async def chat_stream(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
on_content_delta: Callable[[str], Awaitable[None]] | None = None,
|
||||
) -> LLMResponse:
|
||||
"""Stream a chat completion, calling *on_content_delta* for each text chunk.
|
||||
|
||||
Returns the same ``LLMResponse`` as :meth:`chat`. The default
|
||||
implementation falls back to a non-streaming call and delivers the
|
||||
full content as a single delta. Providers that support native
|
||||
streaming should override this method.
|
||||
"""
|
||||
response = await self.chat(
|
||||
messages=messages, tools=tools, model=model,
|
||||
max_tokens=max_tokens, temperature=temperature,
|
||||
reasoning_effort=reasoning_effort, tool_choice=tool_choice,
|
||||
)
|
||||
if on_content_delta and response.content:
|
||||
await on_content_delta(response.content)
|
||||
return response
|
||||
|
||||
async def _safe_chat_stream(self, **kwargs: Any) -> LLMResponse:
|
||||
"""Call chat_stream() and convert unexpected exceptions to error responses."""
|
||||
try:
|
||||
return await self.chat_stream(**kwargs)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
return LLMResponse(content=f"Error calling LLM: {exc}", finish_reason="error")
|
||||
|
||||
async def chat_stream_with_retry(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: object = _SENTINEL,
|
||||
temperature: object = _SENTINEL,
|
||||
reasoning_effort: object = _SENTINEL,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
on_content_delta: Callable[[str], Awaitable[None]] | None = None,
|
||||
) -> LLMResponse:
|
||||
"""Call chat_stream() with retry on transient provider failures."""
|
||||
if max_tokens is self._SENTINEL:
|
||||
max_tokens = self.generation.max_tokens
|
||||
if temperature is self._SENTINEL:
|
||||
temperature = self.generation.temperature
|
||||
if reasoning_effort is self._SENTINEL:
|
||||
reasoning_effort = self.generation.reasoning_effort
|
||||
|
||||
kw: dict[str, Any] = dict(
|
||||
messages=messages, tools=tools, model=model,
|
||||
max_tokens=max_tokens, temperature=temperature,
|
||||
reasoning_effort=reasoning_effort, tool_choice=tool_choice,
|
||||
on_content_delta=on_content_delta,
|
||||
)
|
||||
|
||||
for attempt, delay in enumerate(self._CHAT_RETRY_DELAYS, start=1):
|
||||
response = await self._safe_chat_stream(**kw)
|
||||
|
||||
if response.finish_reason != "error":
|
||||
return response
|
||||
|
||||
if not self._is_transient_error(response.content):
|
||||
stripped = self._strip_image_content(messages)
|
||||
if stripped is not None:
|
||||
logger.warning("Non-transient LLM error with image content, retrying without images")
|
||||
return await self._safe_chat_stream(**{**kw, "messages": stripped})
|
||||
return response
|
||||
|
||||
logger.warning(
|
||||
"LLM transient error (attempt {}/{}), retrying in {}s: {}",
|
||||
attempt, len(self._CHAT_RETRY_DELAYS), delay,
|
||||
(response.content or "")[:120].lower(),
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
return await self._safe_chat_stream(**kw)
|
||||
|
||||
async def chat_with_retry(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: object = _SENTINEL,
|
||||
temperature: object = _SENTINEL,
|
||||
reasoning_effort: object = _SENTINEL,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
) -> LLMResponse:
|
||||
"""Call chat() with retry on transient provider failures.
|
||||
|
||||
Parameters default to ``self.generation`` when not explicitly passed,
|
||||
so callers no longer need to thread temperature / max_tokens /
|
||||
reasoning_effort through every layer.
|
||||
"""
|
||||
if max_tokens is self._SENTINEL:
|
||||
max_tokens = self.generation.max_tokens
|
||||
if temperature is self._SENTINEL:
|
||||
temperature = self.generation.temperature
|
||||
if reasoning_effort is self._SENTINEL:
|
||||
reasoning_effort = self.generation.reasoning_effort
|
||||
|
||||
kw: dict[str, Any] = dict(
|
||||
messages=messages, tools=tools, model=model,
|
||||
max_tokens=max_tokens, temperature=temperature,
|
||||
reasoning_effort=reasoning_effort, tool_choice=tool_choice,
|
||||
)
|
||||
|
||||
for attempt, delay in enumerate(self._CHAT_RETRY_DELAYS, start=1):
|
||||
response = await self._safe_chat(**kw)
|
||||
|
||||
if response.finish_reason != "error":
|
||||
return response
|
||||
|
||||
if not self._is_transient_error(response.content):
|
||||
stripped = self._strip_image_content(messages)
|
||||
if stripped is not None:
|
||||
logger.warning("Non-transient LLM error with image content, retrying without images")
|
||||
return await self._safe_chat(**{**kw, "messages": stripped})
|
||||
return response
|
||||
|
||||
logger.warning(
|
||||
"LLM transient error (attempt {}/{}), retrying in {}s: {}",
|
||||
attempt, len(self._CHAT_RETRY_DELAYS), delay,
|
||||
(response.content or "")[:120].lower(),
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
return await self._safe_chat(**kw)
|
||||
|
||||
@abstractmethod
|
||||
def get_default_model(self) -> str:
|
||||
"""Get the default model for this provider."""
|
||||
pass
|
||||
@@ -0,0 +1,320 @@
|
||||
"""OpenAI Codex Responses Provider."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import json
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import Any, AsyncGenerator
|
||||
|
||||
import httpx
|
||||
from loguru import logger
|
||||
from oauth_cli_kit import get_token as get_codex_token
|
||||
|
||||
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
|
||||
|
||||
DEFAULT_CODEX_URL = "https://chatgpt.com/backend-api/codex/responses"
|
||||
DEFAULT_ORIGINATOR = "nanobot"
|
||||
|
||||
|
||||
class OpenAICodexProvider(LLMProvider):
|
||||
"""Use Codex OAuth to call the Responses API."""
|
||||
|
||||
def __init__(self, default_model: str = "openai-codex/gpt-5.1-codex"):
|
||||
super().__init__(api_key=None, api_base=None)
|
||||
self.default_model = default_model
|
||||
|
||||
async def _call_codex(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None,
|
||||
model: str | None,
|
||||
reasoning_effort: str | None,
|
||||
tool_choice: str | dict[str, Any] | None,
|
||||
on_content_delta: Callable[[str], Awaitable[None]] | None = None,
|
||||
) -> LLMResponse:
|
||||
"""Shared request logic for both chat() and chat_stream()."""
|
||||
model = model or self.default_model
|
||||
system_prompt, input_items = _convert_messages(messages)
|
||||
|
||||
token = await asyncio.to_thread(get_codex_token)
|
||||
headers = _build_headers(token.account_id, token.access)
|
||||
|
||||
body: dict[str, Any] = {
|
||||
"model": _strip_model_prefix(model),
|
||||
"store": False,
|
||||
"stream": True,
|
||||
"instructions": system_prompt,
|
||||
"input": input_items,
|
||||
"text": {"verbosity": "medium"},
|
||||
"include": ["reasoning.encrypted_content"],
|
||||
"prompt_cache_key": _prompt_cache_key(messages),
|
||||
"tool_choice": tool_choice or "auto",
|
||||
"parallel_tool_calls": True,
|
||||
}
|
||||
if reasoning_effort:
|
||||
body["reasoning"] = {"effort": reasoning_effort}
|
||||
if tools:
|
||||
body["tools"] = _convert_tools(tools)
|
||||
|
||||
try:
|
||||
try:
|
||||
content, tool_calls, finish_reason = await _request_codex(
|
||||
DEFAULT_CODEX_URL, headers, body, verify=True,
|
||||
on_content_delta=on_content_delta,
|
||||
)
|
||||
except Exception as e:
|
||||
if "CERTIFICATE_VERIFY_FAILED" not in str(e):
|
||||
raise
|
||||
logger.warning("SSL verification failed for Codex API; retrying with verify=False")
|
||||
content, tool_calls, finish_reason = await _request_codex(
|
||||
DEFAULT_CODEX_URL, headers, body, verify=False,
|
||||
on_content_delta=on_content_delta,
|
||||
)
|
||||
return LLMResponse(content=content, tool_calls=tool_calls, finish_reason=finish_reason)
|
||||
except Exception as e:
|
||||
return LLMResponse(content=f"Error calling Codex: {e}", finish_reason="error")
|
||||
|
||||
async def chat(
|
||||
self, messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None,
|
||||
model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
) -> LLMResponse:
|
||||
return await self._call_codex(messages, tools, model, reasoning_effort, tool_choice)
|
||||
|
||||
async def chat_stream(
|
||||
self, messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None,
|
||||
model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
on_content_delta: Callable[[str], Awaitable[None]] | None = None,
|
||||
) -> LLMResponse:
|
||||
return await self._call_codex(messages, tools, model, reasoning_effort, tool_choice, on_content_delta)
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
return self.default_model
|
||||
|
||||
|
||||
def _strip_model_prefix(model: str) -> str:
|
||||
if model.startswith("openai-codex/") or model.startswith("openai_codex/"):
|
||||
return model.split("/", 1)[1]
|
||||
return model
|
||||
|
||||
|
||||
def _build_headers(account_id: str, token: str) -> dict[str, str]:
|
||||
return {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"chatgpt-account-id": account_id,
|
||||
"OpenAI-Beta": "responses=experimental",
|
||||
"originator": DEFAULT_ORIGINATOR,
|
||||
"User-Agent": "nanobot (python)",
|
||||
"accept": "text/event-stream",
|
||||
"content-type": "application/json",
|
||||
}
|
||||
|
||||
|
||||
async def _request_codex(
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
body: dict[str, Any],
|
||||
verify: bool,
|
||||
on_content_delta: Callable[[str], Awaitable[None]] | None = None,
|
||||
) -> tuple[str, list[ToolCallRequest], str]:
|
||||
async with httpx.AsyncClient(timeout=60.0, verify=verify) as client:
|
||||
async with client.stream("POST", url, headers=headers, json=body) as response:
|
||||
if response.status_code != 200:
|
||||
text = await response.aread()
|
||||
raise RuntimeError(_friendly_error(response.status_code, text.decode("utf-8", "ignore")))
|
||||
return await _consume_sse(response, on_content_delta)
|
||||
|
||||
|
||||
def _convert_tools(tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Convert OpenAI function-calling schema to Codex flat format."""
|
||||
converted: list[dict[str, Any]] = []
|
||||
for tool in tools:
|
||||
fn = (tool.get("function") or {}) if tool.get("type") == "function" else tool
|
||||
name = fn.get("name")
|
||||
if not name:
|
||||
continue
|
||||
params = fn.get("parameters") or {}
|
||||
converted.append({
|
||||
"type": "function",
|
||||
"name": name,
|
||||
"description": fn.get("description") or "",
|
||||
"parameters": params if isinstance(params, dict) else {},
|
||||
})
|
||||
return converted
|
||||
|
||||
|
||||
def _convert_messages(messages: list[dict[str, Any]]) -> tuple[str, list[dict[str, Any]]]:
|
||||
system_prompt = ""
|
||||
input_items: list[dict[str, Any]] = []
|
||||
|
||||
for idx, msg in enumerate(messages):
|
||||
role = msg.get("role")
|
||||
content = msg.get("content")
|
||||
|
||||
if role == "system":
|
||||
system_prompt = content if isinstance(content, str) else ""
|
||||
continue
|
||||
|
||||
if role == "user":
|
||||
input_items.append(_convert_user_message(content))
|
||||
continue
|
||||
|
||||
if role == "assistant":
|
||||
if isinstance(content, str) and content:
|
||||
input_items.append({
|
||||
"type": "message", "role": "assistant",
|
||||
"content": [{"type": "output_text", "text": content}],
|
||||
"status": "completed", "id": f"msg_{idx}",
|
||||
})
|
||||
for tool_call in msg.get("tool_calls", []) or []:
|
||||
fn = tool_call.get("function") or {}
|
||||
call_id, item_id = _split_tool_call_id(tool_call.get("id"))
|
||||
input_items.append({
|
||||
"type": "function_call",
|
||||
"id": item_id or f"fc_{idx}",
|
||||
"call_id": call_id or f"call_{idx}",
|
||||
"name": fn.get("name"),
|
||||
"arguments": fn.get("arguments") or "{}",
|
||||
})
|
||||
continue
|
||||
|
||||
if role == "tool":
|
||||
call_id, _ = _split_tool_call_id(msg.get("tool_call_id"))
|
||||
output_text = content if isinstance(content, str) else json.dumps(content, ensure_ascii=False)
|
||||
input_items.append({"type": "function_call_output", "call_id": call_id, "output": output_text})
|
||||
|
||||
return system_prompt, input_items
|
||||
|
||||
|
||||
def _convert_user_message(content: Any) -> dict[str, Any]:
|
||||
if isinstance(content, str):
|
||||
return {"role": "user", "content": [{"type": "input_text", "text": content}]}
|
||||
if isinstance(content, list):
|
||||
converted: list[dict[str, Any]] = []
|
||||
for item in content:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
if item.get("type") == "text":
|
||||
converted.append({"type": "input_text", "text": item.get("text", "")})
|
||||
elif item.get("type") == "image_url":
|
||||
url = (item.get("image_url") or {}).get("url")
|
||||
if url:
|
||||
converted.append({"type": "input_image", "image_url": url, "detail": "auto"})
|
||||
if converted:
|
||||
return {"role": "user", "content": converted}
|
||||
return {"role": "user", "content": [{"type": "input_text", "text": ""}]}
|
||||
|
||||
|
||||
def _split_tool_call_id(tool_call_id: Any) -> tuple[str, str | None]:
|
||||
if isinstance(tool_call_id, str) and tool_call_id:
|
||||
if "|" in tool_call_id:
|
||||
call_id, item_id = tool_call_id.split("|", 1)
|
||||
return call_id, item_id or None
|
||||
return tool_call_id, None
|
||||
return "call_0", None
|
||||
|
||||
|
||||
def _prompt_cache_key(messages: list[dict[str, Any]]) -> str:
|
||||
raw = json.dumps(messages, ensure_ascii=True, sort_keys=True)
|
||||
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
async def _iter_sse(response: httpx.Response) -> AsyncGenerator[dict[str, Any], None]:
|
||||
buffer: list[str] = []
|
||||
async for line in response.aiter_lines():
|
||||
if line == "":
|
||||
if buffer:
|
||||
data_lines = [l[5:].strip() for l in buffer if l.startswith("data:")]
|
||||
buffer = []
|
||||
if not data_lines:
|
||||
continue
|
||||
data = "\n".join(data_lines).strip()
|
||||
if not data or data == "[DONE]":
|
||||
continue
|
||||
try:
|
||||
yield json.loads(data)
|
||||
except Exception:
|
||||
continue
|
||||
continue
|
||||
buffer.append(line)
|
||||
|
||||
|
||||
async def _consume_sse(
|
||||
response: httpx.Response,
|
||||
on_content_delta: Callable[[str], Awaitable[None]] | None = None,
|
||||
) -> tuple[str, list[ToolCallRequest], str]:
|
||||
content = ""
|
||||
tool_calls: list[ToolCallRequest] = []
|
||||
tool_call_buffers: dict[str, dict[str, Any]] = {}
|
||||
finish_reason = "stop"
|
||||
|
||||
async for event in _iter_sse(response):
|
||||
event_type = event.get("type")
|
||||
if event_type == "response.output_item.added":
|
||||
item = event.get("item") or {}
|
||||
if item.get("type") == "function_call":
|
||||
call_id = item.get("call_id")
|
||||
if not call_id:
|
||||
continue
|
||||
tool_call_buffers[call_id] = {
|
||||
"id": item.get("id") or "fc_0",
|
||||
"name": item.get("name"),
|
||||
"arguments": item.get("arguments") or "",
|
||||
}
|
||||
elif event_type == "response.output_text.delta":
|
||||
delta_text = event.get("delta") or ""
|
||||
content += delta_text
|
||||
if on_content_delta and delta_text:
|
||||
await on_content_delta(delta_text)
|
||||
elif event_type == "response.function_call_arguments.delta":
|
||||
call_id = event.get("call_id")
|
||||
if call_id and call_id in tool_call_buffers:
|
||||
tool_call_buffers[call_id]["arguments"] += event.get("delta") or ""
|
||||
elif event_type == "response.function_call_arguments.done":
|
||||
call_id = event.get("call_id")
|
||||
if call_id and call_id in tool_call_buffers:
|
||||
tool_call_buffers[call_id]["arguments"] = event.get("arguments") or ""
|
||||
elif event_type == "response.output_item.done":
|
||||
item = event.get("item") or {}
|
||||
if item.get("type") == "function_call":
|
||||
call_id = item.get("call_id")
|
||||
if not call_id:
|
||||
continue
|
||||
buf = tool_call_buffers.get(call_id) or {}
|
||||
args_raw = buf.get("arguments") or item.get("arguments") or "{}"
|
||||
try:
|
||||
args = json.loads(args_raw)
|
||||
except Exception:
|
||||
args = {"raw": args_raw}
|
||||
tool_calls.append(
|
||||
ToolCallRequest(
|
||||
id=f"{call_id}|{buf.get('id') or item.get('id') or 'fc_0'}",
|
||||
name=buf.get("name") or item.get("name"),
|
||||
arguments=args,
|
||||
)
|
||||
)
|
||||
elif event_type == "response.completed":
|
||||
status = (event.get("response") or {}).get("status")
|
||||
finish_reason = _map_finish_reason(status)
|
||||
elif event_type in {"error", "response.failed"}:
|
||||
raise RuntimeError("Codex response failed")
|
||||
|
||||
return content, tool_calls, finish_reason
|
||||
|
||||
|
||||
_FINISH_REASON_MAP = {"completed": "stop", "incomplete": "length", "failed": "error", "cancelled": "error"}
|
||||
|
||||
|
||||
def _map_finish_reason(status: str | None) -> str:
|
||||
return _FINISH_REASON_MAP.get(status or "completed", "stop")
|
||||
|
||||
|
||||
def _friendly_error(status_code: int, raw: str) -> str:
|
||||
if status_code == 429:
|
||||
return "ChatGPT usage quota exceeded or rate limit triggered. Please try again later."
|
||||
return f"HTTP {status_code}: {raw}"
|
||||
@@ -0,0 +1,589 @@
|
||||
"""OpenAI-compatible provider for all non-Anthropic LLM APIs."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
import secrets
|
||||
import string
|
||||
import uuid
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import json_repair
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from nanobot.providers.registry import ProviderSpec
|
||||
|
||||
_ALLOWED_MSG_KEYS = frozenset({
|
||||
"role", "content", "tool_calls", "tool_call_id", "name",
|
||||
"reasoning_content", "extra_content",
|
||||
})
|
||||
_ALNUM = string.ascii_letters + string.digits
|
||||
|
||||
_STANDARD_TC_KEYS = frozenset({"id", "type", "index", "function"})
|
||||
_STANDARD_FN_KEYS = frozenset({"name", "arguments"})
|
||||
_DEFAULT_OPENROUTER_HEADERS = {
|
||||
"HTTP-Referer": "https://github.com/HKUDS/nanobot",
|
||||
"X-OpenRouter-Title": "nanobot",
|
||||
"X-OpenRouter-Categories": "cli-agent,personal-agent",
|
||||
}
|
||||
|
||||
|
||||
def _short_tool_id() -> str:
|
||||
"""9-char alphanumeric ID compatible with all providers (incl. Mistral)."""
|
||||
return "".join(secrets.choice(_ALNUM) for _ in range(9))
|
||||
|
||||
|
||||
def _get(obj: Any, key: str) -> Any:
|
||||
"""Get a value from dict or object attribute, returning None if absent."""
|
||||
if isinstance(obj, dict):
|
||||
return obj.get(key)
|
||||
return getattr(obj, key, None)
|
||||
|
||||
|
||||
def _coerce_dict(value: Any) -> dict[str, Any] | None:
|
||||
"""Try to coerce *value* to a dict; return None if not possible or empty."""
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, dict):
|
||||
return value if value else None
|
||||
model_dump = getattr(value, "model_dump", None)
|
||||
if callable(model_dump):
|
||||
dumped = model_dump()
|
||||
if isinstance(dumped, dict) and dumped:
|
||||
return dumped
|
||||
return None
|
||||
|
||||
|
||||
def _extract_tc_extras(tc: Any) -> tuple[
|
||||
dict[str, Any] | None,
|
||||
dict[str, Any] | None,
|
||||
dict[str, Any] | None,
|
||||
]:
|
||||
"""Extract (extra_content, provider_specific_fields, fn_provider_specific_fields).
|
||||
|
||||
Works for both SDK objects and dicts. Captures Gemini ``extra_content``
|
||||
verbatim and any non-standard keys on the tool-call / function.
|
||||
"""
|
||||
extra_content = _coerce_dict(_get(tc, "extra_content"))
|
||||
|
||||
tc_dict = _coerce_dict(tc)
|
||||
prov = None
|
||||
fn_prov = None
|
||||
if tc_dict is not None:
|
||||
leftover = {k: v for k, v in tc_dict.items()
|
||||
if k not in _STANDARD_TC_KEYS and k != "extra_content" and v is not None}
|
||||
if leftover:
|
||||
prov = leftover
|
||||
fn = _coerce_dict(tc_dict.get("function"))
|
||||
if fn is not None:
|
||||
fn_leftover = {k: v for k, v in fn.items()
|
||||
if k not in _STANDARD_FN_KEYS and v is not None}
|
||||
if fn_leftover:
|
||||
fn_prov = fn_leftover
|
||||
else:
|
||||
prov = _coerce_dict(_get(tc, "provider_specific_fields"))
|
||||
fn_obj = _get(tc, "function")
|
||||
if fn_obj is not None:
|
||||
fn_prov = _coerce_dict(_get(fn_obj, "provider_specific_fields"))
|
||||
|
||||
return extra_content, prov, fn_prov
|
||||
|
||||
|
||||
def _uses_openrouter_attribution(spec: "ProviderSpec | None", api_base: str | None) -> bool:
|
||||
"""Apply Nanobot attribution headers to OpenRouter requests by default."""
|
||||
if spec and spec.name == "openrouter":
|
||||
return True
|
||||
return bool(api_base and "openrouter" in api_base.lower())
|
||||
|
||||
|
||||
class OpenAICompatProvider(LLMProvider):
|
||||
"""Unified provider for all OpenAI-compatible APIs.
|
||||
|
||||
Receives a resolved ``ProviderSpec`` from the caller — no internal
|
||||
registry lookups needed.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str | None = None,
|
||||
api_base: str | None = None,
|
||||
default_model: str = "gpt-4o",
|
||||
extra_headers: dict[str, str] | None = None,
|
||||
spec: ProviderSpec | None = None,
|
||||
):
|
||||
super().__init__(api_key, api_base)
|
||||
self.default_model = default_model
|
||||
self.extra_headers = extra_headers or {}
|
||||
self._spec = spec
|
||||
|
||||
if api_key and spec and spec.env_key:
|
||||
self._setup_env(api_key, api_base)
|
||||
|
||||
effective_base = api_base or (spec.default_api_base if spec else None) or None
|
||||
default_headers = {"x-session-affinity": uuid.uuid4().hex}
|
||||
if _uses_openrouter_attribution(spec, effective_base):
|
||||
default_headers.update(_DEFAULT_OPENROUTER_HEADERS)
|
||||
if extra_headers:
|
||||
default_headers.update(extra_headers)
|
||||
|
||||
self._client = AsyncOpenAI(
|
||||
api_key=api_key or "no-key",
|
||||
base_url=effective_base,
|
||||
default_headers=default_headers,
|
||||
)
|
||||
|
||||
def _setup_env(self, api_key: str, api_base: str | None) -> None:
|
||||
"""Set environment variables based on provider spec."""
|
||||
spec = self._spec
|
||||
if not spec or not spec.env_key:
|
||||
return
|
||||
if spec.is_gateway:
|
||||
os.environ[spec.env_key] = api_key
|
||||
else:
|
||||
os.environ.setdefault(spec.env_key, api_key)
|
||||
effective_base = api_base or spec.default_api_base
|
||||
for env_name, env_val in spec.env_extras:
|
||||
resolved = env_val.replace("{api_key}", api_key).replace("{api_base}", effective_base)
|
||||
os.environ.setdefault(env_name, resolved)
|
||||
|
||||
@staticmethod
|
||||
def _apply_cache_control(
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None,
|
||||
) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]:
|
||||
"""Inject cache_control markers for prompt caching."""
|
||||
cache_marker = {"type": "ephemeral"}
|
||||
new_messages = list(messages)
|
||||
|
||||
def _mark(msg: dict[str, Any]) -> dict[str, Any]:
|
||||
content = msg.get("content")
|
||||
if isinstance(content, str):
|
||||
return {**msg, "content": [
|
||||
{"type": "text", "text": content, "cache_control": cache_marker},
|
||||
]}
|
||||
if isinstance(content, list) and content:
|
||||
nc = list(content)
|
||||
nc[-1] = {**nc[-1], "cache_control": cache_marker}
|
||||
return {**msg, "content": nc}
|
||||
return msg
|
||||
|
||||
if new_messages and new_messages[0].get("role") == "system":
|
||||
new_messages[0] = _mark(new_messages[0])
|
||||
if len(new_messages) >= 3:
|
||||
new_messages[-2] = _mark(new_messages[-2])
|
||||
|
||||
new_tools = tools
|
||||
if tools:
|
||||
new_tools = list(tools)
|
||||
new_tools[-1] = {**new_tools[-1], "cache_control": cache_marker}
|
||||
return new_messages, new_tools
|
||||
|
||||
@staticmethod
|
||||
def _normalize_tool_call_id(tool_call_id: Any) -> Any:
|
||||
"""Normalize to a provider-safe 9-char alphanumeric form."""
|
||||
if not isinstance(tool_call_id, str):
|
||||
return tool_call_id
|
||||
if len(tool_call_id) == 9 and tool_call_id.isalnum():
|
||||
return tool_call_id
|
||||
return hashlib.sha1(tool_call_id.encode()).hexdigest()[:9]
|
||||
|
||||
def _sanitize_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Strip non-standard keys, normalize tool_call IDs."""
|
||||
sanitized = LLMProvider._sanitize_request_messages(messages, _ALLOWED_MSG_KEYS)
|
||||
id_map: dict[str, str] = {}
|
||||
|
||||
def map_id(value: Any) -> Any:
|
||||
if not isinstance(value, str):
|
||||
return value
|
||||
return id_map.setdefault(value, self._normalize_tool_call_id(value))
|
||||
|
||||
for clean in sanitized:
|
||||
if isinstance(clean.get("tool_calls"), list):
|
||||
normalized = []
|
||||
for tc in clean["tool_calls"]:
|
||||
if not isinstance(tc, dict):
|
||||
normalized.append(tc)
|
||||
continue
|
||||
tc_clean = dict(tc)
|
||||
tc_clean["id"] = map_id(tc_clean.get("id"))
|
||||
normalized.append(tc_clean)
|
||||
clean["tool_calls"] = normalized
|
||||
if "tool_call_id" in clean and clean["tool_call_id"]:
|
||||
clean["tool_call_id"] = map_id(clean["tool_call_id"])
|
||||
return sanitized
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Build kwargs
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _build_kwargs(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None,
|
||||
model: str | None,
|
||||
max_tokens: int,
|
||||
temperature: float,
|
||||
reasoning_effort: str | None,
|
||||
tool_choice: str | dict[str, Any] | None,
|
||||
) -> dict[str, Any]:
|
||||
model_name = model or self.default_model
|
||||
spec = self._spec
|
||||
|
||||
if spec and spec.supports_prompt_caching:
|
||||
messages, tools = self._apply_cache_control(messages, tools)
|
||||
|
||||
if spec and spec.strip_model_prefix:
|
||||
model_name = model_name.split("/")[-1]
|
||||
|
||||
kwargs: dict[str, Any] = {
|
||||
"model": model_name,
|
||||
"messages": self._sanitize_messages(self._sanitize_empty_content(messages)),
|
||||
"temperature": temperature,
|
||||
}
|
||||
|
||||
if spec and getattr(spec, "supports_max_completion_tokens", False):
|
||||
kwargs["max_completion_tokens"] = max(1, max_tokens)
|
||||
else:
|
||||
kwargs["max_tokens"] = max(1, max_tokens)
|
||||
|
||||
if spec:
|
||||
model_lower = model_name.lower()
|
||||
for pattern, overrides in spec.model_overrides:
|
||||
if pattern in model_lower:
|
||||
kwargs.update(overrides)
|
||||
break
|
||||
|
||||
if reasoning_effort:
|
||||
kwargs["reasoning_effort"] = reasoning_effort
|
||||
|
||||
if tools:
|
||||
kwargs["tools"] = tools
|
||||
kwargs["tool_choice"] = tool_choice or "auto"
|
||||
|
||||
return kwargs
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Response parsing
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _maybe_mapping(value: Any) -> dict[str, Any] | None:
|
||||
if isinstance(value, dict):
|
||||
return value
|
||||
model_dump = getattr(value, "model_dump", None)
|
||||
if callable(model_dump):
|
||||
dumped = model_dump()
|
||||
if isinstance(dumped, dict):
|
||||
return dumped
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _extract_text_content(cls, value: Any) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
if isinstance(value, list):
|
||||
parts: list[str] = []
|
||||
for item in value:
|
||||
item_map = cls._maybe_mapping(item)
|
||||
if item_map:
|
||||
text = item_map.get("text")
|
||||
if isinstance(text, str):
|
||||
parts.append(text)
|
||||
continue
|
||||
text = getattr(item, "text", None)
|
||||
if isinstance(text, str):
|
||||
parts.append(text)
|
||||
continue
|
||||
if isinstance(item, str):
|
||||
parts.append(item)
|
||||
return "".join(parts) or None
|
||||
return str(value)
|
||||
|
||||
@classmethod
|
||||
def _extract_usage(cls, response: Any) -> dict[str, int]:
|
||||
usage_obj = None
|
||||
response_map = cls._maybe_mapping(response)
|
||||
if response_map is not None:
|
||||
usage_obj = response_map.get("usage")
|
||||
elif hasattr(response, "usage") and response.usage:
|
||||
usage_obj = response.usage
|
||||
|
||||
usage_map = cls._maybe_mapping(usage_obj)
|
||||
if usage_map is not None:
|
||||
return {
|
||||
"prompt_tokens": int(usage_map.get("prompt_tokens") or 0),
|
||||
"completion_tokens": int(usage_map.get("completion_tokens") or 0),
|
||||
"total_tokens": int(usage_map.get("total_tokens") or 0),
|
||||
}
|
||||
|
||||
if usage_obj:
|
||||
return {
|
||||
"prompt_tokens": getattr(usage_obj, "prompt_tokens", 0) or 0,
|
||||
"completion_tokens": getattr(usage_obj, "completion_tokens", 0) or 0,
|
||||
"total_tokens": getattr(usage_obj, "total_tokens", 0) or 0,
|
||||
}
|
||||
return {}
|
||||
|
||||
def _parse(self, response: Any) -> LLMResponse:
|
||||
if isinstance(response, str):
|
||||
return LLMResponse(content=response, finish_reason="stop")
|
||||
|
||||
response_map = self._maybe_mapping(response)
|
||||
if response_map is not None:
|
||||
choices = response_map.get("choices") or []
|
||||
if not choices:
|
||||
content = self._extract_text_content(
|
||||
response_map.get("content") or response_map.get("output_text")
|
||||
)
|
||||
if content is not None:
|
||||
return LLMResponse(
|
||||
content=content,
|
||||
finish_reason=str(response_map.get("finish_reason") or "stop"),
|
||||
usage=self._extract_usage(response_map),
|
||||
)
|
||||
return LLMResponse(content="Error: API returned empty choices.", finish_reason="error")
|
||||
|
||||
choice0 = self._maybe_mapping(choices[0]) or {}
|
||||
msg0 = self._maybe_mapping(choice0.get("message")) or {}
|
||||
content = self._extract_text_content(msg0.get("content"))
|
||||
finish_reason = str(choice0.get("finish_reason") or "stop")
|
||||
|
||||
raw_tool_calls: list[Any] = []
|
||||
reasoning_content = msg0.get("reasoning_content")
|
||||
for ch in choices:
|
||||
ch_map = self._maybe_mapping(ch) or {}
|
||||
m = self._maybe_mapping(ch_map.get("message")) or {}
|
||||
tool_calls = m.get("tool_calls")
|
||||
if isinstance(tool_calls, list) and tool_calls:
|
||||
raw_tool_calls.extend(tool_calls)
|
||||
if ch_map.get("finish_reason") in ("tool_calls", "stop"):
|
||||
finish_reason = str(ch_map["finish_reason"])
|
||||
if not content:
|
||||
content = self._extract_text_content(m.get("content"))
|
||||
if not reasoning_content:
|
||||
reasoning_content = m.get("reasoning_content")
|
||||
|
||||
parsed_tool_calls = []
|
||||
for tc in raw_tool_calls:
|
||||
tc_map = self._maybe_mapping(tc) or {}
|
||||
fn = self._maybe_mapping(tc_map.get("function")) or {}
|
||||
args = fn.get("arguments", {})
|
||||
if isinstance(args, str):
|
||||
args = json_repair.loads(args)
|
||||
ec, prov, fn_prov = _extract_tc_extras(tc)
|
||||
parsed_tool_calls.append(ToolCallRequest(
|
||||
id=_short_tool_id(),
|
||||
name=str(fn.get("name") or ""),
|
||||
arguments=args if isinstance(args, dict) else {},
|
||||
extra_content=ec,
|
||||
provider_specific_fields=prov,
|
||||
function_provider_specific_fields=fn_prov,
|
||||
))
|
||||
|
||||
return LLMResponse(
|
||||
content=content,
|
||||
tool_calls=parsed_tool_calls,
|
||||
finish_reason=finish_reason,
|
||||
usage=self._extract_usage(response_map),
|
||||
reasoning_content=reasoning_content if isinstance(reasoning_content, str) else None,
|
||||
)
|
||||
|
||||
if not response.choices:
|
||||
return LLMResponse(content="Error: API returned empty choices.", finish_reason="error")
|
||||
|
||||
choice = response.choices[0]
|
||||
msg = choice.message
|
||||
content = msg.content
|
||||
finish_reason = choice.finish_reason
|
||||
|
||||
raw_tool_calls: list[Any] = []
|
||||
for ch in response.choices:
|
||||
m = ch.message
|
||||
if hasattr(m, "tool_calls") and m.tool_calls:
|
||||
raw_tool_calls.extend(m.tool_calls)
|
||||
if ch.finish_reason in ("tool_calls", "stop"):
|
||||
finish_reason = ch.finish_reason
|
||||
if not content and m.content:
|
||||
content = m.content
|
||||
|
||||
tool_calls = []
|
||||
for tc in raw_tool_calls:
|
||||
args = tc.function.arguments
|
||||
if isinstance(args, str):
|
||||
args = json_repair.loads(args)
|
||||
ec, prov, fn_prov = _extract_tc_extras(tc)
|
||||
tool_calls.append(ToolCallRequest(
|
||||
id=_short_tool_id(),
|
||||
name=tc.function.name,
|
||||
arguments=args,
|
||||
extra_content=ec,
|
||||
provider_specific_fields=prov,
|
||||
function_provider_specific_fields=fn_prov,
|
||||
))
|
||||
|
||||
return LLMResponse(
|
||||
content=content,
|
||||
tool_calls=tool_calls,
|
||||
finish_reason=finish_reason or "stop",
|
||||
usage=self._extract_usage(response),
|
||||
reasoning_content=getattr(msg, "reasoning_content", None) or None,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _parse_chunks(cls, chunks: list[Any]) -> LLMResponse:
|
||||
content_parts: list[str] = []
|
||||
tc_bufs: dict[int, dict[str, Any]] = {}
|
||||
finish_reason = "stop"
|
||||
usage: dict[str, int] = {}
|
||||
|
||||
def _accum_tc(tc: Any, idx_hint: int) -> None:
|
||||
"""Accumulate one streaming tool-call delta into *tc_bufs*."""
|
||||
tc_index: int = _get(tc, "index") if _get(tc, "index") is not None else idx_hint
|
||||
buf = tc_bufs.setdefault(tc_index, {
|
||||
"id": "", "name": "", "arguments": "",
|
||||
"extra_content": None, "prov": None, "fn_prov": None,
|
||||
})
|
||||
tc_id = _get(tc, "id")
|
||||
if tc_id:
|
||||
buf["id"] = str(tc_id)
|
||||
fn = _get(tc, "function")
|
||||
if fn is not None:
|
||||
fn_name = _get(fn, "name")
|
||||
if fn_name:
|
||||
buf["name"] = str(fn_name)
|
||||
fn_args = _get(fn, "arguments")
|
||||
if fn_args:
|
||||
buf["arguments"] += str(fn_args)
|
||||
ec, prov, fn_prov = _extract_tc_extras(tc)
|
||||
if ec:
|
||||
buf["extra_content"] = ec
|
||||
if prov:
|
||||
buf["prov"] = prov
|
||||
if fn_prov:
|
||||
buf["fn_prov"] = fn_prov
|
||||
|
||||
for chunk in chunks:
|
||||
if isinstance(chunk, str):
|
||||
content_parts.append(chunk)
|
||||
continue
|
||||
|
||||
chunk_map = cls._maybe_mapping(chunk)
|
||||
if chunk_map is not None:
|
||||
choices = chunk_map.get("choices") or []
|
||||
if not choices:
|
||||
usage = cls._extract_usage(chunk_map) or usage
|
||||
text = cls._extract_text_content(
|
||||
chunk_map.get("content") or chunk_map.get("output_text")
|
||||
)
|
||||
if text:
|
||||
content_parts.append(text)
|
||||
continue
|
||||
choice = cls._maybe_mapping(choices[0]) or {}
|
||||
if choice.get("finish_reason"):
|
||||
finish_reason = str(choice["finish_reason"])
|
||||
delta = cls._maybe_mapping(choice.get("delta")) or {}
|
||||
text = cls._extract_text_content(delta.get("content"))
|
||||
if text:
|
||||
content_parts.append(text)
|
||||
for idx, tc in enumerate(delta.get("tool_calls") or []):
|
||||
_accum_tc(tc, idx)
|
||||
usage = cls._extract_usage(chunk_map) or usage
|
||||
continue
|
||||
|
||||
if not chunk.choices:
|
||||
usage = cls._extract_usage(chunk) or usage
|
||||
continue
|
||||
choice = chunk.choices[0]
|
||||
if choice.finish_reason:
|
||||
finish_reason = choice.finish_reason
|
||||
delta = choice.delta
|
||||
if delta and delta.content:
|
||||
content_parts.append(delta.content)
|
||||
for tc in (delta.tool_calls or []) if delta else []:
|
||||
_accum_tc(tc, getattr(tc, "index", 0))
|
||||
|
||||
return LLMResponse(
|
||||
content="".join(content_parts) or None,
|
||||
tool_calls=[
|
||||
ToolCallRequest(
|
||||
id=b["id"] or _short_tool_id(),
|
||||
name=b["name"],
|
||||
arguments=json_repair.loads(b["arguments"]) if b["arguments"] else {},
|
||||
extra_content=b.get("extra_content"),
|
||||
provider_specific_fields=b.get("prov"),
|
||||
function_provider_specific_fields=b.get("fn_prov"),
|
||||
)
|
||||
for b in tc_bufs.values()
|
||||
],
|
||||
finish_reason=finish_reason,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _handle_error(e: Exception) -> LLMResponse:
|
||||
body = getattr(e, "doc", None) or getattr(getattr(e, "response", None), "text", None)
|
||||
msg = f"Error: {body.strip()[:500]}" if body and body.strip() else f"Error calling LLM: {e}"
|
||||
return LLMResponse(content=msg, finish_reason="error")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
) -> LLMResponse:
|
||||
kwargs = self._build_kwargs(
|
||||
messages, tools, model, max_tokens, temperature,
|
||||
reasoning_effort, tool_choice,
|
||||
)
|
||||
try:
|
||||
return self._parse(await self._client.chat.completions.create(**kwargs))
|
||||
except Exception as e:
|
||||
return self._handle_error(e)
|
||||
|
||||
async def chat_stream(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
reasoning_effort: str | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
on_content_delta: Callable[[str], Awaitable[None]] | None = None,
|
||||
) -> LLMResponse:
|
||||
kwargs = self._build_kwargs(
|
||||
messages, tools, model, max_tokens, temperature,
|
||||
reasoning_effort, tool_choice,
|
||||
)
|
||||
kwargs["stream"] = True
|
||||
kwargs["stream_options"] = {"include_usage": True}
|
||||
try:
|
||||
stream = await self._client.chat.completions.create(**kwargs)
|
||||
chunks: list[Any] = []
|
||||
async for chunk in stream:
|
||||
chunks.append(chunk)
|
||||
if on_content_delta and chunk.choices:
|
||||
text = getattr(chunk.choices[0].delta, "content", None)
|
||||
if text:
|
||||
await on_content_delta(text)
|
||||
return self._parse_chunks(chunks)
|
||||
except Exception as e:
|
||||
return self._handle_error(e)
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
return self.default_model
|
||||
@@ -0,0 +1,355 @@
|
||||
"""
|
||||
Provider Registry — single source of truth for LLM provider metadata.
|
||||
|
||||
Adding a new provider:
|
||||
1. Add a ProviderSpec to PROVIDERS below.
|
||||
2. Add a field to ProvidersConfig in config/schema.py.
|
||||
Done. Env vars, config matching, status display all derive from here.
|
||||
|
||||
Order matters — it controls match priority and fallback. Gateways first.
|
||||
Every entry writes out all fields so you can copy-paste as a template.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from pydantic.alias_generators import to_snake
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProviderSpec:
|
||||
"""One LLM provider's metadata. See PROVIDERS below for real examples.
|
||||
|
||||
Placeholders in env_extras values:
|
||||
{api_key} — the user's API key
|
||||
{api_base} — api_base from config, or this spec's default_api_base
|
||||
"""
|
||||
|
||||
# identity
|
||||
name: str # config field name, e.g. "dashscope"
|
||||
keywords: tuple[str, ...] # model-name keywords for matching (lowercase)
|
||||
env_key: str # env var for API key, e.g. "DASHSCOPE_API_KEY"
|
||||
display_name: str = "" # shown in `nanobot status`
|
||||
|
||||
# which provider implementation to use
|
||||
# "openai_compat" | "anthropic" | "azure_openai" | "openai_codex"
|
||||
backend: str = "openai_compat"
|
||||
|
||||
# extra env vars, e.g. (("ZHIPUAI_API_KEY", "{api_key}"),)
|
||||
env_extras: tuple[tuple[str, str], ...] = ()
|
||||
|
||||
# gateway / local detection
|
||||
is_gateway: bool = False # routes any model (OpenRouter, AiHubMix)
|
||||
is_local: bool = False # local deployment (vLLM, Ollama)
|
||||
detect_by_key_prefix: str = "" # match api_key prefix, e.g. "sk-or-"
|
||||
detect_by_base_keyword: str = "" # match substring in api_base URL
|
||||
default_api_base: str = "" # OpenAI-compatible base URL for this provider
|
||||
|
||||
# gateway behavior
|
||||
strip_model_prefix: bool = False # strip "provider/" before sending to gateway
|
||||
supports_max_completion_tokens: bool = False
|
||||
|
||||
# per-model param overrides, e.g. (("kimi-k2.5", {"temperature": 1.0}),)
|
||||
model_overrides: tuple[tuple[str, dict[str, Any]], ...] = ()
|
||||
|
||||
# OAuth-based providers (e.g., OpenAI Codex) don't use API keys
|
||||
is_oauth: bool = False
|
||||
|
||||
# Direct providers skip API-key validation (user supplies everything)
|
||||
is_direct: bool = False
|
||||
|
||||
# Provider supports cache_control on content blocks (e.g. Anthropic prompt caching)
|
||||
supports_prompt_caching: bool = False
|
||||
|
||||
@property
|
||||
def label(self) -> str:
|
||||
return self.display_name or self.name.title()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PROVIDERS — the registry. Order = priority. Copy any entry as template.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
PROVIDERS: tuple[ProviderSpec, ...] = (
|
||||
# === Custom (direct OpenAI-compatible endpoint) ========================
|
||||
ProviderSpec(
|
||||
name="custom",
|
||||
keywords=(),
|
||||
env_key="",
|
||||
display_name="Custom",
|
||||
backend="openai_compat",
|
||||
is_direct=True,
|
||||
),
|
||||
|
||||
# === Azure OpenAI (direct API calls with API version 2024-10-21) =====
|
||||
ProviderSpec(
|
||||
name="azure_openai",
|
||||
keywords=("azure", "azure-openai"),
|
||||
env_key="",
|
||||
display_name="Azure OpenAI",
|
||||
backend="azure_openai",
|
||||
is_direct=True,
|
||||
),
|
||||
# === Gateways (detected by api_key / api_base, not model name) =========
|
||||
# Gateways can route any model, so they win in fallback.
|
||||
# OpenRouter: global gateway, keys start with "sk-or-"
|
||||
ProviderSpec(
|
||||
name="openrouter",
|
||||
keywords=("openrouter",),
|
||||
env_key="OPENROUTER_API_KEY",
|
||||
display_name="OpenRouter",
|
||||
backend="openai_compat",
|
||||
is_gateway=True,
|
||||
detect_by_key_prefix="sk-or-",
|
||||
detect_by_base_keyword="openrouter",
|
||||
default_api_base="https://openrouter.ai/api/v1",
|
||||
supports_prompt_caching=True,
|
||||
),
|
||||
# AiHubMix: global gateway, OpenAI-compatible interface.
|
||||
# strip_model_prefix=True: doesn't understand "anthropic/claude-3",
|
||||
# strips to bare "claude-3".
|
||||
ProviderSpec(
|
||||
name="aihubmix",
|
||||
keywords=("aihubmix",),
|
||||
env_key="OPENAI_API_KEY",
|
||||
display_name="AiHubMix",
|
||||
backend="openai_compat",
|
||||
is_gateway=True,
|
||||
detect_by_base_keyword="aihubmix",
|
||||
default_api_base="https://aihubmix.com/v1",
|
||||
strip_model_prefix=True,
|
||||
),
|
||||
# SiliconFlow (硅基流动): OpenAI-compatible gateway, model names keep org prefix
|
||||
ProviderSpec(
|
||||
name="siliconflow",
|
||||
keywords=("siliconflow",),
|
||||
env_key="OPENAI_API_KEY",
|
||||
display_name="SiliconFlow",
|
||||
backend="openai_compat",
|
||||
is_gateway=True,
|
||||
detect_by_base_keyword="siliconflow",
|
||||
default_api_base="https://api.siliconflow.cn/v1",
|
||||
),
|
||||
|
||||
# VolcEngine (火山引擎): OpenAI-compatible gateway, pay-per-use models
|
||||
ProviderSpec(
|
||||
name="volcengine",
|
||||
keywords=("volcengine", "volces", "ark"),
|
||||
env_key="OPENAI_API_KEY",
|
||||
display_name="VolcEngine",
|
||||
backend="openai_compat",
|
||||
is_gateway=True,
|
||||
detect_by_base_keyword="volces",
|
||||
default_api_base="https://ark.cn-beijing.volces.com/api/v3",
|
||||
),
|
||||
|
||||
# VolcEngine Coding Plan (火山引擎 Coding Plan): same key as volcengine
|
||||
ProviderSpec(
|
||||
name="volcengine_coding_plan",
|
||||
keywords=("volcengine-plan",),
|
||||
env_key="OPENAI_API_KEY",
|
||||
display_name="VolcEngine Coding Plan",
|
||||
backend="openai_compat",
|
||||
is_gateway=True,
|
||||
default_api_base="https://ark.cn-beijing.volces.com/api/coding/v3",
|
||||
strip_model_prefix=True,
|
||||
),
|
||||
|
||||
# BytePlus: VolcEngine international, pay-per-use models
|
||||
ProviderSpec(
|
||||
name="byteplus",
|
||||
keywords=("byteplus",),
|
||||
env_key="OPENAI_API_KEY",
|
||||
display_name="BytePlus",
|
||||
backend="openai_compat",
|
||||
is_gateway=True,
|
||||
detect_by_base_keyword="bytepluses",
|
||||
default_api_base="https://ark.ap-southeast.bytepluses.com/api/v3",
|
||||
strip_model_prefix=True,
|
||||
),
|
||||
|
||||
# BytePlus Coding Plan: same key as byteplus
|
||||
ProviderSpec(
|
||||
name="byteplus_coding_plan",
|
||||
keywords=("byteplus-plan",),
|
||||
env_key="OPENAI_API_KEY",
|
||||
display_name="BytePlus Coding Plan",
|
||||
backend="openai_compat",
|
||||
is_gateway=True,
|
||||
default_api_base="https://ark.ap-southeast.bytepluses.com/api/coding/v3",
|
||||
strip_model_prefix=True,
|
||||
),
|
||||
|
||||
|
||||
# === Standard providers (matched by model-name keywords) ===============
|
||||
# Anthropic: native Anthropic SDK
|
||||
ProviderSpec(
|
||||
name="anthropic",
|
||||
keywords=("anthropic", "claude"),
|
||||
env_key="ANTHROPIC_API_KEY",
|
||||
display_name="Anthropic",
|
||||
backend="anthropic",
|
||||
supports_prompt_caching=True,
|
||||
),
|
||||
# OpenAI: SDK default base URL (no override needed)
|
||||
ProviderSpec(
|
||||
name="openai",
|
||||
keywords=("openai", "gpt"),
|
||||
env_key="OPENAI_API_KEY",
|
||||
display_name="OpenAI",
|
||||
backend="openai_compat",
|
||||
),
|
||||
# OpenAI Codex: OAuth-based, dedicated provider
|
||||
ProviderSpec(
|
||||
name="openai_codex",
|
||||
keywords=("openai-codex",),
|
||||
env_key="",
|
||||
display_name="OpenAI Codex",
|
||||
backend="openai_codex",
|
||||
detect_by_base_keyword="codex",
|
||||
default_api_base="https://chatgpt.com/backend-api",
|
||||
is_oauth=True,
|
||||
),
|
||||
# GitHub Copilot: OAuth-based
|
||||
ProviderSpec(
|
||||
name="github_copilot",
|
||||
keywords=("github_copilot", "copilot"),
|
||||
env_key="",
|
||||
display_name="Github Copilot",
|
||||
backend="openai_compat",
|
||||
default_api_base="https://api.githubcopilot.com",
|
||||
is_oauth=True,
|
||||
),
|
||||
# DeepSeek: OpenAI-compatible at api.deepseek.com
|
||||
ProviderSpec(
|
||||
name="deepseek",
|
||||
keywords=("deepseek",),
|
||||
env_key="DEEPSEEK_API_KEY",
|
||||
display_name="DeepSeek",
|
||||
backend="openai_compat",
|
||||
default_api_base="https://api.deepseek.com",
|
||||
),
|
||||
# Gemini: Google's OpenAI-compatible endpoint
|
||||
ProviderSpec(
|
||||
name="gemini",
|
||||
keywords=("gemini",),
|
||||
env_key="GEMINI_API_KEY",
|
||||
display_name="Gemini",
|
||||
backend="openai_compat",
|
||||
default_api_base="https://generativelanguage.googleapis.com/v1beta/openai/",
|
||||
),
|
||||
# Zhipu (智谱): OpenAI-compatible at open.bigmodel.cn
|
||||
ProviderSpec(
|
||||
name="zhipu",
|
||||
keywords=("zhipu", "glm", "zai"),
|
||||
env_key="ZAI_API_KEY",
|
||||
display_name="Zhipu AI",
|
||||
backend="openai_compat",
|
||||
env_extras=(("ZHIPUAI_API_KEY", "{api_key}"),),
|
||||
default_api_base="https://open.bigmodel.cn/api/paas/v4",
|
||||
),
|
||||
# DashScope (通义): Qwen models, OpenAI-compatible endpoint
|
||||
ProviderSpec(
|
||||
name="dashscope",
|
||||
keywords=("qwen", "dashscope"),
|
||||
env_key="DASHSCOPE_API_KEY",
|
||||
display_name="DashScope",
|
||||
backend="openai_compat",
|
||||
default_api_base="https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
),
|
||||
# Moonshot (月之暗面): Kimi models. K2.5 enforces temperature >= 1.0.
|
||||
ProviderSpec(
|
||||
name="moonshot",
|
||||
keywords=("moonshot", "kimi"),
|
||||
env_key="MOONSHOT_API_KEY",
|
||||
display_name="Moonshot",
|
||||
backend="openai_compat",
|
||||
default_api_base="https://api.moonshot.ai/v1",
|
||||
model_overrides=(("kimi-k2.5", {"temperature": 1.0}),),
|
||||
),
|
||||
# MiniMax: OpenAI-compatible API
|
||||
ProviderSpec(
|
||||
name="minimax",
|
||||
keywords=("minimax",),
|
||||
env_key="MINIMAX_API_KEY",
|
||||
display_name="MiniMax",
|
||||
backend="openai_compat",
|
||||
default_api_base="https://api.minimax.io/v1",
|
||||
),
|
||||
# Mistral AI: OpenAI-compatible API
|
||||
ProviderSpec(
|
||||
name="mistral",
|
||||
keywords=("mistral",),
|
||||
env_key="MISTRAL_API_KEY",
|
||||
display_name="Mistral",
|
||||
backend="openai_compat",
|
||||
default_api_base="https://api.mistral.ai/v1",
|
||||
),
|
||||
# Step Fun (阶跃星辰): OpenAI-compatible API
|
||||
ProviderSpec(
|
||||
name="stepfun",
|
||||
keywords=("stepfun", "step"),
|
||||
env_key="STEPFUN_API_KEY",
|
||||
display_name="Step Fun",
|
||||
backend="openai_compat",
|
||||
default_api_base="https://api.stepfun.com/v1",
|
||||
),
|
||||
# === Local deployment (matched by config key, NOT by api_base) =========
|
||||
# vLLM / any OpenAI-compatible local server
|
||||
ProviderSpec(
|
||||
name="vllm",
|
||||
keywords=("vllm",),
|
||||
env_key="HOSTED_VLLM_API_KEY",
|
||||
display_name="vLLM/Local",
|
||||
backend="openai_compat",
|
||||
is_local=True,
|
||||
),
|
||||
# Ollama (local, OpenAI-compatible)
|
||||
ProviderSpec(
|
||||
name="ollama",
|
||||
keywords=("ollama", "nemotron"),
|
||||
env_key="OLLAMA_API_KEY",
|
||||
display_name="Ollama",
|
||||
backend="openai_compat",
|
||||
is_local=True,
|
||||
detect_by_base_keyword="11434",
|
||||
default_api_base="http://localhost:11434/v1",
|
||||
),
|
||||
# === OpenVINO Model Server (direct, local, OpenAI-compatible at /v3) ===
|
||||
ProviderSpec(
|
||||
name="ovms",
|
||||
keywords=("openvino", "ovms"),
|
||||
env_key="",
|
||||
display_name="OpenVINO Model Server",
|
||||
backend="openai_compat",
|
||||
is_direct=True,
|
||||
is_local=True,
|
||||
default_api_base="http://localhost:8000/v3",
|
||||
),
|
||||
# === Auxiliary (not a primary LLM provider) ============================
|
||||
# Groq: mainly used for Whisper voice transcription, also usable for LLM
|
||||
ProviderSpec(
|
||||
name="groq",
|
||||
keywords=("groq",),
|
||||
env_key="GROQ_API_KEY",
|
||||
display_name="Groq",
|
||||
backend="openai_compat",
|
||||
default_api_base="https://api.groq.com/openai/v1",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Lookup helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def find_by_name(name: str) -> ProviderSpec | None:
|
||||
"""Find a provider spec by config field name, e.g. "dashscope"."""
|
||||
normalized = to_snake(name.replace("-", "_"))
|
||||
for spec in PROVIDERS:
|
||||
if spec.name == normalized:
|
||||
return spec
|
||||
return None
|
||||
@@ -0,0 +1,64 @@
|
||||
"""Voice transcription provider using Groq."""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class GroqTranscriptionProvider:
|
||||
"""
|
||||
Voice transcription provider using Groq's Whisper API.
|
||||
|
||||
Groq offers extremely fast transcription with a generous free tier.
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: str | None = None):
|
||||
self.api_key = api_key or os.environ.get("GROQ_API_KEY")
|
||||
self.api_url = "https://api.groq.com/openai/v1/audio/transcriptions"
|
||||
|
||||
async def transcribe(self, file_path: str | Path) -> str:
|
||||
"""
|
||||
Transcribe an audio file using Groq.
|
||||
|
||||
Args:
|
||||
file_path: Path to the audio file.
|
||||
|
||||
Returns:
|
||||
Transcribed text.
|
||||
"""
|
||||
if not self.api_key:
|
||||
logger.warning("Groq API key not configured for transcription")
|
||||
return ""
|
||||
|
||||
path = Path(file_path)
|
||||
if not path.exists():
|
||||
logger.error("Audio file not found: {}", file_path)
|
||||
return ""
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
with open(path, "rb") as f:
|
||||
files = {
|
||||
"file": (path.name, f),
|
||||
"model": (None, "whisper-large-v3"),
|
||||
}
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
self.api_url,
|
||||
headers=headers,
|
||||
files=files,
|
||||
timeout=60.0
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data.get("text", "")
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Groq transcription error: {}", e)
|
||||
return ""
|
||||
Reference in New Issue
Block a user