feat: add streaming output

This commit is contained in:
qixinbo
2026-03-20 16:54:21 +08:00
parent e3f67d38f8
commit 50352a3653
5 changed files with 258 additions and 148 deletions
+4 -2
View File
@@ -34,6 +34,8 @@ from nanobot.config.schema import Config
from app.api.skills import load_skills
from app.services.llm_cache import get_llm_configs
from app.core.streaming_provider import StreamingLiteLLMProvider
class NanobotIntegration:
def __init__(self):
self.agent: AgentLoop | None = None
@@ -156,7 +158,7 @@ class NanobotIntegration:
spec = find_by_name(provider_name)
# Skip API key check for now to allow initialization without full config
return LiteLLMProvider(
return StreamingLiteLLMProvider(
api_key=p.api_key if p else None,
api_base=config.get_api_base(model),
default_model=model,
@@ -211,7 +213,7 @@ class NanobotIntegration:
cached = self._model_agent_cache.get(model_id)
if cached:
return cached
provider = LiteLLMProvider(
provider = StreamingLiteLLMProvider(
api_key=target_config.get("api_key"),
api_base=target_config.get("api_base"),
default_model=target_config.get("model"),
+76
View File
@@ -0,0 +1,76 @@
import contextvars
import json
from typing import Any, Dict, List, Optional
from loguru import logger
from nanobot.providers.litellm_provider import LiteLLMProvider
from nanobot.providers.base import LLMResponse
from litellm import acompletion, stream_chunk_builder
streaming_queue_var = contextvars.ContextVar("streaming_queue", default=None)
class StreamingLiteLLMProvider(LiteLLMProvider):
async def chat(
self,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
model: Optional[str] = None,
temperature: float = 0.7,
max_tokens: int = 4000,
reasoning_effort: Optional[str] = None,
request_timeout: Optional[int] = None,
num_retries: Optional[int] = None,
) -> LLMResponse:
original_model = model or self.default_model
model_name = self._resolve_model(original_model)
kwargs: Dict[str, Any] = {
"model": model_name,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
"stream": True, # 强制开启流式
}
if self.api_key and self.api_key != "no-key":
kwargs["api_key"] = self.api_key
if self.api_base:
kwargs["api_base"] = self.api_base
if self.extra_headers:
kwargs["extra_headers"] = self.extra_headers
if tools:
kwargs["tools"] = tools
if request_timeout is not None:
kwargs["timeout"] = request_timeout
if num_retries is not None:
kwargs["num_retries"] = max(0, int(num_retries))
if reasoning_effort and self._supports_reasoning_effort(model_name):
kwargs["reasoning_effort"] = reasoning_effort
try:
response_stream = await acompletion(**kwargs)
chunks = []
queue = streaming_queue_var.get()
async for chunk in response_stream:
chunks.append(chunk)
if queue is not None:
# 提取普通内容或 think 内容
delta = chunk.choices[0].delta if chunk.choices else None
if delta:
content = getattr(delta, "content", None)
reasoning_content = getattr(delta, "reasoning_content", None)
if content:
await queue.put({"type": "delta", "content": content})
if reasoning_content:
await queue.put({"type": "progress", "content": reasoning_content, "is_reasoning": True})
# 还原为完整的 response 对象供 nanobot 处理
full_response = stream_chunk_builder(chunks, messages=messages)
return self._parse_response(full_response)
except Exception as e:
logger.error("StreamingLiteLLMProvider failed: {}", e)
raise