diff --git a/backend/app/agent/chart.py b/backend/app/agent/chart.py index d9d348a..2292bc7 100644 --- a/backend/app/agent/chart.py +++ b/backend/app/agent/chart.py @@ -7,8 +7,9 @@ from pathlib import Path # Add project root to sys.path PROJECT_ROOT = Path(__file__).resolve().parents[3] -if str(PROJECT_ROOT) not in sys.path: - sys.path.append(str(PROJECT_ROOT)) +NANOBOT_ROOT = PROJECT_ROOT / "nanobot" +if str(NANOBOT_ROOT) not in sys.path: + sys.path.append(str(NANOBOT_ROOT)) from nanobot.providers.litellm_provider import LiteLLMProvider from app.schemas.chart import ChartGenerationResponse diff --git a/backend/app/agent/nl2sql.py b/backend/app/agent/nl2sql.py index 2c6425c..a8b0daa 100644 --- a/backend/app/agent/nl2sql.py +++ b/backend/app/agent/nl2sql.py @@ -15,8 +15,9 @@ logger = logging.getLogger(__name__) # Add project root to sys.path to allow importing nanobot PROJECT_ROOT = Path(__file__).resolve().parents[3] -if str(PROJECT_ROOT) not in sys.path: - sys.path.append(str(PROJECT_ROOT)) +NANOBOT_ROOT = PROJECT_ROOT / "nanobot" +if str(NANOBOT_ROOT) not in sys.path: + sys.path.append(str(NANOBOT_ROOT)) from nanobot.providers.litellm_provider import LiteLLMProvider from app.connectors.postgres import postgres_connector diff --git a/backend/app/core/artifacts.py b/backend/app/core/artifacts.py new file mode 100644 index 0000000..fc071fb --- /dev/null +++ b/backend/app/core/artifacts.py @@ -0,0 +1,202 @@ +import mimetypes +import re +from pathlib import Path +from typing import Any, Iterable +from urllib.parse import quote + +from pydantic import BaseModel + + +LOCAL_URI_PATTERN = re.compile(r"local://[^\s<>'\"\]\)\}]+") +PATH_PATTERN = re.compile( + r"(?:[A-Za-z]:[\\/][^\s<>'\"]+\.[A-Za-z0-9]{1,12}|/[^\s<>'\"]+\.[A-Za-z0-9]{1,12}|(?:\.\./|\.?/)?(?:[\w\-.]+[\\/])+[\w\-.]+\.[A-Za-z0-9]{1,12})" +) +REPORT_PATH_PATTERN = re.compile(r"data[\\/]data[\\/][\w\-.]+\.[A-Za-z0-9]{1,12}", re.IGNORECASE) +PREVIEWABLE_EXTENSIONS = { + ".html", + ".htm", + ".pdf", + ".pptx", + ".txt", + ".md", + ".json", + ".csv", + ".tsv", + ".yaml", + ".yml", + ".xml", + ".log", +} + + +class ArtifactPayload(BaseModel): + name: str + mime_type: str + size: int + download_url: str + previewable: bool + preview_url: str | None = None + + +def extract_artifacts(content: str, session_messages: list[dict[str, Any]] | None = None) -> list[dict[str, Any]]: + candidates = _collect_candidate_texts(content, session_messages or []) + ordered_locators: list[str] = [] + seen_locators: set[str] = set() + for text in candidates: + for locator in _extract_locators(text): + if locator in seen_locators: + continue + seen_locators.add(locator) + ordered_locators.append(locator) + artifacts: list[dict[str, Any]] = [] + seen_paths: set[Path] = set() + for locator in ordered_locators: + path = _resolve_locator(locator) + if not path or not path.exists() or not path.is_file(): + continue + resolved = path.resolve() + if resolved in seen_paths: + continue + seen_paths.add(resolved) + artifact = _build_artifact_payload(locator, resolved) + artifacts.append(artifact.model_dump(exclude_none=True)) + return artifacts + + +def _build_artifact_payload(locator: str, path: Path) -> ArtifactPayload: + mime_type = _guess_mime_type(path) + previewable = _is_previewable(path, mime_type) + encoded = quote(locator, safe="") + preview_url = f"/nanobot/artifacts/preview?target={encoded}" if previewable else None + return ArtifactPayload( + name=path.name, + mime_type=mime_type, + size=path.stat().st_size, + download_url=f"/nanobot/artifacts/download?target={encoded}", + previewable=previewable, + preview_url=preview_url, + ) + + +def _guess_mime_type(path: Path) -> str: + mime_type, _ = mimetypes.guess_type(path.name) + return mime_type or "application/octet-stream" + + +def _is_previewable(path: Path, mime_type: str) -> bool: + if mime_type.startswith("image/") or mime_type.startswith("text/"): + return True + extension = path.suffix.lower() + if extension in PREVIEWABLE_EXTENSIONS: + return True + return mime_type in { + "application/pdf", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + } + + +def _collect_candidate_texts(content: str, session_messages: list[dict[str, Any]]) -> list[str]: + texts = [content or ""] + if not session_messages: + return texts + last_user_idx = -1 + for idx, message in enumerate(session_messages): + if message.get("role") == "user": + last_user_idx = idx + if last_user_idx == -1: + segment = session_messages + else: + segment = session_messages[last_user_idx + 1 :] + for message in segment: + raw = message.get("content") + flattened = _flatten_content(raw) + if flattened: + texts.append(flattened) + return texts + + +def _extract_locators(text: str) -> Iterable[str]: + if not text: + return [] + ordered: list[str] = [] + seen: set[str] = set() + patterns = (LOCAL_URI_PATTERN, REPORT_PATH_PATTERN, PATH_PATTERN) + for pattern in patterns: + for match in pattern.findall(text): + normalized = _normalize_locator(match) + if not normalized or normalized in seen: + continue + seen.add(normalized) + ordered.append(normalized) + return ordered + + +def _normalize_locator(raw_locator: str) -> str: + locator = raw_locator.strip().strip("`'\"") + locator = locator.rstrip(".,;:!?)]}") + return locator + + +def _resolve_locator(locator: str) -> Path | None: + backend_root = Path(__file__).resolve().parents[2] + data_root = backend_root / "data" + workspace_root = data_root / "workspace" + uploads_root = data_root / "uploads" + reports_root = data_root / "data" + if locator.startswith("local://"): + raw_local = locator.replace("local://", "", 1).strip().lstrip("/\\") + if not raw_local: + return None + candidate = Path(raw_local) + if candidate.is_absolute(): + return candidate + checks = [workspace_root / candidate, reports_root / candidate, uploads_root / candidate, uploads_root / candidate.name] + for path in checks: + if path.exists(): + return path + return uploads_root / candidate.name + normalized = locator.replace("\\", "/") + path = Path(locator) + if path.is_absolute(): + return path + if normalized.startswith("data/data/"): + return backend_root / normalized + checks = [ + workspace_root / normalized, + data_root / normalized, + backend_root / normalized, + ] + for candidate in checks: + if candidate.exists(): + return candidate + return None + + +def _flatten_content(value: Any) -> str: + if value is None: + return "" + if isinstance(value, str): + return value + if isinstance(value, list): + fragments: list[str] = [] + for item in value: + flattened = _flatten_content(item) + if flattened: + fragments.append(flattened) + return "\n".join(fragments) + if isinstance(value, dict): + fragments: list[str] = [] + text = value.get("text") + if isinstance(text, str): + fragments.append(text) + content = value.get("content") + if content is not None: + nested = _flatten_content(content) + if nested: + fragments.append(nested) + for field in ("path", "file", "file_path", "url"): + data = value.get(field) + if isinstance(data, str): + fragments.append(data) + return "\n".join(fragments) + return str(value) diff --git a/backend/app/core/files.py b/backend/app/core/files.py index 9cffdaa..e228f29 100644 --- a/backend/app/core/files.py +++ b/backend/app/core/files.py @@ -2,17 +2,76 @@ import os from pathlib import Path from typing import Optional + +backend_root = Path(__file__).resolve().parents[2] +data_root = backend_root / "data" +workspace_root = data_root / "workspace" +uploads_root = data_root / "uploads" +reports_root = data_root / "data" +allowed_artifact_roots = (workspace_root, uploads_root, reports_root) + + def resolve_upload_file_path(file_url: Optional[str]) -> Path: if not file_url: raise ValueError("File URL is empty") - + if file_url.startswith("local://"): raw_name = file_url.replace("local://", "", 1) safe_name = os.path.basename(raw_name) - # Assuming we are in backend/app/core, go up to backend/data/uploads - upload_dir = Path(__file__).resolve().parents[2] / "data" / "uploads" - file_path = upload_dir / safe_name + file_path = uploads_root / safe_name return file_path - - # If it's already an absolute path (or relative path not starting with local://) + return Path(file_url) + + +def resolve_artifact_target(target: str) -> Path | None: + locator = (target or "").strip().strip("'\"") + if not locator: + return None + if locator.startswith("local://"): + raw_local = locator.replace("local://", "", 1).strip().lstrip("/\\") + if not raw_local: + return None + candidate = Path(raw_local) + if candidate.is_absolute(): + return candidate + checks = ( + workspace_root / candidate, + reports_root / candidate, + uploads_root / candidate, + uploads_root / candidate.name, + ) + for path in checks: + if path.exists(): + return path + return uploads_root / candidate.name + normalized = locator.replace("\\", "/") + path = Path(locator) + if path.is_absolute(): + return path + if normalized.startswith("data/data/"): + return backend_root / normalized + checks = ( + workspace_root / normalized, + data_root / normalized, + backend_root / normalized, + ) + for candidate in checks: + if candidate.exists(): + return candidate + return None + + +def ensure_artifact_access(path: Path, *, require_file: bool = True) -> Path: + try: + resolved = path.resolve(strict=True) + except FileNotFoundError as exc: + raise FileNotFoundError("目标文件不存在") from exc + if require_file and not resolved.is_file(): + raise FileNotFoundError("目标文件不存在") + if not require_file and not resolved.is_dir(): + raise FileNotFoundError("目标目录不存在") + for root in allowed_artifact_roots: + if resolved.is_relative_to(root.resolve()): + return resolved + raise PermissionError("非法路径访问") diff --git a/backend/main.py b/backend/main.py index 7b1dddd..3112485 100644 --- a/backend/main.py +++ b/backend/main.py @@ -1,8 +1,13 @@ import asyncio +import base64 +import binascii from typing import Any, Dict, List, Optional, Literal, Tuple -from fastapi import FastAPI, HTTPException +import mimetypes +from pathlib import Path + +from fastapi import FastAPI, HTTPException, Query from fastapi.encoders import jsonable_encoder -from fastapi.responses import StreamingResponse +from fastapi.responses import FileResponse, RedirectResponse, StreamingResponse from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from pydantic import BaseModel @@ -14,6 +19,8 @@ from datetime import datetime from app.api import upload, llm, skills, users, datasources, projects, semantic from app.connectors.postgres import postgres_connector from app.connectors.clickhouse import clickhouse_connector +from app.core.artifacts import extract_artifacts +from app.core.files import ensure_artifact_access, resolve_artifact_target from app.core.nanobot import nanobot_service from app.core.session_alias_store import session_alias_store from app.context import current_session_id, current_progress_callback, current_viz_data, current_data_source, current_file_url @@ -50,6 +57,17 @@ app.include_router(datasources.router, prefix="/api/v1") app.include_router(semantic.router, prefix="/api/v1") STREAM_DELTA_CHUNK_SIZE = 48 +PREVIEWABLE_TEXT_EXTENSIONS = { + ".txt", + ".md", + ".json", + ".csv", + ".tsv", + ".yaml", + ".yml", + ".xml", + ".log", +} @app.on_event("startup") async def startup_event(): @@ -85,6 +103,100 @@ def nanobot_status(): return {"status": "running", "model": nanobot_service.agent.model} return {"status": "stopped"} + +def _guess_mime_type(path: os.PathLike[str] | str) -> str: + mime_type, _ = mimetypes.guess_type(str(path)) + return mime_type or "application/octet-stream" + + +def _resolve_checked_target(target: str) -> os.PathLike[str]: + path = resolve_artifact_target(target) + if path is None: + raise HTTPException(status_code=404, detail="目标文件不存在") + try: + return ensure_artifact_access(path, require_file=True) + except FileNotFoundError: + raise HTTPException(status_code=404, detail="目标文件不存在") + except PermissionError: + raise HTTPException(status_code=403, detail="非法路径访问") + + +def _is_previewable(path: os.PathLike[str], mime_type: str) -> bool: + suffix = os.path.splitext(str(path))[1].lower() + if suffix in {".html", ".htm", ".pdf", ".pptx"}: + return True + if suffix in PREVIEWABLE_TEXT_EXTENSIONS: + return True + return mime_type.startswith("image/") or mime_type.startswith("text/") + + +def _encode_web_root(path: Path) -> str: + return base64.urlsafe_b64encode(str(path).encode("utf-8")).decode("utf-8").rstrip("=") + + +def _decode_web_root(token: str) -> Path: + padding = "=" * (-len(token) % 4) + try: + decoded = base64.urlsafe_b64decode((token + padding).encode("utf-8")).decode("utf-8") + except (binascii.Error, UnicodeDecodeError): + raise HTTPException(status_code=400, detail="非法预览目录标识") + return Path(decoded) + + +@app.get("/nanobot/artifacts/download") +def download_artifact(target: str = Query(...)): + resolved = _resolve_checked_target(target) + return FileResponse( + path=str(resolved), + media_type="application/octet-stream", + filename=os.path.basename(str(resolved)), + ) + + +@app.get("/nanobot/artifacts/preview") +def preview_artifact(target: str = Query(...)): + resolved = _resolve_checked_target(target) + mime_type = _guess_mime_type(resolved) + if not _is_previewable(resolved, mime_type): + raise HTTPException(status_code=415, detail="当前文件类型不支持预览,请使用下载") + suffix = os.path.splitext(str(resolved))[1].lower() + if suffix in {".html", ".htm"}: + root_token = _encode_web_root(Path(resolved).parent) + entry = Path(resolved).name + return RedirectResponse(url=f"/nanobot/artifacts/web/{root_token}/{entry}", status_code=307) + return FileResponse( + path=str(resolved), + media_type=mime_type, + filename=os.path.basename(str(resolved)), + content_disposition_type="inline", + ) + + +@app.get("/nanobot/artifacts/web/{root_token}/{resource_path:path}") +def preview_web_artifact_resource(root_token: str, resource_path: str): + root_dir = _decode_web_root(root_token) + try: + safe_root = ensure_artifact_access(root_dir, require_file=False) + except FileNotFoundError: + raise HTTPException(status_code=404, detail="Web 预览目录不存在") + except PermissionError: + raise HTTPException(status_code=403, detail="非法路径访问") + candidate = os.path.join(str(safe_root), resource_path) + try: + resolved = ensure_artifact_access(Path(candidate), require_file=True) + except FileNotFoundError: + raise HTTPException(status_code=404, detail="Web 资源不存在") + except PermissionError: + raise HTTPException(status_code=403, detail="非法路径访问") + if not Path(resolved).is_relative_to(Path(safe_root)): + raise HTTPException(status_code=403, detail="非法路径访问") + return FileResponse( + path=str(resolved), + media_type=_guess_mime_type(resolved), + filename=os.path.basename(str(resolved)), + content_disposition_type="inline", + ) + class ChatRequest(BaseModel): message: str session_id: str = "api:default" @@ -127,6 +239,27 @@ class SessionFileContextUpdateRequest(BaseModel): active_data_file: Optional[Dict[str, Any]] = None selected_data_source: Optional[str] = None + +def _persist_assistant_enrichment( + session_id: str, + viz_payload: Optional[Dict[str, Any]] = None, + artifacts: Optional[List[Dict[str, Any]]] = None, +) -> None: + if not nanobot_service.agent: + return + session = nanobot_service.agent.sessions.get_or_create(session_id) + if not session.messages or session.messages[-1].get("role") != "assistant": + return + changed = False + if viz_payload: + session.messages[-1]["viz"] = viz_payload + changed = True + if artifacts: + session.messages[-1]["artifacts"] = artifacts + changed = True + if changed: + nanobot_service.agent.sessions.save(session) + @app.post("/nanobot/chat") async def nanobot_chat(request: ChatRequest): try: @@ -154,20 +287,28 @@ async def nanobot_chat(request: ChatRequest): skill_ids=request.skill_ids, model_id=request.model_id, ) + text = response or "" + session_messages = [] + if nanobot_service.agent: + session = nanobot_service.agent.sessions.get_or_create(request.session_id) + session_messages = session.messages + artifacts = extract_artifacts(text, session_messages) viz_payload = current_viz_data.get() - if viz_payload and nanobot_service.agent: - # Update the last assistant message with viz data - session = nanobot_service.agent.sessions.get_or_create(request.session_id) - if session.messages and session.messages[-1].get("role") == "assistant": - session.messages[-1]["viz"] = viz_payload - nanobot_service.agent.sessions.save(session) + _persist_assistant_enrichment( + session_id=request.session_id, + viz_payload=viz_payload if isinstance(viz_payload, dict) else None, + artifacts=artifacts, + ) - return { - "response": response, + payload = { + "response": text, "viz": viz_payload, "routing": {"selected": "agent", "reason": "auto_routed_by_agent"}, } + if artifacts: + payload["artifacts"] = artifacts + return payload except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -252,6 +393,11 @@ async def nanobot_chat_stream(request: ChatRequest): response = await current_task text = response or "" + session_messages = [] + if nanobot_service.agent: + session = nanobot_service.agent.sessions.get_or_create(request.session_id) + session_messages = session.messages + artifacts = extract_artifacts(text, session_messages) # Check again for viz payload after task completes if not sent yet viz_payload = current_viz_data.get() @@ -268,17 +414,19 @@ async def nanobot_chat_stream(request: ChatRequest): except Exception as e: pass - # Persist viz payload to session - if viz_payload and nanobot_service.agent: - session = nanobot_service.agent.sessions.get_or_create(request.session_id) - if session.messages and session.messages[-1].get("role") == "assistant": - session.messages[-1]["viz"] = viz_payload - nanobot_service.agent.sessions.save(session) + _persist_assistant_enrichment( + session_id=request.session_id, + viz_payload=viz_payload if isinstance(viz_payload, dict) else None, + artifacts=artifacts, + ) # Since true streaming is enabled via StreamingLiteLLMProvider, # we no longer need to chunk and yield `text` here. # Just yield the final text to signal completion and update final state. - yield f"data: {json.dumps({'type': 'final', 'content': text}, ensure_ascii=False)}\n\n" + final_payload = {"type": "final", "content": text} + if artifacts: + final_payload["artifacts"] = artifacts + yield f"data: {json.dumps(final_payload, ensure_ascii=False)}\n\n" yield f"data: {json.dumps({'type': 'done'}, ensure_ascii=False)}\n\n" except asyncio.CancelledError: raise diff --git a/backend/tests/test_artifact_endpoints.py b/backend/tests/test_artifact_endpoints.py new file mode 100644 index 0000000..a35c970 --- /dev/null +++ b/backend/tests/test_artifact_endpoints.py @@ -0,0 +1,131 @@ +from pathlib import Path + +import pytest +from fastapi.testclient import TestClient + +from main import app + + +def _backend_data_root() -> Path: + return Path(__file__).resolve().parents[1] / "data" + + +def test_download_artifact_within_whitelist() -> None: + uploads_dir = _backend_data_root() / "uploads" + uploads_dir.mkdir(parents=True, exist_ok=True) + sample = uploads_dir / "task2-download.csv" + sample.write_text("id,name\n1,a\n", encoding="utf-8") + + client = TestClient(app) + response = client.get("/nanobot/artifacts/download", params={"target": "local://task2-download.csv"}) + + assert response.status_code == 200 + assert response.headers["content-type"].startswith("application/octet-stream") + assert response.headers["content-disposition"].startswith("attachment;") + assert response.content == sample.read_bytes() + + +def test_download_artifact_rejects_outside_paths() -> None: + client = TestClient(app) + response = client.get("/nanobot/artifacts/download", params={"target": "/etc/hosts"}) + + assert response.status_code == 403 + assert response.json()["detail"] == "非法路径访问" + + +def test_preview_artifact_returns_unsupported_for_binary() -> None: + uploads_dir = _backend_data_root() / "uploads" + uploads_dir.mkdir(parents=True, exist_ok=True) + sample = uploads_dir / "task2-unsupported.bin" + sample.write_bytes(b"\x00\x01\x02") + + client = TestClient(app) + response = client.get("/nanobot/artifacts/preview", params={"target": f"local://{sample.name}"}) + + assert response.status_code == 415 + assert response.json()["detail"] == "当前文件类型不支持预览,请使用下载" + download = client.get("/nanobot/artifacts/download", params={"target": f"local://{sample.name}"}) + assert download.status_code == 200 + assert download.content == sample.read_bytes() + + +def test_preview_html_supports_directory_resources() -> None: + web_dir = _backend_data_root() / "workspace" / "task2-web" + web_dir.mkdir(parents=True, exist_ok=True) + html_file = web_dir / "index.html" + css_file = web_dir / "styles.css" + html_file.write_text("ok", encoding="utf-8") + css_file.write_text("body{color:#333;}", encoding="utf-8") + + client = TestClient(app) + preview = client.get( + "/nanobot/artifacts/preview", + params={"target": str(html_file)}, + follow_redirects=False, + ) + + assert preview.status_code == 307 + location = preview.headers["location"] + assert location.startswith("/nanobot/artifacts/web/") + + html_response = client.get(location) + assert html_response.status_code == 200 + assert "text/html" in html_response.headers["content-type"] + assert "styles.css" in html_response.text + + css_response = client.get(location.replace("index.html", "styles.css")) + assert css_response.status_code == 200 + assert "text/css" in css_response.headers["content-type"] + assert "color:#333" in css_response.text + + +@pytest.mark.parametrize( + ("filename", "payload", "expected_mime"), + [ + ("task4-image.png", b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR", "image/png"), + ("task4-preview.pdf", b"%PDF-1.4\n1 0 obj\n<<>>\nendobj\n", "application/pdf"), + ( + "task4-preview.pptx", + b"PK\x03\x04\x14\x00\x00\x00\x08\x00\x00\x00!\x00", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + ), + ], +) +def test_preview_and_download_supported_types(filename: str, payload: bytes, expected_mime: str) -> None: + uploads_dir = _backend_data_root() / "uploads" + uploads_dir.mkdir(parents=True, exist_ok=True) + sample = uploads_dir / filename + sample.write_bytes(payload) + + client = TestClient(app) + preview = client.get("/nanobot/artifacts/preview", params={"target": f"local://{filename}"}) + assert preview.status_code == 200 + assert preview.headers["content-type"].startswith(expected_mime) + + download = client.get("/nanobot/artifacts/download", params={"target": f"local://{filename}"}) + assert download.status_code == 200 + assert download.content == sample.read_bytes() + + +def test_web_preview_missing_resource_returns_error_and_download_still_works() -> None: + web_dir = _backend_data_root() / "workspace" / "task4-web-missing" + web_dir.mkdir(parents=True, exist_ok=True) + html_file = web_dir / "index.html" + html_file.write_text("ok", encoding="utf-8") + + client = TestClient(app) + preview = client.get( + "/nanobot/artifacts/preview", + params={"target": str(html_file)}, + follow_redirects=False, + ) + assert preview.status_code == 307 + location = preview.headers["location"] + + missing = client.get(location.replace("index.html", "missing.js")) + assert missing.status_code == 404 + assert missing.json()["detail"] == "Web 资源不存在" + + download = client.get("/nanobot/artifacts/download", params={"target": str(html_file)}) + assert download.status_code == 200 + assert download.content == html_file.read_bytes() diff --git a/backend/tests/test_artifacts.py b/backend/tests/test_artifacts.py new file mode 100644 index 0000000..29e792e --- /dev/null +++ b/backend/tests/test_artifacts.py @@ -0,0 +1,55 @@ +from pathlib import Path + +from app.core.artifacts import extract_artifacts + + +def _backend_data_root() -> Path: + return Path(__file__).resolve().parents[1] / "data" + + +def test_extract_artifacts_from_local_and_tool_paths() -> None: + data_root = _backend_data_root() + uploads_dir = data_root / "uploads" + workspace_dir = data_root / "workspace" / "reports" + uploads_dir.mkdir(parents=True, exist_ok=True) + workspace_dir.mkdir(parents=True, exist_ok=True) + + upload_file = uploads_dir / "task1-sample.csv" + upload_file.write_text("a,b\n1,2\n", encoding="utf-8") + report_file = workspace_dir / "task1-report.html" + report_file.write_text("ok", encoding="utf-8") + + content = "请下载 local://task1-sample.csv" + session_messages = [ + {"role": "user", "content": "生成报告"}, + {"role": "tool", "content": f"输出文件:{report_file}"}, + ] + + artifacts = extract_artifacts(content, session_messages) + + by_name = {item["name"]: item for item in artifacts} + assert "task1-sample.csv" in by_name + assert "task1-report.html" in by_name + assert by_name["task1-sample.csv"]["download_url"].startswith("/nanobot/artifacts/download?target=") + assert by_name["task1-sample.csv"]["previewable"] is True + assert by_name["task1-report.html"]["previewable"] is True + assert by_name["task1-report.html"]["preview_url"].startswith("/nanobot/artifacts/preview?target=") + + +def test_extract_artifacts_deduplicate_and_skip_missing() -> None: + data_root = _backend_data_root() + workspace_dir = data_root / "workspace" + workspace_dir.mkdir(parents=True, exist_ok=True) + + pdf_file = workspace_dir / "task1-dedup.pdf" + pdf_file.write_bytes(b"%PDF-1.4 test") + missing_file = workspace_dir / "task1-missing.pdf" + + content = f"{pdf_file} and {pdf_file} and {missing_file}" + artifacts = extract_artifacts(content, []) + + assert len(artifacts) == 1 + item = artifacts[0] + assert item["name"] == "task1-dedup.pdf" + assert item["mime_type"] == "application/pdf" + assert item["previewable"] is True diff --git a/frontend/src/components/ChatInterface.tsx b/frontend/src/components/ChatInterface.tsx index 23eb468..f5e409a 100644 --- a/frontend/src/components/ChatInterface.tsx +++ b/frontend/src/components/ChatInterface.tsx @@ -1,6 +1,6 @@ import { useState, useRef, useEffect } from "react"; import { ScrollArea } from "@/components/ui/scroll-area"; -import { User, Loader2, ArrowUp, ChevronDown, Check, Square, Plus, Database, Wand2, Zap, CheckCircle2, Table, XCircle, Settings, ExternalLink } from "lucide-react"; +import { User, Loader2, ArrowUp, ChevronDown, Check, Square, Plus, Database, Wand2, Zap, CheckCircle2, Table, XCircle, Settings, ExternalLink, FileText, Download, Eye } from "lucide-react"; import { api } from "@/lib/api"; import { type ChartSpec } from "@/store/visualizationStore"; import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; @@ -14,6 +14,7 @@ import { useTranslation } from "react-i18next"; import { InlineVisualizationCard } from "./InlineVisualizationCard"; import { useProjectStore } from "@/store/projectStore"; import { SlashCommandMenu } from "./SlashCommandMenu"; +import { Dialog, DialogContent, DialogHeader, DialogTitle } from "@/components/ui/dialog"; interface Message { id: string; @@ -24,6 +25,7 @@ interface Message { progressLogs?: string[]; routeInfo?: string; reasoningContent?: string; + artifacts?: MessageArtifact[]; } interface MessageViz { @@ -35,6 +37,21 @@ interface MessageViz { error?: string | null; } +interface MessageArtifact { + name: string; + mime_type: string; + size: number; + download_url: string; + previewable: boolean; + preview_url?: string; +} + +interface ArtifactPreviewTarget { + name: string; + mimeType: string; + previewUrl: string; +} + const REPORT_HTML_BLOCK_REGEX = /([\s\S]*?)/i; const splitReportHtml = (content: string): { markdown: string; reportHtml: string | null } => { @@ -97,6 +114,56 @@ interface SessionData { }>; } +const formatArtifactSize = (size: number): string => { + if (!Number.isFinite(size) || size < 0) return "0 B"; + const units = ["B", "KB", "MB", "GB", "TB"]; + let value = size; + let unitIndex = 0; + while (value >= 1024 && unitIndex < units.length - 1) { + value /= 1024; + unitIndex += 1; + } + const fixed = value >= 10 || unitIndex === 0 ? 0 : 1; + return `${value.toFixed(fixed)} ${units[unitIndex]}`; +}; + +const normalizeArtifacts = (raw: unknown): MessageArtifact[] => { + if (!Array.isArray(raw)) return []; + return raw.reduce((acc, item) => { + if (!item || typeof item !== "object") return acc; + const source = item as Record; + const name = typeof source.name === "string" ? source.name : ""; + const mimeType = typeof source.mime_type === "string" + ? source.mime_type + : typeof source.mimeType === "string" + ? source.mimeType + : "application/octet-stream"; + const size = typeof source.size === "number" ? source.size : 0; + const downloadUrl = typeof source.download_url === "string" + ? source.download_url + : typeof source.downloadUrl === "string" + ? source.downloadUrl + : ""; + const previewable = Boolean(source.previewable); + const previewUrl = typeof source.preview_url === "string" + ? source.preview_url + : typeof source.previewUrl === "string" + ? source.previewUrl + : undefined; + if (!name || !downloadUrl) return acc; + const normalized: MessageArtifact = { + name, + mime_type: mimeType, + size, + download_url: downloadUrl, + previewable, + preview_url: previewUrl, + }; + acc.push(normalized); + return acc; + }, []); +}; + export function ChatInterface() { const { t } = useTranslation(); const [messagesBySession, setMessagesBySession] = useState>({}); @@ -105,6 +172,7 @@ export function ChatInterface() { const [availableSkills, setAvailableSkills] = useState([]); const [selectedSkillIds, setSelectedSkillIds] = useState([]); const [isMenuOpen, setIsMenuOpen] = useState(false); + const [artifactPreview, setArtifactPreview] = useState(null); const scrollRef = useRef(null); const location = useLocation(); const { currentProject } = useProjectStore(); @@ -294,6 +362,7 @@ export function ChatInterface() { role: m.role as 'user' | 'assistant', content: cleanContent, viz: m.viz ? buildMessageViz(m.viz) : undefined, + artifacts: normalizeArtifacts(m.artifacts), }; }); setMessagesForSession(activeSessionKey, formattedMessages); @@ -643,6 +712,7 @@ export function ChatInterface() { selected?: string; reason?: string; chart?: { chart_spec?: ChartSpec | null; reasoning?: string; can_visualize?: boolean; chart_type?: string } | null; + artifacts?: unknown; }; if (payload.type === "delta" && payload.content) { @@ -667,14 +737,17 @@ export function ChatInterface() { pushProgressLog(payload.content, payload.is_reasoning || false); } - if (payload.type === "final" && payload.content) { + if (payload.type === "final") { hasFinalPayload = true; - streamedText = payload.content; + if (typeof payload.content === "string") { + streamedText = payload.content; + } flushAssistant(true); pushProgressLog(t('answerGenerationCompleted')); + const messageArtifacts = normalizeArtifacts(payload.artifacts); setMessagesForSession(targetSessionKey, (prev) => prev.map((msg) => - msg.id === assistantId ? { ...msg, content: payload.content || "", awaitingFirstToken: false, viz: streamedViz ?? msg.viz } : msg + msg.id === assistantId ? { ...msg, content: typeof payload.content === "string" ? payload.content : msg.content || "", awaitingFirstToken: false, viz: streamedViz ?? msg.viz, artifacts: messageArtifacts.length > 0 ? messageArtifacts : msg.artifacts } : msg ) ); } @@ -1059,6 +1132,43 @@ export function ChatInterface() { ) : null} + {msg.artifacts && msg.artifacts.length > 0 ? ( +
+ {msg.artifacts.map((artifact, artifactIndex) => ( +
+
+
+ +
+
+
{artifact.name}
+
{formatArtifactSize(artifact.size)}
+
+
+
+ {artifact.previewable && artifact.preview_url ? ( + + ) : null} + + + {t('download')} + +
+
+ ))} +
+ ) : null} {msg.viz ? (
@@ -1243,6 +1353,30 @@ export function ChatInterface() {
)} + { + if (!open) setArtifactPreview(null); + }}> + + + {artifactPreview?.name || t('artifactPreview')} + +
+ {artifactPreview?.mimeType.startsWith("image/") ? ( + {artifactPreview.name} + ) : artifactPreview ? ( +