feat: add artifact

2026-03-27 15:10:33 +08:00
parent ec585ae730
commit 37070d7896
10 changed files with 768 additions and 31 deletions
@@ -7,8 +7,9 @@ from pathlib import Path

 # Add project root to sys.path
 PROJECT_ROOT = Path(__file__).resolve().parents[3]
-if str(PROJECT_ROOT) not in sys.path:
-    sys.path.append(str(PROJECT_ROOT))
+NANOBOT_ROOT = PROJECT_ROOT / "nanobot"
+if str(NANOBOT_ROOT) not in sys.path:
+    sys.path.append(str(NANOBOT_ROOT))

 from nanobot.providers.litellm_provider import LiteLLMProvider
 from app.schemas.chart import ChartGenerationResponse
@@ -15,8 +15,9 @@ logger = logging.getLogger(__name__)

 # Add project root to sys.path to allow importing nanobot
 PROJECT_ROOT = Path(__file__).resolve().parents[3]
-if str(PROJECT_ROOT) not in sys.path:
-    sys.path.append(str(PROJECT_ROOT))
+NANOBOT_ROOT = PROJECT_ROOT / "nanobot"
+if str(NANOBOT_ROOT) not in sys.path:
+    sys.path.append(str(NANOBOT_ROOT))

 from nanobot.providers.litellm_provider import LiteLLMProvider
 from app.connectors.postgres import postgres_connector
@@ -0,0 +1,202 @@
+import mimetypes
+import re
+from pathlib import Path
+from typing import Any, Iterable
+from urllib.parse import quote
+
+from pydantic import BaseModel
+
+
+LOCAL_URI_PATTERN = re.compile(r"local://[^\s<>'\"\]\)\}]+")
+PATH_PATTERN = re.compile(
+    r"(?:[A-Za-z]:[\\/][^\s<>'\"]+\.[A-Za-z0-9]{1,12}|/[^\s<>'\"]+\.[A-Za-z0-9]{1,12}|(?:\.\./|\.?/)?(?:[\w\-.]+[\\/])+[\w\-.]+\.[A-Za-z0-9]{1,12})"
+)
+REPORT_PATH_PATTERN = re.compile(r"data[\\/]data[\\/][\w\-.]+\.[A-Za-z0-9]{1,12}", re.IGNORECASE)
+PREVIEWABLE_EXTENSIONS = {
+    ".html",
+    ".htm",
+    ".pdf",
+    ".pptx",
+    ".txt",
+    ".md",
+    ".json",
+    ".csv",
+    ".tsv",
+    ".yaml",
+    ".yml",
+    ".xml",
+    ".log",
+}
+
+
+class ArtifactPayload(BaseModel):
+    name: str
+    mime_type: str
+    size: int
+    download_url: str
+    previewable: bool
+    preview_url: str | None = None
+
+
+def extract_artifacts(content: str, session_messages: list[dict[str, Any]] | None = None) -> list[dict[str, Any]]:
+    candidates = _collect_candidate_texts(content, session_messages or [])
+    ordered_locators: list[str] = []
+    seen_locators: set[str] = set()
+    for text in candidates:
+        for locator in _extract_locators(text):
+            if locator in seen_locators:
+                continue
+            seen_locators.add(locator)
+            ordered_locators.append(locator)
+    artifacts: list[dict[str, Any]] = []
+    seen_paths: set[Path] = set()
+    for locator in ordered_locators:
+        path = _resolve_locator(locator)
+        if not path or not path.exists() or not path.is_file():
+            continue
+        resolved = path.resolve()
+        if resolved in seen_paths:
+            continue
+        seen_paths.add(resolved)
+        artifact = _build_artifact_payload(locator, resolved)
+        artifacts.append(artifact.model_dump(exclude_none=True))
+    return artifacts
+
+
+def _build_artifact_payload(locator: str, path: Path) -> ArtifactPayload:
+    mime_type = _guess_mime_type(path)
+    previewable = _is_previewable(path, mime_type)
+    encoded = quote(locator, safe="")
+    preview_url = f"/nanobot/artifacts/preview?target={encoded}" if previewable else None
+    return ArtifactPayload(
+        name=path.name,
+        mime_type=mime_type,
+        size=path.stat().st_size,
+        download_url=f"/nanobot/artifacts/download?target={encoded}",
+        previewable=previewable,
+        preview_url=preview_url,
+    )
+
+
+def _guess_mime_type(path: Path) -> str:
+    mime_type, _ = mimetypes.guess_type(path.name)
+    return mime_type or "application/octet-stream"
+
+
+def _is_previewable(path: Path, mime_type: str) -> bool:
+    if mime_type.startswith("image/") or mime_type.startswith("text/"):
+        return True
+    extension = path.suffix.lower()
+    if extension in PREVIEWABLE_EXTENSIONS:
+        return True
+    return mime_type in {
+        "application/pdf",
+        "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+    }
+
+
+def _collect_candidate_texts(content: str, session_messages: list[dict[str, Any]]) -> list[str]:
+    texts = [content or ""]
+    if not session_messages:
+        return texts
+    last_user_idx = -1
+    for idx, message in enumerate(session_messages):
+        if message.get("role") == "user":
+            last_user_idx = idx
+    if last_user_idx == -1:
+        segment = session_messages
+    else:
+        segment = session_messages[last_user_idx + 1 :]
+    for message in segment:
+        raw = message.get("content")
+        flattened = _flatten_content(raw)
+        if flattened:
+            texts.append(flattened)
+    return texts
+
+
+def _extract_locators(text: str) -> Iterable[str]:
+    if not text:
+        return []
+    ordered: list[str] = []
+    seen: set[str] = set()
+    patterns = (LOCAL_URI_PATTERN, REPORT_PATH_PATTERN, PATH_PATTERN)
+    for pattern in patterns:
+        for match in pattern.findall(text):
+            normalized = _normalize_locator(match)
+            if not normalized or normalized in seen:
+                continue
+            seen.add(normalized)
+            ordered.append(normalized)
+    return ordered
+
+
+def _normalize_locator(raw_locator: str) -> str:
+    locator = raw_locator.strip().strip("`'\"")
+    locator = locator.rstrip(".,;:!?)]}")
+    return locator
+
+
+def _resolve_locator(locator: str) -> Path | None:
+    backend_root = Path(__file__).resolve().parents[2]
+    data_root = backend_root / "data"
+    workspace_root = data_root / "workspace"
+    uploads_root = data_root / "uploads"
+    reports_root = data_root / "data"
+    if locator.startswith("local://"):
+        raw_local = locator.replace("local://", "", 1).strip().lstrip("/\\")
+        if not raw_local:
+            return None
+        candidate = Path(raw_local)
+        if candidate.is_absolute():
+            return candidate
+        checks = [workspace_root / candidate, reports_root / candidate, uploads_root / candidate, uploads_root / candidate.name]
+        for path in checks:
+            if path.exists():
+                return path
+        return uploads_root / candidate.name
+    normalized = locator.replace("\\", "/")
+    path = Path(locator)
+    if path.is_absolute():
+        return path
+    if normalized.startswith("data/data/"):
+        return backend_root / normalized
+    checks = [
+        workspace_root / normalized,
+        data_root / normalized,
+        backend_root / normalized,
+    ]
+    for candidate in checks:
+        if candidate.exists():
+            return candidate
+    return None
+
+
+def _flatten_content(value: Any) -> str:
+    if value is None:
+        return ""
+    if isinstance(value, str):
+        return value
+    if isinstance(value, list):
+        fragments: list[str] = []
+        for item in value:
+            flattened = _flatten_content(item)
+            if flattened:
+                fragments.append(flattened)
+        return "\n".join(fragments)
+    if isinstance(value, dict):
+        fragments: list[str] = []
+        text = value.get("text")
+        if isinstance(text, str):
+            fragments.append(text)
+        content = value.get("content")
+        if content is not None:
+            nested = _flatten_content(content)
+            if nested:
+                fragments.append(nested)
+        for field in ("path", "file", "file_path", "url"):
+            data = value.get(field)
+            if isinstance(data, str):
+                fragments.append(data)
+        return "\n".join(fragments)
+    return str(value)
@@ -2,6 +2,15 @@ import os
 from pathlib import Path
 from typing import Optional

+
+backend_root = Path(__file__).resolve().parents[2]
+data_root = backend_root / "data"
+workspace_root = data_root / "workspace"
+uploads_root = data_root / "uploads"
+reports_root = data_root / "data"
+allowed_artifact_roots = (workspace_root, uploads_root, reports_root)
+
+
 def resolve_upload_file_path(file_url: Optional[str]) -> Path:
    if not file_url:
        raise ValueError("File URL is empty")
@@ -9,10 +18,60 @@ def resolve_upload_file_path(file_url: Optional[str]) -> Path:
    if file_url.startswith("local://"):
        raw_name = file_url.replace("local://", "", 1)
        safe_name = os.path.basename(raw_name)
-        # Assuming we are in backend/app/core, go up to backend/data/uploads
-        upload_dir = Path(__file__).resolve().parents[2] / "data" / "uploads"
-        file_path = upload_dir / safe_name
+        file_path = uploads_root / safe_name
        return file_path

-    # If it's already an absolute path (or relative path not starting with local://)
    return Path(file_url)
+
+
+def resolve_artifact_target(target: str) -> Path | None:
+    locator = (target or "").strip().strip("'\"")
+    if not locator:
+        return None
+    if locator.startswith("local://"):
+        raw_local = locator.replace("local://", "", 1).strip().lstrip("/\\")
+        if not raw_local:
+            return None
+        candidate = Path(raw_local)
+        if candidate.is_absolute():
+            return candidate
+        checks = (
+            workspace_root / candidate,
+            reports_root / candidate,
+            uploads_root / candidate,
+            uploads_root / candidate.name,
+        )
+        for path in checks:
+            if path.exists():
+                return path
+        return uploads_root / candidate.name
+    normalized = locator.replace("\\", "/")
+    path = Path(locator)
+    if path.is_absolute():
+        return path
+    if normalized.startswith("data/data/"):
+        return backend_root / normalized
+    checks = (
+        workspace_root / normalized,
+        data_root / normalized,
+        backend_root / normalized,
+    )
+    for candidate in checks:
+        if candidate.exists():
+            return candidate
+    return None
+
+
+def ensure_artifact_access(path: Path, *, require_file: bool = True) -> Path:
+    try:
+        resolved = path.resolve(strict=True)
+    except FileNotFoundError as exc:
+        raise FileNotFoundError("目标文件不存在") from exc
+    if require_file and not resolved.is_file():
+        raise FileNotFoundError("目标文件不存在")
+    if not require_file and not resolved.is_dir():
+        raise FileNotFoundError("目标目录不存在")
+    for root in allowed_artifact_roots:
+        if resolved.is_relative_to(root.resolve()):
+            return resolved
+    raise PermissionError("非法路径访问")
@@ -1,8 +1,13 @@
 import asyncio
+import base64
+import binascii
 from typing import Any, Dict, List, Optional, Literal, Tuple
-from fastapi import FastAPI, HTTPException
+import mimetypes
+from pathlib import Path
+
+from fastapi import FastAPI, HTTPException, Query
 from fastapi.encoders import jsonable_encoder
-from fastapi.responses import StreamingResponse
+from fastapi.responses import FileResponse, RedirectResponse, StreamingResponse
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
@@ -14,6 +19,8 @@ from datetime import datetime
 from app.api import upload, llm, skills, users, datasources, projects, semantic
 from app.connectors.postgres import postgres_connector
 from app.connectors.clickhouse import clickhouse_connector
+from app.core.artifacts import extract_artifacts
+from app.core.files import ensure_artifact_access, resolve_artifact_target
 from app.core.nanobot import nanobot_service
 from app.core.session_alias_store import session_alias_store
 from app.context import current_session_id, current_progress_callback, current_viz_data, current_data_source, current_file_url
@@ -50,6 +57,17 @@ app.include_router(datasources.router, prefix="/api/v1")
 app.include_router(semantic.router, prefix="/api/v1")

 STREAM_DELTA_CHUNK_SIZE = 48
+PREVIEWABLE_TEXT_EXTENSIONS = {
+    ".txt",
+    ".md",
+    ".json",
+    ".csv",
+    ".tsv",
+    ".yaml",
+    ".yml",
+    ".xml",
+    ".log",
+}

@app.on_event("startup")
 async def startup_event():
@@ -85,6 +103,100 @@ def nanobot_status():
        return {"status": "running", "model": nanobot_service.agent.model}
    return {"status": "stopped"}

+
+def _guess_mime_type(path: os.PathLike[str] | str) -> str:
+    mime_type, _ = mimetypes.guess_type(str(path))
+    return mime_type or "application/octet-stream"
+
+
+def _resolve_checked_target(target: str) -> os.PathLike[str]:
+    path = resolve_artifact_target(target)
+    if path is None:
+        raise HTTPException(status_code=404, detail="目标文件不存在")
+    try:
+        return ensure_artifact_access(path, require_file=True)
+    except FileNotFoundError:
+        raise HTTPException(status_code=404, detail="目标文件不存在")
+    except PermissionError:
+        raise HTTPException(status_code=403, detail="非法路径访问")
+
+
+def _is_previewable(path: os.PathLike[str], mime_type: str) -> bool:
+    suffix = os.path.splitext(str(path))[1].lower()
+    if suffix in {".html", ".htm", ".pdf", ".pptx"}:
+        return True
+    if suffix in PREVIEWABLE_TEXT_EXTENSIONS:
+        return True
+    return mime_type.startswith("image/") or mime_type.startswith("text/")
+
+
+def _encode_web_root(path: Path) -> str:
+    return base64.urlsafe_b64encode(str(path).encode("utf-8")).decode("utf-8").rstrip("=")
+
+
+def _decode_web_root(token: str) -> Path:
+    padding = "=" * (-len(token) % 4)
+    try:
+        decoded = base64.urlsafe_b64decode((token + padding).encode("utf-8")).decode("utf-8")
+    except (binascii.Error, UnicodeDecodeError):
+        raise HTTPException(status_code=400, detail="非法预览目录标识")
+    return Path(decoded)
+
+
+@app.get("/nanobot/artifacts/download")
+def download_artifact(target: str = Query(...)):
+    resolved = _resolve_checked_target(target)
+    return FileResponse(
+        path=str(resolved),
+        media_type="application/octet-stream",
+        filename=os.path.basename(str(resolved)),
+    )
+
+
+@app.get("/nanobot/artifacts/preview")
+def preview_artifact(target: str = Query(...)):
+    resolved = _resolve_checked_target(target)
+    mime_type = _guess_mime_type(resolved)
+    if not _is_previewable(resolved, mime_type):
+        raise HTTPException(status_code=415, detail="当前文件类型不支持预览，请使用下载")
+    suffix = os.path.splitext(str(resolved))[1].lower()
+    if suffix in {".html", ".htm"}:
+        root_token = _encode_web_root(Path(resolved).parent)
+        entry = Path(resolved).name
+        return RedirectResponse(url=f"/nanobot/artifacts/web/{root_token}/{entry}", status_code=307)
+    return FileResponse(
+        path=str(resolved),
+        media_type=mime_type,
+        filename=os.path.basename(str(resolved)),
+        content_disposition_type="inline",
+    )
+
+
+@app.get("/nanobot/artifacts/web/{root_token}/{resource_path:path}")
+def preview_web_artifact_resource(root_token: str, resource_path: str):
+    root_dir = _decode_web_root(root_token)
+    try:
+        safe_root = ensure_artifact_access(root_dir, require_file=False)
+    except FileNotFoundError:
+        raise HTTPException(status_code=404, detail="Web 预览目录不存在")
+    except PermissionError:
+        raise HTTPException(status_code=403, detail="非法路径访问")
+    candidate = os.path.join(str(safe_root), resource_path)
+    try:
+        resolved = ensure_artifact_access(Path(candidate), require_file=True)
+    except FileNotFoundError:
+        raise HTTPException(status_code=404, detail="Web 资源不存在")
+    except PermissionError:
+        raise HTTPException(status_code=403, detail="非法路径访问")
+    if not Path(resolved).is_relative_to(Path(safe_root)):
+        raise HTTPException(status_code=403, detail="非法路径访问")
+    return FileResponse(
+        path=str(resolved),
+        media_type=_guess_mime_type(resolved),
+        filename=os.path.basename(str(resolved)),
+        content_disposition_type="inline",
+    )
+
 class ChatRequest(BaseModel):
    message: str
    session_id: str = "api:default"
@@ -127,6 +239,27 @@ class SessionFileContextUpdateRequest(BaseModel):
    active_data_file: Optional[Dict[str, Any]] = None
    selected_data_source: Optional[str] = None

+
+def _persist_assistant_enrichment(
+    session_id: str,
+    viz_payload: Optional[Dict[str, Any]] = None,
+    artifacts: Optional[List[Dict[str, Any]]] = None,
+) -> None:
+    if not nanobot_service.agent:
+        return
+    session = nanobot_service.agent.sessions.get_or_create(session_id)
+    if not session.messages or session.messages[-1].get("role") != "assistant":
+        return
+    changed = False
+    if viz_payload:
+        session.messages[-1]["viz"] = viz_payload
+        changed = True
+    if artifacts:
+        session.messages[-1]["artifacts"] = artifacts
+        changed = True
+    if changed:
+        nanobot_service.agent.sessions.save(session)
+
@app.post("/nanobot/chat")
 async def nanobot_chat(request: ChatRequest):
    try:
@@ -154,20 +287,28 @@ async def nanobot_chat(request: ChatRequest):
            skill_ids=request.skill_ids,
            model_id=request.model_id,
        )
+        text = response or ""
+        session_messages = []
+        if nanobot_service.agent:
+            session = nanobot_service.agent.sessions.get_or_create(request.session_id)
+            session_messages = session.messages
+        artifacts = extract_artifacts(text, session_messages)

        viz_payload = current_viz_data.get()
-        if viz_payload and nanobot_service.agent:
-            # Update the last assistant message with viz data
-            session = nanobot_service.agent.sessions.get_or_create(request.session_id)
-            if session.messages and session.messages[-1].get("role") == "assistant":
-                session.messages[-1]["viz"] = viz_payload
-                nanobot_service.agent.sessions.save(session)
+        _persist_assistant_enrichment(
+            session_id=request.session_id,
+            viz_payload=viz_payload if isinstance(viz_payload, dict) else None,
+            artifacts=artifacts,
+        )

-        return {
-            "response": response,
+        payload = {
+            "response": text,
            "viz": viz_payload,
            "routing": {"selected": "agent", "reason": "auto_routed_by_agent"},
        }
+        if artifacts:
+            payload["artifacts"] = artifacts
+        return payload
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@@ -252,6 +393,11 @@ async def nanobot_chat_stream(request: ChatRequest):

            response = await current_task
            text = response or ""
+            session_messages = []
+            if nanobot_service.agent:
+                session = nanobot_service.agent.sessions.get_or_create(request.session_id)
+                session_messages = session.messages
+            artifacts = extract_artifacts(text, session_messages)

            # Check again for viz payload after task completes if not sent yet
            viz_payload = current_viz_data.get()
@@ -268,17 +414,19 @@ async def nanobot_chat_stream(request: ChatRequest):
                except Exception as e:
                    pass

-            # Persist viz payload to session
-            if viz_payload and nanobot_service.agent:
-                session = nanobot_service.agent.sessions.get_or_create(request.session_id)
-                if session.messages and session.messages[-1].get("role") == "assistant":
-                    session.messages[-1]["viz"] = viz_payload
-                    nanobot_service.agent.sessions.save(session)
+            _persist_assistant_enrichment(
+                session_id=request.session_id,
+                viz_payload=viz_payload if isinstance(viz_payload, dict) else None,
+                artifacts=artifacts,
+            )
            
            # Since true streaming is enabled via StreamingLiteLLMProvider, 
            # we no longer need to chunk and yield `text` here.
            # Just yield the final text to signal completion and update final state.
-            yield f"data: {json.dumps({'type': 'final', 'content': text}, ensure_ascii=False)}\n\n"
+            final_payload = {"type": "final", "content": text}
+            if artifacts:
+                final_payload["artifacts"] = artifacts
+            yield f"data: {json.dumps(final_payload, ensure_ascii=False)}\n\n"
            yield f"data: {json.dumps({'type': 'done'}, ensure_ascii=False)}\n\n"
        except asyncio.CancelledError:
            raise
@@ -0,0 +1,131 @@
+from pathlib import Path
+
+import pytest
+from fastapi.testclient import TestClient
+
+from main import app
+
+
+def _backend_data_root() -> Path:
+    return Path(__file__).resolve().parents[1] / "data"
+
+
+def test_download_artifact_within_whitelist() -> None:
+    uploads_dir = _backend_data_root() / "uploads"
+    uploads_dir.mkdir(parents=True, exist_ok=True)
+    sample = uploads_dir / "task2-download.csv"
+    sample.write_text("id,name\n1,a\n", encoding="utf-8")
+
+    client = TestClient(app)
+    response = client.get("/nanobot/artifacts/download", params={"target": "local://task2-download.csv"})
+
+    assert response.status_code == 200
+    assert response.headers["content-type"].startswith("application/octet-stream")
+    assert response.headers["content-disposition"].startswith("attachment;")
+    assert response.content == sample.read_bytes()
+
+
+def test_download_artifact_rejects_outside_paths() -> None:
+    client = TestClient(app)
+    response = client.get("/nanobot/artifacts/download", params={"target": "/etc/hosts"})
+
+    assert response.status_code == 403
+    assert response.json()["detail"] == "非法路径访问"
+
+
+def test_preview_artifact_returns_unsupported_for_binary() -> None:
+    uploads_dir = _backend_data_root() / "uploads"
+    uploads_dir.mkdir(parents=True, exist_ok=True)
+    sample = uploads_dir / "task2-unsupported.bin"
+    sample.write_bytes(b"\x00\x01\x02")
+
+    client = TestClient(app)
+    response = client.get("/nanobot/artifacts/preview", params={"target": f"local://{sample.name}"})
+
+    assert response.status_code == 415
+    assert response.json()["detail"] == "当前文件类型不支持预览，请使用下载"
+    download = client.get("/nanobot/artifacts/download", params={"target": f"local://{sample.name}"})
+    assert download.status_code == 200
+    assert download.content == sample.read_bytes()
+
+
+def test_preview_html_supports_directory_resources() -> None:
+    web_dir = _backend_data_root() / "workspace" / "task2-web"
+    web_dir.mkdir(parents=True, exist_ok=True)
+    html_file = web_dir / "index.html"
+    css_file = web_dir / "styles.css"
+    html_file.write_text("<html><head><link rel='stylesheet' href='styles.css'></head><body>ok</body></html>", encoding="utf-8")
+    css_file.write_text("body{color:#333;}", encoding="utf-8")
+
+    client = TestClient(app)
+    preview = client.get(
+        "/nanobot/artifacts/preview",
+        params={"target": str(html_file)},
+        follow_redirects=False,
+    )
+
+    assert preview.status_code == 307
+    location = preview.headers["location"]
+    assert location.startswith("/nanobot/artifacts/web/")
+
+    html_response = client.get(location)
+    assert html_response.status_code == 200
+    assert "text/html" in html_response.headers["content-type"]
+    assert "styles.css" in html_response.text
+
+    css_response = client.get(location.replace("index.html", "styles.css"))
+    assert css_response.status_code == 200
+    assert "text/css" in css_response.headers["content-type"]
+    assert "color:#333" in css_response.text
+
+
+@pytest.mark.parametrize(
+    ("filename", "payload", "expected_mime"),
+    [
+        ("task4-image.png", b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR", "image/png"),
+        ("task4-preview.pdf", b"%PDF-1.4\n1 0 obj\n<<>>\nendobj\n", "application/pdf"),
+        (
+            "task4-preview.pptx",
+            b"PK\x03\x04\x14\x00\x00\x00\x08\x00\x00\x00!\x00",
+            "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+        ),
+    ],
+)
+def test_preview_and_download_supported_types(filename: str, payload: bytes, expected_mime: str) -> None:
+    uploads_dir = _backend_data_root() / "uploads"
+    uploads_dir.mkdir(parents=True, exist_ok=True)
+    sample = uploads_dir / filename
+    sample.write_bytes(payload)
+
+    client = TestClient(app)
+    preview = client.get("/nanobot/artifacts/preview", params={"target": f"local://{filename}"})
+    assert preview.status_code == 200
+    assert preview.headers["content-type"].startswith(expected_mime)
+
+    download = client.get("/nanobot/artifacts/download", params={"target": f"local://{filename}"})
+    assert download.status_code == 200
+    assert download.content == sample.read_bytes()
+
+
+def test_web_preview_missing_resource_returns_error_and_download_still_works() -> None:
+    web_dir = _backend_data_root() / "workspace" / "task4-web-missing"
+    web_dir.mkdir(parents=True, exist_ok=True)
+    html_file = web_dir / "index.html"
+    html_file.write_text("<html><head><script src='missing.js'></script></head><body>ok</body></html>", encoding="utf-8")
+
+    client = TestClient(app)
+    preview = client.get(
+        "/nanobot/artifacts/preview",
+        params={"target": str(html_file)},
+        follow_redirects=False,
+    )
+    assert preview.status_code == 307
+    location = preview.headers["location"]
+
+    missing = client.get(location.replace("index.html", "missing.js"))
+    assert missing.status_code == 404
+    assert missing.json()["detail"] == "Web 资源不存在"
+
+    download = client.get("/nanobot/artifacts/download", params={"target": str(html_file)})
+    assert download.status_code == 200
+    assert download.content == html_file.read_bytes()
@@ -0,0 +1,55 @@
+from pathlib import Path
+
+from app.core.artifacts import extract_artifacts
+
+
+def _backend_data_root() -> Path:
+    return Path(__file__).resolve().parents[1] / "data"
+
+
+def test_extract_artifacts_from_local_and_tool_paths() -> None:
+    data_root = _backend_data_root()
+    uploads_dir = data_root / "uploads"
+    workspace_dir = data_root / "workspace" / "reports"
+    uploads_dir.mkdir(parents=True, exist_ok=True)
+    workspace_dir.mkdir(parents=True, exist_ok=True)
+
+    upload_file = uploads_dir / "task1-sample.csv"
+    upload_file.write_text("a,b\n1,2\n", encoding="utf-8")
+    report_file = workspace_dir / "task1-report.html"
+    report_file.write_text("<html><body>ok</body></html>", encoding="utf-8")
+
+    content = "请下载 local://task1-sample.csv"
+    session_messages = [
+        {"role": "user", "content": "生成报告"},
+        {"role": "tool", "content": f"输出文件：{report_file}"},
+    ]
+
+    artifacts = extract_artifacts(content, session_messages)
+
+    by_name = {item["name"]: item for item in artifacts}
+    assert "task1-sample.csv" in by_name
+    assert "task1-report.html" in by_name
+    assert by_name["task1-sample.csv"]["download_url"].startswith("/nanobot/artifacts/download?target=")
+    assert by_name["task1-sample.csv"]["previewable"] is True
+    assert by_name["task1-report.html"]["previewable"] is True
+    assert by_name["task1-report.html"]["preview_url"].startswith("/nanobot/artifacts/preview?target=")
+
+
+def test_extract_artifacts_deduplicate_and_skip_missing() -> None:
+    data_root = _backend_data_root()
+    workspace_dir = data_root / "workspace"
+    workspace_dir.mkdir(parents=True, exist_ok=True)
+
+    pdf_file = workspace_dir / "task1-dedup.pdf"
+    pdf_file.write_bytes(b"%PDF-1.4 test")
+    missing_file = workspace_dir / "task1-missing.pdf"
+
+    content = f"{pdf_file} and {pdf_file} and {missing_file}"
+    artifacts = extract_artifacts(content, [])
+
+    assert len(artifacts) == 1
+    item = artifacts[0]
+    assert item["name"] == "task1-dedup.pdf"
+    assert item["mime_type"] == "application/pdf"
+    assert item["previewable"] is True
@@ -1,6 +1,6 @@
 import { useState, useRef, useEffect } from "react";
 import { ScrollArea } from "@/components/ui/scroll-area";
-import { User, Loader2, ArrowUp, ChevronDown, Check, Square, Plus, Database, Wand2, Zap, CheckCircle2, Table, XCircle, Settings, ExternalLink } from "lucide-react";
+import { User, Loader2, ArrowUp, ChevronDown, Check, Square, Plus, Database, Wand2, Zap, CheckCircle2, Table, XCircle, Settings, ExternalLink, FileText, Download, Eye } from "lucide-react";
 import { api } from "@/lib/api";
 import { type ChartSpec } from "@/store/visualizationStore";
 import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
@@ -14,6 +14,7 @@ import { useTranslation } from "react-i18next";
 import { InlineVisualizationCard } from "./InlineVisualizationCard";
 import { useProjectStore } from "@/store/projectStore";
 import { SlashCommandMenu } from "./SlashCommandMenu";
+import { Dialog, DialogContent, DialogHeader, DialogTitle } from "@/components/ui/dialog";

 interface Message {
  id: string;
@@ -24,6 +25,7 @@ interface Message {
  progressLogs?: string[];
  routeInfo?: string;
  reasoningContent?: string;
+  artifacts?: MessageArtifact[];
 }

 interface MessageViz {
@@ -35,6 +37,21 @@ interface MessageViz {
  error?: string | null;
 }

+interface MessageArtifact {
+  name: string;
+  mime_type: string;
+  size: number;
+  download_url: string;
+  previewable: boolean;
+  preview_url?: string;
+}
+
+interface ArtifactPreviewTarget {
+  name: string;
+  mimeType: string;
+  previewUrl: string;
+}
+
 const REPORT_HTML_BLOCK_REGEX = /<!--\s*REPORT_HTML_START\s*-->([\s\S]*?)<!--\s*REPORT_HTML_END\s*-->/i;

 const splitReportHtml = (content: string): { markdown: string; reportHtml: string | null } => {
@@ -97,6 +114,56 @@ interface SessionData {
  }>;
 }

+const formatArtifactSize = (size: number): string => {
+  if (!Number.isFinite(size) || size < 0) return "0 B";
+  const units = ["B", "KB", "MB", "GB", "TB"];
+  let value = size;
+  let unitIndex = 0;
+  while (value >= 1024 && unitIndex < units.length - 1) {
+    value /= 1024;
+    unitIndex += 1;
+  }
+  const fixed = value >= 10 || unitIndex === 0 ? 0 : 1;
+  return `${value.toFixed(fixed)} ${units[unitIndex]}`;
+};
+
+const normalizeArtifacts = (raw: unknown): MessageArtifact[] => {
+  if (!Array.isArray(raw)) return [];
+  return raw.reduce<MessageArtifact[]>((acc, item) => {
+    if (!item || typeof item !== "object") return acc;
+    const source = item as Record<string, unknown>;
+    const name = typeof source.name === "string" ? source.name : "";
+    const mimeType = typeof source.mime_type === "string"
+      ? source.mime_type
+      : typeof source.mimeType === "string"
+        ? source.mimeType
+        : "application/octet-stream";
+    const size = typeof source.size === "number" ? source.size : 0;
+    const downloadUrl = typeof source.download_url === "string"
+      ? source.download_url
+      : typeof source.downloadUrl === "string"
+        ? source.downloadUrl
+        : "";
+    const previewable = Boolean(source.previewable);
+    const previewUrl = typeof source.preview_url === "string"
+      ? source.preview_url
+      : typeof source.previewUrl === "string"
+        ? source.previewUrl
+        : undefined;
+    if (!name || !downloadUrl) return acc;
+    const normalized: MessageArtifact = {
+      name,
+      mime_type: mimeType,
+      size,
+      download_url: downloadUrl,
+      previewable,
+      preview_url: previewUrl,
+    };
+    acc.push(normalized);
+    return acc;
+  }, []);
+};
+
 export function ChatInterface() {
  const { t } = useTranslation();
  const [messagesBySession, setMessagesBySession] = useState<Record<string, Message[]>>({});
@@ -105,6 +172,7 @@ export function ChatInterface() {
  const [availableSkills, setAvailableSkills] = useState<Skill[]>([]);
  const [selectedSkillIds, setSelectedSkillIds] = useState<string[]>([]);
  const [isMenuOpen, setIsMenuOpen] = useState(false);
+  const [artifactPreview, setArtifactPreview] = useState<ArtifactPreviewTarget | null>(null);
  const scrollRef = useRef<HTMLDivElement>(null);
  const location = useLocation();
  const { currentProject } = useProjectStore();
@@ -294,6 +362,7 @@ export function ChatInterface() {
                role: m.role as 'user' | 'assistant',
                content: cleanContent,
                viz: m.viz ? buildMessageViz(m.viz) : undefined,
+                artifacts: normalizeArtifacts(m.artifacts),
              };
            });
          setMessagesForSession(activeSessionKey, formattedMessages);
@@ -643,6 +712,7 @@ export function ChatInterface() {
            selected?: string;
            reason?: string;
            chart?: { chart_spec?: ChartSpec | null; reasoning?: string; can_visualize?: boolean; chart_type?: string } | null;
+            artifacts?: unknown;
          };

           if (payload.type === "delta" && payload.content) {
@@ -667,14 +737,17 @@ export function ChatInterface() {
            pushProgressLog(payload.content, payload.is_reasoning || false);
          }

-           if (payload.type === "final" && payload.content) {
+           if (payload.type === "final") {
            hasFinalPayload = true;
-             streamedText = payload.content;
+            if (typeof payload.content === "string") {
+              streamedText = payload.content;
+            }
            flushAssistant(true);
            pushProgressLog(t('answerGenerationCompleted'));
+            const messageArtifacts = normalizeArtifacts(payload.artifacts);
             setMessagesForSession(targetSessionKey, (prev) =>
               prev.map((msg) =>
-                msg.id === assistantId ? { ...msg, content: payload.content || "", awaitingFirstToken: false, viz: streamedViz ?? msg.viz } : msg
+                msg.id === assistantId ? { ...msg, content: typeof payload.content === "string" ? payload.content : msg.content || "", awaitingFirstToken: false, viz: streamedViz ?? msg.viz, artifacts: messageArtifacts.length > 0 ? messageArtifacts : msg.artifacts } : msg
               )
             );
           }
@@ -1059,6 +1132,43 @@ export function ChatInterface() {
                                </a>
                              </div>
                            ) : null}
+                            {msg.artifacts && msg.artifacts.length > 0 ? (
+                              <div className="mt-4 grid gap-2 sm:grid-cols-2">
+                                {msg.artifacts.map((artifact, artifactIndex) => (
+                                  <div key={`${msg.id}-artifact-${artifactIndex}`} className="rounded-xl border border-zinc-200 bg-zinc-50/60 px-3 py-2.5">
+                                    <div className="flex items-center gap-2.5">
+                                      <div className="h-8 w-8 rounded-lg bg-white border border-zinc-200 flex items-center justify-center text-zinc-500 shrink-0">
+                                        <FileText className="h-4 w-4" />
+                                      </div>
+                                      <div className="min-w-0 flex-1">
+                                        <div className="text-sm font-medium text-zinc-800 truncate">{artifact.name}</div>
+                                        <div className="text-[11px] text-zinc-500">{formatArtifactSize(artifact.size)}</div>
+                                      </div>
+                                    </div>
+                                    <div className="mt-2 flex items-center gap-2">
+                                      {artifact.previewable && artifact.preview_url ? (
+                                        <button
+                                          onClick={() => setArtifactPreview({ name: artifact.name, mimeType: artifact.mime_type, previewUrl: artifact.preview_url || "" })}
+                                          className="inline-flex items-center gap-1.5 text-xs px-2.5 py-1.5 rounded-md border border-zinc-300 text-zinc-700 hover:bg-white transition-colors"
+                                        >
+                                          <Eye className="h-3.5 w-3.5" />
+                                          {t('preview')}
+                                        </button>
+                                      ) : null}
+                                      <a
+                                        href={artifact.download_url}
+                                        target="_blank"
+                                        rel="noopener noreferrer"
+                                        className="inline-flex items-center gap-1.5 text-xs px-2.5 py-1.5 rounded-md border border-zinc-300 text-zinc-700 hover:bg-white transition-colors"
+                                      >
+                                        <Download className="h-3.5 w-3.5" />
+                                        {t('download')}
+                                      </a>
+                                    </div>
+                                  </div>
+                                ))}
+                              </div>
+                            ) : null}
                            {msg.viz ? (
                              <div className="mt-3 pt-3 border-t border-zinc-100">
                                <InlineVisualizationCard viz={msg.viz} />
@@ -1243,6 +1353,30 @@ export function ChatInterface() {
          </div>
        </div>
      )}
+      <Dialog open={Boolean(artifactPreview)} onOpenChange={(open) => {
+        if (!open) setArtifactPreview(null);
+      }}>
+        <DialogContent className="sm:max-w-[min(1100px,95vw)] h-[85vh] flex flex-col">
+          <DialogHeader>
+            <DialogTitle>{artifactPreview?.name || t('artifactPreview')}</DialogTitle>
+          </DialogHeader>
+          <div className="flex-1 min-h-0 rounded-lg border border-zinc-200 bg-white overflow-hidden">
+            {artifactPreview?.mimeType.startsWith("image/") ? (
+              <img
+                src={artifactPreview.previewUrl}
+                alt={artifactPreview.name}
+                className="w-full h-full object-contain bg-zinc-50"
+              />
+            ) : artifactPreview ? (
+              <iframe
+                title={artifactPreview.name}
+                src={artifactPreview.previewUrl}
+                className="w-full h-full"
+              />
+            ) : null}
+          </div>
+        </DialogContent>
+      </Dialog>
    </div>
  );
 }
@@ -38,6 +38,9 @@
  "thinkingProcess": "Thinking Process",
  "modelThinking": "Model is thinking, please wait...",
  "openReportInNewTab": "Open report in new tab",
+  "artifactPreview": "File Preview",
+  "preview": "Preview",
+  "download": "Download",
  "outputInterrupted": "Output interrupted",
  "requestSubmittedRouting": "Request submitted, preparing to route...",
  "routingInfo": "Routing: {{selected}} {{reason}}",
@@ -51,6 +51,9 @@
  "thinkingProcess": "思考过程",
  "modelThinking": "模型思考中，请稍候...",
  "openReportInNewTab": "在新标签页中打开分析报告",
+  "artifactPreview": "文件预览",
+  "preview": "预览",
+  "download": "下载",
  "outputInterrupted": "已中断输出",
  "requestSubmittedRouting": "请求已提交，准备路由...",
  "routingInfo": "路由：{{selected}}{{reason}}",