feat: add artifact

This commit is contained in:
qixinbo
2026-03-27 15:10:33 +08:00
parent ec585ae730
commit 37070d7896
10 changed files with 768 additions and 31 deletions
+3 -2
View File
@@ -7,8 +7,9 @@ from pathlib import Path
# Add project root to sys.path
PROJECT_ROOT = Path(__file__).resolve().parents[3]
if str(PROJECT_ROOT) not in sys.path:
sys.path.append(str(PROJECT_ROOT))
NANOBOT_ROOT = PROJECT_ROOT / "nanobot"
if str(NANOBOT_ROOT) not in sys.path:
sys.path.append(str(NANOBOT_ROOT))
from nanobot.providers.litellm_provider import LiteLLMProvider
from app.schemas.chart import ChartGenerationResponse
+3 -2
View File
@@ -15,8 +15,9 @@ logger = logging.getLogger(__name__)
# Add project root to sys.path to allow importing nanobot
PROJECT_ROOT = Path(__file__).resolve().parents[3]
if str(PROJECT_ROOT) not in sys.path:
sys.path.append(str(PROJECT_ROOT))
NANOBOT_ROOT = PROJECT_ROOT / "nanobot"
if str(NANOBOT_ROOT) not in sys.path:
sys.path.append(str(NANOBOT_ROOT))
from nanobot.providers.litellm_provider import LiteLLMProvider
from app.connectors.postgres import postgres_connector
+202
View File
@@ -0,0 +1,202 @@
import mimetypes
import re
from pathlib import Path
from typing import Any, Iterable
from urllib.parse import quote
from pydantic import BaseModel
LOCAL_URI_PATTERN = re.compile(r"local://[^\s<>'\"\]\)\}]+")
PATH_PATTERN = re.compile(
r"(?:[A-Za-z]:[\\/][^\s<>'\"]+\.[A-Za-z0-9]{1,12}|/[^\s<>'\"]+\.[A-Za-z0-9]{1,12}|(?:\.\./|\.?/)?(?:[\w\-.]+[\\/])+[\w\-.]+\.[A-Za-z0-9]{1,12})"
)
REPORT_PATH_PATTERN = re.compile(r"data[\\/]data[\\/][\w\-.]+\.[A-Za-z0-9]{1,12}", re.IGNORECASE)
PREVIEWABLE_EXTENSIONS = {
".html",
".htm",
".pdf",
".pptx",
".txt",
".md",
".json",
".csv",
".tsv",
".yaml",
".yml",
".xml",
".log",
}
class ArtifactPayload(BaseModel):
name: str
mime_type: str
size: int
download_url: str
previewable: bool
preview_url: str | None = None
def extract_artifacts(content: str, session_messages: list[dict[str, Any]] | None = None) -> list[dict[str, Any]]:
candidates = _collect_candidate_texts(content, session_messages or [])
ordered_locators: list[str] = []
seen_locators: set[str] = set()
for text in candidates:
for locator in _extract_locators(text):
if locator in seen_locators:
continue
seen_locators.add(locator)
ordered_locators.append(locator)
artifacts: list[dict[str, Any]] = []
seen_paths: set[Path] = set()
for locator in ordered_locators:
path = _resolve_locator(locator)
if not path or not path.exists() or not path.is_file():
continue
resolved = path.resolve()
if resolved in seen_paths:
continue
seen_paths.add(resolved)
artifact = _build_artifact_payload(locator, resolved)
artifacts.append(artifact.model_dump(exclude_none=True))
return artifacts
def _build_artifact_payload(locator: str, path: Path) -> ArtifactPayload:
mime_type = _guess_mime_type(path)
previewable = _is_previewable(path, mime_type)
encoded = quote(locator, safe="")
preview_url = f"/nanobot/artifacts/preview?target={encoded}" if previewable else None
return ArtifactPayload(
name=path.name,
mime_type=mime_type,
size=path.stat().st_size,
download_url=f"/nanobot/artifacts/download?target={encoded}",
previewable=previewable,
preview_url=preview_url,
)
def _guess_mime_type(path: Path) -> str:
mime_type, _ = mimetypes.guess_type(path.name)
return mime_type or "application/octet-stream"
def _is_previewable(path: Path, mime_type: str) -> bool:
if mime_type.startswith("image/") or mime_type.startswith("text/"):
return True
extension = path.suffix.lower()
if extension in PREVIEWABLE_EXTENSIONS:
return True
return mime_type in {
"application/pdf",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
}
def _collect_candidate_texts(content: str, session_messages: list[dict[str, Any]]) -> list[str]:
texts = [content or ""]
if not session_messages:
return texts
last_user_idx = -1
for idx, message in enumerate(session_messages):
if message.get("role") == "user":
last_user_idx = idx
if last_user_idx == -1:
segment = session_messages
else:
segment = session_messages[last_user_idx + 1 :]
for message in segment:
raw = message.get("content")
flattened = _flatten_content(raw)
if flattened:
texts.append(flattened)
return texts
def _extract_locators(text: str) -> Iterable[str]:
if not text:
return []
ordered: list[str] = []
seen: set[str] = set()
patterns = (LOCAL_URI_PATTERN, REPORT_PATH_PATTERN, PATH_PATTERN)
for pattern in patterns:
for match in pattern.findall(text):
normalized = _normalize_locator(match)
if not normalized or normalized in seen:
continue
seen.add(normalized)
ordered.append(normalized)
return ordered
def _normalize_locator(raw_locator: str) -> str:
locator = raw_locator.strip().strip("`'\"")
locator = locator.rstrip(".,;:!?)]}")
return locator
def _resolve_locator(locator: str) -> Path | None:
backend_root = Path(__file__).resolve().parents[2]
data_root = backend_root / "data"
workspace_root = data_root / "workspace"
uploads_root = data_root / "uploads"
reports_root = data_root / "data"
if locator.startswith("local://"):
raw_local = locator.replace("local://", "", 1).strip().lstrip("/\\")
if not raw_local:
return None
candidate = Path(raw_local)
if candidate.is_absolute():
return candidate
checks = [workspace_root / candidate, reports_root / candidate, uploads_root / candidate, uploads_root / candidate.name]
for path in checks:
if path.exists():
return path
return uploads_root / candidate.name
normalized = locator.replace("\\", "/")
path = Path(locator)
if path.is_absolute():
return path
if normalized.startswith("data/data/"):
return backend_root / normalized
checks = [
workspace_root / normalized,
data_root / normalized,
backend_root / normalized,
]
for candidate in checks:
if candidate.exists():
return candidate
return None
def _flatten_content(value: Any) -> str:
if value is None:
return ""
if isinstance(value, str):
return value
if isinstance(value, list):
fragments: list[str] = []
for item in value:
flattened = _flatten_content(item)
if flattened:
fragments.append(flattened)
return "\n".join(fragments)
if isinstance(value, dict):
fragments: list[str] = []
text = value.get("text")
if isinstance(text, str):
fragments.append(text)
content = value.get("content")
if content is not None:
nested = _flatten_content(content)
if nested:
fragments.append(nested)
for field in ("path", "file", "file_path", "url"):
data = value.get(field)
if isinstance(data, str):
fragments.append(data)
return "\n".join(fragments)
return str(value)
+63 -4
View File
@@ -2,6 +2,15 @@ import os
from pathlib import Path
from typing import Optional
backend_root = Path(__file__).resolve().parents[2]
data_root = backend_root / "data"
workspace_root = data_root / "workspace"
uploads_root = data_root / "uploads"
reports_root = data_root / "data"
allowed_artifact_roots = (workspace_root, uploads_root, reports_root)
def resolve_upload_file_path(file_url: Optional[str]) -> Path:
if not file_url:
raise ValueError("File URL is empty")
@@ -9,10 +18,60 @@ def resolve_upload_file_path(file_url: Optional[str]) -> Path:
if file_url.startswith("local://"):
raw_name = file_url.replace("local://", "", 1)
safe_name = os.path.basename(raw_name)
# Assuming we are in backend/app/core, go up to backend/data/uploads
upload_dir = Path(__file__).resolve().parents[2] / "data" / "uploads"
file_path = upload_dir / safe_name
file_path = uploads_root / safe_name
return file_path
# If it's already an absolute path (or relative path not starting with local://)
return Path(file_url)
def resolve_artifact_target(target: str) -> Path | None:
locator = (target or "").strip().strip("'\"")
if not locator:
return None
if locator.startswith("local://"):
raw_local = locator.replace("local://", "", 1).strip().lstrip("/\\")
if not raw_local:
return None
candidate = Path(raw_local)
if candidate.is_absolute():
return candidate
checks = (
workspace_root / candidate,
reports_root / candidate,
uploads_root / candidate,
uploads_root / candidate.name,
)
for path in checks:
if path.exists():
return path
return uploads_root / candidate.name
normalized = locator.replace("\\", "/")
path = Path(locator)
if path.is_absolute():
return path
if normalized.startswith("data/data/"):
return backend_root / normalized
checks = (
workspace_root / normalized,
data_root / normalized,
backend_root / normalized,
)
for candidate in checks:
if candidate.exists():
return candidate
return None
def ensure_artifact_access(path: Path, *, require_file: bool = True) -> Path:
try:
resolved = path.resolve(strict=True)
except FileNotFoundError as exc:
raise FileNotFoundError("目标文件不存在") from exc
if require_file and not resolved.is_file():
raise FileNotFoundError("目标文件不存在")
if not require_file and not resolved.is_dir():
raise FileNotFoundError("目标目录不存在")
for root in allowed_artifact_roots:
if resolved.is_relative_to(root.resolve()):
return resolved
raise PermissionError("非法路径访问")
+165 -17
View File
@@ -1,8 +1,13 @@
import asyncio
import base64
import binascii
from typing import Any, Dict, List, Optional, Literal, Tuple
from fastapi import FastAPI, HTTPException
import mimetypes
from pathlib import Path
from fastapi import FastAPI, HTTPException, Query
from fastapi.encoders import jsonable_encoder
from fastapi.responses import StreamingResponse
from fastapi.responses import FileResponse, RedirectResponse, StreamingResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
@@ -14,6 +19,8 @@ from datetime import datetime
from app.api import upload, llm, skills, users, datasources, projects, semantic
from app.connectors.postgres import postgres_connector
from app.connectors.clickhouse import clickhouse_connector
from app.core.artifacts import extract_artifacts
from app.core.files import ensure_artifact_access, resolve_artifact_target
from app.core.nanobot import nanobot_service
from app.core.session_alias_store import session_alias_store
from app.context import current_session_id, current_progress_callback, current_viz_data, current_data_source, current_file_url
@@ -50,6 +57,17 @@ app.include_router(datasources.router, prefix="/api/v1")
app.include_router(semantic.router, prefix="/api/v1")
STREAM_DELTA_CHUNK_SIZE = 48
PREVIEWABLE_TEXT_EXTENSIONS = {
".txt",
".md",
".json",
".csv",
".tsv",
".yaml",
".yml",
".xml",
".log",
}
@app.on_event("startup")
async def startup_event():
@@ -85,6 +103,100 @@ def nanobot_status():
return {"status": "running", "model": nanobot_service.agent.model}
return {"status": "stopped"}
def _guess_mime_type(path: os.PathLike[str] | str) -> str:
mime_type, _ = mimetypes.guess_type(str(path))
return mime_type or "application/octet-stream"
def _resolve_checked_target(target: str) -> os.PathLike[str]:
path = resolve_artifact_target(target)
if path is None:
raise HTTPException(status_code=404, detail="目标文件不存在")
try:
return ensure_artifact_access(path, require_file=True)
except FileNotFoundError:
raise HTTPException(status_code=404, detail="目标文件不存在")
except PermissionError:
raise HTTPException(status_code=403, detail="非法路径访问")
def _is_previewable(path: os.PathLike[str], mime_type: str) -> bool:
suffix = os.path.splitext(str(path))[1].lower()
if suffix in {".html", ".htm", ".pdf", ".pptx"}:
return True
if suffix in PREVIEWABLE_TEXT_EXTENSIONS:
return True
return mime_type.startswith("image/") or mime_type.startswith("text/")
def _encode_web_root(path: Path) -> str:
return base64.urlsafe_b64encode(str(path).encode("utf-8")).decode("utf-8").rstrip("=")
def _decode_web_root(token: str) -> Path:
padding = "=" * (-len(token) % 4)
try:
decoded = base64.urlsafe_b64decode((token + padding).encode("utf-8")).decode("utf-8")
except (binascii.Error, UnicodeDecodeError):
raise HTTPException(status_code=400, detail="非法预览目录标识")
return Path(decoded)
@app.get("/nanobot/artifacts/download")
def download_artifact(target: str = Query(...)):
resolved = _resolve_checked_target(target)
return FileResponse(
path=str(resolved),
media_type="application/octet-stream",
filename=os.path.basename(str(resolved)),
)
@app.get("/nanobot/artifacts/preview")
def preview_artifact(target: str = Query(...)):
resolved = _resolve_checked_target(target)
mime_type = _guess_mime_type(resolved)
if not _is_previewable(resolved, mime_type):
raise HTTPException(status_code=415, detail="当前文件类型不支持预览,请使用下载")
suffix = os.path.splitext(str(resolved))[1].lower()
if suffix in {".html", ".htm"}:
root_token = _encode_web_root(Path(resolved).parent)
entry = Path(resolved).name
return RedirectResponse(url=f"/nanobot/artifacts/web/{root_token}/{entry}", status_code=307)
return FileResponse(
path=str(resolved),
media_type=mime_type,
filename=os.path.basename(str(resolved)),
content_disposition_type="inline",
)
@app.get("/nanobot/artifacts/web/{root_token}/{resource_path:path}")
def preview_web_artifact_resource(root_token: str, resource_path: str):
root_dir = _decode_web_root(root_token)
try:
safe_root = ensure_artifact_access(root_dir, require_file=False)
except FileNotFoundError:
raise HTTPException(status_code=404, detail="Web 预览目录不存在")
except PermissionError:
raise HTTPException(status_code=403, detail="非法路径访问")
candidate = os.path.join(str(safe_root), resource_path)
try:
resolved = ensure_artifact_access(Path(candidate), require_file=True)
except FileNotFoundError:
raise HTTPException(status_code=404, detail="Web 资源不存在")
except PermissionError:
raise HTTPException(status_code=403, detail="非法路径访问")
if not Path(resolved).is_relative_to(Path(safe_root)):
raise HTTPException(status_code=403, detail="非法路径访问")
return FileResponse(
path=str(resolved),
media_type=_guess_mime_type(resolved),
filename=os.path.basename(str(resolved)),
content_disposition_type="inline",
)
class ChatRequest(BaseModel):
message: str
session_id: str = "api:default"
@@ -127,6 +239,27 @@ class SessionFileContextUpdateRequest(BaseModel):
active_data_file: Optional[Dict[str, Any]] = None
selected_data_source: Optional[str] = None
def _persist_assistant_enrichment(
session_id: str,
viz_payload: Optional[Dict[str, Any]] = None,
artifacts: Optional[List[Dict[str, Any]]] = None,
) -> None:
if not nanobot_service.agent:
return
session = nanobot_service.agent.sessions.get_or_create(session_id)
if not session.messages or session.messages[-1].get("role") != "assistant":
return
changed = False
if viz_payload:
session.messages[-1]["viz"] = viz_payload
changed = True
if artifacts:
session.messages[-1]["artifacts"] = artifacts
changed = True
if changed:
nanobot_service.agent.sessions.save(session)
@app.post("/nanobot/chat")
async def nanobot_chat(request: ChatRequest):
try:
@@ -154,20 +287,28 @@ async def nanobot_chat(request: ChatRequest):
skill_ids=request.skill_ids,
model_id=request.model_id,
)
text = response or ""
session_messages = []
if nanobot_service.agent:
session = nanobot_service.agent.sessions.get_or_create(request.session_id)
session_messages = session.messages
artifacts = extract_artifacts(text, session_messages)
viz_payload = current_viz_data.get()
if viz_payload and nanobot_service.agent:
# Update the last assistant message with viz data
session = nanobot_service.agent.sessions.get_or_create(request.session_id)
if session.messages and session.messages[-1].get("role") == "assistant":
session.messages[-1]["viz"] = viz_payload
nanobot_service.agent.sessions.save(session)
_persist_assistant_enrichment(
session_id=request.session_id,
viz_payload=viz_payload if isinstance(viz_payload, dict) else None,
artifacts=artifacts,
)
return {
"response": response,
payload = {
"response": text,
"viz": viz_payload,
"routing": {"selected": "agent", "reason": "auto_routed_by_agent"},
}
if artifacts:
payload["artifacts"] = artifacts
return payload
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@@ -252,6 +393,11 @@ async def nanobot_chat_stream(request: ChatRequest):
response = await current_task
text = response or ""
session_messages = []
if nanobot_service.agent:
session = nanobot_service.agent.sessions.get_or_create(request.session_id)
session_messages = session.messages
artifacts = extract_artifacts(text, session_messages)
# Check again for viz payload after task completes if not sent yet
viz_payload = current_viz_data.get()
@@ -268,17 +414,19 @@ async def nanobot_chat_stream(request: ChatRequest):
except Exception as e:
pass
# Persist viz payload to session
if viz_payload and nanobot_service.agent:
session = nanobot_service.agent.sessions.get_or_create(request.session_id)
if session.messages and session.messages[-1].get("role") == "assistant":
session.messages[-1]["viz"] = viz_payload
nanobot_service.agent.sessions.save(session)
_persist_assistant_enrichment(
session_id=request.session_id,
viz_payload=viz_payload if isinstance(viz_payload, dict) else None,
artifacts=artifacts,
)
# Since true streaming is enabled via StreamingLiteLLMProvider,
# we no longer need to chunk and yield `text` here.
# Just yield the final text to signal completion and update final state.
yield f"data: {json.dumps({'type': 'final', 'content': text}, ensure_ascii=False)}\n\n"
final_payload = {"type": "final", "content": text}
if artifacts:
final_payload["artifacts"] = artifacts
yield f"data: {json.dumps(final_payload, ensure_ascii=False)}\n\n"
yield f"data: {json.dumps({'type': 'done'}, ensure_ascii=False)}\n\n"
except asyncio.CancelledError:
raise
+131
View File
@@ -0,0 +1,131 @@
from pathlib import Path
import pytest
from fastapi.testclient import TestClient
from main import app
def _backend_data_root() -> Path:
return Path(__file__).resolve().parents[1] / "data"
def test_download_artifact_within_whitelist() -> None:
uploads_dir = _backend_data_root() / "uploads"
uploads_dir.mkdir(parents=True, exist_ok=True)
sample = uploads_dir / "task2-download.csv"
sample.write_text("id,name\n1,a\n", encoding="utf-8")
client = TestClient(app)
response = client.get("/nanobot/artifacts/download", params={"target": "local://task2-download.csv"})
assert response.status_code == 200
assert response.headers["content-type"].startswith("application/octet-stream")
assert response.headers["content-disposition"].startswith("attachment;")
assert response.content == sample.read_bytes()
def test_download_artifact_rejects_outside_paths() -> None:
client = TestClient(app)
response = client.get("/nanobot/artifacts/download", params={"target": "/etc/hosts"})
assert response.status_code == 403
assert response.json()["detail"] == "非法路径访问"
def test_preview_artifact_returns_unsupported_for_binary() -> None:
uploads_dir = _backend_data_root() / "uploads"
uploads_dir.mkdir(parents=True, exist_ok=True)
sample = uploads_dir / "task2-unsupported.bin"
sample.write_bytes(b"\x00\x01\x02")
client = TestClient(app)
response = client.get("/nanobot/artifacts/preview", params={"target": f"local://{sample.name}"})
assert response.status_code == 415
assert response.json()["detail"] == "当前文件类型不支持预览,请使用下载"
download = client.get("/nanobot/artifacts/download", params={"target": f"local://{sample.name}"})
assert download.status_code == 200
assert download.content == sample.read_bytes()
def test_preview_html_supports_directory_resources() -> None:
web_dir = _backend_data_root() / "workspace" / "task2-web"
web_dir.mkdir(parents=True, exist_ok=True)
html_file = web_dir / "index.html"
css_file = web_dir / "styles.css"
html_file.write_text("<html><head><link rel='stylesheet' href='styles.css'></head><body>ok</body></html>", encoding="utf-8")
css_file.write_text("body{color:#333;}", encoding="utf-8")
client = TestClient(app)
preview = client.get(
"/nanobot/artifacts/preview",
params={"target": str(html_file)},
follow_redirects=False,
)
assert preview.status_code == 307
location = preview.headers["location"]
assert location.startswith("/nanobot/artifacts/web/")
html_response = client.get(location)
assert html_response.status_code == 200
assert "text/html" in html_response.headers["content-type"]
assert "styles.css" in html_response.text
css_response = client.get(location.replace("index.html", "styles.css"))
assert css_response.status_code == 200
assert "text/css" in css_response.headers["content-type"]
assert "color:#333" in css_response.text
@pytest.mark.parametrize(
("filename", "payload", "expected_mime"),
[
("task4-image.png", b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR", "image/png"),
("task4-preview.pdf", b"%PDF-1.4\n1 0 obj\n<<>>\nendobj\n", "application/pdf"),
(
"task4-preview.pptx",
b"PK\x03\x04\x14\x00\x00\x00\x08\x00\x00\x00!\x00",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
),
],
)
def test_preview_and_download_supported_types(filename: str, payload: bytes, expected_mime: str) -> None:
uploads_dir = _backend_data_root() / "uploads"
uploads_dir.mkdir(parents=True, exist_ok=True)
sample = uploads_dir / filename
sample.write_bytes(payload)
client = TestClient(app)
preview = client.get("/nanobot/artifacts/preview", params={"target": f"local://{filename}"})
assert preview.status_code == 200
assert preview.headers["content-type"].startswith(expected_mime)
download = client.get("/nanobot/artifacts/download", params={"target": f"local://{filename}"})
assert download.status_code == 200
assert download.content == sample.read_bytes()
def test_web_preview_missing_resource_returns_error_and_download_still_works() -> None:
web_dir = _backend_data_root() / "workspace" / "task4-web-missing"
web_dir.mkdir(parents=True, exist_ok=True)
html_file = web_dir / "index.html"
html_file.write_text("<html><head><script src='missing.js'></script></head><body>ok</body></html>", encoding="utf-8")
client = TestClient(app)
preview = client.get(
"/nanobot/artifacts/preview",
params={"target": str(html_file)},
follow_redirects=False,
)
assert preview.status_code == 307
location = preview.headers["location"]
missing = client.get(location.replace("index.html", "missing.js"))
assert missing.status_code == 404
assert missing.json()["detail"] == "Web 资源不存在"
download = client.get("/nanobot/artifacts/download", params={"target": str(html_file)})
assert download.status_code == 200
assert download.content == html_file.read_bytes()
+55
View File
@@ -0,0 +1,55 @@
from pathlib import Path
from app.core.artifacts import extract_artifacts
def _backend_data_root() -> Path:
return Path(__file__).resolve().parents[1] / "data"
def test_extract_artifacts_from_local_and_tool_paths() -> None:
data_root = _backend_data_root()
uploads_dir = data_root / "uploads"
workspace_dir = data_root / "workspace" / "reports"
uploads_dir.mkdir(parents=True, exist_ok=True)
workspace_dir.mkdir(parents=True, exist_ok=True)
upload_file = uploads_dir / "task1-sample.csv"
upload_file.write_text("a,b\n1,2\n", encoding="utf-8")
report_file = workspace_dir / "task1-report.html"
report_file.write_text("<html><body>ok</body></html>", encoding="utf-8")
content = "请下载 local://task1-sample.csv"
session_messages = [
{"role": "user", "content": "生成报告"},
{"role": "tool", "content": f"输出文件:{report_file}"},
]
artifacts = extract_artifacts(content, session_messages)
by_name = {item["name"]: item for item in artifacts}
assert "task1-sample.csv" in by_name
assert "task1-report.html" in by_name
assert by_name["task1-sample.csv"]["download_url"].startswith("/nanobot/artifacts/download?target=")
assert by_name["task1-sample.csv"]["previewable"] is True
assert by_name["task1-report.html"]["previewable"] is True
assert by_name["task1-report.html"]["preview_url"].startswith("/nanobot/artifacts/preview?target=")
def test_extract_artifacts_deduplicate_and_skip_missing() -> None:
data_root = _backend_data_root()
workspace_dir = data_root / "workspace"
workspace_dir.mkdir(parents=True, exist_ok=True)
pdf_file = workspace_dir / "task1-dedup.pdf"
pdf_file.write_bytes(b"%PDF-1.4 test")
missing_file = workspace_dir / "task1-missing.pdf"
content = f"{pdf_file} and {pdf_file} and {missing_file}"
artifacts = extract_artifacts(content, [])
assert len(artifacts) == 1
item = artifacts[0]
assert item["name"] == "task1-dedup.pdf"
assert item["mime_type"] == "application/pdf"
assert item["previewable"] is True
+138 -4
View File
@@ -1,6 +1,6 @@
import { useState, useRef, useEffect } from "react";
import { ScrollArea } from "@/components/ui/scroll-area";
import { User, Loader2, ArrowUp, ChevronDown, Check, Square, Plus, Database, Wand2, Zap, CheckCircle2, Table, XCircle, Settings, ExternalLink } from "lucide-react";
import { User, Loader2, ArrowUp, ChevronDown, Check, Square, Plus, Database, Wand2, Zap, CheckCircle2, Table, XCircle, Settings, ExternalLink, FileText, Download, Eye } from "lucide-react";
import { api } from "@/lib/api";
import { type ChartSpec } from "@/store/visualizationStore";
import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
@@ -14,6 +14,7 @@ import { useTranslation } from "react-i18next";
import { InlineVisualizationCard } from "./InlineVisualizationCard";
import { useProjectStore } from "@/store/projectStore";
import { SlashCommandMenu } from "./SlashCommandMenu";
import { Dialog, DialogContent, DialogHeader, DialogTitle } from "@/components/ui/dialog";
interface Message {
id: string;
@@ -24,6 +25,7 @@ interface Message {
progressLogs?: string[];
routeInfo?: string;
reasoningContent?: string;
artifacts?: MessageArtifact[];
}
interface MessageViz {
@@ -35,6 +37,21 @@ interface MessageViz {
error?: string | null;
}
interface MessageArtifact {
name: string;
mime_type: string;
size: number;
download_url: string;
previewable: boolean;
preview_url?: string;
}
interface ArtifactPreviewTarget {
name: string;
mimeType: string;
previewUrl: string;
}
const REPORT_HTML_BLOCK_REGEX = /<!--\s*REPORT_HTML_START\s*-->([\s\S]*?)<!--\s*REPORT_HTML_END\s*-->/i;
const splitReportHtml = (content: string): { markdown: string; reportHtml: string | null } => {
@@ -97,6 +114,56 @@ interface SessionData {
}>;
}
const formatArtifactSize = (size: number): string => {
if (!Number.isFinite(size) || size < 0) return "0 B";
const units = ["B", "KB", "MB", "GB", "TB"];
let value = size;
let unitIndex = 0;
while (value >= 1024 && unitIndex < units.length - 1) {
value /= 1024;
unitIndex += 1;
}
const fixed = value >= 10 || unitIndex === 0 ? 0 : 1;
return `${value.toFixed(fixed)} ${units[unitIndex]}`;
};
const normalizeArtifacts = (raw: unknown): MessageArtifact[] => {
if (!Array.isArray(raw)) return [];
return raw.reduce<MessageArtifact[]>((acc, item) => {
if (!item || typeof item !== "object") return acc;
const source = item as Record<string, unknown>;
const name = typeof source.name === "string" ? source.name : "";
const mimeType = typeof source.mime_type === "string"
? source.mime_type
: typeof source.mimeType === "string"
? source.mimeType
: "application/octet-stream";
const size = typeof source.size === "number" ? source.size : 0;
const downloadUrl = typeof source.download_url === "string"
? source.download_url
: typeof source.downloadUrl === "string"
? source.downloadUrl
: "";
const previewable = Boolean(source.previewable);
const previewUrl = typeof source.preview_url === "string"
? source.preview_url
: typeof source.previewUrl === "string"
? source.previewUrl
: undefined;
if (!name || !downloadUrl) return acc;
const normalized: MessageArtifact = {
name,
mime_type: mimeType,
size,
download_url: downloadUrl,
previewable,
preview_url: previewUrl,
};
acc.push(normalized);
return acc;
}, []);
};
export function ChatInterface() {
const { t } = useTranslation();
const [messagesBySession, setMessagesBySession] = useState<Record<string, Message[]>>({});
@@ -105,6 +172,7 @@ export function ChatInterface() {
const [availableSkills, setAvailableSkills] = useState<Skill[]>([]);
const [selectedSkillIds, setSelectedSkillIds] = useState<string[]>([]);
const [isMenuOpen, setIsMenuOpen] = useState(false);
const [artifactPreview, setArtifactPreview] = useState<ArtifactPreviewTarget | null>(null);
const scrollRef = useRef<HTMLDivElement>(null);
const location = useLocation();
const { currentProject } = useProjectStore();
@@ -294,6 +362,7 @@ export function ChatInterface() {
role: m.role as 'user' | 'assistant',
content: cleanContent,
viz: m.viz ? buildMessageViz(m.viz) : undefined,
artifacts: normalizeArtifacts(m.artifacts),
};
});
setMessagesForSession(activeSessionKey, formattedMessages);
@@ -643,6 +712,7 @@ export function ChatInterface() {
selected?: string;
reason?: string;
chart?: { chart_spec?: ChartSpec | null; reasoning?: string; can_visualize?: boolean; chart_type?: string } | null;
artifacts?: unknown;
};
if (payload.type === "delta" && payload.content) {
@@ -667,14 +737,17 @@ export function ChatInterface() {
pushProgressLog(payload.content, payload.is_reasoning || false);
}
if (payload.type === "final" && payload.content) {
if (payload.type === "final") {
hasFinalPayload = true;
streamedText = payload.content;
if (typeof payload.content === "string") {
streamedText = payload.content;
}
flushAssistant(true);
pushProgressLog(t('answerGenerationCompleted'));
const messageArtifacts = normalizeArtifacts(payload.artifacts);
setMessagesForSession(targetSessionKey, (prev) =>
prev.map((msg) =>
msg.id === assistantId ? { ...msg, content: payload.content || "", awaitingFirstToken: false, viz: streamedViz ?? msg.viz } : msg
msg.id === assistantId ? { ...msg, content: typeof payload.content === "string" ? payload.content : msg.content || "", awaitingFirstToken: false, viz: streamedViz ?? msg.viz, artifacts: messageArtifacts.length > 0 ? messageArtifacts : msg.artifacts } : msg
)
);
}
@@ -1059,6 +1132,43 @@ export function ChatInterface() {
</a>
</div>
) : null}
{msg.artifacts && msg.artifacts.length > 0 ? (
<div className="mt-4 grid gap-2 sm:grid-cols-2">
{msg.artifacts.map((artifact, artifactIndex) => (
<div key={`${msg.id}-artifact-${artifactIndex}`} className="rounded-xl border border-zinc-200 bg-zinc-50/60 px-3 py-2.5">
<div className="flex items-center gap-2.5">
<div className="h-8 w-8 rounded-lg bg-white border border-zinc-200 flex items-center justify-center text-zinc-500 shrink-0">
<FileText className="h-4 w-4" />
</div>
<div className="min-w-0 flex-1">
<div className="text-sm font-medium text-zinc-800 truncate">{artifact.name}</div>
<div className="text-[11px] text-zinc-500">{formatArtifactSize(artifact.size)}</div>
</div>
</div>
<div className="mt-2 flex items-center gap-2">
{artifact.previewable && artifact.preview_url ? (
<button
onClick={() => setArtifactPreview({ name: artifact.name, mimeType: artifact.mime_type, previewUrl: artifact.preview_url || "" })}
className="inline-flex items-center gap-1.5 text-xs px-2.5 py-1.5 rounded-md border border-zinc-300 text-zinc-700 hover:bg-white transition-colors"
>
<Eye className="h-3.5 w-3.5" />
{t('preview')}
</button>
) : null}
<a
href={artifact.download_url}
target="_blank"
rel="noopener noreferrer"
className="inline-flex items-center gap-1.5 text-xs px-2.5 py-1.5 rounded-md border border-zinc-300 text-zinc-700 hover:bg-white transition-colors"
>
<Download className="h-3.5 w-3.5" />
{t('download')}
</a>
</div>
</div>
))}
</div>
) : null}
{msg.viz ? (
<div className="mt-3 pt-3 border-t border-zinc-100">
<InlineVisualizationCard viz={msg.viz} />
@@ -1243,6 +1353,30 @@ export function ChatInterface() {
</div>
</div>
)}
<Dialog open={Boolean(artifactPreview)} onOpenChange={(open) => {
if (!open) setArtifactPreview(null);
}}>
<DialogContent className="sm:max-w-[min(1100px,95vw)] h-[85vh] flex flex-col">
<DialogHeader>
<DialogTitle>{artifactPreview?.name || t('artifactPreview')}</DialogTitle>
</DialogHeader>
<div className="flex-1 min-h-0 rounded-lg border border-zinc-200 bg-white overflow-hidden">
{artifactPreview?.mimeType.startsWith("image/") ? (
<img
src={artifactPreview.previewUrl}
alt={artifactPreview.name}
className="w-full h-full object-contain bg-zinc-50"
/>
) : artifactPreview ? (
<iframe
title={artifactPreview.name}
src={artifactPreview.previewUrl}
className="w-full h-full"
/>
) : null}
</div>
</DialogContent>
</Dialog>
</div>
);
}
+3
View File
@@ -38,6 +38,9 @@
"thinkingProcess": "Thinking Process",
"modelThinking": "Model is thinking, please wait...",
"openReportInNewTab": "Open report in new tab",
"artifactPreview": "File Preview",
"preview": "Preview",
"download": "Download",
"outputInterrupted": "Output interrupted",
"requestSubmittedRouting": "Request submitted, preparing to route...",
"routingInfo": "Routing: {{selected}} {{reason}}",
+3
View File
@@ -51,6 +51,9 @@
"thinkingProcess": "思考过程",
"modelThinking": "模型思考中,请稍候...",
"openReportInNewTab": "在新标签页中打开分析报告",
"artifactPreview": "文件预览",
"preview": "预览",
"download": "下载",
"outputInterrupted": "已中断输出",
"requestSubmittedRouting": "请求已提交,准备路由...",
"routingInfo": "路由:{{selected}}{{reason}}",