Files
boocode/apps/server/src/services/artifacts.ts
indifferentketchup ad45b28250 v1.13.19-html-artifact-panes: pane-based artifact viewer with on-request HTML
Every assistant message gets an "Open in pane" affordance that opens the
message in the workspace splitter — Markdown pane (Copy + Download .md) by
default; HTML pane (Download .html only) when the model emits a self-contained
<!DOCTYPE html> or fenced ```html artifact. BOOCHAT.md rule keeps Markdown
default at every length; HTML opt-in on explicit user request.

Backend: services/artifacts.ts (slug derivation + write helpers with
symlink-escape guard via realpath-after-mkdir), routes/artifacts.ts (POST
download + GET stream with nosniff + CSP sandbox defense-in-depth), HTML
detection in finalizeCompletion writing a new message_parts.kind='html_artifact'
row (schema CHECK extended via v1.13.13 pattern), graceful 1MB cap via the
pure decideHtmlArtifactWrite helper. PartKind union extended.

Frontend: MarkdownRenderer.tsx extracted from MessageBubble's inline
MarkdownBody for reuse; MarkdownArtifactPane.tsx + HtmlArtifactPane.tsx with
loading/error states; pane state is reference-only ({chat_id, message_id,
title}) — content fetched on mount to keep workspace_panes jsonb small and
avoid 1MB blobs riding session_workspace_updated frames. iframe sandbox
locked to allow-scripts allow-clipboard-write allow-downloads with no
allow-same-origin, srcDoc not src. openInPane discriminates 404 (expected
fallback) from real errors (toast + bail). PanelRightOpen icon button with
mobile 44px tap-target.

31 new server unit tests including a real-symlink filesystem case; 332/332
server tests passing, tsc clean both sides, pnpm -C apps/web build green.
Smoke deferred to first deploy.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 12:43:13 +00:00

256 lines
9.0 KiB
TypeScript

// v1.14.x-html-artifact-panes: artifact writer + slug derivation.
//
// Writes Markdown and HTML artifacts to `<projectRoot>/.boocode/artifacts/`
// as plain files. Returns `{path, url}` where:
// - path is the absolute on-disk path
// - url is a project-scoped REST URL pointing at the GET download route
// registered in routes/artifacts.ts. The route streams the file with
// Content-Disposition: attachment.
//
// Path safety: we do NOT use path_guard.ts (it realpaths and throws ENOENT
// for files that don't exist yet, which artifact creation requires).
// Instead we mirror the v1.13.18 codecontext_client.ts pattern: resolve
// the candidate path against the realpath'd projectRoot, then verify the
// result starts with projectRoot + sep (or equals projectRoot).
import { mkdir, realpath, writeFile } from 'node:fs/promises';
import { resolve, sep } from 'node:path';
import { PathScopeError } from './path_guard.js';
import type { Message } from '../types/api.js';
export interface HtmlArtifactPayload {
html_content: string;
char_count: number;
title: string | null;
}
export interface ArtifactWriteResult {
path: string;
url: string;
}
const ARTIFACT_SUBDIR = '.boocode/artifacts';
// ---- slug helpers ----
// Lowercase, replace non-alnum runs with '-', trim leading/trailing '-',
// collapse repeated '-', cap at 60 chars. Empty → 'artifact'.
function slugify(input: string): string {
const cleaned = input
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '')
.replace(/-{2,}/g, '-')
.slice(0, 60)
.replace(/^-+|-+$/g, '');
return cleaned || 'artifact';
}
function firstHeading(md: string): string | null {
// Match the first `# ` ATX heading at the start of a line.
const m = md.match(/^[ \t]*#[ \t]+(.+?)\s*$/m);
if (!m) return null;
const text = m[1]?.trim() ?? '';
return text.length > 0 ? text : null;
}
function firstNWords(s: string, n: number): string {
const words = s.trim().split(/\s+/).filter(Boolean).slice(0, n);
return words.join(' ');
}
export function deriveMarkdownSlug(messageContent: string): string {
const heading = firstHeading(messageContent);
if (heading) return slugify(heading);
const sixWords = firstNWords(messageContent, 6);
return slugify(sixWords);
}
// Strip HTML tags for inner-text extraction. Crude but sufficient for slug
// derivation — we're not rendering, just finding readable words.
function stripTags(html: string): string {
return html
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, ' ')
.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, ' ')
.replace(/<[^>]+>/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}
function extractTitleTag(html: string): string | null {
const m = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
if (!m) return null;
const text = stripTags(m[1] ?? '').trim();
return text.length > 0 ? text : null;
}
function extractH1(html: string): string | null {
const m = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
if (!m) return null;
const text = stripTags(m[1] ?? '').trim();
return text.length > 0 ? text : null;
}
export function deriveHtmlSlug(payload: {
html_content: string;
title: string | null;
}): string {
if (payload.title && payload.title.trim().length > 0) {
return slugify(payload.title);
}
const title = extractTitleTag(payload.html_content);
if (title) return slugify(title);
const h1 = extractH1(payload.html_content);
if (h1) return slugify(h1);
const inner = stripTags(payload.html_content);
return slugify(firstNWords(inner, 6));
}
// Derive title for the html_artifact part payload: <title> → first <h1> →
// first 80 chars of inner text. Returns null if nothing useful is found.
export function deriveHtmlTitle(html: string): string | null {
const t = extractTitleTag(html);
if (t) return t;
const h1 = extractH1(html);
if (h1) return h1;
const inner = stripTags(html);
if (inner.length === 0) return null;
return inner.slice(0, 80);
}
// ---- HTML detection (B4) ----
// Returns the inner HTML content if `text` is a recognised HTML artifact:
// - starts with <!DOCTYPE html> (case-insensitive, whitespace-trimmed), OR
// - wrapped entirely in a fenced ```html ... ``` block.
// Returns null if neither matches.
export function detectHtmlArtifact(text: string): string | null {
const trimmed = text.trim();
if (trimmed.length === 0) return null;
if (/^<!doctype\s+html/i.test(trimmed)) {
return trimmed;
}
// Fenced ```html block consuming the entire (trimmed) message. Allow an
// optional trailing newline before the closing fence.
const fence = trimmed.match(/^```html\s*\n([\s\S]*?)\n?```\s*$/i);
if (fence) {
const inner = fence[1] ?? '';
if (/^\s*<!doctype\s+html/i.test(inner) || /<html[\s>]/i.test(inner)) {
return inner.trim();
}
}
return null;
}
// ---- path resolution ----
// Resolve `<projectRoot>/.boocode/artifacts/<filename>` and verify the
// result stays under projectRoot. Mirrors the v1.13.18 codecontext_client.ts
// approach: realpath projectRoot first, then prefix-check the candidate.
// Throws on escape.
async function resolveArtifactPath(
projectRoot: string,
filename: string,
): Promise<{ resolvedRoot: string; artifactsDir: string; absPath: string }> {
const resolvedRoot = await realpath(projectRoot);
const artifactsDir = resolve(resolvedRoot, ARTIFACT_SUBDIR);
const absPath = resolve(artifactsDir, filename);
// Lexical prefix check on the resolved candidates. (The `!== resolvedRoot`
// branch was dead — ARTIFACT_SUBDIR is non-empty so artifactsDir always
// differs from resolvedRoot.)
if (!artifactsDir.startsWith(resolvedRoot + sep)) {
throw new PathScopeError(
`artifacts dir escapes project root: ${artifactsDir}`,
);
}
if (!absPath.startsWith(artifactsDir + sep)) {
throw new PathScopeError(
`artifact filename escapes artifacts dir: ${filename}`,
);
}
return { resolvedRoot, artifactsDir, absPath };
}
// After mkdir, realpath the artifacts dir and re-verify it stays under
// resolvedRoot. Closes the symlink-escape gap: if `.boocode/artifacts` (or
// any ancestor below resolvedRoot) is a symlink pointing outside the
// project, the lexical check in resolveArtifactPath passes but the actual
// write lands outside the sandbox. Throws PathScopeError on escape.
async function assertArtifactsDirSafe(
artifactsDir: string,
resolvedRoot: string,
): Promise<void> {
const realDir = await realpath(artifactsDir);
if (realDir !== resolvedRoot && !realDir.startsWith(resolvedRoot + sep)) {
throw new PathScopeError(
`artifacts dir resolves outside project root: ${realDir}`,
);
}
}
// Pure decision helper for whether finalizeCompletion should write the
// `html_artifact` part. Exported for unit testing the cap-skip branch.
// Returns `{write: true, byteLen}` when the payload is under the cap, or
// `{write: false, byteLen, reason: 'cap_exceeded'}` when oversize.
export type HtmlArtifactDecision =
| { write: true; byteLen: number }
| { write: false; byteLen: number; reason: 'cap_exceeded' };
export function decideHtmlArtifactWrite(
htmlContent: string,
): HtmlArtifactDecision {
const byteLen = Buffer.byteLength(htmlContent, 'utf8');
if (byteLen > HTML_ARTIFACT_MAX_BYTES) {
return { write: false, byteLen, reason: 'cap_exceeded' };
}
return { write: true, byteLen };
}
function buildUrl(projectId: string, filename: string): string {
return `/api/projects/${projectId}/artifacts/${encodeURIComponent(filename)}`;
}
export interface WriteContext {
projectId: string;
projectRoot: string;
}
export async function writeMarkdownArtifact(
message: Pick<Message, 'content'>,
ctx: WriteContext,
): Promise<ArtifactWriteResult> {
const slug = deriveMarkdownSlug(message.content);
const filename = `${slug}-${Date.now()}.md`;
const { resolvedRoot, artifactsDir, absPath } = await resolveArtifactPath(
ctx.projectRoot,
filename,
);
await mkdir(artifactsDir, { recursive: true });
await assertArtifactsDirSafe(artifactsDir, resolvedRoot);
await writeFile(absPath, message.content, 'utf8');
return { path: absPath, url: buildUrl(ctx.projectId, filename) };
}
export async function writeHtmlArtifact(
payload: HtmlArtifactPayload,
ctx: WriteContext,
): Promise<ArtifactWriteResult> {
const slug = deriveHtmlSlug(payload);
const filename = `${slug}-${Date.now()}.html`;
const { resolvedRoot, artifactsDir, absPath } = await resolveArtifactPath(
ctx.projectRoot,
filename,
);
await mkdir(artifactsDir, { recursive: true });
await assertArtifactsDirSafe(artifactsDir, resolvedRoot);
await writeFile(absPath, payload.html_content, 'utf8');
return { path: absPath, url: buildUrl(ctx.projectId, filename) };
}
// 1MB cap on HTML artifacts (proposal S6). Larger payloads are not written
// to the `html_artifact` part — the assistant text lands as plain content
// and a warning is logged. Streaming abort was considered but the graceful
// "no artifact, plain text falls back" path is simpler and lossless from
// the user's perspective.
export const HTML_ARTIFACT_MAX_BYTES = 1_048_576;