Every assistant message gets an "Open in pane" affordance that opens the
message in the workspace splitter — Markdown pane (Copy + Download .md) by
default; HTML pane (Download .html only) when the model emits a self-contained
<!DOCTYPE html> or fenced ```html artifact. BOOCHAT.md rule keeps Markdown
default at every length; HTML opt-in on explicit user request.
Backend: services/artifacts.ts (slug derivation + write helpers with
symlink-escape guard via realpath-after-mkdir), routes/artifacts.ts (POST
download + GET stream with nosniff + CSP sandbox defense-in-depth), HTML
detection in finalizeCompletion writing a new message_parts.kind='html_artifact'
row (schema CHECK extended via v1.13.13 pattern), graceful 1MB cap via the
pure decideHtmlArtifactWrite helper. PartKind union extended.
Frontend: MarkdownRenderer.tsx extracted from MessageBubble's inline
MarkdownBody for reuse; MarkdownArtifactPane.tsx + HtmlArtifactPane.tsx with
loading/error states; pane state is reference-only ({chat_id, message_id,
title}) — content fetched on mount to keep workspace_panes jsonb small and
avoid 1MB blobs riding session_workspace_updated frames. iframe sandbox
locked to allow-scripts allow-clipboard-write allow-downloads with no
allow-same-origin, srcDoc not src. openInPane discriminates 404 (expected
fallback) from real errors (toast + bail). PanelRightOpen icon button with
mobile 44px tap-target.
31 new server unit tests including a real-symlink filesystem case; 332/332
server tests passing, tsc clean both sides, pnpm -C apps/web build green.
Smoke deferred to first deploy.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
256 lines
9.0 KiB
TypeScript
256 lines
9.0 KiB
TypeScript
// v1.14.x-html-artifact-panes: artifact writer + slug derivation.
|
|
//
|
|
// Writes Markdown and HTML artifacts to `<projectRoot>/.boocode/artifacts/`
|
|
// as plain files. Returns `{path, url}` where:
|
|
// - path is the absolute on-disk path
|
|
// - url is a project-scoped REST URL pointing at the GET download route
|
|
// registered in routes/artifacts.ts. The route streams the file with
|
|
// Content-Disposition: attachment.
|
|
//
|
|
// Path safety: we do NOT use path_guard.ts (it realpaths and throws ENOENT
|
|
// for files that don't exist yet, which artifact creation requires).
|
|
// Instead we mirror the v1.13.18 codecontext_client.ts pattern: resolve
|
|
// the candidate path against the realpath'd projectRoot, then verify the
|
|
// result starts with projectRoot + sep (or equals projectRoot).
|
|
|
|
import { mkdir, realpath, writeFile } from 'node:fs/promises';
|
|
import { resolve, sep } from 'node:path';
|
|
import { PathScopeError } from './path_guard.js';
|
|
import type { Message } from '../types/api.js';
|
|
|
|
export interface HtmlArtifactPayload {
|
|
html_content: string;
|
|
char_count: number;
|
|
title: string | null;
|
|
}
|
|
|
|
export interface ArtifactWriteResult {
|
|
path: string;
|
|
url: string;
|
|
}
|
|
|
|
const ARTIFACT_SUBDIR = '.boocode/artifacts';
|
|
|
|
// ---- slug helpers ----
|
|
|
|
// Lowercase, replace non-alnum runs with '-', trim leading/trailing '-',
|
|
// collapse repeated '-', cap at 60 chars. Empty → 'artifact'.
|
|
function slugify(input: string): string {
|
|
const cleaned = input
|
|
.toLowerCase()
|
|
.replace(/[^a-z0-9]+/g, '-')
|
|
.replace(/^-+|-+$/g, '')
|
|
.replace(/-{2,}/g, '-')
|
|
.slice(0, 60)
|
|
.replace(/^-+|-+$/g, '');
|
|
return cleaned || 'artifact';
|
|
}
|
|
|
|
function firstHeading(md: string): string | null {
|
|
// Match the first `# ` ATX heading at the start of a line.
|
|
const m = md.match(/^[ \t]*#[ \t]+(.+?)\s*$/m);
|
|
if (!m) return null;
|
|
const text = m[1]?.trim() ?? '';
|
|
return text.length > 0 ? text : null;
|
|
}
|
|
|
|
function firstNWords(s: string, n: number): string {
|
|
const words = s.trim().split(/\s+/).filter(Boolean).slice(0, n);
|
|
return words.join(' ');
|
|
}
|
|
|
|
export function deriveMarkdownSlug(messageContent: string): string {
|
|
const heading = firstHeading(messageContent);
|
|
if (heading) return slugify(heading);
|
|
const sixWords = firstNWords(messageContent, 6);
|
|
return slugify(sixWords);
|
|
}
|
|
|
|
// Strip HTML tags for inner-text extraction. Crude but sufficient for slug
|
|
// derivation — we're not rendering, just finding readable words.
|
|
function stripTags(html: string): string {
|
|
return html
|
|
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, ' ')
|
|
.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, ' ')
|
|
.replace(/<[^>]+>/g, ' ')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
function extractTitleTag(html: string): string | null {
|
|
const m = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
if (!m) return null;
|
|
const text = stripTags(m[1] ?? '').trim();
|
|
return text.length > 0 ? text : null;
|
|
}
|
|
|
|
function extractH1(html: string): string | null {
|
|
const m = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
|
|
if (!m) return null;
|
|
const text = stripTags(m[1] ?? '').trim();
|
|
return text.length > 0 ? text : null;
|
|
}
|
|
|
|
export function deriveHtmlSlug(payload: {
|
|
html_content: string;
|
|
title: string | null;
|
|
}): string {
|
|
if (payload.title && payload.title.trim().length > 0) {
|
|
return slugify(payload.title);
|
|
}
|
|
const title = extractTitleTag(payload.html_content);
|
|
if (title) return slugify(title);
|
|
const h1 = extractH1(payload.html_content);
|
|
if (h1) return slugify(h1);
|
|
const inner = stripTags(payload.html_content);
|
|
return slugify(firstNWords(inner, 6));
|
|
}
|
|
|
|
// Derive title for the html_artifact part payload: <title> → first <h1> →
|
|
// first 80 chars of inner text. Returns null if nothing useful is found.
|
|
export function deriveHtmlTitle(html: string): string | null {
|
|
const t = extractTitleTag(html);
|
|
if (t) return t;
|
|
const h1 = extractH1(html);
|
|
if (h1) return h1;
|
|
const inner = stripTags(html);
|
|
if (inner.length === 0) return null;
|
|
return inner.slice(0, 80);
|
|
}
|
|
|
|
// ---- HTML detection (B4) ----
|
|
|
|
// Returns the inner HTML content if `text` is a recognised HTML artifact:
|
|
// - starts with <!DOCTYPE html> (case-insensitive, whitespace-trimmed), OR
|
|
// - wrapped entirely in a fenced ```html ... ``` block.
|
|
// Returns null if neither matches.
|
|
export function detectHtmlArtifact(text: string): string | null {
|
|
const trimmed = text.trim();
|
|
if (trimmed.length === 0) return null;
|
|
if (/^<!doctype\s+html/i.test(trimmed)) {
|
|
return trimmed;
|
|
}
|
|
// Fenced ```html block consuming the entire (trimmed) message. Allow an
|
|
// optional trailing newline before the closing fence.
|
|
const fence = trimmed.match(/^```html\s*\n([\s\S]*?)\n?```\s*$/i);
|
|
if (fence) {
|
|
const inner = fence[1] ?? '';
|
|
if (/^\s*<!doctype\s+html/i.test(inner) || /<html[\s>]/i.test(inner)) {
|
|
return inner.trim();
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
// ---- path resolution ----
|
|
|
|
// Resolve `<projectRoot>/.boocode/artifacts/<filename>` and verify the
|
|
// result stays under projectRoot. Mirrors the v1.13.18 codecontext_client.ts
|
|
// approach: realpath projectRoot first, then prefix-check the candidate.
|
|
// Throws on escape.
|
|
async function resolveArtifactPath(
|
|
projectRoot: string,
|
|
filename: string,
|
|
): Promise<{ resolvedRoot: string; artifactsDir: string; absPath: string }> {
|
|
const resolvedRoot = await realpath(projectRoot);
|
|
const artifactsDir = resolve(resolvedRoot, ARTIFACT_SUBDIR);
|
|
const absPath = resolve(artifactsDir, filename);
|
|
// Lexical prefix check on the resolved candidates. (The `!== resolvedRoot`
|
|
// branch was dead — ARTIFACT_SUBDIR is non-empty so artifactsDir always
|
|
// differs from resolvedRoot.)
|
|
if (!artifactsDir.startsWith(resolvedRoot + sep)) {
|
|
throw new PathScopeError(
|
|
`artifacts dir escapes project root: ${artifactsDir}`,
|
|
);
|
|
}
|
|
if (!absPath.startsWith(artifactsDir + sep)) {
|
|
throw new PathScopeError(
|
|
`artifact filename escapes artifacts dir: ${filename}`,
|
|
);
|
|
}
|
|
return { resolvedRoot, artifactsDir, absPath };
|
|
}
|
|
|
|
// After mkdir, realpath the artifacts dir and re-verify it stays under
|
|
// resolvedRoot. Closes the symlink-escape gap: if `.boocode/artifacts` (or
|
|
// any ancestor below resolvedRoot) is a symlink pointing outside the
|
|
// project, the lexical check in resolveArtifactPath passes but the actual
|
|
// write lands outside the sandbox. Throws PathScopeError on escape.
|
|
async function assertArtifactsDirSafe(
|
|
artifactsDir: string,
|
|
resolvedRoot: string,
|
|
): Promise<void> {
|
|
const realDir = await realpath(artifactsDir);
|
|
if (realDir !== resolvedRoot && !realDir.startsWith(resolvedRoot + sep)) {
|
|
throw new PathScopeError(
|
|
`artifacts dir resolves outside project root: ${realDir}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
// Pure decision helper for whether finalizeCompletion should write the
|
|
// `html_artifact` part. Exported for unit testing the cap-skip branch.
|
|
// Returns `{write: true, byteLen}` when the payload is under the cap, or
|
|
// `{write: false, byteLen, reason: 'cap_exceeded'}` when oversize.
|
|
export type HtmlArtifactDecision =
|
|
| { write: true; byteLen: number }
|
|
| { write: false; byteLen: number; reason: 'cap_exceeded' };
|
|
|
|
export function decideHtmlArtifactWrite(
|
|
htmlContent: string,
|
|
): HtmlArtifactDecision {
|
|
const byteLen = Buffer.byteLength(htmlContent, 'utf8');
|
|
if (byteLen > HTML_ARTIFACT_MAX_BYTES) {
|
|
return { write: false, byteLen, reason: 'cap_exceeded' };
|
|
}
|
|
return { write: true, byteLen };
|
|
}
|
|
|
|
function buildUrl(projectId: string, filename: string): string {
|
|
return `/api/projects/${projectId}/artifacts/${encodeURIComponent(filename)}`;
|
|
}
|
|
|
|
export interface WriteContext {
|
|
projectId: string;
|
|
projectRoot: string;
|
|
}
|
|
|
|
export async function writeMarkdownArtifact(
|
|
message: Pick<Message, 'content'>,
|
|
ctx: WriteContext,
|
|
): Promise<ArtifactWriteResult> {
|
|
const slug = deriveMarkdownSlug(message.content);
|
|
const filename = `${slug}-${Date.now()}.md`;
|
|
const { resolvedRoot, artifactsDir, absPath } = await resolveArtifactPath(
|
|
ctx.projectRoot,
|
|
filename,
|
|
);
|
|
await mkdir(artifactsDir, { recursive: true });
|
|
await assertArtifactsDirSafe(artifactsDir, resolvedRoot);
|
|
await writeFile(absPath, message.content, 'utf8');
|
|
return { path: absPath, url: buildUrl(ctx.projectId, filename) };
|
|
}
|
|
|
|
export async function writeHtmlArtifact(
|
|
payload: HtmlArtifactPayload,
|
|
ctx: WriteContext,
|
|
): Promise<ArtifactWriteResult> {
|
|
const slug = deriveHtmlSlug(payload);
|
|
const filename = `${slug}-${Date.now()}.html`;
|
|
const { resolvedRoot, artifactsDir, absPath } = await resolveArtifactPath(
|
|
ctx.projectRoot,
|
|
filename,
|
|
);
|
|
await mkdir(artifactsDir, { recursive: true });
|
|
await assertArtifactsDirSafe(artifactsDir, resolvedRoot);
|
|
await writeFile(absPath, payload.html_content, 'utf8');
|
|
return { path: absPath, url: buildUrl(ctx.projectId, filename) };
|
|
}
|
|
|
|
// 1MB cap on HTML artifacts (proposal S6). Larger payloads are not written
|
|
// to the `html_artifact` part — the assistant text lands as plain content
|
|
// and a warning is logged. Streaming abort was considered but the graceful
|
|
// "no artifact, plain text falls back" path is simpler and lossless from
|
|
// the user's perspective.
|
|
export const HTML_ARTIFACT_MAX_BYTES = 1_048_576;
|