// v1.14.x-html-artifact-panes: artifact writer + slug derivation. // // Writes Markdown and HTML artifacts to `/.boocode/artifacts/` // as plain files. Returns `{path, url}` where: // - path is the absolute on-disk path // - url is a project-scoped REST URL pointing at the GET download route // registered in routes/artifacts.ts. The route streams the file with // Content-Disposition: attachment. // // Path safety: we do NOT use path_guard.ts (it realpaths and throws ENOENT // for files that don't exist yet, which artifact creation requires). // Instead we mirror the v1.13.18 codecontext_client.ts pattern: resolve // the candidate path against the realpath'd projectRoot, then verify the // result starts with projectRoot + sep (or equals projectRoot). import { mkdir, realpath, writeFile } from 'node:fs/promises'; import { resolve, sep } from 'node:path'; import { PathScopeError } from './path_guard.js'; import type { Message } from '../types/api.js'; export interface HtmlArtifactPayload { html_content: string; char_count: number; title: string | null; } export interface ArtifactWriteResult { path: string; url: string; } const ARTIFACT_SUBDIR = '.boocode/artifacts'; // ---- slug helpers ---- // Lowercase, replace non-alnum runs with '-', trim leading/trailing '-', // collapse repeated '-', cap at 60 chars. Empty → 'artifact'. function slugify(input: string): string { const cleaned = input .toLowerCase() .replace(/[^a-z0-9]+/g, '-') .replace(/^-+|-+$/g, '') .replace(/-{2,}/g, '-') .slice(0, 60) .replace(/^-+|-+$/g, ''); return cleaned || 'artifact'; } function firstHeading(md: string): string | null { // Match the first `# ` ATX heading at the start of a line. const m = md.match(/^[ \t]*#[ \t]+(.+?)\s*$/m); if (!m) return null; const text = m[1]?.trim() ?? ''; return text.length > 0 ? text : null; } function firstNWords(s: string, n: number): string { const words = s.trim().split(/\s+/).filter(Boolean).slice(0, n); return words.join(' '); } export function deriveMarkdownSlug(messageContent: string): string { const heading = firstHeading(messageContent); if (heading) return slugify(heading); const sixWords = firstNWords(messageContent, 6); return slugify(sixWords); } // Strip HTML tags for inner-text extraction. Crude but sufficient for slug // derivation — we're not rendering, just finding readable words. function stripTags(html: string): string { return html .replace(/)<[^<]*)*<\/script>/gi, ' ') .replace(/)<[^<]*)*<\/style>/gi, ' ') .replace(/<[^>]+>/g, ' ') .replace(/\s+/g, ' ') .trim(); } function extractTitleTag(html: string): string | null { const m = html.match(/]*>([\s\S]*?)<\/title>/i); if (!m) return null; const text = stripTags(m[1] ?? '').trim(); return text.length > 0 ? text : null; } function extractH1(html: string): string | null { const m = html.match(/]*>([\s\S]*?)<\/h1>/i); if (!m) return null; const text = stripTags(m[1] ?? '').trim(); return text.length > 0 ? text : null; } export function deriveHtmlSlug(payload: { html_content: string; title: string | null; }): string { if (payload.title && payload.title.trim().length > 0) { return slugify(payload.title); } const title = extractTitleTag(payload.html_content); if (title) return slugify(title); const h1 = extractH1(payload.html_content); if (h1) return slugify(h1); const inner = stripTags(payload.html_content); return slugify(firstNWords(inner, 6)); } // Derive title for the html_artifact part payload: → first <h1> → // first 80 chars of inner text. Returns null if nothing useful is found. export function deriveHtmlTitle(html: string): string | null { const t = extractTitleTag(html); if (t) return t; const h1 = extractH1(html); if (h1) return h1; const inner = stripTags(html); if (inner.length === 0) return null; return inner.slice(0, 80); } // ---- HTML detection (B4) ---- // Returns the inner HTML content if `text` is a recognised HTML artifact: // - starts with <!DOCTYPE html> (case-insensitive, whitespace-trimmed), OR // - wrapped entirely in a fenced ```html ... ``` block. // Returns null if neither matches. export function detectHtmlArtifact(text: string): string | null { const trimmed = text.trim(); if (trimmed.length === 0) return null; if (/^<!doctype\s+html/i.test(trimmed)) { return trimmed; } // Fenced ```html block consuming the entire (trimmed) message. Allow an // optional trailing newline before the closing fence. const fence = trimmed.match(/^```html\s*\n([\s\S]*?)\n?```\s*$/i); if (fence) { const inner = fence[1] ?? ''; if (/^\s*<!doctype\s+html/i.test(inner) || /<html[\s>]/i.test(inner)) { return inner.trim(); } } return null; } // ---- path resolution ---- // Resolve `<projectRoot>/.boocode/artifacts/<filename>` and verify the // result stays under projectRoot. Mirrors the v1.13.18 codecontext_client.ts // approach: realpath projectRoot first, then prefix-check the candidate. // Throws on escape. async function resolveArtifactPath( projectRoot: string, filename: string, ): Promise<{ resolvedRoot: string; artifactsDir: string; absPath: string }> { const resolvedRoot = await realpath(projectRoot); const artifactsDir = resolve(resolvedRoot, ARTIFACT_SUBDIR); const absPath = resolve(artifactsDir, filename); // Lexical prefix check on the resolved candidates. (The `!== resolvedRoot` // branch was dead — ARTIFACT_SUBDIR is non-empty so artifactsDir always // differs from resolvedRoot.) if (!artifactsDir.startsWith(resolvedRoot + sep)) { throw new PathScopeError( `artifacts dir escapes project root: ${artifactsDir}`, ); } if (!absPath.startsWith(artifactsDir + sep)) { throw new PathScopeError( `artifact filename escapes artifacts dir: ${filename}`, ); } return { resolvedRoot, artifactsDir, absPath }; } // After mkdir, realpath the artifacts dir and re-verify it stays under // resolvedRoot. Closes the symlink-escape gap: if `.boocode/artifacts` (or // any ancestor below resolvedRoot) is a symlink pointing outside the // project, the lexical check in resolveArtifactPath passes but the actual // write lands outside the sandbox. Throws PathScopeError on escape. async function assertArtifactsDirSafe( artifactsDir: string, resolvedRoot: string, ): Promise<void> { const realDir = await realpath(artifactsDir); if (realDir !== resolvedRoot && !realDir.startsWith(resolvedRoot + sep)) { throw new PathScopeError( `artifacts dir resolves outside project root: ${realDir}`, ); } } // Pure decision helper for whether finalizeCompletion should write the // `html_artifact` part. Exported for unit testing the cap-skip branch. // Returns `{write: true, byteLen}` when the payload is under the cap, or // `{write: false, byteLen, reason: 'cap_exceeded'}` when oversize. export type HtmlArtifactDecision = | { write: true; byteLen: number } | { write: false; byteLen: number; reason: 'cap_exceeded' }; export function decideHtmlArtifactWrite( htmlContent: string, ): HtmlArtifactDecision { const byteLen = Buffer.byteLength(htmlContent, 'utf8'); if (byteLen > HTML_ARTIFACT_MAX_BYTES) { return { write: false, byteLen, reason: 'cap_exceeded' }; } return { write: true, byteLen }; } function buildUrl(projectId: string, filename: string): string { return `/api/projects/${projectId}/artifacts/${encodeURIComponent(filename)}`; } export interface WriteContext { projectId: string; projectRoot: string; } export async function writeMarkdownArtifact( message: Pick<Message, 'content'>, ctx: WriteContext, ): Promise<ArtifactWriteResult> { const slug = deriveMarkdownSlug(message.content); const filename = `${slug}-${Date.now()}.md`; const { resolvedRoot, artifactsDir, absPath } = await resolveArtifactPath( ctx.projectRoot, filename, ); await mkdir(artifactsDir, { recursive: true }); await assertArtifactsDirSafe(artifactsDir, resolvedRoot); await writeFile(absPath, message.content, 'utf8'); return { path: absPath, url: buildUrl(ctx.projectId, filename) }; } export async function writeHtmlArtifact( payload: HtmlArtifactPayload, ctx: WriteContext, ): Promise<ArtifactWriteResult> { const slug = deriveHtmlSlug(payload); const filename = `${slug}-${Date.now()}.html`; const { resolvedRoot, artifactsDir, absPath } = await resolveArtifactPath( ctx.projectRoot, filename, ); await mkdir(artifactsDir, { recursive: true }); await assertArtifactsDirSafe(artifactsDir, resolvedRoot); await writeFile(absPath, payload.html_content, 'utf8'); return { path: absPath, url: buildUrl(ctx.projectId, filename) }; } // 1MB cap on HTML artifacts (proposal S6). Larger payloads are not written // to the `html_artifact` part — the assistant text lands as plain content // and a warning is logged. Streaming abort was considered but the graceful // "no artifact, plain text falls back" path is simpler and lossless from // the user's perspective. export const HTML_ARTIFACT_MAX_BYTES = 1_048_576;