import { promises as fs } from 'fs'; import { randomBytes } from 'crypto'; import path from 'path'; import type { Sql } from '../db.js'; // v1.13.5: opencode-style truncation storage. When a tool slice would cut // content the model might still want, we store the full text on tmpfs and // hand the model an opaque id. view_truncated_output(id) retrieves it. // // Tmpfs path means full content vanishes on container restart; chats that // outlive a restart lose retrieval (acceptable — the user has usually moved // on or the data is stale). 7-day TTL + orphan reap bound disk growth via // the periodic sweeper in index.ts. export const TRUNCATION_DIR = process.env.BOOCODE_TRUNCATION_DIR ?? '/tmp/boocode-truncations'; export const TRUNCATION_TTL_MS = 7 * 24 * 60 * 60 * 1000; // Matches view_file's MAX_FILE_BYTES — anything bigger was already refused // at the source tool's size check, so we never see it here. export const MAX_TRUNCATION_BYTES = 5 * 1024 * 1024; const ID_RE = /^tr_[0-9a-v]{12}$/; let dirEnsured = false; async function ensureDir(): Promise { if (dirEnsured) return; await fs.mkdir(TRUNCATION_DIR, { recursive: true, mode: 0o700 }); dirEnsured = true; } // 12 base32 chars ≈ 60 bits of entropy. Collision probability across a // 7-day window with ~thousands of truncations is essentially zero. function newId(): string { const buf = randomBytes(8); const alphabet = '0123456789abcdefghijklmnopqrstuv'; let out = 'tr_'; for (const byte of buf) { out += alphabet[byte & 0x1f]; out += alphabet[(byte >> 3) & 0x1f]; } return out.slice(0, 15); } function idToPath(id: string): string { // Defense-in-depth: the model never supplies a path component (only ids), // but a malformed id from anywhere else shouldn't escape TRUNCATION_DIR. if (!ID_RE.test(id)) { throw new Error(`Invalid truncation id: ${id}`); } return path.join(TRUNCATION_DIR, id); } export async function storeTruncation(fullContent: string): Promise { const bytes = Buffer.byteLength(fullContent, 'utf8'); if (bytes > MAX_TRUNCATION_BYTES) { throw new Error(`Truncation content ${bytes}B exceeds ${MAX_TRUNCATION_BYTES}B cap`); } await ensureDir(); const id = newId(); await fs.writeFile(idToPath(id), fullContent, { encoding: 'utf8', mode: 0o600 }); return id; } export async function readTruncation(id: string): Promise { if (!ID_RE.test(id)) return null; try { return await fs.readFile(idToPath(id), { encoding: 'utf8' }); } catch (err) { if ((err as NodeJS.ErrnoException).code === 'ENOENT') return null; throw err; } } // Wrap a tool's output. If wasTruncated, stash the full content on tmpfs // and return its id alongside the sliced view the tool would have returned. // Storage failure (disk full, permission denied) is non-fatal — the sliced // view ships without an outputPath, which is exactly what the tool returned // before v1.13.5. Same goes for content over MAX_TRUNCATION_BYTES. export async function truncateIfNeeded(args: { fullContent: string; slicedContent: string; wasTruncated: boolean; }): Promise<{ content: string; truncated: boolean; outputPath?: string }> { if (!args.wasTruncated) { return { content: args.slicedContent, truncated: false }; } const bytes = Buffer.byteLength(args.fullContent, 'utf8'); if (bytes > MAX_TRUNCATION_BYTES) { return { content: args.slicedContent, truncated: true }; } try { const outputPath = await storeTruncation(args.fullContent); return { content: args.slicedContent, truncated: true, outputPath }; } catch { return { content: args.slicedContent, truncated: true }; } } // Periodic cleanup. Called from index.ts's sweep interval (v1.13.3 cadence). // Pass 1: TTL — anything older than TRUNCATION_TTL_MS is gone. // Pass 2: orphans — files with no live message_parts.payload->'output'->>'outputPath' // reference. Catches the case where a part referencing an outputPath got // hidden by prune (v1.13.4) and the file is now unreachable. export async function cleanupTruncations(args: { sql: Sql; log: { warn: (obj: object, msg: string) => void; error: (obj: object, msg: string) => void }; }): Promise<{ ttlReaped: number; orphanReaped: number }> { await ensureDir(); const cutoff = Date.now() - TRUNCATION_TTL_MS; let ttlReaped = 0; let orphanReaped = 0; let entries: string[]; try { entries = await fs.readdir(TRUNCATION_DIR); } catch (err) { args.log.error({ err }, 'cleanupTruncations readdir failed'); return { ttlReaped, orphanReaped }; } if (entries.length === 0) return { ttlReaped, orphanReaped }; const survivors: string[] = []; for (const name of entries) { if (!ID_RE.test(name)) continue; const full = path.join(TRUNCATION_DIR, name); try { const stat = await fs.stat(full); if (stat.mtimeMs < cutoff) { await fs.unlink(full); ttlReaped += 1; } else { survivors.push(name); } } catch { // File vanished between readdir and stat — fine. } } if (survivors.length === 0) { if (ttlReaped > 0) { args.log.warn({ ttlReaped, orphanReaped: 0 }, 'cleanupTruncations reaped files'); } return { ttlReaped, orphanReaped: 0 }; } // outputPath rides inside the tool_result part's payload.output object // (see partsFromToolMessage in inference/parts.ts), so the json path is // payload->'output'->>'outputPath' rather than top-level. const referenced = await args.sql<{ output_path: string }[]>` SELECT DISTINCT p.payload->'output'->>'outputPath' AS output_path FROM message_parts p WHERE p.kind = 'tool_result' AND p.payload->'output' ? 'outputPath' AND p.payload->'output'->>'outputPath' = ANY(${survivors}) `; const live = new Set(referenced.map((r) => r.output_path)); for (const name of survivors) { if (live.has(name)) continue; try { await fs.unlink(path.join(TRUNCATION_DIR, name)); orphanReaped += 1; } catch { // ignore } } if (ttlReaped > 0 || orphanReaped > 0) { args.log.warn({ ttlReaped, orphanReaped }, 'cleanupTruncations reaped files'); } return { ttlReaped, orphanReaped }; }