v1.13.5: opencode truncate.ts port — full tool output retrievable via opaque id

- New services/truncate.ts. Tmpfs storage at /tmp/boocode-truncations/ (BOOCODE_TRUNCATION_DIR env var overrides for tests). 12-char base32 opaque ids (~60 bits entropy, "tr_<id>"). Three exports: storeTruncation, readTruncation, truncateIfNeeded (wrap-or-passthrough helper). cleanupTruncations does TTL-pass (7 days) + orphan-reap (parts query on payload->'output'->>'outputPath') in one shot. - Wired four tools through truncateIfNeeded: view_file (raw full file), list_dir (full filtered+secret-filtered entries serialized one-per-line), web_fetch (textRaw pre-slice), codecontext_client (body.result pre-slice). Each returns the existing sliced view plus an optional outputPath field when truncation fires. - New view_truncated_output ToolDef. Resolves opaque id → on-disk content internally; model never sees the truncation dir. Same start_line / end_line slicing semantics as view_file. Registered in ALL_TOOLS (alpha sort places it after view_file automatically) and READ_ONLY_TOOL_NAMES. - cleanupTruncations piggybacks on the v1.13.3 stuck-row sweeper's 60s setInterval. No-op when truncation dir is empty. Not wired (TODO follow-up): grep and find_files. file_ops returns post-cap results to the tool execute path, so the "full content" isn't recoverable without a refactor of fileOps.grep / fileOps.findFiles to expose the uncapped result. web_search is silent-slice (no truncated flag); outside scope. Five sites of seven covered; the remaining two are the only ones needing a file_ops change. Tests: 7 new in truncate.test.ts (roundtrip, unknown id, malformed id, truncateIfNeeded false/true/over-cap/storage-failure paths). 186 total (was 179). cleanupTruncations file-system half implicitly via TTL pass; orphan-reap branch covered by the live container smoke. Smoke verified end-to-end against the live container: - view_file with start_line=1, end_line=3 on CLAUDE.md → tool_result part carried outputPath "tr_cdpn1o04k6ma" + truncated=true. - /tmp/boocode-truncations/tr_cdpn1o04k6ma exists, 15876 bytes, mode 0o600, parent dir mode 0o700. - Follow-up view_truncated_output(id, start_line=50, end_line=55) returned the actual lines 50-55 of CLAUDE.md (the 808notes/BooCode bullets). - ALL_TOOLS count=20 (was 19); alpha sort places view_truncated_output between view_file and watch_changes. Closes a v1.12 catalog row that was scoped but deferred. The v1.13 parts table made outputPath ride on the existing tool_result payload with no schema change beyond the storage helper itself. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-22 07:55:55 +00:00
parent ec8593cf77
commit f8fc5db929
6 changed files with 439 additions and 34 deletions
--- a/apps/server/src/services/truncate.ts
+++ b/apps/server/src/services/truncate.ts
@@ -0,0 +1,170 @@
+import { promises as fs } from 'fs';
+import { randomBytes } from 'crypto';
+import path from 'path';
+import type { Sql } from '../db.js';
+
+// v1.13.5: opencode-style truncation storage. When a tool slice would cut
+// content the model might still want, we store the full text on tmpfs and
+// hand the model an opaque id. view_truncated_output(id) retrieves it.
+//
+// Tmpfs path means full content vanishes on container restart; chats that
+// outlive a restart lose retrieval (acceptable — the user has usually moved
+// on or the data is stale). 7-day TTL + orphan reap bound disk growth via
+// the periodic sweeper in index.ts.
+
+export const TRUNCATION_DIR = process.env.BOOCODE_TRUNCATION_DIR ?? '/tmp/boocode-truncations';
+export const TRUNCATION_TTL_MS = 7 * 24 * 60 * 60 * 1000;
+// Matches view_file's MAX_FILE_BYTES — anything bigger was already refused
+// at the source tool's size check, so we never see it here.
+export const MAX_TRUNCATION_BYTES = 5 * 1024 * 1024;
+
+const ID_RE = /^tr_[0-9a-v]{12}$/;
+
+let dirEnsured = false;
+async function ensureDir(): Promise<void> {
+  if (dirEnsured) return;
+  await fs.mkdir(TRUNCATION_DIR, { recursive: true, mode: 0o700 });
+  dirEnsured = true;
+}
+
+// 12 base32 chars ≈ 60 bits of entropy. Collision probability across a
+// 7-day window with ~thousands of truncations is essentially zero.
+function newId(): string {
+  const buf = randomBytes(8);
+  const alphabet = '0123456789abcdefghijklmnopqrstuv';
+  let out = 'tr_';
+  for (const byte of buf) {
+    out += alphabet[byte & 0x1f];
+    out += alphabet[(byte >> 3) & 0x1f];
+  }
+  return out.slice(0, 15);
+}
+
+function idToPath(id: string): string {
+  // Defense-in-depth: the model never supplies a path component (only ids),
+  // but a malformed id from anywhere else shouldn't escape TRUNCATION_DIR.
+  if (!ID_RE.test(id)) {
+    throw new Error(`Invalid truncation id: ${id}`);
+  }
+  return path.join(TRUNCATION_DIR, id);
+}
+
+export async function storeTruncation(fullContent: string): Promise<string> {
+  const bytes = Buffer.byteLength(fullContent, 'utf8');
+  if (bytes > MAX_TRUNCATION_BYTES) {
+    throw new Error(`Truncation content ${bytes}B exceeds ${MAX_TRUNCATION_BYTES}B cap`);
+  }
+  await ensureDir();
+  const id = newId();
+  await fs.writeFile(idToPath(id), fullContent, { encoding: 'utf8', mode: 0o600 });
+  return id;
+}
+
+export async function readTruncation(id: string): Promise<string | null> {
+  if (!ID_RE.test(id)) return null;
+  try {
+    return await fs.readFile(idToPath(id), { encoding: 'utf8' });
+  } catch (err) {
+    if ((err as NodeJS.ErrnoException).code === 'ENOENT') return null;
+    throw err;
+  }
+}
+
+// Wrap a tool's output. If wasTruncated, stash the full content on tmpfs
+// and return its id alongside the sliced view the tool would have returned.
+// Storage failure (disk full, permission denied) is non-fatal — the sliced
+// view ships without an outputPath, which is exactly what the tool returned
+// before v1.13.5. Same goes for content over MAX_TRUNCATION_BYTES.
+export async function truncateIfNeeded(args: {
+  fullContent: string;
+  slicedContent: string;
+  wasTruncated: boolean;
+}): Promise<{ content: string; truncated: boolean; outputPath?: string }> {
+  if (!args.wasTruncated) {
+    return { content: args.slicedContent, truncated: false };
+  }
+  const bytes = Buffer.byteLength(args.fullContent, 'utf8');
+  if (bytes > MAX_TRUNCATION_BYTES) {
+    return { content: args.slicedContent, truncated: true };
+  }
+  try {
+    const outputPath = await storeTruncation(args.fullContent);
+    return { content: args.slicedContent, truncated: true, outputPath };
+  } catch {
+    return { content: args.slicedContent, truncated: true };
+  }
+}
+
+// Periodic cleanup. Called from index.ts's sweep interval (v1.13.3 cadence).
+// Pass 1: TTL — anything older than TRUNCATION_TTL_MS is gone.
+// Pass 2: orphans — files with no live message_parts.payload->'output'->>'outputPath'
+// reference. Catches the case where a part referencing an outputPath got
+// hidden by prune (v1.13.4) and the file is now unreachable.
+export async function cleanupTruncations(args: {
+  sql: Sql;
+  log: { warn: (obj: object, msg: string) => void; error: (obj: object, msg: string) => void };
+}): Promise<{ ttlReaped: number; orphanReaped: number }> {
+  await ensureDir();
+  const cutoff = Date.now() - TRUNCATION_TTL_MS;
+  let ttlReaped = 0;
+  let orphanReaped = 0;
+
+  let entries: string[];
+  try {
+    entries = await fs.readdir(TRUNCATION_DIR);
+  } catch (err) {
+    args.log.error({ err }, 'cleanupTruncations readdir failed');
+    return { ttlReaped, orphanReaped };
+  }
+  if (entries.length === 0) return { ttlReaped, orphanReaped };
+
+  const survivors: string[] = [];
+  for (const name of entries) {
+    if (!ID_RE.test(name)) continue;
+    const full = path.join(TRUNCATION_DIR, name);
+    try {
+      const stat = await fs.stat(full);
+      if (stat.mtimeMs < cutoff) {
+        await fs.unlink(full);
+        ttlReaped += 1;
+      } else {
+        survivors.push(name);
+      }
+    } catch {
+      // File vanished between readdir and stat — fine.
+    }
+  }
+
+  if (survivors.length === 0) {
+    if (ttlReaped > 0) {
+      args.log.warn({ ttlReaped, orphanReaped: 0 }, 'cleanupTruncations reaped files');
+    }
+    return { ttlReaped, orphanReaped: 0 };
+  }
+
+  // outputPath rides inside the tool_result part's payload.output object
+  // (see partsFromToolMessage in inference/parts.ts), so the json path is
+  // payload->'output'->>'outputPath' rather than top-level.
+  const referenced = await args.sql<{ output_path: string }[]>`
+    SELECT DISTINCT p.payload->'output'->>'outputPath' AS output_path
+    FROM message_parts p
+    WHERE p.kind = 'tool_result'
+      AND p.payload->'output' ? 'outputPath'
+      AND p.payload->'output'->>'outputPath' = ANY(${survivors})
+  `;
+  const live = new Set(referenced.map((r) => r.output_path));
+  for (const name of survivors) {
+    if (live.has(name)) continue;
+    try {
+      await fs.unlink(path.join(TRUNCATION_DIR, name));
+      orphanReaped += 1;
+    } catch {
+      // ignore
+    }
+  }
+
+  if (ttlReaped > 0 || orphanReaped > 0) {
+    args.log.warn({ ttlReaped, orphanReaped }, 'cleanupTruncations reaped files');
+  }
+  return { ttlReaped, orphanReaped };
+}