v1.13.5: opencode truncate.ts port — full tool output retrievable via opaque id
- New services/truncate.ts. Tmpfs storage at /tmp/boocode-truncations/ (BOOCODE_TRUNCATION_DIR env var overrides for tests). 12-char base32 opaque ids (~60 bits entropy, "tr_<id>"). Three exports: storeTruncation, readTruncation, truncateIfNeeded (wrap-or-passthrough helper). cleanupTruncations does TTL-pass (7 days) + orphan-reap (parts query on payload->'output'->>'outputPath') in one shot. - Wired four tools through truncateIfNeeded: view_file (raw full file), list_dir (full filtered+secret-filtered entries serialized one-per-line), web_fetch (textRaw pre-slice), codecontext_client (body.result pre-slice). Each returns the existing sliced view plus an optional outputPath field when truncation fires. - New view_truncated_output ToolDef. Resolves opaque id → on-disk content internally; model never sees the truncation dir. Same start_line / end_line slicing semantics as view_file. Registered in ALL_TOOLS (alpha sort places it after view_file automatically) and READ_ONLY_TOOL_NAMES. - cleanupTruncations piggybacks on the v1.13.3 stuck-row sweeper's 60s setInterval. No-op when truncation dir is empty. Not wired (TODO follow-up): grep and find_files. file_ops returns post-cap results to the tool execute path, so the "full content" isn't recoverable without a refactor of fileOps.grep / fileOps.findFiles to expose the uncapped result. web_search is silent-slice (no truncated flag); outside scope. Five sites of seven covered; the remaining two are the only ones needing a file_ops change. Tests: 7 new in truncate.test.ts (roundtrip, unknown id, malformed id, truncateIfNeeded false/true/over-cap/storage-failure paths). 186 total (was 179). cleanupTruncations file-system half implicitly via TTL pass; orphan-reap branch covered by the live container smoke. Smoke verified end-to-end against the live container: - view_file with start_line=1, end_line=3 on CLAUDE.md → tool_result part carried outputPath "tr_cdpn1o04k6ma" + truncated=true. - /tmp/boocode-truncations/tr_cdpn1o04k6ma exists, 15876 bytes, mode 0o600, parent dir mode 0o700. - Follow-up view_truncated_output(id, start_line=50, end_line=55) returned the actual lines 50-55 of CLAUDE.md (the 808notes/BooCode bullets). - ALL_TOOLS count=20 (was 19); alpha sort places view_truncated_output between view_file and watch_changes. Closes a v1.12 catalog row that was scoped but deferred. The v1.13 parts table made outputPath ride on the existing tool_result payload with no schema change beyond the storage helper itself. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
170
apps/server/src/services/truncate.ts
Normal file
170
apps/server/src/services/truncate.ts
Normal file
@@ -0,0 +1,170 @@
|
||||
import { promises as fs } from 'fs';
|
||||
import { randomBytes } from 'crypto';
|
||||
import path from 'path';
|
||||
import type { Sql } from '../db.js';
|
||||
|
||||
// v1.13.5: opencode-style truncation storage. When a tool slice would cut
|
||||
// content the model might still want, we store the full text on tmpfs and
|
||||
// hand the model an opaque id. view_truncated_output(id) retrieves it.
|
||||
//
|
||||
// Tmpfs path means full content vanishes on container restart; chats that
|
||||
// outlive a restart lose retrieval (acceptable — the user has usually moved
|
||||
// on or the data is stale). 7-day TTL + orphan reap bound disk growth via
|
||||
// the periodic sweeper in index.ts.
|
||||
|
||||
export const TRUNCATION_DIR = process.env.BOOCODE_TRUNCATION_DIR ?? '/tmp/boocode-truncations';
|
||||
export const TRUNCATION_TTL_MS = 7 * 24 * 60 * 60 * 1000;
|
||||
// Matches view_file's MAX_FILE_BYTES — anything bigger was already refused
|
||||
// at the source tool's size check, so we never see it here.
|
||||
export const MAX_TRUNCATION_BYTES = 5 * 1024 * 1024;
|
||||
|
||||
const ID_RE = /^tr_[0-9a-v]{12}$/;
|
||||
|
||||
let dirEnsured = false;
|
||||
async function ensureDir(): Promise<void> {
|
||||
if (dirEnsured) return;
|
||||
await fs.mkdir(TRUNCATION_DIR, { recursive: true, mode: 0o700 });
|
||||
dirEnsured = true;
|
||||
}
|
||||
|
||||
// 12 base32 chars ≈ 60 bits of entropy. Collision probability across a
|
||||
// 7-day window with ~thousands of truncations is essentially zero.
|
||||
function newId(): string {
|
||||
const buf = randomBytes(8);
|
||||
const alphabet = '0123456789abcdefghijklmnopqrstuv';
|
||||
let out = 'tr_';
|
||||
for (const byte of buf) {
|
||||
out += alphabet[byte & 0x1f];
|
||||
out += alphabet[(byte >> 3) & 0x1f];
|
||||
}
|
||||
return out.slice(0, 15);
|
||||
}
|
||||
|
||||
function idToPath(id: string): string {
|
||||
// Defense-in-depth: the model never supplies a path component (only ids),
|
||||
// but a malformed id from anywhere else shouldn't escape TRUNCATION_DIR.
|
||||
if (!ID_RE.test(id)) {
|
||||
throw new Error(`Invalid truncation id: ${id}`);
|
||||
}
|
||||
return path.join(TRUNCATION_DIR, id);
|
||||
}
|
||||
|
||||
export async function storeTruncation(fullContent: string): Promise<string> {
|
||||
const bytes = Buffer.byteLength(fullContent, 'utf8');
|
||||
if (bytes > MAX_TRUNCATION_BYTES) {
|
||||
throw new Error(`Truncation content ${bytes}B exceeds ${MAX_TRUNCATION_BYTES}B cap`);
|
||||
}
|
||||
await ensureDir();
|
||||
const id = newId();
|
||||
await fs.writeFile(idToPath(id), fullContent, { encoding: 'utf8', mode: 0o600 });
|
||||
return id;
|
||||
}
|
||||
|
||||
export async function readTruncation(id: string): Promise<string | null> {
|
||||
if (!ID_RE.test(id)) return null;
|
||||
try {
|
||||
return await fs.readFile(idToPath(id), { encoding: 'utf8' });
|
||||
} catch (err) {
|
||||
if ((err as NodeJS.ErrnoException).code === 'ENOENT') return null;
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
// Wrap a tool's output. If wasTruncated, stash the full content on tmpfs
|
||||
// and return its id alongside the sliced view the tool would have returned.
|
||||
// Storage failure (disk full, permission denied) is non-fatal — the sliced
|
||||
// view ships without an outputPath, which is exactly what the tool returned
|
||||
// before v1.13.5. Same goes for content over MAX_TRUNCATION_BYTES.
|
||||
export async function truncateIfNeeded(args: {
|
||||
fullContent: string;
|
||||
slicedContent: string;
|
||||
wasTruncated: boolean;
|
||||
}): Promise<{ content: string; truncated: boolean; outputPath?: string }> {
|
||||
if (!args.wasTruncated) {
|
||||
return { content: args.slicedContent, truncated: false };
|
||||
}
|
||||
const bytes = Buffer.byteLength(args.fullContent, 'utf8');
|
||||
if (bytes > MAX_TRUNCATION_BYTES) {
|
||||
return { content: args.slicedContent, truncated: true };
|
||||
}
|
||||
try {
|
||||
const outputPath = await storeTruncation(args.fullContent);
|
||||
return { content: args.slicedContent, truncated: true, outputPath };
|
||||
} catch {
|
||||
return { content: args.slicedContent, truncated: true };
|
||||
}
|
||||
}
|
||||
|
||||
// Periodic cleanup. Called from index.ts's sweep interval (v1.13.3 cadence).
|
||||
// Pass 1: TTL — anything older than TRUNCATION_TTL_MS is gone.
|
||||
// Pass 2: orphans — files with no live message_parts.payload->'output'->>'outputPath'
|
||||
// reference. Catches the case where a part referencing an outputPath got
|
||||
// hidden by prune (v1.13.4) and the file is now unreachable.
|
||||
export async function cleanupTruncations(args: {
|
||||
sql: Sql;
|
||||
log: { warn: (obj: object, msg: string) => void; error: (obj: object, msg: string) => void };
|
||||
}): Promise<{ ttlReaped: number; orphanReaped: number }> {
|
||||
await ensureDir();
|
||||
const cutoff = Date.now() - TRUNCATION_TTL_MS;
|
||||
let ttlReaped = 0;
|
||||
let orphanReaped = 0;
|
||||
|
||||
let entries: string[];
|
||||
try {
|
||||
entries = await fs.readdir(TRUNCATION_DIR);
|
||||
} catch (err) {
|
||||
args.log.error({ err }, 'cleanupTruncations readdir failed');
|
||||
return { ttlReaped, orphanReaped };
|
||||
}
|
||||
if (entries.length === 0) return { ttlReaped, orphanReaped };
|
||||
|
||||
const survivors: string[] = [];
|
||||
for (const name of entries) {
|
||||
if (!ID_RE.test(name)) continue;
|
||||
const full = path.join(TRUNCATION_DIR, name);
|
||||
try {
|
||||
const stat = await fs.stat(full);
|
||||
if (stat.mtimeMs < cutoff) {
|
||||
await fs.unlink(full);
|
||||
ttlReaped += 1;
|
||||
} else {
|
||||
survivors.push(name);
|
||||
}
|
||||
} catch {
|
||||
// File vanished between readdir and stat — fine.
|
||||
}
|
||||
}
|
||||
|
||||
if (survivors.length === 0) {
|
||||
if (ttlReaped > 0) {
|
||||
args.log.warn({ ttlReaped, orphanReaped: 0 }, 'cleanupTruncations reaped files');
|
||||
}
|
||||
return { ttlReaped, orphanReaped: 0 };
|
||||
}
|
||||
|
||||
// outputPath rides inside the tool_result part's payload.output object
|
||||
// (see partsFromToolMessage in inference/parts.ts), so the json path is
|
||||
// payload->'output'->>'outputPath' rather than top-level.
|
||||
const referenced = await args.sql<{ output_path: string }[]>`
|
||||
SELECT DISTINCT p.payload->'output'->>'outputPath' AS output_path
|
||||
FROM message_parts p
|
||||
WHERE p.kind = 'tool_result'
|
||||
AND p.payload->'output' ? 'outputPath'
|
||||
AND p.payload->'output'->>'outputPath' = ANY(${survivors})
|
||||
`;
|
||||
const live = new Set(referenced.map((r) => r.output_path));
|
||||
for (const name of survivors) {
|
||||
if (live.has(name)) continue;
|
||||
try {
|
||||
await fs.unlink(path.join(TRUNCATION_DIR, name));
|
||||
orphanReaped += 1;
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
if (ttlReaped > 0 || orphanReaped > 0) {
|
||||
args.log.warn({ ttlReaped, orphanReaped }, 'cleanupTruncations reaped files');
|
||||
}
|
||||
return { ttlReaped, orphanReaped };
|
||||
}
|
||||
Reference in New Issue
Block a user