import type { Sql } from '../../db.js'; // v1.13.4: two-tier compaction prune. Opencode's prune half (the cheap one); // summarize half shipped in v1.11.0 as services/compaction.ts. // // Algorithm: scan tool_result parts newest-first. Protect the last // PROTECTED_TOKENS of content (the model recently saw these — pruning them // kills coherence). Older parts are candidates. Mark them hidden_at only // if the candidate pool would free at least PRUNE_TRIGGER_TOKENS — pruning // 3 small tool_results to recover 500 tokens isn't worth the loss of // fidelity for the model's next turn. // // Stops at the last compaction summary boundary (chats.tail_start_id). The // v1.11.0 summary already encodes everything before that point; pruning // across the boundary would double-erase. export const PROTECTED_TOKENS = 40_000; export const PRUNE_TRIGGER_TOKENS = 20_000; // Rough char-to-token estimate. Same heuristic compaction's usable() uses // implicitly via the buffer constant. function estimateTokens(text: string): number { return Math.ceil(text.length / 4); } function payloadTokens(payload: unknown): number { return estimateTokens(JSON.stringify(payload ?? '')); } export interface PruneResult { hidden: number; freedTokens: number; } // Pure algorithmic core, exported for unit-test access. Takes parts already // ordered newest-first, plus an optional cutoff (last compaction summary // boundary). Returns the part ids to hide and the total token estimate of // the candidates. Caller does the DB UPDATE. export interface PartForPrune { id: string; payload: unknown; created_at: Date; } export function selectPruneTargets( partsNewestFirst: ReadonlyArray, tailStartCreatedAt: Date | null, ): { ids: string[]; freedTokens: number } { let protectedTokens = 0; const candidates: { id: string; tokens: number }[] = []; let crossedProtection = false; for (const part of partsNewestFirst) { if (tailStartCreatedAt && part.created_at < tailStartCreatedAt) { // Past the last summary boundary; the v1.11.0 anchored summary already // covers everything older. Bail rather than double-erase. break; } const tokens = payloadTokens(part.payload); if (!crossedProtection) { protectedTokens += tokens; if (protectedTokens >= PROTECTED_TOKENS) { crossedProtection = true; } continue; } candidates.push({ id: part.id, tokens }); } const candidateTokens = candidates.reduce((s, c) => s + c.tokens, 0); if (candidates.length === 0 || candidateTokens < PRUNE_TRIGGER_TOKENS) { return { ids: [], freedTokens: 0 }; } return { ids: candidates.map((c) => c.id), freedTokens: candidateTokens }; } export async function prune(args: { sql: Sql; chatId: string; }): Promise { const { sql, chatId } = args; // Newest-first scan of visible tool_result parts in this chat. Pull // chats.tail_start_id alongside so we know where the last summary boundary // sits (don't prune across it). const parts = await sql<{ id: string; payload: unknown; created_at: Date; tail_start_id: string | null; }[]>` SELECT p.id, p.payload, m.created_at, (SELECT c.tail_start_id FROM chats c WHERE c.id = ${chatId}) AS tail_start_id FROM message_parts p JOIN messages m ON m.id = p.message_id WHERE m.chat_id = ${chatId} AND p.kind = 'tool_result' AND p.hidden_at IS NULL ORDER BY m.created_at DESC, p.sequence DESC `; if (parts.length === 0) { return { hidden: 0, freedTokens: 0 }; } // Read the boundary cutoff timestamp once. Older messages are off-limits. let tailStartCreatedAt: Date | null = null; const firstTailId = parts[0]?.tail_start_id ?? null; if (firstTailId) { const tailRow = await sql<{ created_at: Date }[]>` SELECT created_at FROM messages WHERE id = ${firstTailId} `; tailStartCreatedAt = tailRow[0]?.created_at ?? null; } const decision = selectPruneTargets(parts, tailStartCreatedAt); if (decision.ids.length === 0) { return { hidden: 0, freedTokens: 0 }; } await sql` UPDATE message_parts SET hidden_at = clock_timestamp() WHERE id = ANY(${decision.ids}) `; return { hidden: decision.ids.length, freedTokens: decision.freedTokens }; }