- message_parts.hidden_at timestamptz column (NULL by default) with a partial index on (message_id) WHERE hidden_at IS NULL for the common visible-parts filter. - messages_with_parts view changed from COALESCE(parts, legacy) to CASE WHEN EXISTS(any parts of kind) THEN visible-parts ELSE legacy. COALESCE would have leaked hidden parts back via the legacy fallback when every part was pruned (smoke caught it pre-commit). The CASE distinguishes "no parts at all → fall back to legacy column for pre-v1.13.0 history" from "all parts hidden → return null/empty so the row drops out of the model payload" exactly. - prune.ts: scans tool_result parts newest-first, protects the last 40k tokens (PROTECTED_TOKENS), marks older candidates hidden when their combined estimate clears 20k (PRUNE_TRIGGER_TOKENS — equal to COMPACTION_BUFFER from v1.11.0, so a successful prune is exactly the budget the summary path would have freed). Stops at chats.tail_start_id so it doesn't double-erase across the last summary boundary. Pure decision helper selectPruneTargets exported separately for unit tests. - Wired into maybeFlagForCompaction: prune runs synchronously when overflow is detected; if it freed >= PRUNE_TRIGGER_TOKENS, the needs_compaction flag is NOT set and the (expensive) summary inference call is skipped this turn. The next turn's overflow check re-evaluates from scratch. - 6 new unit tests in prune.test.ts cover: empty input, protection-only (no candidates), candidates below trigger, candidates above trigger, candidates straddling a summary boundary, exactly-protection-tokens. 179 tests total (was 173). Smoke verified post-rebuild: - \\d message_parts shows hidden_at + partial index. - View definition shows AND p.hidden_at IS NULL filters on all three subselects. - Synthetic hide-then-restore confirmed the view drops the tool_result jsonb to null when its only part is hidden, and restores when un-hidden. - EXPLAIN ANALYZE on the 42-message stress chat: 0.325ms (faster than v1.13.1-B's 1.018ms — EXISTS short-circuits cleanly for the common no-parts case). - Normal turn (plain text prompt) completes unaffected. Closes a v1.11.0 design item that was scoped but never implemented. With v1.13's parts table the prune is dramatically cheaper to write — pre-parts it would have meant editing JSON blobs in-place; now it's a hidden_at flag and a view subselect. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
128 lines
4.2 KiB
TypeScript
128 lines
4.2 KiB
TypeScript
import type { Sql } from '../../db.js';
|
|
|
|
// v1.13.4: two-tier compaction prune. Opencode's prune half (the cheap one);
|
|
// summarize half shipped in v1.11.0 as services/compaction.ts.
|
|
//
|
|
// Algorithm: scan tool_result parts newest-first. Protect the last
|
|
// PROTECTED_TOKENS of content (the model recently saw these — pruning them
|
|
// kills coherence). Older parts are candidates. Mark them hidden_at only
|
|
// if the candidate pool would free at least PRUNE_TRIGGER_TOKENS — pruning
|
|
// 3 small tool_results to recover 500 tokens isn't worth the loss of
|
|
// fidelity for the model's next turn.
|
|
//
|
|
// Stops at the last compaction summary boundary (chats.tail_start_id). The
|
|
// v1.11.0 summary already encodes everything before that point; pruning
|
|
// across the boundary would double-erase.
|
|
|
|
export const PROTECTED_TOKENS = 40_000;
|
|
export const PRUNE_TRIGGER_TOKENS = 20_000;
|
|
|
|
// Rough char-to-token estimate. Same heuristic compaction's usable() uses
|
|
// implicitly via the buffer constant.
|
|
function estimateTokens(text: string): number {
|
|
return Math.ceil(text.length / 4);
|
|
}
|
|
|
|
function payloadTokens(payload: unknown): number {
|
|
return estimateTokens(JSON.stringify(payload ?? ''));
|
|
}
|
|
|
|
export interface PruneResult {
|
|
hidden: number;
|
|
freedTokens: number;
|
|
}
|
|
|
|
// Pure algorithmic core, exported for unit-test access. Takes parts already
|
|
// ordered newest-first, plus an optional cutoff (last compaction summary
|
|
// boundary). Returns the part ids to hide and the total token estimate of
|
|
// the candidates. Caller does the DB UPDATE.
|
|
export interface PartForPrune {
|
|
id: string;
|
|
payload: unknown;
|
|
created_at: Date;
|
|
}
|
|
|
|
export function selectPruneTargets(
|
|
partsNewestFirst: ReadonlyArray<PartForPrune>,
|
|
tailStartCreatedAt: Date | null,
|
|
): { ids: string[]; freedTokens: number } {
|
|
let protectedTokens = 0;
|
|
const candidates: { id: string; tokens: number }[] = [];
|
|
let crossedProtection = false;
|
|
|
|
for (const part of partsNewestFirst) {
|
|
if (tailStartCreatedAt && part.created_at < tailStartCreatedAt) {
|
|
// Past the last summary boundary; the v1.11.0 anchored summary already
|
|
// covers everything older. Bail rather than double-erase.
|
|
break;
|
|
}
|
|
const tokens = payloadTokens(part.payload);
|
|
if (!crossedProtection) {
|
|
protectedTokens += tokens;
|
|
if (protectedTokens >= PROTECTED_TOKENS) {
|
|
crossedProtection = true;
|
|
}
|
|
continue;
|
|
}
|
|
candidates.push({ id: part.id, tokens });
|
|
}
|
|
|
|
const candidateTokens = candidates.reduce((s, c) => s + c.tokens, 0);
|
|
if (candidates.length === 0 || candidateTokens < PRUNE_TRIGGER_TOKENS) {
|
|
return { ids: [], freedTokens: 0 };
|
|
}
|
|
return { ids: candidates.map((c) => c.id), freedTokens: candidateTokens };
|
|
}
|
|
|
|
export async function prune(args: {
|
|
sql: Sql;
|
|
chatId: string;
|
|
}): Promise<PruneResult> {
|
|
const { sql, chatId } = args;
|
|
|
|
// Newest-first scan of visible tool_result parts in this chat. Pull
|
|
// chats.tail_start_id alongside so we know where the last summary boundary
|
|
// sits (don't prune across it).
|
|
const parts = await sql<{
|
|
id: string;
|
|
payload: unknown;
|
|
created_at: Date;
|
|
tail_start_id: string | null;
|
|
}[]>`
|
|
SELECT p.id, p.payload, m.created_at,
|
|
(SELECT c.tail_start_id FROM chats c WHERE c.id = ${chatId}) AS tail_start_id
|
|
FROM message_parts p
|
|
JOIN messages m ON m.id = p.message_id
|
|
WHERE m.chat_id = ${chatId}
|
|
AND p.kind = 'tool_result'
|
|
AND p.hidden_at IS NULL
|
|
ORDER BY m.created_at DESC, p.sequence DESC
|
|
`;
|
|
|
|
if (parts.length === 0) {
|
|
return { hidden: 0, freedTokens: 0 };
|
|
}
|
|
|
|
// Read the boundary cutoff timestamp once. Older messages are off-limits.
|
|
let tailStartCreatedAt: Date | null = null;
|
|
const firstTailId = parts[0]?.tail_start_id ?? null;
|
|
if (firstTailId) {
|
|
const tailRow = await sql<{ created_at: Date }[]>`
|
|
SELECT created_at FROM messages WHERE id = ${firstTailId}
|
|
`;
|
|
tailStartCreatedAt = tailRow[0]?.created_at ?? null;
|
|
}
|
|
|
|
const decision = selectPruneTargets(parts, tailStartCreatedAt);
|
|
if (decision.ids.length === 0) {
|
|
return { hidden: 0, freedTokens: 0 };
|
|
}
|
|
|
|
await sql`
|
|
UPDATE message_parts
|
|
SET hidden_at = clock_timestamp()
|
|
WHERE id = ANY(${decision.ids})
|
|
`;
|
|
return { hidden: decision.ids.length, freedTokens: decision.freedTokens };
|
|
}
|