- message_parts.hidden_at timestamptz column (NULL by default) with a partial index on (message_id) WHERE hidden_at IS NULL for the common visible-parts filter. - messages_with_parts view changed from COALESCE(parts, legacy) to CASE WHEN EXISTS(any parts of kind) THEN visible-parts ELSE legacy. COALESCE would have leaked hidden parts back via the legacy fallback when every part was pruned (smoke caught it pre-commit). The CASE distinguishes "no parts at all → fall back to legacy column for pre-v1.13.0 history" from "all parts hidden → return null/empty so the row drops out of the model payload" exactly. - prune.ts: scans tool_result parts newest-first, protects the last 40k tokens (PROTECTED_TOKENS), marks older candidates hidden when their combined estimate clears 20k (PRUNE_TRIGGER_TOKENS — equal to COMPACTION_BUFFER from v1.11.0, so a successful prune is exactly the budget the summary path would have freed). Stops at chats.tail_start_id so it doesn't double-erase across the last summary boundary. Pure decision helper selectPruneTargets exported separately for unit tests. - Wired into maybeFlagForCompaction: prune runs synchronously when overflow is detected; if it freed >= PRUNE_TRIGGER_TOKENS, the needs_compaction flag is NOT set and the (expensive) summary inference call is skipped this turn. The next turn's overflow check re-evaluates from scratch. - 6 new unit tests in prune.test.ts cover: empty input, protection-only (no candidates), candidates below trigger, candidates above trigger, candidates straddling a summary boundary, exactly-protection-tokens. 179 tests total (was 173). Smoke verified post-rebuild: - \\d message_parts shows hidden_at + partial index. - View definition shows AND p.hidden_at IS NULL filters on all three subselects. - Synthetic hide-then-restore confirmed the view drops the tool_result jsonb to null when its only part is hidden, and restores when un-hidden. - EXPLAIN ANALYZE on the 42-message stress chat: 0.325ms (faster than v1.13.1-B's 1.018ms — EXISTS short-circuits cleanly for the common no-parts case). - Normal turn (plain text prompt) completes unaffected. Closes a v1.11.0 design item that was scoped but never implemented. With v1.13's parts table the prune is dramatically cheaper to write — pre-parts it would have meant editing JSON blobs in-place; now it's a hidden_at flag and a view subselect. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
97 lines
3.6 KiB
TypeScript
97 lines
3.6 KiB
TypeScript
import { describe, it, expect, beforeEach } from 'vitest';
|
|
import {
|
|
selectPruneTargets,
|
|
PROTECTED_TOKENS,
|
|
PRUNE_TRIGGER_TOKENS,
|
|
type PartForPrune,
|
|
} from '../inference/prune.js';
|
|
|
|
// Test fixture: build a tool_result part whose payload size yields a known
|
|
// token estimate (chars/4). The decision logic only cares about
|
|
// JSON.stringify(payload).length, so a string payload of `4n` chars
|
|
// produces exactly `n` tokens.
|
|
let seq = 0;
|
|
function part(tokens: number, createdAt: Date): PartForPrune {
|
|
seq += 1;
|
|
// JSON.stringify("xxx...") wraps in quotes (adds 2 chars), so subtract 2
|
|
// before multiplying. Math.ceil((len+2)/4) needs len ≈ 4*tokens - 2 so the
|
|
// total stringified length is 4*tokens. Approximate by padding 4 chars per
|
|
// token; the off-by-one from quotes is small and tests check totals, not
|
|
// exact per-part counts.
|
|
const text = 'x'.repeat(tokens * 4 - 2);
|
|
return { id: `p${seq}`, payload: text, created_at: createdAt };
|
|
}
|
|
|
|
const T_NOW = new Date('2026-05-22T12:00:00Z');
|
|
function ago(secondsBack: number): Date {
|
|
return new Date(T_NOW.getTime() - secondsBack * 1000);
|
|
}
|
|
|
|
describe('selectPruneTargets', () => {
|
|
beforeEach(() => {
|
|
seq = 0;
|
|
});
|
|
|
|
it('returns nothing when there are no parts', () => {
|
|
expect(selectPruneTargets([], null)).toEqual({ ids: [], freedTokens: 0 });
|
|
});
|
|
|
|
it('returns nothing when total tokens are under the protection window', () => {
|
|
const parts: PartForPrune[] = [
|
|
part(10_000, ago(10)),
|
|
part(10_000, ago(20)),
|
|
]; // 20k total, all protected
|
|
expect(selectPruneTargets(parts, null)).toEqual({ ids: [], freedTokens: 0 });
|
|
});
|
|
|
|
it('returns nothing when candidate total is below the prune trigger', () => {
|
|
// Protection fills with ~40k newest, candidates only ~5k. Below 20k trigger.
|
|
const parts: PartForPrune[] = [
|
|
part(20_000, ago(10)),
|
|
part(20_000, ago(20)),
|
|
// Past protection; total ~5k won't trigger.
|
|
part(5_000, ago(30)),
|
|
];
|
|
const result = selectPruneTargets(parts, null);
|
|
expect(result.ids).toEqual([]);
|
|
expect(result.freedTokens).toBe(0);
|
|
});
|
|
|
|
it('hides candidates past protection when their total clears the trigger', () => {
|
|
// Newest 40k protected; older 30k cleanly above the 20k trigger.
|
|
const parts: PartForPrune[] = [
|
|
part(20_000, ago(10)),
|
|
part(20_000, ago(20)),
|
|
// Past protection, total ~30k freed.
|
|
part(15_000, ago(30)),
|
|
part(15_000, ago(40)),
|
|
];
|
|
const result = selectPruneTargets(parts, null);
|
|
expect(result.ids).toEqual(['p3', 'p4']);
|
|
expect(result.freedTokens).toBeGreaterThanOrEqual(PRUNE_TRIGGER_TOKENS);
|
|
});
|
|
|
|
it('stops at the compaction summary boundary', () => {
|
|
// Newest 30k protected (just under PROTECTED_TOKENS=40k); then 30k of
|
|
// older parts. Boundary sits at ago(35), so the ago(40) part is
|
|
// beyond it and gets skipped.
|
|
const parts: PartForPrune[] = [
|
|
part(15_000, ago(10)),
|
|
part(15_000, ago(20)),
|
|
part(15_000, ago(30)), // crosses protection threshold; candidate
|
|
part(15_000, ago(40)), // beyond summary boundary; skipped
|
|
];
|
|
const tailStart = ago(35);
|
|
const result = selectPruneTargets(parts, tailStart);
|
|
// ago(30) is the only candidate inside the window; 15k is below the
|
|
// 20k trigger so we expect no hides.
|
|
expect(result.ids).toEqual([]);
|
|
});
|
|
|
|
it('does not prune when only protected parts exist (no candidates)', () => {
|
|
// Exactly PROTECTED_TOKENS of newest parts; no older candidates.
|
|
const parts: PartForPrune[] = [part(PROTECTED_TOKENS, ago(10))];
|
|
expect(selectPruneTargets(parts, null)).toEqual({ ids: [], freedTokens: 0 });
|
|
});
|
|
});
|