Opencode pattern (session/overflow.ts): fire compaction at 85% of ctx_max, replacing the v1.11.0-era `ctx_max - 20_000` formula. Old formula: usable = ctx_max - 20_000 - ctx=262144 → trigger at 242144 (92.4%) — only 7.6% headroom - ctx=100000 → trigger at 80000 (80.0%) - ctx= 32000 → trigger at 12000 (37.5%) — over-eager - ctx<=20000 → trigger at 0 — never fires New formula: usable = floor(0.85 * ctx_max) - ctx=262144 → trigger at 222822 (85.0%) — 15% headroom for summarizer - ctx=100000 → trigger at 85000 (85.0%) - ctx= 32000 → trigger at 27200 (85.0%) - ctx= 8192 → trigger at 6963 (85.0%) Ratio gives consistent headroom at any context scale. The qwen3.6 daily driver gets ~19k tokens more breathing room before overflow; small-ctx models no longer degenerate to never-triggering. usable() is the only consumer of COMPACTION_BUFFER → constant deleted. New EARLY_TRIGGER_RATIO constant takes its place. isOverflow() and the maybeFlagForCompaction() call site at payload.ts:184 are unchanged — formula swap is internal to compaction.ts. payload.ts comment touched only to drop the stale COMPACTION_BUFFER reference (PRUNE_TRIGGER_TOKENS stays at 20k as the prune-freed threshold; independent of the overflow formula). Tests: 4 new usable() corner cases (262k/100k/8k/zero+negative), plus 5 isOverflow() numbers shifted to match the 85k budget at ctx=100k. 195/195 server tests pass (was 194). Smoke: ratio math verified by unit tests at all four corners. Live cap-hit verification deferred — requires accumulating >222k tokens in a session under qwen3.6-35b-a3b-mxfp4 (was >242k pre-fix); will surface organically in extended use.
324 lines
13 KiB
TypeScript
324 lines
13 KiB
TypeScript
import { describe, it, expect } from 'vitest';
|
||
import {
|
||
usable,
|
||
isOverflow,
|
||
estimate,
|
||
turns,
|
||
select,
|
||
buildPrompt,
|
||
buildHeadPayload,
|
||
type CompactionMessage,
|
||
} from '../compaction.js';
|
||
import { SUMMARY_TEMPLATE } from '../compaction-prompt.js';
|
||
|
||
// ---- fixture ----------------------------------------------------------------
|
||
// Tiny constructor for the message shape `compaction.ts` consumes. Default
|
||
// values match the post-CP1 schema (summary=false, kind='message', complete).
|
||
// Tests that need a summary row pass `summary: true`.
|
||
|
||
let counter = 0;
|
||
function mkMsg(
|
||
role: CompactionMessage['role'],
|
||
content: string,
|
||
overrides: Partial<CompactionMessage> = {},
|
||
): CompactionMessage {
|
||
counter += 1;
|
||
return {
|
||
id: `m${counter}`,
|
||
role,
|
||
content,
|
||
kind: 'message',
|
||
summary: false,
|
||
status: 'complete',
|
||
tool_calls: null,
|
||
tool_results: null,
|
||
reasoning_parts: null,
|
||
metadata: null,
|
||
created_at: new Date(counter * 1000).toISOString(),
|
||
...overrides,
|
||
};
|
||
}
|
||
|
||
// ---- usable -----------------------------------------------------------------
|
||
|
||
// v1.13.9: ratio-only early trigger at 0.85 × contextLimit. Replaces the
|
||
// v1.11.0-era `contextLimit - 20_000` math, which degenerated to 0 for
|
||
// contexts ≤20k and gave only 7-8% headroom at 262k.
|
||
describe('usable() — ratio-only early trigger (v1.13.9)', () => {
|
||
it('returns floor(0.85 * limit) for the qwen3.6 daily-driver context', () => {
|
||
// floor(0.85 * 262144) = floor(222822.4) = 222822 — 15% headroom for
|
||
// the summarizer to do its turn without itself overflowing.
|
||
expect(usable(262144)).toBe(222822);
|
||
});
|
||
|
||
it('returns 0.85× for a mid-sized context', () => {
|
||
expect(usable(100_000)).toBe(85_000);
|
||
});
|
||
|
||
it('returns 0.85× for a small context (no degenerate 0)', () => {
|
||
// floor(0.85 * 8192) = 6963. Under the old formula this returned 0
|
||
// (8192 - 20_000 clamped to 0), effectively disabling compaction for
|
||
// small-context models. The ratio keeps the trigger active.
|
||
expect(usable(8192)).toBe(6963);
|
||
});
|
||
|
||
it('returns 0 for zero or negative contextLimit', () => {
|
||
expect(usable(0)).toBe(0);
|
||
expect(usable(-1)).toBe(0);
|
||
});
|
||
});
|
||
|
||
// ---- isOverflow -------------------------------------------------------------
|
||
|
||
describe('isOverflow', () => {
|
||
it('returns false when usable is 0 (unknown contextLimit)', () => {
|
||
expect(isOverflow({ prompt_tokens: 999_999, completion_tokens: 0 }, 0)).toBe(false);
|
||
expect(isOverflow({ prompt_tokens: 0, completion_tokens: 999_999 }, -1)).toBe(false);
|
||
});
|
||
|
||
it('returns false at 50% of usable', () => {
|
||
// v1.13.9: usable(100k) = 85k → 50% ≈ 42.5k.
|
||
expect(isOverflow({ prompt_tokens: 30_000, completion_tokens: 10_000 }, 100_000)).toBe(false);
|
||
});
|
||
|
||
it('returns false just under usable', () => {
|
||
// v1.13.9: 84_000 + 999 = 84_999 < 85_000 budget.
|
||
expect(isOverflow({ prompt_tokens: 84_000, completion_tokens: 999 }, 100_000)).toBe(false);
|
||
});
|
||
|
||
it('returns true exactly at usable (>=, not strict >)', () => {
|
||
// v1.13.9: 85_000 == usable(100_000).
|
||
expect(isOverflow({ prompt_tokens: 85_000, completion_tokens: 0 }, 100_000)).toBe(true);
|
||
});
|
||
|
||
it('returns true above usable', () => {
|
||
// 50_000 + 40_000 = 90_000 > 85_000.
|
||
expect(isOverflow({ prompt_tokens: 50_000, completion_tokens: 40_000 }, 100_000)).toBe(true);
|
||
});
|
||
});
|
||
|
||
// ---- estimate ---------------------------------------------------------------
|
||
|
||
describe('estimate', () => {
|
||
it('returns a tiny value for an empty array (JSON.stringify([]) is "[]")', () => {
|
||
// Math.ceil('[]'.length / 4) = 1. Documented here so the next reader
|
||
// doesn't think "0" is the expected baseline — char-count/4 will never
|
||
// be exactly 0 for any JSON-serializable input.
|
||
expect(estimate([])).toBe(1);
|
||
});
|
||
|
||
it('scales roughly with content length', () => {
|
||
const tiny = estimate([mkMsg('user', 'hi')]);
|
||
const big = estimate([mkMsg('user', 'x'.repeat(4000))]);
|
||
expect(big).toBeGreaterThan(tiny);
|
||
expect(big).toBeGreaterThanOrEqual(1000); // 4000 chars / 4 = 1000 floor
|
||
});
|
||
|
||
it('is deterministic across repeated calls', () => {
|
||
const msgs = [mkMsg('user', 'one'), mkMsg('assistant', 'two')];
|
||
expect(estimate(msgs)).toBe(estimate(msgs));
|
||
});
|
||
});
|
||
|
||
// ---- turns ------------------------------------------------------------------
|
||
|
||
describe('turns', () => {
|
||
it('returns [] for an empty message list', () => {
|
||
expect(turns([])).toEqual([]);
|
||
});
|
||
|
||
it('returns one turn for a single user message', () => {
|
||
const u = mkMsg('user', 'hi');
|
||
const result = turns([u]);
|
||
expect(result).toHaveLength(1);
|
||
expect(result[0]).toEqual({ start: 0, end: 1, id: u.id });
|
||
});
|
||
|
||
it('returns two turns for user/assistant/user/assistant', () => {
|
||
const u1 = mkMsg('user', 'q1');
|
||
const a1 = mkMsg('assistant', 'a1');
|
||
const u2 = mkMsg('user', 'q2');
|
||
const a2 = mkMsg('assistant', 'a2');
|
||
const result = turns([u1, a1, u2, a2]);
|
||
expect(result).toEqual([
|
||
{ start: 0, end: 2, id: u1.id },
|
||
{ start: 2, end: 4, id: u2.id },
|
||
]);
|
||
});
|
||
|
||
it('extends the final turn end to include trailing non-user messages', () => {
|
||
// Spec wording: "user/assistant + trailing system → trailing included
|
||
// in last turn's range". Single-turn variant: [user, assistant, system]
|
||
// should produce one turn with end=3 (covers all three indices).
|
||
const u = mkMsg('user', 'q');
|
||
const a = mkMsg('assistant', 'a');
|
||
const s = mkMsg('system', 'note');
|
||
const result = turns([u, a, s]);
|
||
expect(result).toEqual([{ start: 0, end: 3, id: u.id }]);
|
||
});
|
||
|
||
it('skips user rows flagged as summary (anchored-rolling rows)', () => {
|
||
// Defense-in-depth — process() pre-filters summary rows, but turns()
|
||
// also skips them so a misuse from another caller doesn't create a
|
||
// bogus turn boundary on the summary row itself.
|
||
const u1 = mkMsg('user', 'q1');
|
||
const a1 = mkMsg('assistant', 'a1');
|
||
const sum = mkMsg('user', 'rolled-up', { summary: true });
|
||
const u2 = mkMsg('user', 'q2');
|
||
const result = turns([u1, a1, sum, u2]);
|
||
expect(result.map((t) => t.id)).toEqual([u1.id, u2.id]);
|
||
});
|
||
});
|
||
|
||
// ---- select -----------------------------------------------------------------
|
||
|
||
describe('select', () => {
|
||
it('returns empty head + undefined tail for an empty message list', () => {
|
||
const result = select([], 100_000);
|
||
expect(result.head).toEqual([]);
|
||
expect(result.tail_start_id).toBeUndefined();
|
||
});
|
||
|
||
it('full-preserves when there are fewer turns than tail_turns', () => {
|
||
// 1 turn but tail_turns=2: keep === turn0 → keep.start === 0 →
|
||
// sentinel-return path that signals "no compaction this round".
|
||
const u = mkMsg('user', 'only');
|
||
const a = mkMsg('assistant', 'a');
|
||
const result = select([u, a], 100_000, 2);
|
||
expect(result.head).toEqual([u, a]);
|
||
expect(result.tail_start_id).toBeUndefined();
|
||
});
|
||
|
||
it('keeps the last tail_turns turns when they all fit the budget', () => {
|
||
// 3 turns, all small. tail_turns=2 means keep the last 2; head =
|
||
// messages[0..turn2.start] = just turn1's content.
|
||
const u1 = mkMsg('user', 'q1');
|
||
const a1 = mkMsg('assistant', 'a1');
|
||
const u2 = mkMsg('user', 'q2');
|
||
const a2 = mkMsg('assistant', 'a2');
|
||
const u3 = mkMsg('user', 'q3');
|
||
const a3 = mkMsg('assistant', 'a3');
|
||
const msgs = [u1, a1, u2, a2, u3, a3];
|
||
const result = select(msgs, 100_000, 2);
|
||
// Turn boundaries: [0,2), [2,4), [4,6). slice(-2) = turns at 2 and 4.
|
||
// Walking backward: u3 fits, then u2 fits → keep={start:2, id:u2.id}.
|
||
expect(result.tail_start_id).toBe(u2.id);
|
||
expect(result.head).toEqual([u1, a1]);
|
||
});
|
||
|
||
it('splits a turn mid-stream when the whole turn would overflow the budget', () => {
|
||
// tail_turns=1 so we look only at the most recent turn. Stuff it past
|
||
// 8k of content (max preserve budget) and the splitter walks forward
|
||
// looking for the largest suffix that fits.
|
||
const u1 = mkMsg('user', 'q1');
|
||
const a1 = mkMsg('assistant', 'a1');
|
||
const u2 = mkMsg('user', 'q2 with a giant payload');
|
||
const huge = mkMsg('assistant', 'X'.repeat(40_000)); // ~10k tokens
|
||
const smallTail = mkMsg('assistant', 'short answer');
|
||
const msgs = [u1, a1, u2, huge, smallTail];
|
||
const result = select(msgs, 100_000, 1);
|
||
// The split walks from turn.start+1 forward; the first index whose
|
||
// [i, end) slice fits the budget becomes the new keep. We don't assert
|
||
// a specific id (depends on character math), only that compaction was
|
||
// triggered (tail_start_id set, head non-empty) and that the head
|
||
// doesn't include the final small message.
|
||
expect(result.tail_start_id).toBeDefined();
|
||
expect(result.head.length).toBeGreaterThan(0);
|
||
expect(result.head).not.toContain(smallTail);
|
||
});
|
||
|
||
it('full-preserves when no split point fits', () => {
|
||
// Single oversized turn; splitTurn walks but each suffix is still too
|
||
// big. After the loop, keep is undefined → full-preserve sentinel.
|
||
// Force this with a sub-buffer context so budget is the floor (2k),
|
||
// and a single 40k-char message.
|
||
const u = mkMsg('user', 'oversized');
|
||
const a = mkMsg('assistant', 'Y'.repeat(40_000));
|
||
const result = select([u, a], 30_000, 1);
|
||
// v1.13.9: usable(30k) = floor(0.85*30k) = 25500 → budget =
|
||
// min(8k, max(2k, floor(25500*0.25))) = min(8k, max(2k, 6375)) = 6375.
|
||
// 40k chars ≈ 10k tokens. Still can't fit (10k > 6375).
|
||
expect(result.tail_start_id).toBeUndefined();
|
||
expect(result.head).toEqual([u, a]);
|
||
});
|
||
});
|
||
|
||
// ---- buildPrompt ------------------------------------------------------------
|
||
|
||
describe('buildPrompt', () => {
|
||
it('opens with the "create new" anchor when previousSummary is undefined', () => {
|
||
const out = buildPrompt(undefined, []);
|
||
expect(out.startsWith('Create a new anchored summary')).toBe(true);
|
||
expect(out).toContain(SUMMARY_TEMPLATE);
|
||
expect(out).not.toContain('<previous-summary>');
|
||
});
|
||
|
||
it('opens with the "update" anchor and embeds previousSummary verbatim', () => {
|
||
const prev = '## Goal\n- finish v1.11 compaction';
|
||
const out = buildPrompt(prev, []);
|
||
expect(out.startsWith('Update the anchored summary')).toBe(true);
|
||
expect(out).toContain('<previous-summary>');
|
||
expect(out).toContain(prev);
|
||
expect(out).toContain('</previous-summary>');
|
||
expect(out).toContain(SUMMARY_TEMPLATE);
|
||
});
|
||
|
||
it('appends extra context strings after the template (reserved for plugin injection)', () => {
|
||
const out = buildPrompt(undefined, ['extra-context-line']);
|
||
expect(out.endsWith('extra-context-line')).toBe(true);
|
||
});
|
||
});
|
||
|
||
// ---- buildHeadPayload (v1.13.6) -----------------------------------------------
|
||
|
||
describe('buildHeadPayload reasoning render', () => {
|
||
it('emits reasoning as a <reasoning> tag prefixed onto the assistant content', () => {
|
||
const out = buildHeadPayload([
|
||
mkMsg('user', 'show me the file'),
|
||
mkMsg('assistant', 'reading it now', {
|
||
reasoning_parts: [{ text: 'user wants src/index.ts; I should view it' }],
|
||
}),
|
||
]);
|
||
expect(out).toHaveLength(2);
|
||
expect(out[1]!.role).toBe('assistant');
|
||
expect(out[1]!.content).toBe(
|
||
'<reasoning>user wants src/index.ts; I should view it</reasoning>\n\nreading it now',
|
||
);
|
||
});
|
||
|
||
it('emits a standalone <reasoning> tag when reasoning is present but content is empty (tool-call-only turn)', () => {
|
||
const out = buildHeadPayload([
|
||
mkMsg('assistant', '', {
|
||
reasoning_parts: [{ text: 'jumping straight to grep' }],
|
||
tool_calls: [{ id: 'c1', name: 'grep', args: { pattern: 'foo' } }],
|
||
}),
|
||
]);
|
||
expect(out).toHaveLength(1);
|
||
expect(out[0]!.content).toBe('<reasoning>jumping straight to grep</reasoning>');
|
||
expect(out[0]!.tool_calls).toHaveLength(1);
|
||
expect(out[0]!.tool_calls![0]!.function.name).toBe('grep');
|
||
});
|
||
|
||
it('joins multiple reasoning parts without separators (matches the streaming concat)', () => {
|
||
const out = buildHeadPayload([
|
||
mkMsg('assistant', 'final answer', {
|
||
reasoning_parts: [{ text: 'first thought ' }, { text: 'second thought' }],
|
||
}),
|
||
]);
|
||
expect(out[0]!.content).toBe(
|
||
'<reasoning>first thought second thought</reasoning>\n\nfinal answer',
|
||
);
|
||
});
|
||
|
||
it('omits the reasoning tag entirely when reasoning_parts is null or empty', () => {
|
||
const out = buildHeadPayload([
|
||
mkMsg('assistant', 'plain answer', { reasoning_parts: null }),
|
||
mkMsg('assistant', 'other answer', { reasoning_parts: [] }),
|
||
]);
|
||
expect(out[0]!.content).toBe('plain answer');
|
||
expect(out[1]!.content).toBe('other answer');
|
||
expect(out[0]!.content).not.toContain('<reasoning>');
|
||
expect(out[1]!.content).not.toContain('<reasoning>');
|
||
});
|
||
});
|