v1.13.9: compaction overflow trigger — 0.85 × ctx_max early trigger
Opencode pattern (session/overflow.ts): fire compaction at 85% of ctx_max, replacing the v1.11.0-era `ctx_max - 20_000` formula. Old formula: usable = ctx_max - 20_000 - ctx=262144 → trigger at 242144 (92.4%) — only 7.6% headroom - ctx=100000 → trigger at 80000 (80.0%) - ctx= 32000 → trigger at 12000 (37.5%) — over-eager - ctx<=20000 → trigger at 0 — never fires New formula: usable = floor(0.85 * ctx_max) - ctx=262144 → trigger at 222822 (85.0%) — 15% headroom for summarizer - ctx=100000 → trigger at 85000 (85.0%) - ctx= 32000 → trigger at 27200 (85.0%) - ctx= 8192 → trigger at 6963 (85.0%) Ratio gives consistent headroom at any context scale. The qwen3.6 daily driver gets ~19k tokens more breathing room before overflow; small-ctx models no longer degenerate to never-triggering. usable() is the only consumer of COMPACTION_BUFFER → constant deleted. New EARLY_TRIGGER_RATIO constant takes its place. isOverflow() and the maybeFlagForCompaction() call site at payload.ts:184 are unchanged — formula swap is internal to compaction.ts. payload.ts comment touched only to drop the stale COMPACTION_BUFFER reference (PRUNE_TRIGGER_TOKENS stays at 20k as the prune-freed threshold; independent of the overflow formula). Tests: 4 new usable() corner cases (262k/100k/8k/zero+negative), plus 5 isOverflow() numbers shifted to match the 85k budget at ctx=100k. 195/195 server tests pass (was 194). Smoke: ratio math verified by unit tests at all four corners. Live cap-hit verification deferred — requires accumulating >222k tokens in a session under qwen3.6-35b-a3b-mxfp4 (was >242k pre-fix); will surface organically in extended use.
This commit is contained in:
@@ -41,49 +41,58 @@ function mkMsg(
|
||||
|
||||
// ---- usable -----------------------------------------------------------------
|
||||
|
||||
describe('usable', () => {
|
||||
it('returns 0 when contextLimit is 0', () => {
|
||||
// v1.13.9: ratio-only early trigger at 0.85 × contextLimit. Replaces the
|
||||
// v1.11.0-era `contextLimit - 20_000` math, which degenerated to 0 for
|
||||
// contexts ≤20k and gave only 7-8% headroom at 262k.
|
||||
describe('usable() — ratio-only early trigger (v1.13.9)', () => {
|
||||
it('returns floor(0.85 * limit) for the qwen3.6 daily-driver context', () => {
|
||||
// floor(0.85 * 262144) = floor(222822.4) = 222822 — 15% headroom for
|
||||
// the summarizer to do its turn without itself overflowing.
|
||||
expect(usable(262144)).toBe(222822);
|
||||
});
|
||||
|
||||
it('returns 0.85× for a mid-sized context', () => {
|
||||
expect(usable(100_000)).toBe(85_000);
|
||||
});
|
||||
|
||||
it('returns 0.85× for a small context (no degenerate 0)', () => {
|
||||
// floor(0.85 * 8192) = 6963. Under the old formula this returned 0
|
||||
// (8192 - 20_000 clamped to 0), effectively disabling compaction for
|
||||
// small-context models. The ratio keeps the trigger active.
|
||||
expect(usable(8192)).toBe(6963);
|
||||
});
|
||||
|
||||
it('returns 0 for zero or negative contextLimit', () => {
|
||||
expect(usable(0)).toBe(0);
|
||||
});
|
||||
|
||||
it('returns 0 when contextLimit is below the 20k buffer', () => {
|
||||
// Math.max(0, x - 20000) clamps the subtraction so we never report
|
||||
// negative headroom. A 10k-context model reports 0 usable, which makes
|
||||
// isOverflow short-circuit to false (correct — we can't size the
|
||||
// compaction with no headroom).
|
||||
expect(usable(10_000)).toBe(0);
|
||||
expect(usable(19_999)).toBe(0);
|
||||
expect(usable(20_000)).toBe(0);
|
||||
});
|
||||
|
||||
it('subtracts the 20k buffer from a normal-sized context window', () => {
|
||||
expect(usable(100_000)).toBe(80_000);
|
||||
expect(usable(32_768)).toBe(12_768);
|
||||
expect(usable(-1)).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---- isOverflow -------------------------------------------------------------
|
||||
|
||||
describe('isOverflow', () => {
|
||||
it('returns false when usable is 0 (unknown / sub-buffer context)', () => {
|
||||
it('returns false when usable is 0 (unknown contextLimit)', () => {
|
||||
expect(isOverflow({ prompt_tokens: 999_999, completion_tokens: 0 }, 0)).toBe(false);
|
||||
expect(isOverflow({ prompt_tokens: 0, completion_tokens: 999_999 }, 10_000)).toBe(false);
|
||||
expect(isOverflow({ prompt_tokens: 0, completion_tokens: 999_999 }, -1)).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false at 50% of usable', () => {
|
||||
// usable(100k) = 80k → 50% = 40k.
|
||||
// v1.13.9: usable(100k) = 85k → 50% ≈ 42.5k.
|
||||
expect(isOverflow({ prompt_tokens: 30_000, completion_tokens: 10_000 }, 100_000)).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false just under usable', () => {
|
||||
expect(isOverflow({ prompt_tokens: 79_000, completion_tokens: 999 }, 100_000)).toBe(false);
|
||||
// v1.13.9: 84_000 + 999 = 84_999 < 85_000 budget.
|
||||
expect(isOverflow({ prompt_tokens: 84_000, completion_tokens: 999 }, 100_000)).toBe(false);
|
||||
});
|
||||
|
||||
it('returns true exactly at usable (>=, not strict >)', () => {
|
||||
expect(isOverflow({ prompt_tokens: 80_000, completion_tokens: 0 }, 100_000)).toBe(true);
|
||||
// v1.13.9: 85_000 == usable(100_000).
|
||||
expect(isOverflow({ prompt_tokens: 85_000, completion_tokens: 0 }, 100_000)).toBe(true);
|
||||
});
|
||||
|
||||
it('returns true above usable', () => {
|
||||
// 50_000 + 40_000 = 90_000 > 85_000.
|
||||
expect(isOverflow({ prompt_tokens: 50_000, completion_tokens: 40_000 }, 100_000)).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -226,8 +235,9 @@ describe('select', () => {
|
||||
const u = mkMsg('user', 'oversized');
|
||||
const a = mkMsg('assistant', 'Y'.repeat(40_000));
|
||||
const result = select([u, a], 30_000, 1);
|
||||
// usable(30k) = 10k → budget = min(8k, max(2k, floor(10k*0.25))) =
|
||||
// min(8k, max(2k, 2500)) = 2500. 40k chars ≈ 10k tokens. Can't fit.
|
||||
// v1.13.9: usable(30k) = floor(0.85*30k) = 25500 → budget =
|
||||
// min(8k, max(2k, floor(25500*0.25))) = min(8k, max(2k, 6375)) = 6375.
|
||||
// 40k chars ≈ 10k tokens. Still can't fit (10k > 6375).
|
||||
expect(result.tail_start_id).toBeUndefined();
|
||||
expect(result.head).toEqual([u, a]);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user