v1.13.9: compaction overflow trigger — 0.85 × ctx_max early trigger

Opencode pattern (session/overflow.ts): fire compaction at 85% of ctx_max, replacing the v1.11.0-era `ctx_max - 20_000` formula. Old formula: usable = ctx_max - 20_000 - ctx=262144 → trigger at 242144 (92.4%) — only 7.6% headroom - ctx=100000 → trigger at 80000 (80.0%) - ctx= 32000 → trigger at 12000 (37.5%) — over-eager - ctx<=20000 → trigger at 0 — never fires New formula: usable = floor(0.85 * ctx_max) - ctx=262144 → trigger at 222822 (85.0%) — 15% headroom for summarizer - ctx=100000 → trigger at 85000 (85.0%) - ctx= 32000 → trigger at 27200 (85.0%) - ctx= 8192 → trigger at 6963 (85.0%) Ratio gives consistent headroom at any context scale. The qwen3.6 daily driver gets ~19k tokens more breathing room before overflow; small-ctx models no longer degenerate to never-triggering. usable() is the only consumer of COMPACTION_BUFFER → constant deleted. New EARLY_TRIGGER_RATIO constant takes its place. isOverflow() and the maybeFlagForCompaction() call site at payload.ts:184 are unchanged — formula swap is internal to compaction.ts. payload.ts comment touched only to drop the stale COMPACTION_BUFFER reference (PRUNE_TRIGGER_TOKENS stays at 20k as the prune-freed threshold; independent of the overflow formula). Tests: 4 new usable() corner cases (262k/100k/8k/zero+negative), plus 5 isOverflow() numbers shifted to match the 85k budget at ctx=100k. 195/195 server tests pass (was 194). Smoke: ratio math verified by unit tests at all four corners. Live cap-hit verification deferred — requires accumulating >222k tokens in a session under qwen3.6-35b-a3b-mxfp4 (was >242k pre-fix); will surface organically in extended use.
2026-05-22 13:59:14 +00:00
parent a0c8d212cb
commit b06a4a8e55
3 changed files with 53 additions and 34 deletions
--- a/apps/server/src/services/tests/compaction.test.ts
+++ b/apps/server/src/services/tests/compaction.test.ts
@@ -41,49 +41,58 @@ function mkMsg(

 // ---- usable -----------------------------------------------------------------

-describe('usable', () => {
-  it('returns 0 when contextLimit is 0', () => {
+// v1.13.9: ratio-only early trigger at 0.85 × contextLimit. Replaces the
+// v1.11.0-era `contextLimit - 20_000` math, which degenerated to 0 for
+// contexts ≤20k and gave only 7-8% headroom at 262k.
+describe('usable() — ratio-only early trigger (v1.13.9)', () => {
+  it('returns floor(0.85 * limit) for the qwen3.6 daily-driver context', () => {
+    // floor(0.85 * 262144) = floor(222822.4) = 222822 — 15% headroom for
+    // the summarizer to do its turn without itself overflowing.
+    expect(usable(262144)).toBe(222822);
+  });
+
+  it('returns 0.85× for a mid-sized context', () => {
+    expect(usable(100_000)).toBe(85_000);
+  });
+
+  it('returns 0.85× for a small context (no degenerate 0)', () => {
+    // floor(0.85 * 8192) = 6963. Under the old formula this returned 0
+    // (8192 - 20_000 clamped to 0), effectively disabling compaction for
+    // small-context models. The ratio keeps the trigger active.
+    expect(usable(8192)).toBe(6963);
+  });
+
+  it('returns 0 for zero or negative contextLimit', () => {
    expect(usable(0)).toBe(0);
-  });
-
-  it('returns 0 when contextLimit is below the 20k buffer', () => {
-    // Math.max(0, x - 20000) clamps the subtraction so we never report
-    // negative headroom. A 10k-context model reports 0 usable, which makes
-    // isOverflow short-circuit to false (correct — we can't size the
-    // compaction with no headroom).
-    expect(usable(10_000)).toBe(0);
-    expect(usable(19_999)).toBe(0);
-    expect(usable(20_000)).toBe(0);
-  });
-
-  it('subtracts the 20k buffer from a normal-sized context window', () => {
-    expect(usable(100_000)).toBe(80_000);
-    expect(usable(32_768)).toBe(12_768);
+    expect(usable(-1)).toBe(0);
  });
 });

 // ---- isOverflow -------------------------------------------------------------

 describe('isOverflow', () => {
-  it('returns false when usable is 0 (unknown / sub-buffer context)', () => {
+  it('returns false when usable is 0 (unknown contextLimit)', () => {
    expect(isOverflow({ prompt_tokens: 999_999, completion_tokens: 0 }, 0)).toBe(false);
-    expect(isOverflow({ prompt_tokens: 0, completion_tokens: 999_999 }, 10_000)).toBe(false);
+    expect(isOverflow({ prompt_tokens: 0, completion_tokens: 999_999 }, -1)).toBe(false);
  });

  it('returns false at 50% of usable', () => {
-    // usable(100k) = 80k → 50% = 40k.
+    // v1.13.9: usable(100k) = 85k → 50% ≈ 42.5k.
    expect(isOverflow({ prompt_tokens: 30_000, completion_tokens: 10_000 }, 100_000)).toBe(false);
  });

  it('returns false just under usable', () => {
-    expect(isOverflow({ prompt_tokens: 79_000, completion_tokens: 999 }, 100_000)).toBe(false);
+    // v1.13.9: 84_000 + 999 = 84_999 < 85_000 budget.
+    expect(isOverflow({ prompt_tokens: 84_000, completion_tokens: 999 }, 100_000)).toBe(false);
  });

  it('returns true exactly at usable (>=, not strict >)', () => {
-    expect(isOverflow({ prompt_tokens: 80_000, completion_tokens: 0 }, 100_000)).toBe(true);
+    // v1.13.9: 85_000 == usable(100_000).
+    expect(isOverflow({ prompt_tokens: 85_000, completion_tokens: 0 }, 100_000)).toBe(true);
  });

  it('returns true above usable', () => {
+    // 50_000 + 40_000 = 90_000 > 85_000.
    expect(isOverflow({ prompt_tokens: 50_000, completion_tokens: 40_000 }, 100_000)).toBe(true);
  });
 });
@@ -226,8 +235,9 @@ describe('select', () => {
    const u = mkMsg('user', 'oversized');
    const a = mkMsg('assistant', 'Y'.repeat(40_000));
    const result = select([u, a], 30_000, 1);
-    // usable(30k) = 10k → budget = min(8k, max(2k, floor(10k*0.25))) =
-    // min(8k, max(2k, 2500)) = 2500. 40k chars ≈ 10k tokens. Can't fit.
+    // v1.13.9: usable(30k) = floor(0.85*30k) = 25500 → budget =
+    // min(8k, max(2k, floor(25500*0.25))) = min(8k, max(2k, 6375)) = 6375.
+    // 40k chars ≈ 10k tokens. Still can't fit (10k > 6375).
    expect(result.tail_start_id).toBeUndefined();
    expect(result.head).toEqual([u, a]);
  });