diff --git a/apps/server/src/services/__tests__/compaction.test.ts b/apps/server/src/services/__tests__/compaction.test.ts
index 0188f89..f65997d 100644
--- a/apps/server/src/services/__tests__/compaction.test.ts
+++ b/apps/server/src/services/__tests__/compaction.test.ts
@@ -41,49 +41,58 @@ function mkMsg(
 
 // ---- usable -----------------------------------------------------------------
 
-describe('usable', () => {
-  it('returns 0 when contextLimit is 0', () => {
+// v1.13.9: ratio-only early trigger at 0.85 × contextLimit. Replaces the
+// v1.11.0-era `contextLimit - 20_000` math, which degenerated to 0 for
+// contexts ≤20k and gave only 7-8% headroom at 262k.
+describe('usable() — ratio-only early trigger (v1.13.9)', () => {
+  it('returns floor(0.85 * limit) for the qwen3.6 daily-driver context', () => {
+    // floor(0.85 * 262144) = floor(222822.4) = 222822 — 15% headroom for
+    // the summarizer to do its turn without itself overflowing.
+    expect(usable(262144)).toBe(222822);
+  });
+
+  it('returns 0.85× for a mid-sized context', () => {
+    expect(usable(100_000)).toBe(85_000);
+  });
+
+  it('returns 0.85× for a small context (no degenerate 0)', () => {
+    // floor(0.85 * 8192) = 6963. Under the old formula this returned 0
+    // (8192 - 20_000 clamped to 0), effectively disabling compaction for
+    // small-context models. The ratio keeps the trigger active.
+    expect(usable(8192)).toBe(6963);
+  });
+
+  it('returns 0 for zero or negative contextLimit', () => {
     expect(usable(0)).toBe(0);
-  });
-
-  it('returns 0 when contextLimit is below the 20k buffer', () => {
-    // Math.max(0, x - 20000) clamps the subtraction so we never report
-    // negative headroom. A 10k-context model reports 0 usable, which makes
-    // isOverflow short-circuit to false (correct — we can't size the
-    // compaction with no headroom).
-    expect(usable(10_000)).toBe(0);
-    expect(usable(19_999)).toBe(0);
-    expect(usable(20_000)).toBe(0);
-  });
-
-  it('subtracts the 20k buffer from a normal-sized context window', () => {
-    expect(usable(100_000)).toBe(80_000);
-    expect(usable(32_768)).toBe(12_768);
+    expect(usable(-1)).toBe(0);
   });
 });
 
 // ---- isOverflow -------------------------------------------------------------
 
 describe('isOverflow', () => {
-  it('returns false when usable is 0 (unknown / sub-buffer context)', () => {
+  it('returns false when usable is 0 (unknown contextLimit)', () => {
     expect(isOverflow({ prompt_tokens: 999_999, completion_tokens: 0 }, 0)).toBe(false);
-    expect(isOverflow({ prompt_tokens: 0, completion_tokens: 999_999 }, 10_000)).toBe(false);
+    expect(isOverflow({ prompt_tokens: 0, completion_tokens: 999_999 }, -1)).toBe(false);
   });
 
   it('returns false at 50% of usable', () => {
-    // usable(100k) = 80k → 50% = 40k.
+    // v1.13.9: usable(100k) = 85k → 50% ≈ 42.5k.
     expect(isOverflow({ prompt_tokens: 30_000, completion_tokens: 10_000 }, 100_000)).toBe(false);
   });
 
   it('returns false just under usable', () => {
-    expect(isOverflow({ prompt_tokens: 79_000, completion_tokens: 999 }, 100_000)).toBe(false);
+    // v1.13.9: 84_000 + 999 = 84_999 < 85_000 budget.
+    expect(isOverflow({ prompt_tokens: 84_000, completion_tokens: 999 }, 100_000)).toBe(false);
   });
 
   it('returns true exactly at usable (>=, not strict >)', () => {
-    expect(isOverflow({ prompt_tokens: 80_000, completion_tokens: 0 }, 100_000)).toBe(true);
+    // v1.13.9: 85_000 == usable(100_000).
+    expect(isOverflow({ prompt_tokens: 85_000, completion_tokens: 0 }, 100_000)).toBe(true);
   });
 
   it('returns true above usable', () => {
+    // 50_000 + 40_000 = 90_000 > 85_000.
     expect(isOverflow({ prompt_tokens: 50_000, completion_tokens: 40_000 }, 100_000)).toBe(true);
   });
 });
@@ -226,8 +235,9 @@ describe('select', () => {
     const u = mkMsg('user', 'oversized');
     const a = mkMsg('assistant', 'Y'.repeat(40_000));
     const result = select([u, a], 30_000, 1);
-    // usable(30k) = 10k → budget = min(8k, max(2k, floor(10k*0.25))) =
-    // min(8k, max(2k, 2500)) = 2500. 40k chars ≈ 10k tokens. Can't fit.
+    // v1.13.9: usable(30k) = floor(0.85*30k) = 25500 → budget =
+    // min(8k, max(2k, floor(25500*0.25))) = min(8k, max(2k, 6375)) = 6375.
+    // 40k chars ≈ 10k tokens. Still can't fit (10k > 6375).
     expect(result.tail_start_id).toBeUndefined();
     expect(result.head).toEqual([u, a]);
   });
diff --git a/apps/server/src/services/compaction.ts b/apps/server/src/services/compaction.ts
index 609decc..b941a7d 100644
--- a/apps/server/src/services/compaction.ts
+++ b/apps/server/src/services/compaction.ts
@@ -23,7 +23,13 @@ import type { Broker } from './broker.js';
 import { SUMMARY_TEMPLATE } from './compaction-prompt.js';
 import * as modelContextLookup from './model-context.js';
 
-const COMPACTION_BUFFER = 20_000;
+// v1.13.9: ratio-only overflow trigger. Fires compaction at 85% of ctx_max
+// (opencode session/overflow.ts pattern). Replaces the v1.11.0-era
+// `ctx_max - 20_000` formula which degenerated to 0 for contexts ≤20k and
+// gave only 7-8% headroom to the summarizer at 262k. Ratio gives consistent
+// 15% headroom at any scale, and small-ctx models no longer get an
+// effectively-disabled trigger.
+const EARLY_TRIGGER_RATIO = 0.85;
 const MIN_PRESERVE_RECENT_TOKENS = 2_000;
 const MAX_PRESERVE_RECENT_TOKENS = 8_000;
 const DEFAULT_TAIL_TURNS = 2;
@@ -50,13 +56,13 @@ export interface CompactionMessage {
 
 // === overflow ===
 
-// Tokens we hold in reserve for the model's response so a near-full context
-// can still produce a useful turn. Mirrors opencode's COMPACTION_BUFFER.
-// Returns 0 when the context limit is unknown (caller treats 0 as "do not
-// trigger overflow"); avoids dividing-by-zero downstream.
+// Returns the token budget at which overflow fires. Triggers compaction at
+// 85% of contextLimit (opencode session/overflow.ts pattern). Returns 0 when
+// the context limit is unknown — caller treats 0 as "do not trigger overflow",
+// keeping inference flowing rather than compacting a turn we can't size.
 export function usable(contextLimit: number): number {
   if (!contextLimit || contextLimit <= 0) return 0;
-  return Math.max(0, contextLimit - COMPACTION_BUFFER);
+  return Math.floor(EARLY_TRIGGER_RATIO * contextLimit);
 }
 
 export interface Usage {
diff --git a/apps/server/src/services/inference/payload.ts b/apps/server/src/services/inference/payload.ts
index edba789..0e7f56c 100644
--- a/apps/server/src/services/inference/payload.ts
+++ b/apps/server/src/services/inference/payload.ts
@@ -199,10 +199,13 @@ export async function maybeFlagForCompaction(
   );
   if (!overflow) return;
 
-  // v1.13.4: try the cheap prune first. If it freed at least the buffer
-  // worth of tokens (PRUNE_TRIGGER_TOKENS, identical to COMPACTION_BUFFER),
-  // we're below the threshold again — skip flagging summarize for the next
-  // turn. The next turn's overflow check will re-evaluate from scratch.
+  // v1.13.4: try the cheap prune first. If it freed at least
+  // PRUNE_TRIGGER_TOKENS (20k) worth of context, we're below the threshold
+  // again — skip flagging summarize for the next turn. The next turn's
+  // overflow check will re-evaluate from scratch.
+  // v1.13.9: the overflow trigger above is now 85% of ctx_max (was
+  // ctx_max - 20k). PRUNE_TRIGGER_TOKENS stays at 20k as the prune-freed
+  // threshold — independent of the overflow formula.
   // Prune failures (DB errors etc.) propagate so the surrounding inference
   // path sees them; the catch in finalizeCompletion / executeToolPhase
   // doesn't shield this — by design, we want to know if prune is broken.