boocode/apps/server/src/services/__tests__/compaction.test.ts

import { describe, it, expect } from 'vitest';
import {
  usable,
  isOverflow,
  estimate,
  turns,
  select,
  buildPrompt,
  buildHeadPayload,
  deriveFilesRead,
  buildFilesReadContext,
  type CompactionMessage,
} from '../compaction.js';
import { SUMMARY_TEMPLATE } from '../compaction-prompt.js';

// ---- fixture ----------------------------------------------------------------
// Tiny constructor for the message shape `compaction.ts` consumes. Default
// values match the post-CP1 schema (summary=false, kind='message', complete).
// Tests that need a summary row pass `summary: true`.

let counter = 0;
function mkMsg(
  role: CompactionMessage['role'],
  content: string,
  overrides: Partial<CompactionMessage> = {},
): CompactionMessage {
  counter += 1;
  return {
    id: `m${counter}`,
    role,
    content,
    kind: 'message',
    summary: false,
    status: 'complete',
    tool_calls: null,
    tool_results: null,
    reasoning_parts: null,
    metadata: null,
    created_at: new Date(counter * 1000).toISOString(),
    ...overrides,
  };
}

// ---- usable -----------------------------------------------------------------

// v1.13.9: ratio-only early trigger at 0.85 × contextLimit. Replaces the
// v1.11.0-era `contextLimit - 20_000` math, which degenerated to 0 for
// contexts ≤20k and gave only 7-8% headroom at 262k.
describe('usable() — ratio-only early trigger (v1.13.9)', () => {
  it('returns floor(0.85 * limit) for the qwen3.6 daily-driver context', () => {
    // floor(0.85 * 262144) = floor(222822.4) = 222822 — 15% headroom for
    // the summarizer to do its turn without itself overflowing.
    expect(usable(262144)).toBe(222822);
  });

  it('returns 0.85× for a mid-sized context', () => {
    expect(usable(100_000)).toBe(85_000);
  });

  it('returns 0.85× for a small context (no degenerate 0)', () => {
    // floor(0.85 * 8192) = 6963. Under the old formula this returned 0
    // (8192 - 20_000 clamped to 0), effectively disabling compaction for
    // small-context models. The ratio keeps the trigger active.
    expect(usable(8192)).toBe(6963);
  });

  it('returns 0 for zero or negative contextLimit', () => {
    expect(usable(0)).toBe(0);
    expect(usable(-1)).toBe(0);
  });
});

// ---- isOverflow -------------------------------------------------------------

describe('isOverflow', () => {
  it('returns false when usable is 0 (unknown contextLimit)', () => {
    expect(isOverflow({ prompt_tokens: 999_999, completion_tokens: 0 }, 0)).toBe(false);
    expect(isOverflow({ prompt_tokens: 0, completion_tokens: 999_999 }, -1)).toBe(false);
  });

  it('returns false at 50% of usable', () => {
    // v1.13.9: usable(100k) = 85k → 50% ≈ 42.5k.
    expect(isOverflow({ prompt_tokens: 30_000, completion_tokens: 10_000 }, 100_000)).toBe(false);
  });

  it('returns false just under usable', () => {
    // v1.13.9: 84_000 + 999 = 84_999 < 85_000 budget.
    expect(isOverflow({ prompt_tokens: 84_000, completion_tokens: 999 }, 100_000)).toBe(false);
  });

  it('returns true exactly at usable (>=, not strict >)', () => {
    // v1.13.9: 85_000 == usable(100_000).
    expect(isOverflow({ prompt_tokens: 85_000, completion_tokens: 0 }, 100_000)).toBe(true);
  });

  it('returns true above usable', () => {
    // 50_000 + 40_000 = 90_000 > 85_000.
    expect(isOverflow({ prompt_tokens: 50_000, completion_tokens: 40_000 }, 100_000)).toBe(true);
  });
});

// ---- estimate ---------------------------------------------------------------

describe('estimate', () => {
  it('returns a tiny value for an empty array (JSON.stringify([]) is "[]")', () => {
    // Math.ceil('[]'.length / 4) = 1. Documented here so the next reader
    // doesn't think "0" is the expected baseline — char-count/4 will never
    // be exactly 0 for any JSON-serializable input.
    expect(estimate([])).toBe(1);
  });

  it('scales roughly with content length', () => {
    const tiny = estimate([mkMsg('user', 'hi')]);
    const big = estimate([mkMsg('user', 'x'.repeat(4000))]);
    expect(big).toBeGreaterThan(tiny);
    expect(big).toBeGreaterThanOrEqual(1000); // 4000 chars / 4 = 1000 floor
  });

  it('is deterministic across repeated calls', () => {
    const msgs = [mkMsg('user', 'one'), mkMsg('assistant', 'two')];
    expect(estimate(msgs)).toBe(estimate(msgs));
  });
});

// ---- turns ------------------------------------------------------------------

describe('turns', () => {
  it('returns [] for an empty message list', () => {
    expect(turns([])).toEqual([]);
  });

  it('returns one turn for a single user message', () => {
    const u = mkMsg('user', 'hi');
    const result = turns([u]);
    expect(result).toHaveLength(1);
    expect(result[0]).toEqual({ start: 0, end: 1, id: u.id });
  });

  it('returns two turns for user/assistant/user/assistant', () => {
    const u1 = mkMsg('user', 'q1');
    const a1 = mkMsg('assistant', 'a1');
    const u2 = mkMsg('user', 'q2');
    const a2 = mkMsg('assistant', 'a2');
    const result = turns([u1, a1, u2, a2]);
    expect(result).toEqual([
      { start: 0, end: 2, id: u1.id },
      { start: 2, end: 4, id: u2.id },
    ]);
  });

  it('extends the final turn end to include trailing non-user messages', () => {
    // Spec wording: "user/assistant + trailing system → trailing included
    // in last turn's range". Single-turn variant: [user, assistant, system]
    // should produce one turn with end=3 (covers all three indices).
    const u = mkMsg('user', 'q');
    const a = mkMsg('assistant', 'a');
    const s = mkMsg('system', 'note');
    const result = turns([u, a, s]);
    expect(result).toEqual([{ start: 0, end: 3, id: u.id }]);
  });

  it('skips user rows flagged as summary (anchored-rolling rows)', () => {
    // Defense-in-depth — process() pre-filters summary rows, but turns()
    // also skips them so a misuse from another caller doesn't create a
    // bogus turn boundary on the summary row itself.
    const u1 = mkMsg('user', 'q1');
    const a1 = mkMsg('assistant', 'a1');
    const sum = mkMsg('user', 'rolled-up', { summary: true });
    const u2 = mkMsg('user', 'q2');
    const result = turns([u1, a1, sum, u2]);
    expect(result.map((t) => t.id)).toEqual([u1.id, u2.id]);
  });
});

// ---- select -----------------------------------------------------------------

describe('select', () => {
  it('returns empty head + undefined tail for an empty message list', () => {
    const result = select([], 100_000);
    expect(result.head).toEqual([]);
    expect(result.tail_start_id).toBeUndefined();
  });

  it('full-preserves when there are fewer turns than tail_turns', () => {
    // 1 turn but tail_turns=2: keep === turn0 → keep.start === 0 →
    // sentinel-return path that signals "no compaction this round".
    const u = mkMsg('user', 'only');
    const a = mkMsg('assistant', 'a');
    const result = select([u, a], 100_000, 2);
    expect(result.head).toEqual([u, a]);
    expect(result.tail_start_id).toBeUndefined();
  });

  it('keeps the last tail_turns turns when they all fit the budget', () => {
    // 3 turns, all small. tail_turns=2 means keep the last 2; head =
    // messages[0..turn2.start] = just turn1's content.
    const u1 = mkMsg('user', 'q1');
    const a1 = mkMsg('assistant', 'a1');
    const u2 = mkMsg('user', 'q2');
    const a2 = mkMsg('assistant', 'a2');
    const u3 = mkMsg('user', 'q3');
    const a3 = mkMsg('assistant', 'a3');
    const msgs = [u1, a1, u2, a2, u3, a3];
    const result = select(msgs, 100_000, 2);
    // Turn boundaries: [0,2), [2,4), [4,6). slice(-2) = turns at 2 and 4.
    // Walking backward: u3 fits, then u2 fits → keep={start:2, id:u2.id}.
    expect(result.tail_start_id).toBe(u2.id);
    expect(result.head).toEqual([u1, a1]);
  });

  it('splits a turn mid-stream when the whole turn would overflow the budget', () => {
    // tail_turns=1 so we look only at the most recent turn. Stuff it past
    // 8k of content (max preserve budget) and the splitter walks forward
    // looking for the largest suffix that fits.
    const u1 = mkMsg('user', 'q1');
    const a1 = mkMsg('assistant', 'a1');
    const u2 = mkMsg('user', 'q2 with a giant payload');
    const huge = mkMsg('assistant', 'X'.repeat(40_000)); // ~10k tokens
    const smallTail = mkMsg('assistant', 'short answer');
    const msgs = [u1, a1, u2, huge, smallTail];
    const result = select(msgs, 100_000, 1);
    // The split walks from turn.start+1 forward; the first index whose
    // [i, end) slice fits the budget becomes the new keep. We don't assert
    // a specific id (depends on character math), only that compaction was
    // triggered (tail_start_id set, head non-empty) and that the head
    // doesn't include the final small message.
    expect(result.tail_start_id).toBeDefined();
    expect(result.head.length).toBeGreaterThan(0);
    expect(result.head).not.toContain(smallTail);
  });

  it('full-preserves when no split point fits', () => {
    // Single oversized turn; splitTurn walks but each suffix is still too
    // big. After the loop, keep is undefined → full-preserve sentinel.
    // Force this with a sub-buffer context so budget is the floor (2k),
    // and a single 40k-char message.
    const u = mkMsg('user', 'oversized');
    const a = mkMsg('assistant', 'Y'.repeat(40_000));
    const result = select([u, a], 30_000, 1);
    // v1.13.9: usable(30k) = floor(0.85*30k) = 25500 → budget =
    // min(8k, max(2k, floor(25500*0.25))) = min(8k, max(2k, 6375)) = 6375.
    // 40k chars ≈ 10k tokens. Still can't fit (10k > 6375).
    expect(result.tail_start_id).toBeUndefined();
    expect(result.head).toEqual([u, a]);
  });
});

// ---- buildPrompt ------------------------------------------------------------

describe('buildPrompt', () => {
  it('opens with the "create new" anchor when previousSummary is undefined', () => {
    const out = buildPrompt(undefined, []);
    expect(out.startsWith('Create a new anchored summary')).toBe(true);
    expect(out).toContain(SUMMARY_TEMPLATE);
    expect(out).not.toContain('<previous-summary>');
  });

  it('opens with the "update" anchor and embeds previousSummary verbatim', () => {
    const prev = '## Goal\n- finish v1.11 compaction';
    const out = buildPrompt(prev, []);
    expect(out.startsWith('Update the anchored summary')).toBe(true);
    expect(out).toContain('<previous-summary>');
    expect(out).toContain(prev);
    expect(out).toContain('</previous-summary>');
    expect(out).toContain(SUMMARY_TEMPLATE);
  });

  it('appends extra context strings after the template (reserved for plugin injection)', () => {
    const out = buildPrompt(undefined, ['extra-context-line']);
    expect(out.endsWith('extra-context-line')).toBe(true);
  });
});

// ---- buildHeadPayload (v1.13.6) -----------------------------------------------

describe('buildHeadPayload reasoning render', () => {
  it('emits reasoning as a <reasoning> tag prefixed onto the assistant content', () => {
    const out = buildHeadPayload([
      mkMsg('user', 'show me the file'),
      mkMsg('assistant', 'reading it now', {
        reasoning_parts: [{ text: 'user wants src/index.ts; I should view it' }],
      }),
    ]);
    expect(out).toHaveLength(2);
    expect(out[1]!.role).toBe('assistant');
    expect(out[1]!.content).toBe(
      '<reasoning>user wants src/index.ts; I should view it</reasoning>\n\nreading it now',
    );
  });

  it('emits a standalone <reasoning> tag when reasoning is present but content is empty (tool-call-only turn)', () => {
    const out = buildHeadPayload([
      mkMsg('assistant', '', {
        reasoning_parts: [{ text: 'jumping straight to grep' }],
        tool_calls: [{ id: 'c1', name: 'grep', args: { pattern: 'foo' } }],
      }),
    ]);
    expect(out).toHaveLength(1);
    expect(out[0]!.content).toBe('<reasoning>jumping straight to grep</reasoning>');
    expect(out[0]!.tool_calls).toHaveLength(1);
    expect(out[0]!.tool_calls![0]!.function.name).toBe('grep');
  });

  it('joins multiple reasoning parts without separators (matches the streaming concat)', () => {
    const out = buildHeadPayload([
      mkMsg('assistant', 'final answer', {
        reasoning_parts: [{ text: 'first thought ' }, { text: 'second thought' }],
      }),
    ]);
    expect(out[0]!.content).toBe(
      '<reasoning>first thought second thought</reasoning>\n\nfinal answer',
    );
  });

  it('omits the reasoning tag entirely when reasoning_parts is null or empty', () => {
    const out = buildHeadPayload([
      mkMsg('assistant', 'plain answer', { reasoning_parts: null }),
      mkMsg('assistant', 'other answer', { reasoning_parts: [] }),
    ]);
    expect(out[0]!.content).toBe('plain answer');
    expect(out[1]!.content).toBe('other answer');
    expect(out[0]!.content).not.toContain('<reasoning>');
    expect(out[1]!.content).not.toContain('<reasoning>');
  });
});

// ---- buildHeadPayload sentinel stripping (#12) -------------------------------

describe('buildHeadPayload strips all UI sentinels', () => {
  it('drops cap_hit, doom_loop, and mistake_recovery system rows', () => {
    const out = buildHeadPayload([
      mkMsg('user', 'do the thing'),
      mkMsg('system', 'budget reached', { metadata: { kind: 'cap_hit' } }),
      mkMsg('system', 'looping', { metadata: { kind: 'doom_loop' } }),
      mkMsg('system', 'repeated errors', { metadata: { kind: 'mistake_recovery' } }),
      mkMsg('assistant', 'answer'),
    ]);
    // Only the user + assistant rows survive; all three sentinels stripped.
    expect(out).toHaveLength(2);
    expect(out[0]!.role).toBe('user');
    expect(out[1]!.role).toBe('assistant');
  });

  it('keeps a non-sentinel system row (e.g. compact bridge) untouched', () => {
    const out = buildHeadPayload([
      mkMsg('system', 'legacy compact', { kind: 'compact', metadata: null }),
      mkMsg('user', 'q'),
    ]);
    expect(out[0]!.role).toBe('system');
    expect(out[0]!.content).toBe('legacy compact');
  });
});

// ---- file-provenance ledger (#12, Part B) -----------------------------------

describe('deriveFilesRead', () => {
  it('returns [] when the head has no read-tool calls', () => {
    expect(deriveFilesRead([mkMsg('user', 'hi'), mkMsg('assistant', 'hello')])).toEqual([]);
  });

  it('extracts the path arg from view_file / list_dir / grep / find_files', () => {
    const head = [
      mkMsg('assistant', '', {
        tool_calls: [
          { id: 'c1', name: 'view_file', args: { path: 'src/index.ts' } },
          { id: 'c2', name: 'list_dir', args: { path: 'src' } },
          { id: 'c3', name: 'grep', args: { pattern: 'TODO', path: 'apps' } },
          { id: 'c4', name: 'find_files', args: { pattern: '**/*.ts', path: 'lib' } },
        ],
      }),
    ];
    expect(deriveFilesRead(head)).toEqual(['apps', 'lib', 'src', 'src/index.ts']);
  });

  it('dedupes and sorts paths across multiple assistant turns', () => {
    const head = [
      mkMsg('assistant', '', { tool_calls: [{ id: 'c1', name: 'view_file', args: { path: 'b.ts' } }] }),
      mkMsg('assistant', '', { tool_calls: [{ id: 'c2', name: 'view_file', args: { path: 'a.ts' } }] }),
      mkMsg('assistant', '', { tool_calls: [{ id: 'c3', name: 'view_file', args: { path: 'b.ts' } }] }),
    ];
    expect(deriveFilesRead(head)).toEqual(['a.ts', 'b.ts']);
  });

  it('ignores non-read tools and grep calls without a path arg', () => {
    const head = [
      mkMsg('assistant', '', {
        tool_calls: [
          { id: 'c1', name: 'web_search', args: { query: 'x' } },
          { id: 'c2', name: 'grep', args: { pattern: 'foo' } }, // no path → root, skipped
          { id: 'c3', name: 'view_file', args: { path: 'kept.ts' } },
        ],
      }),
    ];
    expect(deriveFilesRead(head)).toEqual(['kept.ts']);
  });

  it('ignores read-tool calls on non-assistant rows', () => {
    const head = [
      mkMsg('user', '', { tool_calls: [{ id: 'c1', name: 'view_file', args: { path: 'nope.ts' } }] }),
    ];
    expect(deriveFilesRead(head)).toEqual([]);
  });
});

describe('buildFilesReadContext', () => {
  it('returns null when nothing was read (no empty section injected)', () => {
    expect(buildFilesReadContext([mkMsg('user', 'hi')])).toBeNull();
  });

  it('formats a ## Files Read block with sorted bullet paths', () => {
    const head = [
      mkMsg('assistant', '', {
        tool_calls: [
          { id: 'c1', name: 'view_file', args: { path: 'z.ts' } },
          { id: 'c2', name: 'view_file', args: { path: 'a.ts' } },
        ],
      }),
    ];
    expect(buildFilesReadContext(head)).toBe('## Files Read\n- a.ts\n- z.ts');
  });
});

describe('SUMMARY_TEMPLATE includes the Files Read section (#12)', () => {
  it('declares a ## Files Read section the model must maintain', () => {
    expect(SUMMARY_TEMPLATE).toContain('## Files Read');
  });
});