refactor: codebase audit cleanup — dead code, dedup, module splits

Multi-agent audit + aggressive cleanup across server/web/coder/booterm, delivered behind a DEFER discipline so none of the in-flight files were touched. Removes dead code/deps/columns, dedups server + coder helpers, and splits the oversized modules (tools.ts, opencode-server.ts, sentinel-summaries, turn.ts, TerminalPane.tsx) behind stable contracts. Adds 78 parity/unit tests (server 587, coder 323); fixes two latent bugs (ChatPane queue keys, FileViewerOverlay blank-line parity). Intended tag: v2.7.12-audit-cleanup. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 21:10:06 +00:00
parent e5ce01ae72
commit 8c200216eb
143 changed files with 6729 additions and 6087 deletions
--- a/apps/server/src/services/tests/budget.test.ts
+++ b/apps/server/src/services/tests/budget.test.ts
@@ -0,0 +1,46 @@
+import { describe, expect, it } from 'vitest';
+import { resolveToolBudget } from '../inference/budget.js';
+import type { Agent } from '../../types/api.js';
+
+const BASE_AGENT: Agent = {
+  id: 'test-agent',
+  name: 'Test',
+  description: 'test',
+  system_prompt: '',
+  temperature: 0.7,
+  top_p: null,
+  top_k: null,
+  min_p: null,
+  presence_penalty: null,
+  top_n_sigma: null,
+  dry_multiplier: null,
+  dry_base: null,
+  dry_allowed_length: null,
+  dry_penalty_last_n: null,
+  tools: ['view_file'],
+  model: null,
+  source: 'global',
+  max_tool_calls: null,
+  steps: null,
+  llama_extra_args: null,
+};
+
+describe('resolveToolBudget', () => {
+  it('returns 100 when agent is null (no-agent raw chat)', () => {
+    expect(resolveToolBudget(null)).toBe(100);
+  });
+
+  it('returns 100 when agent has no max_tool_calls override', () => {
+    expect(resolveToolBudget(BASE_AGENT)).toBe(100);
+  });
+
+  it('returns max_tool_calls when agent overrides the default', () => {
+    const agent: Agent = { ...BASE_AGENT, max_tool_calls: 25 };
+    expect(resolveToolBudget(agent)).toBe(25);
+  });
+
+  it('returns 0 when max_tool_calls is explicitly 0 (text-only mode)', () => {
+    const agent: Agent = { ...BASE_AGENT, max_tool_calls: 0 };
+    expect(resolveToolBudget(agent)).toBe(0);
+  });
+});
--- a/apps/server/src/services/tests/inference-helpers.test.ts
+++ b/apps/server/src/services/tests/inference-helpers.test.ts
@@ -0,0 +1,149 @@
+import { describe, expect, it, vi, afterEach } from 'vitest';
+import { samplerOptsFromAgent } from '../inference/stream-phase.js';
+import { createContentFlusher } from '../inference/content-flusher.js';
+import type { Sql } from '../../db.js';
+import type { Agent } from '../../types/api.js';
+
+const BASE_AGENT: Agent = {
+  id: 'test-agent',
+  name: 'Test',
+  description: 'test',
+  system_prompt: '',
+  temperature: 0.7,
+  top_p: null,
+  top_k: null,
+  min_p: null,
+  presence_penalty: null,
+  top_n_sigma: null,
+  dry_multiplier: null,
+  dry_base: null,
+  dry_allowed_length: null,
+  dry_penalty_last_n: null,
+  tools: ['view_file'],
+  model: null,
+  source: 'global',
+  max_tool_calls: null,
+  steps: null,
+  llama_extra_args: null,
+};
+
+describe('samplerOptsFromAgent', () => {
+  it('maps every nullable sampler field to undefined when agent is null', () => {
+    expect(samplerOptsFromAgent(null)).toEqual({
+      temperature: undefined,
+      top_p: undefined,
+      top_k: undefined,
+      min_p: undefined,
+      presence_penalty: undefined,
+      top_n_sigma: undefined,
+      dry_multiplier: undefined,
+      dry_base: undefined,
+      dry_allowed_length: undefined,
+      dry_penalty_last_n: undefined,
+    });
+  });
+
+  it('strips null sampler fields to undefined but keeps numeric values', () => {
+    const agent: Agent = {
+      ...BASE_AGENT,
+      temperature: 0.5,
+      top_p: 0.9,
+      top_k: null,
+      min_p: 0.05,
+      presence_penalty: null,
+      top_n_sigma: 1,
+      dry_multiplier: null,
+      dry_base: 1.75,
+      dry_allowed_length: null,
+      dry_penalty_last_n: 256,
+    };
+    expect(samplerOptsFromAgent(agent)).toEqual({
+      temperature: 0.5,
+      top_p: 0.9,
+      top_k: undefined,
+      min_p: 0.05,
+      presence_penalty: undefined,
+      top_n_sigma: 1,
+      dry_multiplier: undefined,
+      dry_base: 1.75,
+      dry_allowed_length: undefined,
+      dry_penalty_last_n: 256,
+    });
+  });
+
+  it('never includes a tools field (callers add it)', () => {
+    expect('tools' in samplerOptsFromAgent(BASE_AGENT)).toBe(false);
+  });
+});
+
+describe('createContentFlusher', () => {
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  // A tagged-template stub matching postgres' sql`...` shape. Records the
+  // interpolated content snapshot (values[0]) of each UPDATE.
+  function makeSqlSpy() {
+    const writes: string[] = [];
+    const sql = ((_strings: TemplateStringsArray, ...values: unknown[]) => {
+      writes.push(values[0] as string);
+      return Promise.resolve([]);
+    }) as unknown as Sql;
+    return { sql, writes };
+  }
+
+  it('debounces: many scheduleFlush calls in one window produce one write', async () => {
+    vi.useFakeTimers();
+    const { sql, writes } = makeSqlSpy();
+    let content = '';
+    const flusher = createContentFlusher(sql, 'msg-1', () => content, 500);
+
+    content = 'a';
+    flusher.scheduleFlush();
+    content = 'ab';
+    flusher.scheduleFlush();
+    content = 'abc';
+    flusher.scheduleFlush();
+
+    expect(writes).toHaveLength(0); // nothing before the interval elapses
+    vi.advanceTimersByTime(500);
+    await flusher.drain();
+
+    expect(writes).toHaveLength(1);
+    // snapshot is read at fire time → latest content, not the value at schedule time
+    expect(writes[0]).toBe('abc');
+  });
+
+  it('arms a fresh timer after a flush fires', async () => {
+    vi.useFakeTimers();
+    const { sql, writes } = makeSqlSpy();
+    let content = 'one';
+    const flusher = createContentFlusher(sql, 'msg-1', () => content, 500);
+
+    flusher.scheduleFlush();
+    vi.advanceTimersByTime(500);
+    await Promise.resolve();
+
+    content = 'two';
+    flusher.scheduleFlush();
+    vi.advanceTimersByTime(500);
+    await flusher.drain();
+
+    expect(writes).toEqual(['one', 'two']);
+  });
+
+  it('drain cancels a pending timer without performing a final flush', async () => {
+    vi.useFakeTimers();
+    const { sql, writes } = makeSqlSpy();
+    let content = 'pending';
+    const flusher = createContentFlusher(sql, 'msg-1', () => content, 500);
+
+    flusher.scheduleFlush();
+    // Drain before the timer fires — the pending flush is cancelled, not forced.
+    await flusher.drain();
+    vi.advanceTimersByTime(500);
+    await Promise.resolve();
+
+    expect(writes).toHaveLength(0);
+  });
+});
--- a/apps/server/src/services/tests/model-context.test.ts
+++ b/apps/server/src/services/tests/model-context.test.ts
@@ -9,12 +9,9 @@ import {

 const TEST_URL = 'http://llama-swap.test:8401';

-function mockOkProps(n_ctx: number, total_slots = 1) {
+function mockOkProps(n_ctx: number) {
  return new Response(
-    JSON.stringify({
-      default_generation_settings: { n_ctx },
-      total_slots,
-    }),
+    JSON.stringify({ default_generation_settings: { n_ctx } }),
    { status: 200, headers: { 'Content-Type': 'application/json' } },
  );
 }
@@ -33,12 +30,10 @@ afterEach(() => {

 describe('getModelContext — positive cache', () => {
  it('returns the parsed body on a 200 with valid shape', async () => {
-    const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(mockOkProps(262_144, 1));
+    const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(mockOkProps(262_144));
    const result = await getModelContext('qwen3.6');
    expect(result).not.toBeNull();
    expect(result!.n_ctx).toBe(262_144);
-    expect(result!.total_slots).toBe(1);
-    expect(typeof result!.fetched_at).toBe('number');
    // Verify the URL was constructed correctly — encodes the model name in
    // case it contains characters that would break the path.
    expect(fetchSpy).toHaveBeenCalledExactlyOnceWith(
@@ -57,19 +52,6 @@ describe('getModelContext — positive cache', () => {
    expect(fetchSpy).toHaveBeenCalledTimes(1);
  });

-  it('defaults total_slots to 1 when the server omits it', async () => {
-    // Mirror the docstring claim — total_slots is informational and we don't
-    // reject the response just because it's missing.
-    vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
-      new Response(JSON.stringify({ default_generation_settings: { n_ctx: 8192 } }), {
-        status: 200,
-      }),
-    );
-    const result = await getModelContext('partial-model');
-    expect(result).not.toBeNull();
-    expect(result!.n_ctx).toBe(8192);
-    expect(result!.total_slots).toBe(1);
-  });
 });

 // ---- negative cache (single-shot) ------------------------------------------
--- a/apps/server/src/services/tests/sentinels.test.ts
+++ b/apps/server/src/services/tests/sentinels.test.ts
@@ -0,0 +1,87 @@
+import { describe, it, expect } from 'vitest';
+import { SENTINEL_KINDS, isAnySentinel, isCapHitSentinel, isDoomLoopSentinel, isMistakeRecoverySentinel } from '../inference/sentinels.js';
+import type { Message } from '../../types/api.js';
+
+function makeSentinel(kind: string): Message {
+  return {
+    id: 'msg-1',
+    session_id: 's',
+    chat_id: 'c',
+    role: 'system',
+    content: '',
+    kind: 'message',
+    tool_calls: null,
+    tool_results: null,
+    status: 'complete',
+    last_seq: 0,
+    tokens_used: null,
+    ctx_used: null,
+    ctx_max: null,
+    started_at: null,
+    finished_at: null,
+    created_at: new Date().toISOString(),
+    metadata: { kind } as unknown as import('../../types/api.js').MessageMetadata,
+    summary: false,
+    tail_start_id: null,
+    compacted_at: null,
+  };
+}
+
+describe('SENTINEL_KINDS — single source of truth', () => {
+  it('contains the three known sentinel kinds', () => {
+    expect(SENTINEL_KINDS.has('cap_hit')).toBe(true);
+    expect(SENTINEL_KINDS.has('doom_loop')).toBe(true);
+    expect(SENTINEL_KINDS.has('mistake_recovery')).toBe(true);
+  });
+
+  it('does not contain arbitrary strings', () => {
+    expect(SENTINEL_KINDS.has('user')).toBe(false);
+    expect(SENTINEL_KINDS.has('assistant')).toBe(false);
+    expect(SENTINEL_KINDS.has('')).toBe(false);
+  });
+});
+
+describe('isAnySentinel', () => {
+  it('returns true for cap_hit', () => {
+    expect(isAnySentinel(makeSentinel('cap_hit'))).toBe(true);
+  });
+
+  it('returns true for doom_loop', () => {
+    expect(isAnySentinel(makeSentinel('doom_loop'))).toBe(true);
+  });
+
+  it('returns true for mistake_recovery', () => {
+    expect(isAnySentinel(makeSentinel('mistake_recovery'))).toBe(true);
+  });
+
+  it('returns false for non-system role', () => {
+    const m = { ...makeSentinel('cap_hit'), role: 'user' as const };
+    expect(isAnySentinel(m)).toBe(false);
+  });
+
+  it('returns false for null metadata', () => {
+    const m = { ...makeSentinel('cap_hit'), metadata: null };
+    expect(isAnySentinel(m)).toBe(false);
+  });
+
+  it('returns false for unknown kind', () => {
+    expect(isAnySentinel(makeSentinel('unknown_kind'))).toBe(false);
+  });
+});
+
+describe('individual sentinel predicates still work', () => {
+  it('isCapHitSentinel matches cap_hit only', () => {
+    expect(isCapHitSentinel(makeSentinel('cap_hit'))).toBe(true);
+    expect(isCapHitSentinel(makeSentinel('doom_loop'))).toBe(false);
+  });
+
+  it('isDoomLoopSentinel matches doom_loop only', () => {
+    expect(isDoomLoopSentinel(makeSentinel('doom_loop'))).toBe(true);
+    expect(isDoomLoopSentinel(makeSentinel('cap_hit'))).toBe(false);
+  });
+
+  it('isMistakeRecoverySentinel matches mistake_recovery only', () => {
+    expect(isMistakeRecoverySentinel(makeSentinel('mistake_recovery'))).toBe(true);
+    expect(isMistakeRecoverySentinel(makeSentinel('cap_hit'))).toBe(false);
+  });
+});
--- a/apps/server/src/services/tests/step-decision.test.ts
+++ b/apps/server/src/services/tests/step-decision.test.ts
@@ -0,0 +1,111 @@
+import { describe, expect, it } from 'vitest';
+import { resolveTurnConfig, MAX_STEPS } from '../inference/turn-config.js';
+import { decideStep, decidePostToolAction } from '../inference/step-decision.js';
+import { DOOM_LOOP_THRESHOLD } from '../inference/sentinels.js';
+import type { MistakeState } from '../inference/mistake-tracker.js';
+import type { Agent, ToolCall } from '../../types/api.js';
+
+const BASE_AGENT: Agent = {
+  id: 'test-agent',
+  name: 'Test',
+  description: 'test',
+  system_prompt: '',
+  temperature: 0.7,
+  top_p: null,
+  top_k: null,
+  min_p: null,
+  presence_penalty: null,
+  top_n_sigma: null,
+  dry_multiplier: null,
+  dry_base: null,
+  dry_allowed_length: null,
+  dry_penalty_last_n: null,
+  tools: ['view_file'],
+  model: null,
+  source: 'global',
+  max_tool_calls: null,
+  steps: null,
+  llama_extra_args: null,
+};
+
+function call(name: string, args: Record<string, unknown> = {}): ToolCall {
+  return { id: `tc-${name}-${JSON.stringify(args)}`, name, args };
+}
+
+describe('resolveTurnConfig', () => {
+  it('no agent → budget 100, cap MAX_STEPS, not text-only', () => {
+    expect(resolveTurnConfig(null)).toEqual({
+      effectiveCap: MAX_STEPS,
+      budget: 100,
+      isTextOnly: false,
+    });
+  });
+
+  it('steps: 0 → effectiveCap 0 and isTextOnly true', () => {
+    expect(resolveTurnConfig({ ...BASE_AGENT, steps: 0 })).toEqual({
+      effectiveCap: 0,
+      budget: 100,
+      isTextOnly: true,
+    });
+  });
+
+  it('steps below MAX_STEPS → effectiveCap is the agent value', () => {
+    expect(resolveTurnConfig({ ...BASE_AGENT, steps: 5 }).effectiveCap).toBe(5);
+  });
+
+  it('steps above MAX_STEPS → effectiveCap clamps to MAX_STEPS', () => {
+    expect(resolveTurnConfig({ ...BASE_AGENT, steps: 9999 }).effectiveCap).toBe(MAX_STEPS);
+  });
+
+  it('max_tool_calls overrides the budget', () => {
+    expect(resolveTurnConfig({ ...BASE_AGENT, max_tool_calls: 12 }).budget).toBe(12);
+  });
+});
+
+describe('decideStep (top-of-loop gate)', () => {
+  it('returns stream when no doom loop and under budget', () => {
+    expect(decideStep({ recentToolCalls: [], toolsUsed: 0, budget: 30 })).toEqual({ kind: 'stream' });
+  });
+
+  it('returns budget when toolsUsed has reached the budget', () => {
+    expect(decideStep({ recentToolCalls: [], toolsUsed: 30, budget: 30 })).toEqual({ kind: 'budget' });
+  });
+
+  it('returns doom (with the looping call) on identical-repeat tail', () => {
+    const recent = Array.from({ length: DOOM_LOOP_THRESHOLD }, () => call('view_file', { path: '/a' }));
+    const d = decideStep({ recentToolCalls: recent, toolsUsed: 1, budget: 30 });
+    expect(d.kind).toBe('doom');
+    if (d.kind === 'doom') {
+      expect(d.loop.name).toBe('view_file');
+      expect(d.loop.args).toEqual({ path: '/a' });
+    }
+  });
+
+  it('doom takes precedence over budget when both would trip', () => {
+    const recent = Array.from({ length: DOOM_LOOP_THRESHOLD }, () => call('grep', { q: 'x' }));
+    expect(decideStep({ recentToolCalls: recent, toolsUsed: 30, budget: 30 }).kind).toBe('doom');
+  });
+});
+
+describe('decidePostToolAction (post-tool decision)', () => {
+  const clean: MistakeState = { run: [], nudges: 0 };
+
+  it('non-continue actions stop the loop without consulting the tracker', () => {
+    expect(decidePostToolAction('paused', { run: ['exec_error', 'exec_error', 'exec_error'], nudges: 0 })).toBe('stop');
+    expect(decidePostToolAction('synthesis_done', clean)).toBe('stop');
+  });
+
+  it('continue with a clean tracker → continue', () => {
+    expect(decidePostToolAction('continue', clean)).toBe('continue');
+  });
+
+  it('continue with a threshold streak and no prior nudge → nudge', () => {
+    const tracker: MistakeState = { run: ['zod_reject', 'tool_not_found', 'exec_error'], nudges: 0 };
+    expect(decidePostToolAction('continue', tracker)).toBe('nudge');
+  });
+
+  it('continue with a threshold streak after a nudge already fired → escalate', () => {
+    const tracker: MistakeState = { run: ['zod_reject', 'tool_not_found', 'exec_error'], nudges: 1 };
+    expect(decidePostToolAction('continue', tracker)).toBe('escalate');
+  });
+});
--- a/apps/server/src/services/tests/tools-registry.test.ts
+++ b/apps/server/src/services/tests/tools-registry.test.ts
@@ -0,0 +1,68 @@
+import { describe, it, expect } from 'vitest';
+import { z } from 'zod';
+import {
+  ALL_TOOLS,
+  TOOLS_BY_NAME,
+  appendMcpTools,
+  toolJsonSchemas,
+  type ToolDef,
+} from '../tools.js';
+
+// Parity test for the register-through MCP-discovery contract (Phase 6 split).
+// `ALL_TOOLS` / `TOOLS_BY_NAME` are `let`-bound in tools/registry.ts and
+// reassigned by appendMcpTools() at startup; this barrel re-exports them.
+// apps/coder relies on this exact behavior: it imports `appendMcpTools` + the
+// live `ALL_TOOLS` binding from @boocode/server/tools, calls appendMcpTools()
+// once, then reads ALL_TOOLS. ESM live bindings must carry the mutation
+// through the barrel re-export — if the split ever snapshots the array instead
+// of re-exporting the live binding, these assertions fail. Each test file gets
+// an isolated module instance (vitest default), so mutating the registry here
+// does not leak into tools.test.ts.
+function makeFakeMcpTool(name: string): ToolDef<unknown> {
+  return {
+    name,
+    description: `fake mcp tool ${name}`,
+    inputSchema: z.object({}) as z.ZodType<unknown>,
+    jsonSchema: {
+      type: 'function',
+      function: {
+        name,
+        description: `fake mcp tool ${name}`,
+        parameters: { type: 'object', properties: {}, additionalProperties: false },
+      },
+    },
+    async execute() {
+      return { ok: true };
+    },
+  };
+}
+
+describe('appendMcpTools register-through contract', () => {
+  it('is a no-op for an empty array', () => {
+    const before = ALL_TOOLS.length;
+    appendMcpTools([]);
+    expect(ALL_TOOLS.length).toBe(before);
+  });
+
+  it('mutates the live ALL_TOOLS / TOOLS_BY_NAME bindings observable through the barrel', () => {
+    const before = ALL_TOOLS.length;
+    // Names chosen so insertion lands away from the array ends, proving the
+    // re-sort runs (a naive concat would leave them at the tail).
+    const a = makeFakeMcpTool('mcp__alpha__probe');
+    const z2 = makeFakeMcpTool('mcp__zeta__probe');
+    appendMcpTools([z2, a]);
+
+    expect(ALL_TOOLS.length).toBe(before + 2);
+    expect(TOOLS_BY_NAME['mcp__alpha__probe']).toBe(a);
+    expect(TOOLS_BY_NAME['mcp__zeta__probe']).toBe(z2);
+
+    // Still alpha-sorted after the append (prompt-cache stability invariant).
+    const names = ALL_TOOLS.map((t) => t.name);
+    expect(names).toEqual([...names].sort((x, y) => x.localeCompare(y)));
+
+    // toolJsonSchemas() reads through the same live binding.
+    const schemaNames = toolJsonSchemas().map((s) => s.function.name);
+    expect(schemaNames).toContain('mcp__alpha__probe');
+    expect(schemaNames).toContain('mcp__zeta__probe');
+  });
+});