refactor: codebase audit cleanup — dead code, dedup, module splits

Multi-agent audit + aggressive cleanup across server/web/coder/booterm, delivered behind a DEFER discipline so none of the in-flight files were touched. Removes dead code/deps/columns, dedups server + coder helpers, and splits the oversized modules (tools.ts, opencode-server.ts, sentinel-summaries, turn.ts, TerminalPane.tsx) behind stable contracts. Adds 78 parity/unit tests (server 587, coder 323); fixes two latent bugs (ChatPane queue keys, FileViewerOverlay blank-line parity). Intended tag: v2.7.12-audit-cleanup. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 21:10:06 +00:00
parent e5ce01ae72
commit 8c200216eb
143 changed files with 6729 additions and 6087 deletions
--- a/apps/server/src/services/inference/turn.ts
+++ b/apps/server/src/services/inference/turn.ts
@@ -1,33 +1,21 @@
-import type { FastifyBaseLogger } from 'fastify';
-import type { Sql } from '../../db.js';
-import type { Config } from '../../config.js';
 import type {
  Agent,
-  ErrorReason,
  Message,
-  MessageMetadata,
  Project,
  Session,
-  ToolCall,
  UserStreamFrame,
 } from '../../types/api.js';
-import { ALL_TOOLS } from '../tools.js';
 import { resolveProjectRoot } from '../path_guard.js';
 import { maybeAutoNameChat } from '../auto_name.js';
 import { rewriteSearchQuery } from '../task-search-rewrite.js';
 import { getAgentById } from '../agents.js';
 import * as compaction from '../compaction.js';
-import type { Broker } from '../broker.js';
-import { resolveToolBudget } from './budget.js';
+import { resolveTurnConfig } from './turn-config.js';
+import { decideStep, decidePostToolAction } from './step-decision.js';
 import {
-  detectDoomLoop,
-} from './sentinels.js';
-import {
-  detectMistakePattern,
  freshMistakeState,
  recordStep,
  MISTAKE_RECOVERY_NOTE,
-  type MistakeState,
 } from './mistake-tracker.js';
 import {
  buildMessagesPayload,
@@ -35,13 +23,19 @@ import {
 } from './payload.js';
 import {
  finalizeCompletion,
+  finalizeEmpty,
  handleAbortOrError,
 } from './error-handler.js';
 import {
  executeStreamPhase,
 } from './stream-phase.js';
 import { executeToolPhase, type ToolPhaseResult } from './tool-phase.js';
-import type { StreamPhaseState } from './types.js';
+import type {
+  InferenceContext,
+  StreamPhaseState,
+  StreamResult,
+  TurnArgs,
+} from './types.js';
 import {
  runCapHitSummary,
  runDoomLoopSummary,
@@ -49,121 +43,24 @@ import {
  insertMistakeRecoverySentinel,
 } from './sentinel-summaries.js';

-// v1.14.0: hard ceiling on the number of stream-and-tool iterations per
-// user-message turn. Per-agent cap via agent.steps is the primary knob;
-// MAX_STEPS is the safety ceiling. 200 is 4x the effective budget ceiling
-// (50 tool calls) — in practice budget fires first unless the model makes
-// many 0-tool-call iterations (which exit the loop via the non-tool finish
-// path anyway).
-export const MAX_STEPS = 200;
+// P5: MAX_STEPS moved to ./turn-config.ts (with resolveTurnConfig). Re-exported
+// here so the public surface (index.ts → './turn.js') is unchanged.
+export { MAX_STEPS } from './turn-config.js';

 // v1.12.4: re-exported so external callers (tests, future consumers) keep
 // importing from services/inference.js as the public surface.
 export { detectDoomLoop, DOOM_LOOP_THRESHOLD } from './sentinels.js';
 export { buildMessagesPayload } from './payload.js';

-export interface InferenceFrame {
-  type:
-    | 'message_started'
-    | 'delta'
-    | 'tool_call'
-    | 'tool_result'
-    | 'message_complete'
-    | 'usage'
-    | 'messages_deleted'
-    | 'session_renamed'
-    | 'chat_renamed'
-    | 'error';
-  message_id?: string;
-  message_ids?: string[];
-  chat_id?: string;
-  tool_message_id?: string;
-  tool_call_id?: string;
-  // v1.8.2: 'system' added so cap-hit sentinel messages can announce themselves
-  // through the normal message_started → delta → message_complete sequence.
-  role?: 'assistant' | 'tool' | 'user' | 'system';
-  content?: string;
-  tool_call?: ToolCall;
-  output?: unknown;
-  truncated?: boolean;
-  error?: string;
-  // v1.8.2: structured error reason. Set on `type: 'error'` so the UI can
-  // surface a specific message; `error` stays the human-readable text.
-  reason?: ErrorReason;
-  // v1.8.2: piggybacks on `message_complete` so static or terminally-resolved
-  // messages can carry their persisted metadata to the live stream without a
-  // refetch (sentinels carry { kind: 'cap_hit', ... }; failed messages carry
-  // { kind: 'error', ... }).
-  metadata?: MessageMetadata | null;
-  tokens_used?: number | null;
-  ctx_used?: number | null;
-  ctx_max?: number | null;
-  completion_tokens?: number | null;
-  started_at?: string | null;
-  finished_at?: string | null;
-  model?: string;
-  session_id?: string;
-  name?: string;
-}
-
-export type FramePublisher = (sessionId: string, frame: InferenceFrame) => void;
-
-export interface InferenceContext {
-  sql: Sql;
-  config: Config;
-  log: FastifyBaseLogger;
-  publish: FramePublisher;
-  publishUser: (frame: UserStreamFrame) => void;
-  // v1.11: passed through so compaction.process can publish 'compacted'
-  // frames on the same session WS channel useSessionStream subscribes to.
-  // Compaction is the only path that needs the raw broker handle (regular
-  // inference goes through `publish`); keeping a separate field avoids
-  // tempting other code paths into bypassing the session-id binding.
-  broker: Broker;
-}
-
 // v1.12.4: payload assembly extracted to ./inference/payload.ts (tests
 // import buildMessagesPayload from this module, so a re-export below
 // preserves the public surface). Stream + tool phases extracted to
 // ./inference/stream-phase.ts and ./inference/tool-phase.ts.
-
-export interface StreamResult {
-  finishReason: string | null;
-  content: string;
-  toolCalls: ToolCall[];
-  promptTokens: number | null;
-  completionTokens: number | null;
-  // v1.13.1-C: reasoning text accumulated across reasoning-delta parts.
-  // Empty string when the model doesn't emit reasoning (most cases).
-  reasoning: string;
-}
-
-
-export interface TurnArgs {
-  sessionId: string;
-  chatId: string;
-  assistantMessageId: string;
-  // v1.8.2: cumulative tool calls executed this run. Compared against the
-  // resolved budget at the top of each turn. Replaces the older `depth`
-  // counter (which counted iterations, not invocations).
-  toolsUsed: number;
-  // v1.11.6: ordered tool calls executed in this user-message turn (across
-  // recursive runAssistantTurn invocations). Reset to [] at user-message
-  // boundaries by runInference, same as toolsUsed. Doom-loop check at the
-  // top of runAssistantTurn slices the last DOOM_LOOP_THRESHOLD entries.
-  recentToolCalls: ToolCall[];
-  // v#12 MistakeTracker: heterogeneous-failure recovery state. Loop-local,
-  // reset per runInference (user-message boundary) like recentToolCalls. Folds
-  // tool-phase outcomes via recordStep each iteration; detectMistakePattern
-  // gates the nudge/escalate decision.
-  mistakeTracker: MistakeState;
-  // v#12: transient model-facing recovery note set when a nudge fires. Consumed
-  // (appended as a role:'system' message + cleared) on the NEXT payload build.
-  // Never persisted — mirrors how the cap-hit/doom-loop notes live only inside
-  // the summary call's messages array.
-  pendingRecoveryNote?: string;
-  signal: AbortSignal | undefined;
-}
+//
+// P5: the shared pipeline types (InferenceFrame / FramePublisher /
+// InferenceContext / StreamResult / TurnArgs) moved to ./types.js to break the
+// turn.ts type-hub-and-leaf near-cycle. They are re-exported from there via
+// inference/index.ts for the public surface.


 export async function runAssistantTurn(
@@ -184,17 +81,13 @@ export async function runAssistantTurn(
  const agent = session.agent_id
    ? await getAgentById(project.path, session.agent_id)
    : null;
-  const budget = resolveToolBudget(agent);
-
-  // v1.14.0: effectiveCap = min(agent.steps ?? Infinity, MAX_STEPS).
-  // steps: 0 means "no tool calls allowed" — the first stream phase runs
-  // but if it emits tool calls they are not executed (finalize as text-only).
-  const effectiveCap = Math.min(agent?.steps ?? Infinity, MAX_STEPS);
+  // P5: pure per-turn config (budget + cap math + text-only flag).
+  const { effectiveCap, budget, isTextOnly } = resolveTurnConfig(agent);

  // steps: 0 special case — model responds text-only. The while loop would
  // never enter (effectiveCap === 0), so we handle it explicitly before the
  // loop. The model always gets at least one chance to respond with text.
-  if (effectiveCap === 0) {
+  if (isTextOnly) {
    const loaded = await loadContext(ctx.sql, sessionId, chatId);
    if (loaded) {
      await runTextOnlyTurn(ctx, args, loaded.session, loaded.project, loaded.history, agent);
@@ -214,20 +107,18 @@ export async function runAssistantTurn(
  let pendingRecoveryNote: string | undefined = args.pendingRecoveryNote;

  while (stepNumber < effectiveCap) {
-    // ---- doom-loop check (moved from top-of-function) ----
-    const loop = detectDoomLoop(recentToolCalls);
-    if (loop) {
+    // ---- top-of-loop gate: doom-loop, then budget (pure decision) ----
+    const decision = decideStep({ recentToolCalls, toolsUsed, budget });
+    if (decision.kind === 'doom') {
      // Need fresh history for the summary.
      const loaded = await loadContext(ctx.sql, sessionId, chatId);
      if (loaded) {
        const iterArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, mistakeTracker, signal };
-        await runDoomLoopSummary(ctx, iterArgs, loaded.session, loaded.project, loaded.history, agent, loop);
+        await runDoomLoopSummary(ctx, iterArgs, loaded.session, loaded.project, loaded.history, agent, decision.loop);
      }
      break;
    }
-
-    // ---- budget check (moved from top-of-function) ----
-    if (toolsUsed >= budget) {
+    if (decision.kind === 'budget') {
      const loaded = await loadContext(ctx.sql, sessionId, chatId);
      if (loaded) {
        const iterArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, mistakeTracker, signal };
@@ -235,6 +126,7 @@ export async function runAssistantTurn(
      }
      break;
    }
+    // decision.kind === 'stream' → proceed with compaction + stream + tools.

    // ---- compaction check ----
    // v1.11: if the prior turn flagged this chat for compaction, run it
@@ -345,19 +237,17 @@ export async function runAssistantTurn(
      recordStep(mistakeTracker, o);
    }

-    if (toolPhaseResult.action !== 'continue') {
-      // 'paused' (user input) or 'synthesis_done' — stop the loop. The turn is
-      // already ending, so neither a nudge nor an escalate would change the
-      // control flow; we skip the mistake decision here.
+    // v#12 MistakeTracker: post-tool decision (pure). 'stop' = the tool phase
+    // returned a non-'continue' action ('paused' for user input, or
+    // 'synthesis_done') — neither a nudge nor an escalate would change the
+    // control flow, so the mistake check is skipped. On 'continue' the
+    // heterogeneous-failure pattern gates nudge/escalate/continue. Complements
+    // the doom-loop gate above, which only catches *identical* repeats.
+    const post = decidePostToolAction(toolPhaseResult.action, mistakeTracker);
+    if (post === 'stop') {
      break;
    }
-
-    // v#12 MistakeTracker: heterogeneous-failure decision. Only evaluated on
-    // the 'continue' path (the only case where the loop would otherwise
-    // proceed to another step). Complements the doom-loop check above, which
-    // only catches *identical* repeats.
-    const mistake = detectMistakePattern(mistakeTracker);
-    if (mistake === 'nudge') {
+    if (post === 'nudge') {
      // Soft intervention: inject model-facing recovery guidance into the NEXT
      // step's payload, drop a UI sentinel, bump nudges, reset the streak, and
      // continue. The note is consumed (and cleared) at the top of the next
@@ -379,23 +269,16 @@ export async function runAssistantTurn(
      assistantMessageId = toolPhaseResult.nextAssistantId!;
      continue;
    }
-    if (mistake === 'escalate') {
+    if (post === 'escalate') {
      // The nudge didn't break the failure run — stop the turn (cap-hit-style)
      // to avoid burning the whole step budget on heterogeneous failures. The
-      // next assistant row is still 'streaming'; finalize it as a short note so
-      // the slot doesn't dangle, then drop the escalate sentinel.
+      // next assistant row is still 'streaming'; finalize it as an empty
+      // complete row so the slot doesn't dangle, then drop the escalate
+      // sentinel.
      const failureKinds = [...mistakeTracker.run];
      assistantMessageId = toolPhaseResult.nextAssistantId!;
-      await ctx.sql`
-        UPDATE messages
-        SET content = '', status = 'complete', finished_at = clock_timestamp()
-        WHERE id = ${assistantMessageId}
-      `;
-      ctx.publish(sessionId, {
-        type: 'message_complete',
-        message_id: assistantMessageId,
-        chat_id: chatId,
-      });
+      const escalateArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, mistakeTracker, signal };
+      await finalizeEmpty(ctx, escalateArgs);
      await insertMistakeRecoverySentinel(ctx, sessionId, chatId, {
        failureKinds,
        count: failureKinds.length,
@@ -562,4 +445,3 @@ export function createInferenceRunner(
  };
 }

-export const _toolNames = ALL_TOOLS.map((t) => t.name);