refactor: codebase audit cleanup — dead code, dedup, module splits

Multi-agent audit + aggressive cleanup across server/web/coder/booterm, delivered behind a DEFER discipline so none of the in-flight files were touched. Removes dead code/deps/columns, dedups server + coder helpers, and splits the oversized modules (tools.ts, opencode-server.ts, sentinel-summaries, turn.ts, TerminalPane.tsx) behind stable contracts. Adds 78 parity/unit tests (server 587, coder 323); fixes two latent bugs (ChatPane queue keys, FileViewerOverlay blank-line parity). Intended tag: v2.7.12-audit-cleanup. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 21:10:06 +00:00
parent e5ce01ae72
commit 8c200216eb
143 changed files with 6729 additions and 6087 deletions
--- a/apps/server/src/services/inference/stream-phase.ts
+++ b/apps/server/src/services/inference/stream-phase.ts
@@ -1,377 +1,34 @@
-import type {
-  Agent,
-  Session,
-  ToolCall,
-} from '../../types/api.js';
+// P5 (SPLIT SKETCH): stream-phase.ts is now the BooCode I/O layer for the
+// stream phase — `executeStreamPhase` owns the row UPDATE, message_started
+// frame, debounced content flush, throttled usage publish, model-context
+// lookup, and tool-whitelist filter. The generic AI-SDK adapter
+// (streamCompletion / toModelMessages / buildAiTools / sampler helpers) moved
+// to ./stream-phase-adapter.ts, which has no SQL/broker/publish deps and is
+// unit-testable on its own. The adapter's public names are re-exported below so
+// existing importers of './stream-phase.js' (sentinel-summaries, synthesis
+// pipeline, the helper tests) keep working unchanged.
+
+import type { Agent, Session } from '../../types/api.js';
 import * as modelContext from '../model-context.js';
 import { toolJsonSchemas, type ToolJsonSchema } from '../tools.js';
 import { matchToolGlob } from '../agents.js';
 import type { OpenAiMessage } from './payload.js';
-import { extractToolCallBlocks } from './tool-call-parser.js';
-import { DB_FLUSH_INTERVAL_MS, type StreamPhaseState } from './types.js';
+import { createContentFlusher } from './content-flusher.js';
 import type {
+  StreamPhaseState,
  InferenceContext,
  StreamResult,
  TurnArgs,
-} from './turn.js';
-import { upstreamModel } from './provider.js';
-import {
-  jsonSchema,
-  streamText,
-  tool,
-  type JSONValue,
-  type ModelMessage,
-  type ToolCallRepairFunction,
-} from 'ai';
+} from './types.js';
+import { streamCompletion, samplerOptsFromAgent } from './stream-phase-adapter.js';

-interface StreamOptions {
-  // null = omit tools entirely (compact phase); [] = caller stripped all tools
-  // (rare; we still omit from the request body to avoid OpenAI 400).
-  tools: ToolJsonSchema[] | null;
-  temperature?: number;
-  top_p?: number | null;
-  top_k?: number | null;
-  min_p?: number | null;
-  presence_penalty?: number | null;
-  // v2.6 sampling-streamjson-tokens (#11): llama.cpp sampler extensions. These
-  // are NOT standard AI-SDK streamText options and are NOT serialized by the
-  // openai-compatible provider's standardized-settings path (topK is even
-  // explicitly dropped with an "unsupported feature: topK" warning). They reach
-  // llama-server only via providerOptions.openaiCompatible (see buildSamplerProviderOptions).
-  top_n_sigma?: number | null;
-  dry_multiplier?: number | null;
-  dry_base?: number | null;
-  dry_allowed_length?: number | null;
-  dry_penalty_last_n?: number | null;
-}
-
-// v2.6 #11: build the providerOptions.openaiCompatible extraBody object for the
-// llama.cpp sampler extensions. @ai-sdk/openai-compatible (2.0.47) merges every
-// non-reserved key under providerOptions.openaiCompatible straight into the
-// chat-completion request body (see its getArgs: the Object.fromEntries spread
-// filtered against openaiCompatibleLanguageModelChatOptions.shape). This is the
-// ONLY working passthrough for these params:
-//   - top_k / min_p were latently dropped before this: top_k was passed as the
-//     AI-SDK `topK` setting which the openai-compatible provider rejects as
-//     unsupported; min_p was never passed to streamText at all.
-//   - top_n_sigma + the dry_* family have no AI-SDK equivalent.
-// Keys use llama-server's snake_case body names so they land verbatim.
-function buildSamplerProviderOptions(opts: StreamOptions): Record<string, number> | undefined {
-  const body: Record<string, number> = {};
-  if (typeof opts.top_k === 'number') body.top_k = opts.top_k;
-  if (typeof opts.min_p === 'number') body.min_p = opts.min_p;
-  if (typeof opts.top_n_sigma === 'number') body.top_n_sigma = opts.top_n_sigma;
-  if (typeof opts.dry_multiplier === 'number') body.dry_multiplier = opts.dry_multiplier;
-  if (typeof opts.dry_base === 'number') body.dry_base = opts.dry_base;
-  if (typeof opts.dry_allowed_length === 'number') body.dry_allowed_length = opts.dry_allowed_length;
-  if (typeof opts.dry_penalty_last_n === 'number') body.dry_penalty_last_n = opts.dry_penalty_last_n;
-  return Object.keys(body).length > 0 ? body : undefined;
-}
-
-// v1.13.1-A: convert BooCode's OpenAI-shaped history into AI SDK
-// ModelMessage[]. Tool result messages need a `toolName` field that the
-// OpenAI shape doesn't carry; we look it up by scanning earlier assistant
-// `tool_calls` entries for a matching id.
-function toModelMessages(messages: OpenAiMessage[]): ModelMessage[] {
-  const toolNameById = new Map<string, string>();
-  for (const m of messages) {
-    if (m.role === 'assistant' && m.tool_calls) {
-      for (const tc of m.tool_calls) {
-        toolNameById.set(tc.id, tc.function.name);
-      }
-    }
-  }
-  const out: ModelMessage[] = [];
-  for (const m of messages) {
-    if (m.role === 'system' || m.role === 'user') {
-      out.push({ role: m.role, content: m.content ?? '' });
-      continue;
-    }
-    if (m.role === 'assistant') {
-      const hasTools = m.tool_calls && m.tool_calls.length > 0;
-      const hasReasoning = typeof m.reasoning === 'string' && m.reasoning.length > 0;
-      if (!hasTools && !hasReasoning) {
-        // Bare text assistant (string content). null content + no tool_calls
-        // is degenerate but harmless to forward.
-        out.push({ role: 'assistant', content: m.content ?? '' });
-        continue;
-      }
-      // v1.13.1-C: AI SDK ReasoningPart precedes text + tool-calls in the
-      // assistant content array. Reasoning models (qwen3.6) consume their
-      // prior reasoning context to resume mid-thought across tool boundaries.
-      const parts: Array<
-        | { type: 'reasoning'; text: string }
-        | { type: 'text'; text: string }
-        | { type: 'tool-call'; toolCallId: string; toolName: string; input: unknown }
-      > = [];
-      if (hasReasoning) {
-        parts.push({ type: 'reasoning', text: m.reasoning! });
-      }
-      if (m.content && m.content.length > 0) {
-        parts.push({ type: 'text', text: m.content });
-      }
-      for (const tc of m.tool_calls ?? []) {
-        let input: unknown = {};
-        try {
-          input = tc.function.arguments.length > 0 ? JSON.parse(tc.function.arguments) : {};
-        } catch {
-          // Malformed args from a prior turn: pass through as a raw blob so
-          // the model sees the same shape it emitted. Wraps the string under
-          // _raw to match the buildMessagesPayload upstream convention.
-          input = { _raw: tc.function.arguments };
-        }
-        parts.push({ type: 'tool-call', toolCallId: tc.id, toolName: tc.function.name, input });
-      }
-      out.push({ role: 'assistant', content: parts });
-      continue;
-    }
-    if (m.role === 'tool') {
-      const toolCallId = m.tool_call_id ?? '';
-      const toolName = toolNameById.get(toolCallId) ?? 'unknown';
-      const raw = m.content ?? '';
-      let output: { type: 'text'; value: string } | { type: 'json'; value: JSONValue };
-      try {
-        // JSON.parse returns `any`; cast to JSONValue since the upstream
-        // tool_results column is already JSON-serializable by construction.
-        output = { type: 'json', value: JSON.parse(raw) as JSONValue };
-      } catch {
-        output = { type: 'text', value: raw };
-      }
-      out.push({
-        role: 'tool',
-        content: [{ type: 'tool-result', toolCallId, toolName, output }],
-      });
-      continue;
-    }
-  }
-  return out;
-}
-
-// Build the AI SDK tools record from BooCode's JSON-schema tool definitions.
-// No `execute` field: BooCode runs tools itself in tool-phase.ts; streamText
-// surfaces the tool-call parts via fullStream and we capture them for the
-// outer loop to dispatch.
-function buildAiTools(schemas: ToolJsonSchema[]): Record<string, ReturnType<typeof tool>> {
-  const out: Record<string, ReturnType<typeof tool>> = {};
-  for (const s of schemas) {
-    out[s.function.name] = tool({
-      description: s.function.description,
-      inputSchema: jsonSchema(s.function.parameters),
-    });
-  }
-  return out;
-}
-
-// v1.10.5 Qwen-coder XML fallback. Some local models (notably qwen3-coder via
-// llama-swap) emit tool calls as inline XML inside delta.content rather than
-// the structured tool_calls field. We extract them out of the streamed text
-// before flushing it to the client.
-//
-// Qwen shape:
-//   <tool_call>
-//   <function=NAME>
-//   <parameter=KEY>VALUE</parameter>
-//   ...
-//   </function>
-//   </tool_call>
-//
-// v1.13.16: also recognize Anthropic <invoke> markup that qwen3.6-35b-a3b-mxfp4
-// drifts to (training-data residue from Claude Code documentation):
-//   <invoke name="NAME">
-//   <parameter name="KEY">VALUE</parameter>
-//   </invoke>
-// Both formats share the synthetic xml_call_${idx} ID space; the counter
-// increments across whichever opener appears first. Multiple blocks may
-// appear back-to-back in either format and they never nest.
-export async function streamCompletion(
-  ctx: InferenceContext,
-  model: string,
-  messages: OpenAiMessage[],
-  opts: StreamOptions,
-  onDelta: (content: string) => void,
-  onUsage: ((prompt: number | null, completion: number | null) => void) | undefined,
-  signal?: AbortSignal,
-  agent?: Agent | null,
-): Promise<StreamResult> {
-  const aiMessages = toModelMessages(messages);
-  const hasTools = opts.tools !== null && opts.tools.length > 0;
-  const aiTools = hasTools ? buildAiTools(opts.tools!) : undefined;
-
-  const startedAt = Date.now();
-  // v1.13.1-C: accumulate reasoning text across reasoning-delta parts.
-  // qwen3.6 emits these on a separate channel from text content; we capture
-  // them per stream so finalizeCompletion can dual-write a 'reasoning' part.
-  // Replaces the v1.13.1-A counter-only diagnostic.
-  let reasoningAccumulated = '';
-
-  // v1.13.3: experimental_repairToolCall keeps the stream alive when the
-  // model emits a malformed tool call (bad JSON args, unknown name, etc.).
-  // Without a repair function streamText throws and the WHOLE stream dies;
-  // with one, the SDK invokes us and we route the bad call through normally.
-  // Strategy: pass through unmodified. executeToolPhase's existing error
-  // path (unknown tool name → "unknown tool: X" result; zod-reject → tool
-  // 'X' rejected — fieldname: required) already gives the model a clean
-  // recovery surface on the next turn. Logging gives us visibility into
-  // how often qwen3.6 actually emits broken calls.
-  const repairToolCall: ToolCallRepairFunction<NonNullable<typeof aiTools>> = async ({
-    toolCall,
-    error,
-  }) => {
-    ctx.log.warn(
-      {
-        toolCallId: toolCall.toolCallId,
-        toolName: toolCall.toolName,
-        error: error.message,
-      },
-      'malformed tool call surfaced via repairToolCall',
-    );
-    return toolCall;
-  };
-
-  // v2.6 #11: llama.cpp sampler extensions (top_k, min_p, top_n_sigma, dry_*)
-  // ride providerOptions.openaiCompatible — they are NOT standardized streamText
-  // settings. NB: top_k used to be passed below as the AI-SDK `topK` setting;
-  // the openai-compatible provider dropped it with an "unsupported feature: topK"
-  // warning and min_p was never wired at all, so both were dead on the wire
-  // before this. They now go through the same extraBody path as the new params.
-  const samplerBody = buildSamplerProviderOptions(opts);
-
-  const result = streamText({
-    model: upstreamModel(ctx.config, model, agent ?? null),
-    messages: aiMessages,
-    ...(aiTools
-      ? { tools: aiTools, toolChoice: 'auto' as const, experimental_repairToolCall: repairToolCall }
-      : {}),
-    ...(typeof opts.temperature === 'number' ? { temperature: opts.temperature } : {}),
-    ...(typeof opts.top_p === 'number' ? { topP: opts.top_p } : {}),
-    ...(typeof opts.presence_penalty === 'number' ? { presencePenalty: opts.presence_penalty } : {}),
-    ...(samplerBody ? { providerOptions: { openaiCompatible: samplerBody } } : {}),
-    abortSignal: signal,
-  });
-
-  let content = '';
-  let pendingBuffer = '';
-  let finishReason: string | null = null;
-  // v1.13.1-A: AI SDK emits one `tool-call` part per fully-aggregated call,
-  // so we no longer need the OpenAI-index reassembly map the manual SSE
-  // parser used. XML tool calls extracted from text content go into the
-  // same flat list and keep the v1.10.5 synthetic id convention.
-  const toolCalls: ToolCall[] = [];
-
-  for await (const part of result.fullStream) {
-    switch (part.type) {
-      case 'text-delta': {
-        pendingBuffer += part.text;
-        // v1.13.16: unified extraction. The helper finds the earliest-opening
-        // complete <tool_call> or <invoke> block, flushes prose between/around
-        // them, holds any partial opener for the next chunk, and silently
-        // drops blocks that fail to parse (matches pre-v1.13.16 behavior).
-        const extracted = extractToolCallBlocks(pendingBuffer);
-        if (extracted.flushed.length > 0) {
-          content += extracted.flushed;
-          onDelta(extracted.flushed);
-        }
-        for (const call of extracted.calls) {
-          const synthIdx = toolCalls.length;
-          toolCalls.push({
-            id: `xml_call_${synthIdx}`,
-            name: call.name,
-            args: call.args,
-          });
-        }
-        pendingBuffer = extracted.remaining;
-        break;
-      }
-      case 'tool-call': {
-        // AI SDK has already parsed the input into an object. Match the
-        // ToolCall shape BooCode passes around in toolCallsBuffer downstream.
-        toolCalls.push({
-          id: part.toolCallId,
-          name: part.toolName,
-          args: (part.input ?? {}) as Record<string, unknown>,
-        });
-        break;
-      }
-      case 'reasoning-delta': {
-        // v1.13.1-C: accumulate; finalizeCompletion / executeToolPhase
-        // dual-write the resulting text as a kind='reasoning' part.
-        if (typeof part.text === 'string') {
-          reasoningAccumulated += part.text;
-        }
-        break;
-      }
-      case 'finish': {
-        if (typeof part.finishReason === 'string') {
-          finishReason = part.finishReason;
-        }
-        break;
-      }
-      case 'error': {
-        const err = part.error;
-        throw err instanceof Error ? err : new Error(String(err));
-      }
-      // Intentional no-op: start, start-step, text-start, text-end,
-      // reasoning-start, reasoning-end, source, file, tool-input-start,
-      // tool-input-delta, tool-input-end, tool-result, tool-error,
-      // finish-step, raw. We only care about the aggregated tool-call and
-      // text-delta paths above; the rest are AI SDK lifecycle/streaming
-      // breadcrumbs that don't change BooCode's persistence or WS contract.
-      default:
-        break;
-    }
-  }
-
-  // v1.13.1-A: drain any buffered partial XML opener as plain text. The
-  // pre-AI-SDK path did this on stream end too — better to leak `<tool_c`
-  // than vanish the text.
-  if (pendingBuffer.length > 0) {
-    content += pendingBuffer;
-    onDelta(pendingBuffer);
-    pendingBuffer = '';
-  }
-
-  // AI SDK v6 fullStream returns normally on abort; check signal explicitly.
-  // Without this throw the row would land as status='complete' with partial
-  // content instead of going through handleAbortOrError → status='cancelled'.
-  // Smoke D caught this in v1.13.1-A — don't refactor it away.
-  if (signal?.aborted) {
-    const abortErr = new Error('aborted');
-    abortErr.name = 'AbortError';
-    throw abortErr;
-  }
-
-  // Usage lands as a promise on the result; awaiting after fullStream is
-  // drained is safe. AI SDK v6 names: `inputTokens` / `outputTokens`.
-  let promptTokens: number | null = null;
-  let completionTokens: number | null = null;
-  try {
-    const usage = await result.usage;
-    if (typeof usage.inputTokens === 'number') promptTokens = usage.inputTokens;
-    if (typeof usage.outputTokens === 'number') completionTokens = usage.outputTokens;
-  } catch {
-    // Some providers omit usage on partial streams; leave both null.
-  }
-
-  if (onUsage && (promptTokens !== null || completionTokens !== null)) {
-    onUsage(promptTokens, completionTokens);
-  }
-
-  if (reasoningAccumulated.length > 0) {
-    ctx.log.debug(
-      { reasoningChars: reasoningAccumulated.length, model, elapsed_ms: Date.now() - startedAt },
-      'streamCompletion: captured reasoning',
-    );
-  }
-
-  return {
-    finishReason,
-    content,
-    toolCalls,
-    promptTokens,
-    completionTokens,
-    reasoning: reasoningAccumulated,
-  };
-}
+export {
+  streamCompletion,
+  samplerOptsFromAgent,
+  type StreamOptions,
+  type SamplerOpts,
+  type StreamAdapterContext,
+} from './stream-phase-adapter.js';

 export async function executeStreamPhase(
  ctx: InferenceContext,
@@ -401,27 +58,7 @@ export async function executeStreamPhase(
    role: 'assistant',
  });

-  let pendingFlushTimer: NodeJS.Timeout | null = null;
-  let flushPromise: Promise<unknown> = Promise.resolve();
-
-  const flushNow = () => {
-    if (pendingFlushTimer) {
-      clearTimeout(pendingFlushTimer);
-      pendingFlushTimer = null;
-    }
-    const snapshot = state.accumulated;
-    flushPromise = flushPromise.then(() =>
-      ctx.sql`UPDATE messages SET content = ${snapshot} WHERE id = ${assistantMessageId}`
-    );
-  };
-
-  const scheduleFlush = () => {
-    if (pendingFlushTimer) return;
-    pendingFlushTimer = setTimeout(() => {
-      pendingFlushTimer = null;
-      flushNow();
-    }, DB_FLUSH_INTERVAL_MS);
-  };
+  const flusher = createContentFlusher(ctx.sql, assistantMessageId, () => state.accumulated);

  // Tool whitelist: if an agent is set, filter the global tool list to only the
  // tool names it allows. v1.15.0-mcp-multi: uses matchToolGlob for glob
@@ -434,17 +71,6 @@ export async function executeStreamPhase(
    ? toolJsonSchemas().filter((t) => matchToolGlob(t.function.name, agent.tools))
    : toolJsonSchemas()
  ).filter((t) => webToolsEnabled || !WEB_TOOL_NAMES.has(t.function.name));
-  const effectiveTemperature = agent?.temperature;
-  const effectiveTopP = agent?.top_p ?? undefined;
-  const effectiveTopK = agent?.top_k ?? undefined;
-  const effectiveMinP = agent?.min_p ?? undefined;
-  const effectivePresencePenalty = agent?.presence_penalty ?? undefined;
-  // v2.6 #11: llama.cpp sampler extensions, threaded the same way as top_k/min_p.
-  const effectiveTopNSigma = agent?.top_n_sigma ?? undefined;
-  const effectiveDryMultiplier = agent?.dry_multiplier ?? undefined;
-  const effectiveDryBase = agent?.dry_base ?? undefined;
-  const effectiveDryAllowedLength = agent?.dry_allowed_length ?? undefined;
-  const effectiveDryPenaltyLastN = agent?.dry_penalty_last_n ?? undefined;

  // v1.12.2: ctx_max lookup is cached after the first hit per model, so this
  // is a Map probe in steady state. We capture nCtx once at the top of the
@@ -484,16 +110,7 @@ export async function executeStreamPhase(
      messages,
      {
        tools: effectiveTools,
-        temperature: effectiveTemperature,
-        top_p: effectiveTopP,
-        top_k: effectiveTopK,
-        min_p: effectiveMinP,
-        presence_penalty: effectivePresencePenalty,
-        top_n_sigma: effectiveTopNSigma,
-        dry_multiplier: effectiveDryMultiplier,
-        dry_base: effectiveDryBase,
-        dry_allowed_length: effectiveDryAllowedLength,
-        dry_penalty_last_n: effectiveDryPenaltyLastN,
+        ...samplerOptsFromAgent(agent),
      },
      (delta) => {
        state.accumulated += delta;
@@ -504,7 +121,7 @@ export async function executeStreamPhase(
          content: delta,
        });
        ctx.log.debug({ sessionId, delta }, 'inference delta');
-        scheduleFlush();
+        flusher.scheduleFlush();
      },
      (prompt, completion) => {
        pendingUsage = { p: prompt, c: completion };
@@ -522,14 +139,10 @@ export async function executeStreamPhase(
      agent,
    );
  } finally {
-    if (pendingFlushTimer) {
-      clearTimeout(pendingFlushTimer);
-      pendingFlushTimer = null;
-    }
    if (usageTimer) {
      clearTimeout(usageTimer);
      usageTimer = null;
    }
-    await flushPromise;
+    await flusher.drain();
  }
 }