v1.13.3: cleanup bundle — statement timeout + alpha ordering + stuck-row sweeper + repairToolCall

Four independent items, all owed from prior dispatches. - statement_timeout at the database level via: ALTER DATABASE boocode SET statement_timeout = '30s'; Applied operationally; documented as a comment at the top of schema.sql (ALTER DATABASE can't run inside a DO block, so it's not idempotent inside applySchema). Re-apply after a volume reset. - Tool registry alpha-sorted at module load. llama.cpp's prompt cache hits on byte-identical prefixes; any reordering of the tool list near the top of the system prompt would invalidate every cached turn. Single-source sort at the ALL_TOOLS export so toolJsonSchemas() and TOOLS_BY_NAME inherit the order automatically. New tools.test.ts asserts the invariant; total tests 173 (was 172). - Periodic in-process stuck-row sweeper. Runs every 60s, marks 'streaming' rows older than 5 minutes as 'failed', and publishes chat_status='idle' on the user channel so the UI dot drops without a refresh. Closes the mid-session crash UX gap; the v1.12.1 boot sweep only fires once at startup, so sessions used to stay stuck until next reboot. setInterval cleaned up via app.addHook('onClose'). Mirrors handleAbortOrError's publish pattern. - experimental_repairToolCall wired through AI SDK v6 streamText. Pass- through implementation: log + return the original toolCall so the stream keeps going. executeToolPhase's existing error paths (unknown tool name → 'unknown tool: X' result; zod-reject → 'tool X rejected — field: required') already surface bad calls to the model; the value here is preventing the AI SDK from THROWING on parse errors and killing the whole stream. Owed since v1.13.1-A. Smoke verified: - statement_timeout = '30s' confirmed via SHOW. - Tool path normal flow intact (list_dir prompt → tool_call → result → final assistant). No malformed tool calls in the test run; repair log will surface them when qwen3.6 actually emits one. - Alpha order verified at runtime via the dist bundle: match: true. - Sweeper logic not traffic-tested (no stuck rows to find), but the SQL UPDATE + broker.publishUser pattern is identical to handleAbort and the boot sweep — synthesis-only verification. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-22 06:46:03 +00:00
5 changed files with 102 additions and 3 deletions
--- a/apps/server/src/index.ts
+++ b/apps/server/src/index.ts
@@ -201,6 +201,46 @@ async function main() {
    app.log.info(`serving static frontend from ${webDist}`);
  }
  // v1.13.3: periodic in-process sweeper for streaming rows orphaned by a
  // mid-session crash. The boot sweep (above) only fires once at startup;
  // this loop catches the in-flight case. 60s cadence + 5-min threshold
  // matches the boot sweep so behavior is consistent. Publishes
  // chat_status='idle' on the user channel so the UI dot drops without a
  // refresh — same pattern as handleAbortOrError.
  const SWEEP_INTERVAL_MS = 60_000;
  const sweepStaleStreaming = async (): Promise<void> => {
    try {
      const rows = await sql<{ id: string; chat_id: string }[]>`
        UPDATE messages
        SET status = 'failed', finished_at = clock_timestamp()
        WHERE status = 'streaming'
          AND created_at < NOW() - INTERVAL '5 minutes'
        RETURNING id, chat_id
      `;
      if (rows.length === 0) return;
      app.log.warn(
        { swept: rows.length, ids: rows.map((r) => r.id) },
        'swept stale streaming rows',
      );
      const seenChats = new Set<string>();
      const now = new Date().toISOString();
      for (const row of rows) {
        if (seenChats.has(row.chat_id)) continue;
        seenChats.add(row.chat_id);
        broker.publishUser('default', {
          type: 'chat_status',
          chat_id: row.chat_id,
          status: 'idle',
          at: now,
        });
      }
    } catch (err) {
      app.log.error({ err }, 'stuck-row sweeper failed');
    }
  };
  const sweepTimer = setInterval(() => { void sweepStaleStreaming(); }, SWEEP_INTERVAL_MS);
  app.addHook('onClose', async () => { clearInterval(sweepTimer); });
  const shutdown = async (signal: string) => {
    app.log.info(`received ${signal}, shutting down`);
    try {
--- a/apps/server/src/schema.sql
+++ b/apps/server/src/schema.sql
@@ -1,3 +1,10 @@
 -- v1.13.3: statement_timeout is set at database level via:
 --   ALTER DATABASE boocode SET statement_timeout = '30s';
 -- ALTER DATABASE can't run inside a DO block, so this is an operational
 -- step rather than schema. Re-apply after a volume reset (the setting
 -- lives in pg_db which survives `docker compose up --build` but NOT a
 -- `docker volume rm boocode_pgdata`).
 CREATE TABLE IF NOT EXISTS projects (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  name TEXT NOT NULL,
--- a/apps/server/src/services/tests/tools.test.ts
+++ b/apps/server/src/services/tests/tools.test.ts
@@ -0,0 +1,14 @@
 import { describe, it, expect } from 'vitest';
 import { ALL_TOOLS } from '../tools.js';
 describe('ALL_TOOLS registry', () => {
  // v1.13.3: tools must be alpha-sorted at module load. llama.cpp's prompt
  // cache hits on byte-identical prefixes; the tool list lives near the
  // top of the system prompt, so any order drift invalidates every cached
  // turn. The registry sort is the single source of truth; downstream
  // helpers (toolJsonSchemas, TOOLS_BY_NAME, buildAiTools) inherit it.
  it('exports tools in alphabetical order by name', () => {
    const names = ALL_TOOLS.map((t) => t.name);
    expect(names).toEqual([...names].sort((a, b) => a.localeCompare(b)));
  });
 });
--- a/apps/server/src/services/inference/stream-phase.ts
+++ b/apps/server/src/services/inference/stream-phase.ts
@@ -19,7 +19,14 @@ import type {
  TurnArgs,
 } from './turn.js';
 import { upstreamModel } from './provider.js';
-import { jsonSchema, streamText, tool, type JSONValue, type ModelMessage } from 'ai';
+import {
  jsonSchema,
  streamText,
  tool,
  type JSONValue,
  type ModelMessage,
  type ToolCallRepairFunction,
 } from 'ai';
 interface StreamOptions {
  // null = omit tools entirely (compact phase); [] = caller stripped all tools
@@ -155,10 +162,36 @@ export async function streamCompletion(
  // Replaces the v1.13.1-A counter-only diagnostic.
  let reasoningAccumulated = '';
  // v1.13.3: experimental_repairToolCall keeps the stream alive when the
  // model emits a malformed tool call (bad JSON args, unknown name, etc.).
  // Without a repair function streamText throws and the WHOLE stream dies;
  // with one, the SDK invokes us and we route the bad call through normally.
  // Strategy: pass through unmodified. executeToolPhase's existing error
  // path (unknown tool name → "unknown tool: X" result; zod-reject → tool
  // 'X' rejected — fieldname: required) already gives the model a clean
  // recovery surface on the next turn. Logging gives us visibility into
  // how often qwen3.6 actually emits broken calls.
  const repairToolCall: ToolCallRepairFunction<NonNullable<typeof aiTools>> = async ({
    toolCall,
    error,
  }) => {
    ctx.log.warn(
      {
        toolCallId: toolCall.toolCallId,
        toolName: toolCall.toolName,
        error: error.message,
      },
      'malformed tool call surfaced via repairToolCall',
    );
    return toolCall;
  };
  const result = streamText({
    model: upstreamModel(ctx.config.LLAMA_SWAP_URL, model),
    messages: aiMessages,
-    ...(aiTools ? { tools: aiTools, toolChoice: 'auto' as const } : {}),
+    ...(aiTools
      ? { tools: aiTools, toolChoice: 'auto' as const, experimental_repairToolCall: repairToolCall }
      : {}),
    ...(typeof opts.temperature === 'number' ? { temperature: opts.temperature } : {}),
    abortSignal: signal,
  });
--- a/apps/server/src/services/tools.ts
+++ b/apps/server/src/services/tools.ts
@@ -527,6 +527,11 @@ export const askUserInput: ToolDef<AskUserInputInputT> = {
  },
 };
 // v1.13.3: alpha-sorted by tool.name at module load. llama.cpp's prompt
 // cache hits on byte-identical prefixes; the tool list lives near the top
 // of the system prompt, so any order drift would invalidate every cached
 // turn. Single source of truth for ordering lives here — toolJsonSchemas()
 // and TOOLS_BY_NAME inherit it.
 export const ALL_TOOLS: ReadonlyArray<ToolDef<unknown>> = [
  viewFile as ToolDef<unknown>,
  listDir as ToolDef<unknown>,
@@ -553,7 +558,7 @@ export const ALL_TOOLS: ReadonlyArray<ToolDef<unknown>> = [
  watchChanges as ToolDef<unknown>,
  getSemanticNeighborhoods as ToolDef<unknown>,
  getFrameworkAnalysis as ToolDef<unknown>,
-];
+].sort((a, b) => a.name.localeCompare(b.name));
 // v1.8.2: forward-compatible read-only whitelist. An agent whose `tools` is
 // fully contained in this set gets a generous default tool budget (30);