v1.13.3: cleanup bundle — statement timeout + alpha ordering + stuck-row sweeper + repairToolCall

Four independent items, all owed from prior dispatches. - statement_timeout at the database level via: ALTER DATABASE boocode SET statement_timeout = '30s'; Applied operationally; documented as a comment at the top of schema.sql (ALTER DATABASE can't run inside a DO block, so it's not idempotent inside applySchema). Re-apply after a volume reset. - Tool registry alpha-sorted at module load. llama.cpp's prompt cache hits on byte-identical prefixes; any reordering of the tool list near the top of the system prompt would invalidate every cached turn. Single-source sort at the ALL_TOOLS export so toolJsonSchemas() and TOOLS_BY_NAME inherit the order automatically. New tools.test.ts asserts the invariant; total tests 173 (was 172). - Periodic in-process stuck-row sweeper. Runs every 60s, marks 'streaming' rows older than 5 minutes as 'failed', and publishes chat_status='idle' on the user channel so the UI dot drops without a refresh. Closes the mid-session crash UX gap; the v1.12.1 boot sweep only fires once at startup, so sessions used to stay stuck until next reboot. setInterval cleaned up via app.addHook('onClose'). Mirrors handleAbortOrError's publish pattern. - experimental_repairToolCall wired through AI SDK v6 streamText. Pass- through implementation: log + return the original toolCall so the stream keeps going. executeToolPhase's existing error paths (unknown tool name → 'unknown tool: X' result; zod-reject → 'tool X rejected — field: required') already surface bad calls to the model; the value here is preventing the AI SDK from THROWING on parse errors and killing the whole stream. Owed since v1.13.1-A. Smoke verified: - statement_timeout = '30s' confirmed via SHOW. - Tool path normal flow intact (list_dir prompt → tool_call → result → final assistant). No malformed tool calls in the test run; repair log will surface them when qwen3.6 actually emits one. - Alpha order verified at runtime via the dist bundle: match: true. - Sweeper logic not traffic-tested (no stuck rows to find), but the SQL UPDATE + broker.publishUser pattern is identical to handleAbort and the boot sweep — synthesis-only verification. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-22 06:46:03 +00:00
5 changed files with 102 additions and 3 deletions
--- a/apps/server/src/index.ts
+++ b/apps/server/src/index.ts
@@ -201,6 +201,46 @@ async function main() {
    app.log.info(`serving static frontend from ${webDist}`);
  }

+  // v1.13.3: periodic in-process sweeper for streaming rows orphaned by a
+  // mid-session crash. The boot sweep (above) only fires once at startup;
+  // this loop catches the in-flight case. 60s cadence + 5-min threshold
+  // matches the boot sweep so behavior is consistent. Publishes
+  // chat_status='idle' on the user channel so the UI dot drops without a
+  // refresh — same pattern as handleAbortOrError.
+  const SWEEP_INTERVAL_MS = 60_000;
+  const sweepStaleStreaming = async (): Promise<void> => {
+    try {
+      const rows = await sql<{ id: string; chat_id: string }[]>`
+        UPDATE messages
+        SET status = 'failed', finished_at = clock_timestamp()
+        WHERE status = 'streaming'
+          AND created_at < NOW() - INTERVAL '5 minutes'
+        RETURNING id, chat_id
+      `;
+      if (rows.length === 0) return;
+      app.log.warn(
+        { swept: rows.length, ids: rows.map((r) => r.id) },
+        'swept stale streaming rows',
+      );
+      const seenChats = new Set<string>();
+      const now = new Date().toISOString();
+      for (const row of rows) {
+        if (seenChats.has(row.chat_id)) continue;
+        seenChats.add(row.chat_id);
+        broker.publishUser('default', {
+          type: 'chat_status',
+          chat_id: row.chat_id,
+          status: 'idle',
+          at: now,
+        });
+      }
+    } catch (err) {
+      app.log.error({ err }, 'stuck-row sweeper failed');
+    }
+  };
+  const sweepTimer = setInterval(() => { void sweepStaleStreaming(); }, SWEEP_INTERVAL_MS);
+  app.addHook('onClose', async () => { clearInterval(sweepTimer); });
+
  const shutdown = async (signal: string) => {
    app.log.info(`received ${signal}, shutting down`);
    try {
--- a/apps/server/src/schema.sql
+++ b/apps/server/src/schema.sql
@@ -1,3 +1,10 @@
+-- v1.13.3: statement_timeout is set at database level via:
+--   ALTER DATABASE boocode SET statement_timeout = '30s';
+-- ALTER DATABASE can't run inside a DO block, so this is an operational
+-- step rather than schema. Re-apply after a volume reset (the setting
+-- lives in pg_db which survives `docker compose up --build` but NOT a
+-- `docker volume rm boocode_pgdata`).
+
 CREATE TABLE IF NOT EXISTS projects (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  name TEXT NOT NULL,
--- a/apps/server/src/services/tests/tools.test.ts
+++ b/apps/server/src/services/tests/tools.test.ts
@@ -0,0 +1,14 @@
+import { describe, it, expect } from 'vitest';
+import { ALL_TOOLS } from '../tools.js';
+
+describe('ALL_TOOLS registry', () => {
+  // v1.13.3: tools must be alpha-sorted at module load. llama.cpp's prompt
+  // cache hits on byte-identical prefixes; the tool list lives near the
+  // top of the system prompt, so any order drift invalidates every cached
+  // turn. The registry sort is the single source of truth; downstream
+  // helpers (toolJsonSchemas, TOOLS_BY_NAME, buildAiTools) inherit it.
+  it('exports tools in alphabetical order by name', () => {
+    const names = ALL_TOOLS.map((t) => t.name);
+    expect(names).toEqual([...names].sort((a, b) => a.localeCompare(b)));
+  });
+});
--- a/apps/server/src/services/inference/stream-phase.ts
+++ b/apps/server/src/services/inference/stream-phase.ts
@@ -19,7 +19,14 @@ import type {
  TurnArgs,
 } from './turn.js';
 import { upstreamModel } from './provider.js';
-import { jsonSchema, streamText, tool, type JSONValue, type ModelMessage } from 'ai';
+import {
+  jsonSchema,
+  streamText,
+  tool,
+  type JSONValue,
+  type ModelMessage,
+  type ToolCallRepairFunction,
+} from 'ai';

 interface StreamOptions {
  // null = omit tools entirely (compact phase); [] = caller stripped all tools
@@ -155,10 +162,36 @@ export async function streamCompletion(
  // Replaces the v1.13.1-A counter-only diagnostic.
  let reasoningAccumulated = '';

+  // v1.13.3: experimental_repairToolCall keeps the stream alive when the
+  // model emits a malformed tool call (bad JSON args, unknown name, etc.).
+  // Without a repair function streamText throws and the WHOLE stream dies;
+  // with one, the SDK invokes us and we route the bad call through normally.
+  // Strategy: pass through unmodified. executeToolPhase's existing error
+  // path (unknown tool name → "unknown tool: X" result; zod-reject → tool
+  // 'X' rejected — fieldname: required) already gives the model a clean
+  // recovery surface on the next turn. Logging gives us visibility into
+  // how often qwen3.6 actually emits broken calls.
+  const repairToolCall: ToolCallRepairFunction<NonNullable<typeof aiTools>> = async ({
+    toolCall,
+    error,
+  }) => {
+    ctx.log.warn(
+      {
+        toolCallId: toolCall.toolCallId,
+        toolName: toolCall.toolName,
+        error: error.message,
+      },
+      'malformed tool call surfaced via repairToolCall',
+    );
+    return toolCall;
+  };
+
  const result = streamText({
    model: upstreamModel(ctx.config.LLAMA_SWAP_URL, model),
    messages: aiMessages,
-    ...(aiTools ? { tools: aiTools, toolChoice: 'auto' as const } : {}),
+    ...(aiTools
+      ? { tools: aiTools, toolChoice: 'auto' as const, experimental_repairToolCall: repairToolCall }
+      : {}),
    ...(typeof opts.temperature === 'number' ? { temperature: opts.temperature } : {}),
    abortSignal: signal,
  });
--- a/apps/server/src/services/tools.ts
+++ b/apps/server/src/services/tools.ts
@@ -527,6 +527,11 @@ export const askUserInput: ToolDef<AskUserInputInputT> = {
  },
 };

+// v1.13.3: alpha-sorted by tool.name at module load. llama.cpp's prompt
+// cache hits on byte-identical prefixes; the tool list lives near the top
+// of the system prompt, so any order drift would invalidate every cached
+// turn. Single source of truth for ordering lives here — toolJsonSchemas()
+// and TOOLS_BY_NAME inherit it.
 export const ALL_TOOLS: ReadonlyArray<ToolDef<unknown>> = [
  viewFile as ToolDef<unknown>,
  listDir as ToolDef<unknown>,
@@ -553,7 +558,7 @@ export const ALL_TOOLS: ReadonlyArray<ToolDef<unknown>> = [
  watchChanges as ToolDef<unknown>,
  getSemanticNeighborhoods as ToolDef<unknown>,
  getFrameworkAnalysis as ToolDef<unknown>,
-];
+].sort((a, b) => a.name.localeCompare(b.name));

 // v1.8.2: forward-compatible read-only whitelist. An agent whose `tools` is
 // fully contained in this set gets a generous default tool budget (30);