From a08d809b737a538324ea3368ef32187783f9d1e7 Mon Sep 17 00:00:00 2001
From: indifferentketchup <samkintop@gmail.com>
Date: Fri, 22 May 2026 06:46:03 +0000
Subject: [PATCH] =?UTF-8?q?v1.13.3:=20cleanup=20bundle=20=E2=80=94=20state?=
 =?UTF-8?q?ment=20timeout=20+=20alpha=20ordering=20+=20stuck-row=20sweeper?=
 =?UTF-8?q?=20+=20repairToolCall?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Four independent items, all owed from prior dispatches.

- statement_timeout at the database level via:
    ALTER DATABASE boocode SET statement_timeout = '30s';
  Applied operationally; documented as a comment at the top of schema.sql
  (ALTER DATABASE can't run inside a DO block, so it's not idempotent
  inside applySchema). Re-apply after a volume reset.

- Tool registry alpha-sorted at module load. llama.cpp's prompt cache
  hits on byte-identical prefixes; any reordering of the tool list near
  the top of the system prompt would invalidate every cached turn.
  Single-source sort at the ALL_TOOLS export so toolJsonSchemas() and
  TOOLS_BY_NAME inherit the order automatically. New tools.test.ts
  asserts the invariant; total tests 173 (was 172).

- Periodic in-process stuck-row sweeper. Runs every 60s, marks
  'streaming' rows older than 5 minutes as 'failed', and publishes
  chat_status='idle' on the user channel so the UI dot drops without a
  refresh. Closes the mid-session crash UX gap; the v1.12.1 boot sweep
  only fires once at startup, so sessions used to stay stuck until next
  reboot. setInterval cleaned up via app.addHook('onClose'). Mirrors
  handleAbortOrError's publish pattern.

- experimental_repairToolCall wired through AI SDK v6 streamText. Pass-
  through implementation: log + return the original toolCall so the
  stream keeps going. executeToolPhase's existing error paths (unknown
  tool name → 'unknown tool: X' result; zod-reject → 'tool X rejected
  — field: required') already surface bad calls to the model; the value
  here is preventing the AI SDK from THROWING on parse errors and
  killing the whole stream. Owed since v1.13.1-A.

Smoke verified:
- statement_timeout = '30s' confirmed via SHOW.
- Tool path normal flow intact (list_dir prompt → tool_call → result
  → final assistant). No malformed tool calls in the test run; repair
  log will surface them when qwen3.6 actually emits one.
- Alpha order verified at runtime via the dist bundle: match: true.
- Sweeper logic not traffic-tested (no stuck rows to find), but the
  SQL UPDATE + broker.publishUser pattern is identical to handleAbort
  and the boot sweep — synthesis-only verification.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 apps/server/src/index.ts                      | 40 +++++++++++++++++++
 apps/server/src/schema.sql                    |  7 ++++
 .../src/services/__tests__/tools.test.ts      | 14 +++++++
 .../src/services/inference/stream-phase.ts    | 37 ++++++++++++++++-
 apps/server/src/services/tools.ts             |  7 +++-
 5 files changed, 102 insertions(+), 3 deletions(-)
 create mode 100644 apps/server/src/services/__tests__/tools.test.ts
diff --git a/apps/server/src/index.ts b/apps/server/src/index.ts
index 1c49de5..eba3f0c 100644
--- a/apps/server/src/index.ts
+++ b/apps/server/src/index.ts
@@ -201,6 +201,46 @@ async function main() {
     app.log.info(`serving static frontend from ${webDist}`);
   }
 
+  // v1.13.3: periodic in-process sweeper for streaming rows orphaned by a
+  // mid-session crash. The boot sweep (above) only fires once at startup;
+  // this loop catches the in-flight case. 60s cadence + 5-min threshold
+  // matches the boot sweep so behavior is consistent. Publishes
+  // chat_status='idle' on the user channel so the UI dot drops without a
+  // refresh — same pattern as handleAbortOrError.
+  const SWEEP_INTERVAL_MS = 60_000;
+  const sweepStaleStreaming = async (): Promise<void> => {
+    try {
+      const rows = await sql<{ id: string; chat_id: string }[]>`
+        UPDATE messages
+        SET status = 'failed', finished_at = clock_timestamp()
+        WHERE status = 'streaming'
+          AND created_at < NOW() - INTERVAL '5 minutes'
+        RETURNING id, chat_id
+      `;
+      if (rows.length === 0) return;
+      app.log.warn(
+        { swept: rows.length, ids: rows.map((r) => r.id) },
+        'swept stale streaming rows',
+      );
+      const seenChats = new Set<string>();
+      const now = new Date().toISOString();
+      for (const row of rows) {
+        if (seenChats.has(row.chat_id)) continue;
+        seenChats.add(row.chat_id);
+        broker.publishUser('default', {
+          type: 'chat_status',
+          chat_id: row.chat_id,
+          status: 'idle',
+          at: now,
+        });
+      }
+    } catch (err) {
+      app.log.error({ err }, 'stuck-row sweeper failed');
+    }
+  };
+  const sweepTimer = setInterval(() => { void sweepStaleStreaming(); }, SWEEP_INTERVAL_MS);
+  app.addHook('onClose', async () => { clearInterval(sweepTimer); });
+
   const shutdown = async (signal: string) => {
     app.log.info(`received ${signal}, shutting down`);
     try {
diff --git a/apps/server/src/schema.sql b/apps/server/src/schema.sql
index 9157ed9..78e9ac5 100644
--- a/apps/server/src/schema.sql
+++ b/apps/server/src/schema.sql
@@ -1,3 +1,10 @@
+-- v1.13.3: statement_timeout is set at database level via:
+--   ALTER DATABASE boocode SET statement_timeout = '30s';
+-- ALTER DATABASE can't run inside a DO block, so this is an operational
+-- step rather than schema. Re-apply after a volume reset (the setting
+-- lives in pg_db which survives `docker compose up --build` but NOT a
+-- `docker volume rm boocode_pgdata`).
+
 CREATE TABLE IF NOT EXISTS projects (
   id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
   name TEXT NOT NULL,
diff --git a/apps/server/src/services/__tests__/tools.test.ts b/apps/server/src/services/__tests__/tools.test.ts
new file mode 100644
index 0000000..aecf7a2
--- /dev/null
+++ b/apps/server/src/services/__tests__/tools.test.ts
@@ -0,0 +1,14 @@
+import { describe, it, expect } from 'vitest';
+import { ALL_TOOLS } from '../tools.js';
+
+describe('ALL_TOOLS registry', () => {
+  // v1.13.3: tools must be alpha-sorted at module load. llama.cpp's prompt
+  // cache hits on byte-identical prefixes; the tool list lives near the
+  // top of the system prompt, so any order drift invalidates every cached
+  // turn. The registry sort is the single source of truth; downstream
+  // helpers (toolJsonSchemas, TOOLS_BY_NAME, buildAiTools) inherit it.
+  it('exports tools in alphabetical order by name', () => {
+    const names = ALL_TOOLS.map((t) => t.name);
+    expect(names).toEqual([...names].sort((a, b) => a.localeCompare(b)));
+  });
+});
diff --git a/apps/server/src/services/inference/stream-phase.ts b/apps/server/src/services/inference/stream-phase.ts
index 1f3055f..8ec399b 100644
--- a/apps/server/src/services/inference/stream-phase.ts
+++ b/apps/server/src/services/inference/stream-phase.ts
@@ -19,7 +19,14 @@ import type {
   TurnArgs,
 } from './turn.js';
 import { upstreamModel } from './provider.js';
-import { jsonSchema, streamText, tool, type JSONValue, type ModelMessage } from 'ai';
+import {
+  jsonSchema,
+  streamText,
+  tool,
+  type JSONValue,
+  type ModelMessage,
+  type ToolCallRepairFunction,
+} from 'ai';
 
 interface StreamOptions {
   // null = omit tools entirely (compact phase); [] = caller stripped all tools
@@ -155,10 +162,36 @@ export async function streamCompletion(
   // Replaces the v1.13.1-A counter-only diagnostic.
   let reasoningAccumulated = '';
 
+  // v1.13.3: experimental_repairToolCall keeps the stream alive when the
+  // model emits a malformed tool call (bad JSON args, unknown name, etc.).
+  // Without a repair function streamText throws and the WHOLE stream dies;
+  // with one, the SDK invokes us and we route the bad call through normally.
+  // Strategy: pass through unmodified. executeToolPhase's existing error
+  // path (unknown tool name → "unknown tool: X" result; zod-reject → tool
+  // 'X' rejected — fieldname: required) already gives the model a clean
+  // recovery surface on the next turn. Logging gives us visibility into
+  // how often qwen3.6 actually emits broken calls.
+  const repairToolCall: ToolCallRepairFunction<NonNullable<typeof aiTools>> = async ({
+    toolCall,
+    error,
+  }) => {
+    ctx.log.warn(
+      {
+        toolCallId: toolCall.toolCallId,
+        toolName: toolCall.toolName,
+        error: error.message,
+      },
+      'malformed tool call surfaced via repairToolCall',
+    );
+    return toolCall;
+  };
+
   const result = streamText({
     model: upstreamModel(ctx.config.LLAMA_SWAP_URL, model),
     messages: aiMessages,
-    ...(aiTools ? { tools: aiTools, toolChoice: 'auto' as const } : {}),
+    ...(aiTools
+      ? { tools: aiTools, toolChoice: 'auto' as const, experimental_repairToolCall: repairToolCall }
+      : {}),
     ...(typeof opts.temperature === 'number' ? { temperature: opts.temperature } : {}),
     abortSignal: signal,
   });
diff --git a/apps/server/src/services/tools.ts b/apps/server/src/services/tools.ts
index 725dfef..b407054 100644
--- a/apps/server/src/services/tools.ts
+++ b/apps/server/src/services/tools.ts
@@ -527,6 +527,11 @@ export const askUserInput: ToolDef<AskUserInputInputT> = {
   },
 };
 
+// v1.13.3: alpha-sorted by tool.name at module load. llama.cpp's prompt
+// cache hits on byte-identical prefixes; the tool list lives near the top
+// of the system prompt, so any order drift would invalidate every cached
+// turn. Single source of truth for ordering lives here — toolJsonSchemas()
+// and TOOLS_BY_NAME inherit it.
 export const ALL_TOOLS: ReadonlyArray<ToolDef<unknown>> = [
   viewFile as ToolDef<unknown>,
   listDir as ToolDef<unknown>,
@@ -553,7 +558,7 @@ export const ALL_TOOLS: ReadonlyArray<ToolDef<unknown>> = [
   watchChanges as ToolDef<unknown>,
   getSemanticNeighborhoods as ToolDef<unknown>,
   getFrameworkAnalysis as ToolDef<unknown>,
-];
+].sort((a, b) => a.name.localeCompare(b.name));
 
 // v1.8.2: forward-compatible read-only whitelist. An agent whose `tools` is
 // fully contained in this set gets a generous default tool budget (30);