From a08d809b737a538324ea3368ef32187783f9d1e7 Mon Sep 17 00:00:00 2001 From: indifferentketchup Date: Fri, 22 May 2026 06:46:03 +0000 Subject: [PATCH] =?UTF-8?q?v1.13.3:=20cleanup=20bundle=20=E2=80=94=20state?= =?UTF-8?q?ment=20timeout=20+=20alpha=20ordering=20+=20stuck-row=20sweeper?= =?UTF-8?q?=20+=20repairToolCall?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four independent items, all owed from prior dispatches. - statement_timeout at the database level via: ALTER DATABASE boocode SET statement_timeout = '30s'; Applied operationally; documented as a comment at the top of schema.sql (ALTER DATABASE can't run inside a DO block, so it's not idempotent inside applySchema). Re-apply after a volume reset. - Tool registry alpha-sorted at module load. llama.cpp's prompt cache hits on byte-identical prefixes; any reordering of the tool list near the top of the system prompt would invalidate every cached turn. Single-source sort at the ALL_TOOLS export so toolJsonSchemas() and TOOLS_BY_NAME inherit the order automatically. New tools.test.ts asserts the invariant; total tests 173 (was 172). - Periodic in-process stuck-row sweeper. Runs every 60s, marks 'streaming' rows older than 5 minutes as 'failed', and publishes chat_status='idle' on the user channel so the UI dot drops without a refresh. Closes the mid-session crash UX gap; the v1.12.1 boot sweep only fires once at startup, so sessions used to stay stuck until next reboot. setInterval cleaned up via app.addHook('onClose'). Mirrors handleAbortOrError's publish pattern. - experimental_repairToolCall wired through AI SDK v6 streamText. Pass- through implementation: log + return the original toolCall so the stream keeps going. executeToolPhase's existing error paths (unknown tool name → 'unknown tool: X' result; zod-reject → 'tool X rejected — field: required') already surface bad calls to the model; the value here is preventing the AI SDK from THROWING on parse errors and killing the whole stream. Owed since v1.13.1-A. Smoke verified: - statement_timeout = '30s' confirmed via SHOW. - Tool path normal flow intact (list_dir prompt → tool_call → result → final assistant). No malformed tool calls in the test run; repair log will surface them when qwen3.6 actually emits one. - Alpha order verified at runtime via the dist bundle: match: true. - Sweeper logic not traffic-tested (no stuck rows to find), but the SQL UPDATE + broker.publishUser pattern is identical to handleAbort and the boot sweep — synthesis-only verification. Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/server/src/index.ts | 40 +++++++++++++++++++ apps/server/src/schema.sql | 7 ++++ .../src/services/__tests__/tools.test.ts | 14 +++++++ .../src/services/inference/stream-phase.ts | 37 ++++++++++++++++- apps/server/src/services/tools.ts | 7 +++- 5 files changed, 102 insertions(+), 3 deletions(-) create mode 100644 apps/server/src/services/__tests__/tools.test.ts diff --git a/apps/server/src/index.ts b/apps/server/src/index.ts index 1c49de5..eba3f0c 100644 --- a/apps/server/src/index.ts +++ b/apps/server/src/index.ts @@ -201,6 +201,46 @@ async function main() { app.log.info(`serving static frontend from ${webDist}`); } + // v1.13.3: periodic in-process sweeper for streaming rows orphaned by a + // mid-session crash. The boot sweep (above) only fires once at startup; + // this loop catches the in-flight case. 60s cadence + 5-min threshold + // matches the boot sweep so behavior is consistent. Publishes + // chat_status='idle' on the user channel so the UI dot drops without a + // refresh — same pattern as handleAbortOrError. + const SWEEP_INTERVAL_MS = 60_000; + const sweepStaleStreaming = async (): Promise => { + try { + const rows = await sql<{ id: string; chat_id: string }[]>` + UPDATE messages + SET status = 'failed', finished_at = clock_timestamp() + WHERE status = 'streaming' + AND created_at < NOW() - INTERVAL '5 minutes' + RETURNING id, chat_id + `; + if (rows.length === 0) return; + app.log.warn( + { swept: rows.length, ids: rows.map((r) => r.id) }, + 'swept stale streaming rows', + ); + const seenChats = new Set(); + const now = new Date().toISOString(); + for (const row of rows) { + if (seenChats.has(row.chat_id)) continue; + seenChats.add(row.chat_id); + broker.publishUser('default', { + type: 'chat_status', + chat_id: row.chat_id, + status: 'idle', + at: now, + }); + } + } catch (err) { + app.log.error({ err }, 'stuck-row sweeper failed'); + } + }; + const sweepTimer = setInterval(() => { void sweepStaleStreaming(); }, SWEEP_INTERVAL_MS); + app.addHook('onClose', async () => { clearInterval(sweepTimer); }); + const shutdown = async (signal: string) => { app.log.info(`received ${signal}, shutting down`); try { diff --git a/apps/server/src/schema.sql b/apps/server/src/schema.sql index 9157ed9..78e9ac5 100644 --- a/apps/server/src/schema.sql +++ b/apps/server/src/schema.sql @@ -1,3 +1,10 @@ +-- v1.13.3: statement_timeout is set at database level via: +-- ALTER DATABASE boocode SET statement_timeout = '30s'; +-- ALTER DATABASE can't run inside a DO block, so this is an operational +-- step rather than schema. Re-apply after a volume reset (the setting +-- lives in pg_db which survives `docker compose up --build` but NOT a +-- `docker volume rm boocode_pgdata`). + CREATE TABLE IF NOT EXISTS projects ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), name TEXT NOT NULL, diff --git a/apps/server/src/services/__tests__/tools.test.ts b/apps/server/src/services/__tests__/tools.test.ts new file mode 100644 index 0000000..aecf7a2 --- /dev/null +++ b/apps/server/src/services/__tests__/tools.test.ts @@ -0,0 +1,14 @@ +import { describe, it, expect } from 'vitest'; +import { ALL_TOOLS } from '../tools.js'; + +describe('ALL_TOOLS registry', () => { + // v1.13.3: tools must be alpha-sorted at module load. llama.cpp's prompt + // cache hits on byte-identical prefixes; the tool list lives near the + // top of the system prompt, so any order drift invalidates every cached + // turn. The registry sort is the single source of truth; downstream + // helpers (toolJsonSchemas, TOOLS_BY_NAME, buildAiTools) inherit it. + it('exports tools in alphabetical order by name', () => { + const names = ALL_TOOLS.map((t) => t.name); + expect(names).toEqual([...names].sort((a, b) => a.localeCompare(b))); + }); +}); diff --git a/apps/server/src/services/inference/stream-phase.ts b/apps/server/src/services/inference/stream-phase.ts index 1f3055f..8ec399b 100644 --- a/apps/server/src/services/inference/stream-phase.ts +++ b/apps/server/src/services/inference/stream-phase.ts @@ -19,7 +19,14 @@ import type { TurnArgs, } from './turn.js'; import { upstreamModel } from './provider.js'; -import { jsonSchema, streamText, tool, type JSONValue, type ModelMessage } from 'ai'; +import { + jsonSchema, + streamText, + tool, + type JSONValue, + type ModelMessage, + type ToolCallRepairFunction, +} from 'ai'; interface StreamOptions { // null = omit tools entirely (compact phase); [] = caller stripped all tools @@ -155,10 +162,36 @@ export async function streamCompletion( // Replaces the v1.13.1-A counter-only diagnostic. let reasoningAccumulated = ''; + // v1.13.3: experimental_repairToolCall keeps the stream alive when the + // model emits a malformed tool call (bad JSON args, unknown name, etc.). + // Without a repair function streamText throws and the WHOLE stream dies; + // with one, the SDK invokes us and we route the bad call through normally. + // Strategy: pass through unmodified. executeToolPhase's existing error + // path (unknown tool name → "unknown tool: X" result; zod-reject → tool + // 'X' rejected — fieldname: required) already gives the model a clean + // recovery surface on the next turn. Logging gives us visibility into + // how often qwen3.6 actually emits broken calls. + const repairToolCall: ToolCallRepairFunction> = async ({ + toolCall, + error, + }) => { + ctx.log.warn( + { + toolCallId: toolCall.toolCallId, + toolName: toolCall.toolName, + error: error.message, + }, + 'malformed tool call surfaced via repairToolCall', + ); + return toolCall; + }; + const result = streamText({ model: upstreamModel(ctx.config.LLAMA_SWAP_URL, model), messages: aiMessages, - ...(aiTools ? { tools: aiTools, toolChoice: 'auto' as const } : {}), + ...(aiTools + ? { tools: aiTools, toolChoice: 'auto' as const, experimental_repairToolCall: repairToolCall } + : {}), ...(typeof opts.temperature === 'number' ? { temperature: opts.temperature } : {}), abortSignal: signal, }); diff --git a/apps/server/src/services/tools.ts b/apps/server/src/services/tools.ts index 725dfef..b407054 100644 --- a/apps/server/src/services/tools.ts +++ b/apps/server/src/services/tools.ts @@ -527,6 +527,11 @@ export const askUserInput: ToolDef = { }, }; +// v1.13.3: alpha-sorted by tool.name at module load. llama.cpp's prompt +// cache hits on byte-identical prefixes; the tool list lives near the top +// of the system prompt, so any order drift would invalidate every cached +// turn. Single source of truth for ordering lives here — toolJsonSchemas() +// and TOOLS_BY_NAME inherit it. export const ALL_TOOLS: ReadonlyArray> = [ viewFile as ToolDef, listDir as ToolDef, @@ -553,7 +558,7 @@ export const ALL_TOOLS: ReadonlyArray> = [ watchChanges as ToolDef, getSemanticNeighborhoods as ToolDef, getFrameworkAnalysis as ToolDef, -]; +].sort((a, b) => a.name.localeCompare(b.name)); // v1.8.2: forward-compatible read-only whitelist. An agent whose `tools` is // fully contained in this set gets a generous default tool budget (30);