From ac1a71f583ac8ddc8281864de51fdabe3312acd3 Mon Sep 17 00:00:00 2001 From: indifferentketchup Date: Fri, 22 May 2026 06:34:10 +0000 Subject: [PATCH] v1.13.1-C: port ask_user_input correlation to parts + wire reasoning_parts end-to-end MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass 1 — ask_user_input correlation port (messages.ts:478, :549): - The two correlation queries that backed the elicitation flow used to scan messages.tool_calls and messages.tool_results JSON columns directly. They now JOIN message_parts on payload->>'id' (for the caller assistant) and payload->>'tool_call_id' (for the pending tool row). Semantics preserved: ORDER BY m.created_at DESC LIMIT 1 still picks the latest issuance, the already-answered 409 guard now reads payload.output, and the UPDATE + parts replace inside sql.begin is unchanged from v1.13.0. - Pre-v1.13.0 history has no parts rows and is unreachable to this lookup path (404). Acceptable per dispatch decision — no pending elicitation from before v1.13.0 will still be open. JSON-column fallback can land as a hotfix if it ever surfaces. Pass 2 — reasoning_parts wired end-to-end: - types.ts/StreamResult gains `reasoning: string`. stream-phase.ts accumulates reasoning-delta text per stream (replacing the v1.13.1-A counter-only diagnostic) and returns it on the result. - parts.ts/partsFromAssistantMessage gains an optional `reasoning` param. When present it emits a kind='reasoning' part at sequence 0, ahead of the text and tool_call parts. - error-handler.ts/finalizeCompletion and tool-phase.ts/executeToolPhase both thread result.reasoning into the dual-write call so reasoning-channel models (qwen3.6) get persistent reasoning rows. - payload.ts: loadContext SELECT pulls reasoning_parts from the v1.13.1-B view; OpenAiMessage gains an optional `reasoning` field; buildMessagesPayload collapses reasoning_parts into a single string per assistant message. - stream-phase.ts/toModelMessages converts assistant messages with reasoning into an AI SDK ModelMessage content array starting with a ReasoningPart, matching the @ai-sdk/provider-utils AssistantContent union. Reasoning models can now replay prior reasoning context across tool-call boundaries. - types/api.ts and apps/web/src/api/types.ts Message interface gain reasoning_parts (optional, nullable). Frontend doesn't render this yet — field reserved for a v1.14 UI surface. Tests: 2 new in parts.test.ts cover reasoning-at-sequence-0 with and without text content. 172 tests pass (170 prior + 2 new). Smoke verified against the live container: - A reasoning-prompt ("walk through 17 × 23 step by step") produced one message with kind='reasoning' (361 chars) at sequence 0 and kind='text' (429 chars) at sequence 1. Adapter log confirmed reasoning capture. - The new correlation SQL was validated against existing tool_call / tool_result parts: returns the expected message_id + payload shape with pending state correctly identified via payload.output IS NULL. - ask_user_input end-to-end through the UI is Sam's smoke — the Prompt Builder agent does not always trigger ask_user_input for these prompts, so synthetic verification via SQL substituted for traffic-driven cover. Annotation: the v1.13.1-A abort-throw site in stream-phase.ts got a one-liner comment ("AI SDK v6 fullStream returns normally on abort; check signal explicitly.") to prevent a future refactor removing it. v1.13.2 drops the dual-write + the JSON columns + collapses the view. Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/server/src/routes/messages.ts | 80 +++++++++++-------- .../src/services/__tests__/parts.test.ts | 30 +++++++ .../src/services/inference/error-handler.ts | 8 +- apps/server/src/services/inference/parts.ts | 17 +++- apps/server/src/services/inference/payload.ts | 19 ++++- .../src/services/inference/stream-phase.ts | 51 ++++++++---- .../src/services/inference/tool-phase.ts | 14 +++- apps/server/src/services/inference/turn.ts | 3 + apps/server/src/types/api.ts | 5 ++ apps/web/src/api/types.ts | 5 ++ 10 files changed, 169 insertions(+), 63 deletions(-) diff --git a/apps/server/src/routes/messages.ts b/apps/server/src/routes/messages.ts index fbbe607..4274a0e 100644 --- a/apps/server/src/routes/messages.ts +++ b/apps/server/src/routes/messages.ts @@ -470,30 +470,36 @@ export function registerMessageRoutes( const chat = chatRows[0]!; const sessionId = chat.session_id; - // Find the assistant message that emitted this tool_call. Scoped by - // chat_id + role to avoid cross-chat lookups; ordered by created_at DESC - // because the most recent issuance wins when an LLM reuses call IDs - // across turns (the older, already-answered one is a different row with - // populated tool_results downstream). - const callerRows = await sql<{ id: string; tool_calls: ToolCall[] | null }[]>` - SELECT id, tool_calls FROM messages - WHERE chat_id = ${chat.id} - AND role = 'assistant' - AND tool_calls IS NOT NULL - ORDER BY created_at DESC + // v1.13.1-C: find the assistant's tool_call by indexing message_parts + // directly on payload->>'id'. Scoped by chat_id + role via the JOIN. + // Pre-v1.13.0 history has no parts rows — those tool_calls become + // unreachable here (404). Acceptable per the dispatch decision: any + // pending elicitation from before v1.13.0 is long timed out by now; + // promote to a hotfix with a JSON-column fallback if it ever surfaces. + const callerRows = await sql<{ + message_id: string; + payload: { id: string; name: string; args: Record }; + }[]>` + SELECT p.message_id, p.payload + FROM message_parts p + JOIN messages m ON m.id = p.message_id + WHERE m.chat_id = ${chat.id} + AND m.role = 'assistant' + AND p.kind = 'tool_call' + AND p.payload->>'id' = ${tool_call_id} + ORDER BY m.created_at DESC + LIMIT 1 `; - let foundCall: ToolCall | null = null; - for (const row of callerRows) { - const match = row.tool_calls?.find((tc) => tc.id === tool_call_id); - if (match) { - foundCall = match; - break; - } - } - if (!foundCall) { + const callerRow = callerRows[0]; + if (!callerRow) { reply.code(404); return { error: 'unknown_tool_call_id' }; } + const foundCall: ToolCall = { + id: callerRow.payload.id, + name: callerRow.payload.name, + args: callerRow.payload.args, + }; if (foundCall.name !== 'ask_user_input') { reply.code(400); return { error: 'tool_call_not_ask_user_input' }; @@ -540,18 +546,21 @@ export function registerMessageRoutes( } } - // Find the pending tool row. ORDER BY created_at DESC + LIMIT 1 picks - // the most recent row with this tool_call_id; the already-answered - // check below guards against UPDATE-ing a stale answer. + // v1.13.1-C: find the pending tool row via message_parts on + // payload->>'tool_call_id'. Same fallback caveat as the caller lookup + // above — pre-v1.13.0 rows are unreachable here. const toolRows = await sql<{ - id: string; - tool_results: { tool_call_id: string; output: unknown } | null; + message_id: string; + payload: { tool_call_id: string; output: unknown }; }[]>` - SELECT id, tool_results FROM messages - WHERE chat_id = ${chat.id} - AND role = 'tool' - AND tool_results->>'tool_call_id' = ${tool_call_id} - ORDER BY created_at DESC + SELECT p.message_id, p.payload + FROM message_parts p + JOIN messages m ON m.id = p.message_id + WHERE m.chat_id = ${chat.id} + AND m.role = 'tool' + AND p.kind = 'tool_result' + AND p.payload->>'tool_call_id' = ${tool_call_id} + ORDER BY m.created_at DESC LIMIT 1 `; const toolRow = toolRows[0]; @@ -559,7 +568,7 @@ export function registerMessageRoutes( reply.code(404); return { error: 'unknown_tool_call_id', detail: 'tool message not found' }; } - if (toolRow.tool_results && toolRow.tool_results.output !== null) { + if (toolRow.payload && toolRow.payload.output !== null) { reply.code(409); return { error: 'tool_call_already_answered' }; } @@ -571,20 +580,21 @@ export function registerMessageRoutes( truncated: false, }; + const toolMessageId = toolRow.message_id; const result = await sql.begin(async (tx) => { await tx` UPDATE messages SET tool_results = ${tx.json(newToolResults as never)} - WHERE id = ${toolRow.id} + WHERE id = ${toolMessageId} `; // v1.13.0: replace the pending tool_result part inserted at message // creation (tool-phase.ts) with the answered one. Delete-then-insert // is simpler than UPDATE because parts are append-style elsewhere; // the UNIQUE (message_id, sequence) constraint blocks plain insert. - await tx`DELETE FROM message_parts WHERE message_id = ${toolRow.id} AND kind = 'tool_result'`; + await tx`DELETE FROM message_parts WHERE message_id = ${toolMessageId} AND kind = 'tool_result'`; await tx` INSERT INTO message_parts (message_id, sequence, kind, payload) - VALUES (${toolRow.id}, 0, 'tool_result', ${tx.json(newToolResults as never)}) + VALUES (${toolMessageId}, 0, 'tool_result', ${tx.json(newToolResults as never)}) `; const [assistantMsg] = await tx<{ id: string }[]>` INSERT INTO messages (session_id, chat_id, role, content, status, created_at) @@ -594,7 +604,7 @@ export function registerMessageRoutes( await tx`UPDATE sessions SET updated_at = clock_timestamp() WHERE id = ${sessionId}`; await tx`UPDATE chats SET updated_at = clock_timestamp() WHERE id = ${chat.id}`; return { - tool_message_id: toolRow.id, + tool_message_id: toolMessageId, assistant_message_id: assistantMsg!.id, }; }); diff --git a/apps/server/src/services/__tests__/parts.test.ts b/apps/server/src/services/__tests__/parts.test.ts index bf94fa0..3d4a4b8 100644 --- a/apps/server/src/services/__tests__/parts.test.ts +++ b/apps/server/src/services/__tests__/parts.test.ts @@ -52,6 +52,36 @@ describe('partsFromAssistantMessage', () => { it('returns empty array for empty content + null tool_calls', () => { expect(partsFromAssistantMessage({ content: '', tool_calls: null })).toEqual([]); }); + + it('v1.13.1-C: reasoning lands at sequence 0 before text + tool_calls', () => { + const tc: ToolCall = { id: 'call_r', name: 'view_file', args: { path: 'x.ts' } }; + const parts = partsFromAssistantMessage({ + content: 'inspecting now', + tool_calls: [tc], + reasoning: 'user asked about x.ts; I should view it', + }); + expect(parts.map((p) => [p.sequence, p.kind])).toEqual([ + [0, 'reasoning'], + [1, 'text'], + [2, 'tool_call'], + ]); + expect(parts[0]!.payload).toEqual({ + text: 'user asked about x.ts; I should view it', + }); + }); + + it('v1.13.1-C: reasoning + empty content + tool_calls preserves seq 0 reasoning', () => { + const tc: ToolCall = { id: 'call_r2', name: 'grep', args: { pattern: 'foo' } }; + const parts = partsFromAssistantMessage({ + content: '', + tool_calls: [tc], + reasoning: 'jumping straight to grep', + }); + expect(parts.map((p) => [p.sequence, p.kind])).toEqual([ + [0, 'reasoning'], + [1, 'tool_call'], + ]); + }); }); describe('partsFromToolMessage', () => { diff --git a/apps/server/src/services/inference/error-handler.ts b/apps/server/src/services/inference/error-handler.ts index 9b260b5..d1637d5 100644 --- a/apps/server/src/services/inference/error-handler.ts +++ b/apps/server/src/services/inference/error-handler.ts @@ -116,11 +116,17 @@ export async function finalizeCompletion( // v1.13.0: dual-write the text part. finalizeCompletion is the terminal // path for text-only assistant turns (no tool calls); tool_calls are null // here by construction (the tool-bearing path goes through executeToolPhase). + // v1.13.1-C: include result.reasoning so reasoning-channel models capture + // a kind='reasoning' part alongside the text. // TODO(v1.13.1): wrap the UPDATE above and this insertParts in a single // sql.begin before flipping read authority to message_parts. await insertParts( ctx.sql, - partsFromAssistantMessage({ content, tool_calls: null }).map((p) => ({ + partsFromAssistantMessage({ + content, + tool_calls: null, + reasoning: result.reasoning, + }).map((p) => ({ ...p, message_id: assistantMessageId, })), diff --git a/apps/server/src/services/inference/parts.ts b/apps/server/src/services/inference/parts.ts index f62eea3..6d23a10 100644 --- a/apps/server/src/services/inference/parts.ts +++ b/apps/server/src/services/inference/parts.ts @@ -38,16 +38,25 @@ export async function insertParts(sql: Sql, parts: PartInsert[]): Promise } // Derive parts from the canonical messages row for an assistant message. -// content (when non-empty) becomes a 'text' part at sequence 0; each tool_call -// becomes a 'tool_call' part with payload { id, name, args } where args is -// the parsed object (we use the in-memory ToolCall shape, not the OpenAI -// stringified one). +// reasoning (when non-empty) becomes a 'reasoning' part at sequence 0 — +// it precedes user-visible content logically. content (when non-empty) +// becomes a 'text' part next; each tool_call becomes a 'tool_call' part +// with payload { id, name, args } where args is the parsed object (we +// use the in-memory ToolCall shape, not the OpenAI stringified one). export function partsFromAssistantMessage(args: { content: string; tool_calls: ToolCall[] | null; + // v1.13.1-C: optional reasoning text streamed alongside the answer. + // Most rows have none — only models with separate reasoning channels + // (qwen3.6 etc.) populate this. + reasoning?: string; }): Omit[] { const out: Omit[] = []; let seq = 0; + if (args.reasoning && args.reasoning.length > 0) { + out.push({ sequence: seq, kind: 'reasoning', payload: { text: args.reasoning } }); + seq += 1; + } if (args.content && args.content.length > 0) { out.push({ sequence: seq, kind: 'text', payload: { text: args.content } }); seq += 1; diff --git a/apps/server/src/services/inference/payload.ts b/apps/server/src/services/inference/payload.ts index 7404f8d..84c579b 100644 --- a/apps/server/src/services/inference/payload.ts +++ b/apps/server/src/services/inference/payload.ts @@ -19,6 +19,12 @@ export interface OpenAiMessage { function: { name: string; arguments: string }; }>; tool_call_id?: string; + // v1.13.1-C: reasoning text from a prior assistant turn, sourced from + // message_parts kind='reasoning' rows joined in via reasoning_parts on + // the messages_with_parts view. stream-phase.ts/toModelMessages threads + // this into the AI SDK ReasoningPart when forwarding to the model so + // reasoning models can resume mid-thought across tool-call boundaries. + reasoning?: string; } // v1.12: buildSystemPrompt lives in services/system-prompt.ts. It awaits the @@ -83,6 +89,12 @@ export async function buildMessagesPayload( function: { name: tc.name, arguments: JSON.stringify(tc.args) }, })); } + // v1.13.1-C: collapse reasoning_parts into a single string. The view + // returns them ordered by sequence; multiple reasoning parts on one + // message are rare but concat preserves ordering. Skip when absent. + if (m.reasoning_parts && m.reasoning_parts.length > 0) { + msg.reasoning = m.reasoning_parts.map((p) => p.text ?? '').join(''); + } out.push(msg); continue; } @@ -117,11 +129,12 @@ export async function loadContext( // can show history with the summary card inline); only LLM payloads skip // compacted rows. compacted_at IS NULL keeps the active summary + tail. // v1.13.1-B: reads tool_calls/tool_results via the parts-merged view. - // v1.13.1-C will extend the Message type with reasoning_parts and pull - // it from the same view; deferred here so the type contract stays clean. + // v1.13.1-C: also pull reasoning_parts so assistant messages from + // reasoning models can be replayed with their reasoning context preserved. const history = await sql` SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, - tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata + tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata, + reasoning_parts FROM messages_with_parts WHERE chat_id = ${chatId} AND compacted_at IS NULL ORDER BY created_at ASC, id ASC diff --git a/apps/server/src/services/inference/stream-phase.ts b/apps/server/src/services/inference/stream-phase.ts index 7be694d..1f3055f 100644 --- a/apps/server/src/services/inference/stream-phase.ts +++ b/apps/server/src/services/inference/stream-phase.ts @@ -49,20 +49,28 @@ function toModelMessages(messages: OpenAiMessage[]): ModelMessage[] { } if (m.role === 'assistant') { const hasTools = m.tool_calls && m.tool_calls.length > 0; - if (!hasTools) { + const hasReasoning = typeof m.reasoning === 'string' && m.reasoning.length > 0; + if (!hasTools && !hasReasoning) { // Bare text assistant (string content). null content + no tool_calls // is degenerate but harmless to forward. out.push({ role: 'assistant', content: m.content ?? '' }); continue; } + // v1.13.1-C: AI SDK ReasoningPart precedes text + tool-calls in the + // assistant content array. Reasoning models (qwen3.6) consume their + // prior reasoning context to resume mid-thought across tool boundaries. const parts: Array< + | { type: 'reasoning'; text: string } | { type: 'text'; text: string } | { type: 'tool-call'; toolCallId: string; toolName: string; input: unknown } > = []; + if (hasReasoning) { + parts.push({ type: 'reasoning', text: m.reasoning! }); + } if (m.content && m.content.length > 0) { parts.push({ type: 'text', text: m.content }); } - for (const tc of m.tool_calls!) { + for (const tc of m.tool_calls ?? []) { let input: unknown = {}; try { input = tc.function.arguments.length > 0 ? JSON.parse(tc.function.arguments) : {}; @@ -141,7 +149,11 @@ export async function streamCompletion( const aiTools = hasTools ? buildAiTools(opts.tools!) : undefined; const startedAt = Date.now(); - let reasoningDeltaCount = 0; + // v1.13.1-C: accumulate reasoning text across reasoning-delta parts. + // qwen3.6 emits these on a separate channel from text content; we capture + // them per stream so finalizeCompletion can dual-write a 'reasoning' part. + // Replaces the v1.13.1-A counter-only diagnostic. + let reasoningAccumulated = ''; const result = streamText({ model: upstreamModel(ctx.config.LLAMA_SWAP_URL, model), @@ -218,10 +230,11 @@ export async function streamCompletion( break; } case 'reasoning-delta': { - // v1.13.1-A: reasoning parts are dropped for now. v1.13.1-C will - // persist them as `kind='reasoning'` rows in message_parts. Counter - // is logged at finish so we know whether qwen3.6 actually emits any. - reasoningDeltaCount += 1; + // v1.13.1-C: accumulate; finalizeCompletion / executeToolPhase + // dual-write the resulting text as a kind='reasoning' part. + if (typeof part.text === 'string') { + reasoningAccumulated += part.text; + } break; } case 'finish': { @@ -254,11 +267,10 @@ export async function streamCompletion( pendingBuffer = ''; } - // v1.13.1-A: AI SDK v6 swallows the abort signal — the fullStream iterator - // exits cleanly and we'd otherwise return a successful StreamResult, which - // makes executeStreamPhase call finalizeCompletion and write status='complete'. - // Detect post-iteration abort and throw an AbortError so handleAbortOrError - // owns the row instead, matching v1.12.x stop-button behavior. + // AI SDK v6 fullStream returns normally on abort; check signal explicitly. + // Without this throw the row would land as status='complete' with partial + // content instead of going through handleAbortOrError → status='cancelled'. + // Smoke D caught this in v1.13.1-A — don't refactor it away. if (signal?.aborted) { const abortErr = new Error('aborted'); abortErr.name = 'AbortError'; @@ -281,14 +293,21 @@ export async function streamCompletion( onUsage(promptTokens, completionTokens); } - if (reasoningDeltaCount > 0) { + if (reasoningAccumulated.length > 0) { ctx.log.debug( - { reasoningDeltaCount, model, elapsed_ms: Date.now() - startedAt }, - 'streamCompletion: reasoning deltas dropped (captured in v1.13.1-C)', + { reasoningChars: reasoningAccumulated.length, model, elapsed_ms: Date.now() - startedAt }, + 'streamCompletion: captured reasoning', ); } - return { finishReason, content, toolCalls, promptTokens, completionTokens }; + return { + finishReason, + content, + toolCalls, + promptTokens, + completionTokens, + reasoning: reasoningAccumulated, + }; } export async function executeStreamPhase( diff --git a/apps/server/src/services/inference/tool-phase.ts b/apps/server/src/services/inference/tool-phase.ts index 69d6da6..1f0a3fa 100644 --- a/apps/server/src/services/inference/tool-phase.ts +++ b/apps/server/src/services/inference/tool-phase.ts @@ -98,16 +98,22 @@ export async function executeToolPhase( WHERE id = ${assistantMessageId} RETURNING tokens_used, ctx_used, ctx_max, finished_at `; - // v1.13.0: dual-write to message_parts. Reads still consume the JSON - // columns above; this mirrors the same data into the granular table so - // the AI SDK migration can swap reads later without a backfill window. + // v1.13.0: dual-write to message_parts. v1.13.1-B made parts authoritative + // for reads via the messages_with_parts view; the JSON column write above + // remains for v1.13.1 fallback compatibility (dropped in v1.13.2). + // v1.13.1-C: include result.reasoning so models with separate reasoning + // channels (qwen3.6) get a kind='reasoning' part at sequence 0. // TODO(v1.13.1): wrap the UPDATE above and this insertParts in a single // sql.begin before flipping read authority to message_parts. Without the // transaction, a crash between the two leaves an orphan message that // becomes invisible in the parts-authoritative read path. await insertParts( ctx.sql, - partsFromAssistantMessage({ content, tool_calls: toolCalls }).map((p) => ({ + partsFromAssistantMessage({ + content, + tool_calls: toolCalls, + reasoning: result.reasoning, + }).map((p) => ({ ...p, message_id: assistantMessageId, })), diff --git a/apps/server/src/services/inference/turn.ts b/apps/server/src/services/inference/turn.ts index 64b9c83..3450328 100644 --- a/apps/server/src/services/inference/turn.ts +++ b/apps/server/src/services/inference/turn.ts @@ -118,6 +118,9 @@ export interface StreamResult { toolCalls: ToolCall[]; promptTokens: number | null; completionTokens: number | null; + // v1.13.1-C: reasoning text accumulated across reasoning-delta parts. + // Empty string when the model doesn't emit reasoning (most cases). + reasoning: string; } diff --git a/apps/server/src/types/api.ts b/apps/server/src/types/api.ts index 6874a39..de5bf9c 100644 --- a/apps/server/src/types/api.ts +++ b/apps/server/src/types/api.ts @@ -186,6 +186,11 @@ export interface Message { // v1.8.2: per-message metadata. See MessageMetadata for the discriminated // shapes currently in use. metadata: MessageMetadata | null; + // v1.13.1-C: reasoning content captured from the model's reasoning stream + // (qwen3.6 etc.). Populated from message_parts via the messages_with_parts + // view's reasoning_parts column. Optional — most rows have no reasoning + // and the API may omit the field on legacy responses. + reasoning_parts?: Array<{ text: string }> | null; // v1.11: anchored rolling compaction. Optional so consumers that SELECT // the pre-v1.11 column set still type-check. See compaction.ts + // schema.sql for semantics. diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts index d8fdff5..1e332f3 100644 --- a/apps/web/src/api/types.ts +++ b/apps/web/src/api/types.ts @@ -161,6 +161,11 @@ export interface Message { // v1.8.2: per-message metadata; see MessageMetadata. null for the vast // majority of messages. metadata: MessageMetadata | null; + // v1.13.1-C: reasoning content captured from models that stream reasoning + // tokens separately (qwen3.6 etc.). Backend populates from message_parts; + // optional on the wire — frontend doesn't render this yet (reserved for + // a v1.14 UI surface). + reasoning_parts?: Array<{ text: string }> | null; // v1.11: anchored rolling compaction fields. Optional on the wire so that // older API responses (or test fixtures) parse without explicit nulls. // summary — true on the assistant row that holds the active