import type { MessageMetadata, Session } from '../../types/api.js'; import { decideHtmlArtifactWrite, detectHtmlArtifact, deriveHtmlTitle, HTML_ARTIFACT_MAX_BYTES, } from '../artifacts.js'; import * as modelContext from '../model-context.js'; import { maybeFlagForCompaction } from './payload.js'; import { insertParts, partsFromAssistantMessage } from './parts.js'; import type { PartInsert } from './parts.js'; import { stripToolMarkup } from './tool-call-parser.js'; import type { InferenceContext, StreamResult, TurnArgs } from './types.js'; export async function handleAbortOrError( ctx: InferenceContext, args: TurnArgs, accumulated: string, err: unknown ): Promise { const { sessionId, chatId, assistantMessageId } = args; const isAbort = err instanceof Error && err.name === 'AbortError'; const finalStatus = isAbort ? 'cancelled' : 'failed'; const errMsg = err instanceof Error ? err.message : String(err); accumulated = stripToolMarkup(accumulated, { final: true }); // v1.8.2: persist a structured error metadata blob on genuine failures so // the bubble can render the reason on reload without re-deriving from the // (one-shot) WS error frame. User-initiated abort skips this — there's no // "reason" to surface for a stop the user already explicitly chose. const errorMetadata: MessageMetadata | null = isAbort ? null : { kind: 'error', error_reason: 'llm_provider_error', error_text: errMsg }; if (errorMetadata) { await ctx.sql` UPDATE messages SET status = ${finalStatus}, content = ${accumulated}, finished_at = clock_timestamp(), metadata = ${ctx.sql.json(errorMetadata as never)} WHERE id = ${assistantMessageId} `; } else { await ctx.sql` UPDATE messages SET status = ${finalStatus}, content = ${accumulated}, finished_at = clock_timestamp() WHERE id = ${assistantMessageId} `; } const [failSessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>` UPDATE sessions SET updated_at = clock_timestamp() WHERE id = ${sessionId} RETURNING project_id, name, updated_at `; ctx.publishUser({ type: 'session_updated', session_id: sessionId, project_id: failSessRow!.project_id, name: failSessRow!.name, updated_at: failSessRow!.updated_at }); // v1.8 mobile-tabs: cancellation is a user-initiated stop, treat as idle; // genuine errors flip the dot red. v1.8.2: error path also carries a // machine-readable `reason` so the UI can render specifics inline. if (isAbort) { // v1.12.1: defensive cancellation write. The status=${finalStatus} UPDATE // above already sets 'cancelled' for the AbortError case, but a row can // leak as 'streaming' when the abort fires between the post-tool-phase // INSERT (executeToolPhase) and the next runAssistantTurn's stream setup, // bypassing the try/catch around executeStreamPhase. The status guard // makes this a no-op when the earlier write already landed. await ctx.sql` UPDATE messages SET status = 'cancelled', content = ${accumulated}, finished_at = clock_timestamp() WHERE id = ${args.assistantMessageId} AND status = 'streaming' `; ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() }); ctx.publish(sessionId, { type: 'message_complete', message_id: assistantMessageId, chat_id: chatId, }); ctx.log.info({ sessionId, chatId, assistantMessageId }, 'inference cancelled'); } else { ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'error', at: new Date().toISOString(), reason: 'llm_provider_error', }); ctx.publish(sessionId, { type: 'error', message_id: assistantMessageId, chat_id: chatId, error: errMsg, reason: 'llm_provider_error', }); ctx.log.error({ err, sessionId, assistantMessageId }, 'inference failed'); } } // P5: the success-finalize atom shared by the wrap-up summaries // (sentinel-summaries.ts) and the synthesis pass (synthesisPipeline.ts). Both // previously hand-rolled this exact ceremony — n_ctx lookup, the complete // UPDATE (content/status/tokens/ctx/ctx_max/finished_at; NO model column), and // the message_complete frame with the full token fields. Single-sourcing it // means a message_complete frame-contract change lands in one place instead of // silently skipping the summary/synthesis paths. // // `beforeComplete` runs AFTER the UPDATE and BEFORE the message_complete frame // — synthesis uses it to write its kind='synthesis' part in the original order // (UPDATE → insertParts → message_complete), preserving timing exactly. // // NOTE: finalizeCompletion does NOT use this — it additionally writes the // `model` column, the text/reasoning/html_artifact parts, the compaction flag, // and the session_updated bump, which this atom deliberately omits (the summary // and synthesis paths handle those — or not — themselves). export async function finalizeStreamedRow( ctx: InferenceContext, opts: { sessionId: string; chatId: string; messageId: string; model: string; content: string; completionTokens: number | null; promptTokens: number | null; startedAt: string | null; cacheTokens?: number | null; reasoningTokens?: number | null; beforeComplete?: () => Promise; }, ): Promise { // v1.11.3: see executeToolPhase for the rationale. const mctx = await modelContext.getModelContext(opts.model); const nCtx = mctx?.n_ctx ?? null; const [updated] = await ctx.sql< { tokens_used: number | null; ctx_used: number | null; ctx_max: number | null; finished_at: string | null }[] >` UPDATE messages SET content = ${opts.content}, status = 'complete', tokens_used = ${opts.completionTokens}, ctx_used = ${opts.promptTokens}, ctx_max = ${nCtx}, cache_tokens = ${opts.cacheTokens ?? null}, reasoning_tokens = ${opts.reasoningTokens ?? null}, finished_at = clock_timestamp() WHERE id = ${opts.messageId} RETURNING tokens_used, ctx_used, ctx_max, finished_at `; if (opts.beforeComplete) await opts.beforeComplete(); ctx.publish(opts.sessionId, { type: 'message_complete', message_id: opts.messageId, chat_id: opts.chatId, tokens_used: updated?.tokens_used ?? null, ctx_used: updated?.ctx_used ?? null, ctx_max: updated?.ctx_max ?? null, cache_tokens: opts.cacheTokens ?? null, reasoning_tokens: opts.reasoningTokens ?? null, started_at: opts.startedAt, finished_at: updated?.finished_at ?? null, model: opts.model, }); } // P5: minimal empty-finalize for the mistake-escalate path. The escalate // branch in runAssistantTurn stops the turn cap-hit-style; the next assistant // row is still 'streaming', so it's finalized as an empty complete row (no // tokens, no parts, no session bump — the escalate branch handles the sentinel // + chat_status itself). Centralizing the status-column write + message_complete // frame here keeps it next to the other finalize paths so a status-column // change is found in one place. export async function finalizeEmpty( ctx: InferenceContext, args: TurnArgs, ): Promise { const { sessionId, chatId, assistantMessageId } = args; await ctx.sql` UPDATE messages SET content = '', status = 'complete', finished_at = clock_timestamp() WHERE id = ${assistantMessageId} `; ctx.publish(sessionId, { type: 'message_complete', message_id: assistantMessageId, chat_id: chatId, }); } export async function finalizeCompletion( ctx: InferenceContext, args: TurnArgs, result: StreamResult, startedAt: string | null, session: Session ): Promise { const { sessionId, chatId, assistantMessageId } = args; const content = stripToolMarkup(result.content, { final: true }); const { finishReason, promptTokens, completionTokens, cacheReadTokens, reasoningTokens } = result; // v1.11.3: see executeToolPhase for the rationale. const mctx = await modelContext.getModelContext(session.model); const nCtx = mctx?.n_ctx ?? null; const [updated] = await ctx.sql< { tokens_used: number | null; ctx_used: number | null; ctx_max: number | null; finished_at: string | null }[] >` UPDATE messages SET content = ${content}, status = 'complete', tokens_used = ${completionTokens}, ctx_used = ${promptTokens}, ctx_max = ${nCtx}, cache_tokens = ${cacheReadTokens ?? null}, reasoning_tokens = ${reasoningTokens ?? null}, model = ${session.model}, finished_at = clock_timestamp() WHERE id = ${assistantMessageId} RETURNING tokens_used, ctx_used, ctx_max, finished_at `; // v1.13.0: dual-write the text part. finalizeCompletion is the terminal // path for text-only assistant turns (no tool calls); tool_calls are null // here by construction (the tool-bearing path goes through executeToolPhase). // v1.13.1-C: include result.reasoning so reasoning-channel models capture // a kind='reasoning' part alongside the text. // TODO(v1.13.1): wrap the UPDATE above and this insertParts in a single // sql.begin before flipping read authority to message_parts. const baseParts: PartInsert[] = partsFromAssistantMessage({ content, tool_calls: null, reasoning: result.reasoning, }).map((p) => ({ ...p, message_id: assistantMessageId, })); // v1.14.x-html-artifact-panes: opportunistic HTML detection. Adds a // SIBLING html_artifact part — never replaces the text part. 1MB cap is // graceful: oversized payloads are skipped and the assistant message // lands as plain content (warn logged). const htmlContent = detectHtmlArtifact(content); if (htmlContent !== null) { const decision = decideHtmlArtifactWrite(htmlContent); if (!decision.write) { ctx.log.warn( { assistantMessageId, byteLen: decision.byteLen, cap: HTML_ARTIFACT_MAX_BYTES }, 'html_artifact exceeded 1MB cap; skipping artifact part', ); } else { const title = deriveHtmlTitle(htmlContent); const nextSeq = baseParts.reduce((m, p) => Math.max(m, p.sequence), -1) + 1; baseParts.push({ message_id: assistantMessageId, sequence: nextSeq, kind: 'html_artifact', payload: { html_content: htmlContent, char_count: htmlContent.length, title, }, }); } } await insertParts(ctx.sql, baseParts); // v1.11: flag for compaction on the terminal turn too. Catches the common // case of a turn that hit the limit without invoking tools. await maybeFlagForCompaction(ctx, chatId, updated); const [completeSessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>` UPDATE sessions SET updated_at = clock_timestamp() WHERE id = ${sessionId} RETURNING project_id, name, updated_at `; ctx.publishUser({ type: 'session_updated', session_id: sessionId, project_id: completeSessRow!.project_id, name: completeSessRow!.name, updated_at: completeSessRow!.updated_at }); ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() }); ctx.publish(sessionId, { type: 'message_complete', message_id: assistantMessageId, chat_id: chatId, tokens_used: updated?.tokens_used ?? null, ctx_used: updated?.ctx_used ?? null, ctx_max: updated?.ctx_max ?? null, cache_tokens: cacheReadTokens ?? null, reasoning_tokens: reasoningTokens ?? null, started_at: startedAt, finished_at: updated?.finished_at ?? null, model: session.model, }); ctx.log.info( { sessionId, chatId, assistantMessageId, finishReason, chars: content.length, tokens_used: updated?.tokens_used, ctx_used: updated?.ctx_used, }, 'inference complete' ); }