// v1.13.13: forced second-inference synthesis pass for codecontext // overview/analysis tools. Triggered from tool-phase.ts after a codecontext // tool call lands and BEFORE the normal recursive runAssistantTurn fires. // // Inputs to the synthesis stream: // 1. The codecontext tool's result text. // 2. Top-N source files referenced in that text, fetched via view_file. // 3. Project documentation auto-fetched from the repo root. // 4. The original user message that triggered the turn. // // Output: a NEW assistant message whose sole part is kind='synthesis'. // Streams to the client as deltas exactly like a normal assistant turn. // // Failure modes (all fall through to recursive runAssistantTurn): // - SYNTHESIS_TOOLS membership check fails -> return false immediately. // - File-fetch / doc-fetch errors -> silent skip, continue with what we have. // - Stream error / timeout -> mark synth message status='failed', return false. // - User-abort -> mark cancelled and re-throw so the outer abort handler runs. import { promises as fs } from 'node:fs'; import { join } from 'node:path'; import { TOOLS_BY_NAME } from './tools.js'; import { streamCompletion } from './inference/stream-phase.js'; import { SYNTHESIS_SYSTEM_PROMPT } from './synthesisPrompt.js'; import { insertParts } from './inference/parts.js'; import * as modelContext from './model-context.js'; import { readTruncation } from './truncate.js'; import type { Session } from '../types/api.js'; import type { OpenAiMessage } from './inference/payload.js'; import type { InferenceContext, TurnArgs } from './inference/turn.js'; export const SYNTHESIS_TOOLS: ReadonlySet = new Set([ 'get_codebase_overview', 'get_framework_analysis', 'get_semantic_neighborhoods', ]); const TOP_N_FILES = 5; const FILE_LINE_CAP = 200; const DOC_LINE_CAP = 500; // Token budget for the auto-fetched content (files + docs combined). Estimated // via chars/4 — a rough but stable proxy that doesn't require a tokenizer dep. const TOKEN_BUDGET = 32_000; const CHARS_PER_TOKEN = 4; // 90s per synthesis call. Long enough for a thoughtful overview against a // large auto-fetched payload; short enough that a hung upstream falls through // to the normal recursive turn within a typical user attention window. const SYNTH_TIMEOUT_MS = 90_000; // File-extension regex for referenced-file extraction. Limited to source- // language extensions so we don't pull in lockfiles, images, etc. const FILE_PATH_RE = /(?:^|[`'"<\s$\[])([A-Za-z0-9_./@-]+\.(?:ts|tsx|js|jsx|py|go|rs|java|kt|c|cpp|h|hpp|md|json|yaml|yml|sql|sh|html|css))(?=[`'"<$\]\s,;:]|$)/gm; export interface SynthesisParams { ctx: InferenceContext; args: TurnArgs; session: Session; projectRoot: string; toolName: string; toolResultText: string; // v1.13.15-b: when codecontext's wrapper hit its 32k inline-truncation // limit, we expand the full content via readTruncation for reference-file // extraction only. toolResultText (the truncated head) still ships to the // synth model — preserves the 32k payload-budget contract. truncated?: boolean; // opaque id (tr_<…>), not a filesystem path — see truncate.ts naming note outputPath?: string; } interface FetchedFile { path: string; content: string; } interface DocsCollection { boochat?: string; agents?: string; context?: string; roadmap?: string; } export async function runSynthesisPass(p: SynthesisParams): Promise { if (!SYNTHESIS_TOOLS.has(p.toolName)) return false; let synthMessageId: string | null = null; let accumulated = ''; let timedOut = false; const synthCtrl = new AbortController(); const timer = setTimeout(() => { timedOut = true; synthCtrl.abort(); }, SYNTH_TIMEOUT_MS); try { const userMessage = await fetchOriginalUserMessage(p.ctx, p.args.chatId); if (!userMessage) { p.ctx.log.warn({ chatId: p.args.chatId }, 'synthesis: no user message found; falling through'); return false; } // v1.13.15-b: when the tool result was inline-truncated by the wrapper // (32k cap, see codecontext_client.ts:114), expand the full content from // tmpfs for reference-file extraction. The synth payload still ships the // truncated head (see buildPayload call below) so the token-budget // contract holds. Graceful degradation: if readTruncation returns null // (missing id, ENOENT) or throws, fall back to the truncated head. let extractionSource = p.toolResultText; if (p.truncated && p.outputPath) { try { const full = await readTruncation(p.outputPath); if (full !== null) { extractionSource = full; p.ctx.log.info( { chatId: p.args.chatId, toolName: p.toolName, originalChars: p.toolResultText.length, fullChars: full.length, }, 'synthesis: expanded truncated tool output', ); } } catch (err) { p.ctx.log.warn( { chatId: p.args.chatId, toolName: p.toolName, err: String(err) }, 'synthesis: readTruncation failed, using truncated output', ); } } const refFiles = extractReferencedFiles(extractionSource); const files = await fetchTopFiles(refFiles, p.projectRoot); const docs = await fetchProjectDocs(p.projectRoot); const { files: budgetedFiles, docs: budgetedDocs } = applyTokenBudget(files, docs); const synthMessages = buildPayload( p.toolName, // Truncated head only — full content was used for reference extraction above p.toolResultText, budgetedFiles, budgetedDocs, userMessage, ); // Insert + announce the synthesis assistant message. From here on, any // exception must clean up via the catch block so the row doesn't linger // in 'streaming' status (the 5min stale-streaming sweeper catches it // eventually, but explicit cleanup is better). const [synthRow] = await p.ctx.sql< { id: string; started_at: string }[] >` INSERT INTO messages (session_id, chat_id, role, content, status, started_at, created_at) VALUES (${p.args.sessionId}, ${p.args.chatId}, 'assistant', '', 'streaming', clock_timestamp(), clock_timestamp()) RETURNING id, started_at `; synthMessageId = synthRow!.id; const startedAt = synthRow!.started_at; p.ctx.publish(p.args.sessionId, { type: 'message_started', message_id: synthMessageId, chat_id: p.args.chatId, role: 'assistant', }); // Combine the user-abort signal with our synthesis-specific timeout so // either fires correctly. The `timedOut` flag in scope tells us which one // tripped after streamCompletion throws. const combinedSignal: AbortSignal | undefined = p.args.signal ? AbortSignal.any([p.args.signal, synthCtrl.signal]) : synthCtrl.signal; const onDelta = (delta: string): void => { accumulated += delta; p.ctx.publish(p.args.sessionId, { type: 'delta', message_id: synthMessageId!, chat_id: p.args.chatId, content: delta, }); }; const streamResult = await streamCompletion( p.ctx, p.session.model, synthMessages, { tools: null }, onDelta, undefined, combinedSignal, ); const mctx = await modelContext.getModelContext(p.session.model); const nCtx = mctx?.n_ctx ?? null; const [updated] = await p.ctx.sql< { tokens_used: number | null; ctx_used: number | null; ctx_max: number | null; finished_at: string | null; }[] >` UPDATE messages SET content = ${streamResult.content}, status = 'complete', tokens_used = ${streamResult.completionTokens}, ctx_used = ${streamResult.promptTokens}, ctx_max = ${nCtx}, finished_at = clock_timestamp() WHERE id = ${synthMessageId} RETURNING tokens_used, ctx_used, ctx_max, finished_at `; await insertParts(p.ctx.sql, [ { message_id: synthMessageId, sequence: 0, kind: 'synthesis', payload: { text: streamResult.content }, }, ]); p.ctx.publish(p.args.sessionId, { type: 'message_complete', message_id: synthMessageId, chat_id: p.args.chatId, tokens_used: updated?.tokens_used ?? null, ctx_used: updated?.ctx_used ?? null, ctx_max: updated?.ctx_max ?? null, started_at: startedAt, finished_at: updated?.finished_at ?? null, model: p.session.model, }); p.ctx.publishUser({ type: 'chat_status', chat_id: p.args.chatId, status: 'idle', at: new Date().toISOString(), }); p.ctx.log.info( { chatId: p.args.chatId, synthMessageId, toolName: p.toolName, chars: streamResult.content.length, files: budgetedFiles.length, }, 'synthesis pass complete', ); return true; } catch (err) { await markSynthFailed(p, synthMessageId, accumulated).catch((cleanupErr) => { p.ctx.log.warn({ cleanupErr: String(cleanupErr) }, 'synthesis cleanup UPDATE failed'); }); if (err instanceof Error && err.name === 'AbortError') { if (timedOut) { p.ctx.log.warn( { toolName: p.toolName, chatId: p.args.chatId }, 'synthesis pass timed out; falling through to recursive turn', ); return false; } // User-initiated abort: propagate so the outer error handler marks the // parent turn cancelled. The synth message is already marked failed by // markSynthFailed above. throw err; } p.ctx.log.warn( { err: String(err), toolName: p.toolName, chatId: p.args.chatId }, 'synthesis pass failed; falling through to recursive turn', ); return false; } finally { clearTimeout(timer); } } async function markSynthFailed( p: SynthesisParams, synthMessageId: string | null, accumulated: string, ): Promise { if (synthMessageId === null) return; await p.ctx.sql` UPDATE messages SET content = ${accumulated}, status = 'failed', finished_at = clock_timestamp() WHERE id = ${synthMessageId} `; // Republish so the frontend's live state flips from 'streaming' to // terminal. message_complete carries no error reason — the row's status // column is the truth. The 5-state chat_status dot has 'error' but we // don't fire that here because the broader inference is about to retry // via recursion; flipping the user-channel status to 'error' would race // the recursive turn's 'streaming' announcement. p.ctx.publish(p.args.sessionId, { type: 'message_complete', message_id: synthMessageId, chat_id: p.args.chatId, model: p.session.model, }); } async function fetchOriginalUserMessage( ctx: InferenceContext, chatId: string, ): Promise { const rows = await ctx.sql<{ content: string }[]>` SELECT content FROM messages WHERE chat_id = ${chatId} AND role = 'user' ORDER BY created_at DESC LIMIT 1 `; return rows[0]?.content ?? null; } function extractReferencedFiles(text: string): string[] { const seen = new Set(); const order: string[] = []; let m: RegExpExecArray | null; while ((m = FILE_PATH_RE.exec(text)) !== null) { const candidate = m[1]!; if (seen.has(candidate)) continue; if ( candidate.includes('node_modules') || candidate.includes('/dist/') || candidate.includes('/test/') || candidate.includes('/tests/') || /\.(test|spec)\.[a-z]+$/.test(candidate) ) { continue; } seen.add(candidate); order.push(candidate); } return order; } async function fetchTopFiles(refs: string[], projectRoot: string): Promise { const tool = TOOLS_BY_NAME['view_file']; if (!tool) return []; const out: FetchedFile[] = []; for (const p of refs.slice(0, TOP_N_FILES)) { const absPath = p.startsWith('/') ? p : join(projectRoot, p); try { const r = await tool.execute({ path: absPath, end_line: FILE_LINE_CAP }, projectRoot); const content = (r as { content?: string }).content ?? ''; if (content) out.push({ path: p, content }); } catch { // path-scope blocked, secret-filtered, file too large, or missing — // skip silently. The remaining files (or none) still produce a // meaningful synthesis input. } } return out; } async function fetchProjectDocs(projectRoot: string): Promise { const tool = TOOLS_BY_NAME['view_file']; if (!tool) return {}; const docs: DocsCollection = {}; for (const [filename, key] of [ ['BOOCHAT.md', 'boochat'], ['AGENTS.md', 'agents'], ['CONTEXT.md', 'context'], ] as const) { try { const r = await tool.execute( { path: join(projectRoot, filename), end_line: DOC_LINE_CAP }, projectRoot, ); const content = (r as { content?: string }).content; if (content) docs[key] = content; } catch { // missing doc — skip } } // Case-insensitive *roadmap*.md glob. Picks the first match (alphabetical // by readdir() order); typical projects have at most one roadmap doc. try { const entries = await fs.readdir(projectRoot); const roadmap = entries.find( (e) => /roadmap/i.test(e) && e.toLowerCase().endsWith('.md'), ); if (roadmap) { const r = await tool.execute( { path: join(projectRoot, roadmap), end_line: DOC_LINE_CAP }, projectRoot, ); const content = (r as { content?: string }).content; if (content) docs.roadmap = content; } } catch { // unreadable project root — skip } return docs; } function estTokens(s: string | undefined): number { return s ? Math.ceil(s.length / CHARS_PER_TOKEN) : 0; } function applyTokenBudget( files: FetchedFile[], docs: DocsCollection, ): { files: FetchedFile[]; docs: DocsCollection } { let total = 0; for (const f of files) total += estTokens(f.content); total += estTokens(docs.boochat) + estTokens(docs.agents) + estTokens(docs.context) + estTokens(docs.roadmap); if (total <= TOKEN_BUDGET) return { files, docs }; // Drop priority (lowest priority dropped first): // 1. top-2..N files (keep top-1) // 2. top-1 file // 3. roadmap (+ CONTEXT.md grouped here — dispatch listed roadmap above // AGENTS.md, CONTEXT.md was not in the priority list) // 4. AGENTS.md // 5. BOOCHAT.md (never dropped — truncate to budget if alone exceeds) let outFiles = files.slice(); const outDocs: DocsCollection = { ...docs }; while (total > TOKEN_BUDGET && outFiles.length > 1) { const last = outFiles.pop()!; total -= estTokens(last.content); } if (total <= TOKEN_BUDGET) return { files: outFiles, docs: outDocs }; if (outFiles[0]) { total -= estTokens(outFiles[0].content); outFiles = []; } if (total <= TOKEN_BUDGET) return { files: outFiles, docs: outDocs }; if (outDocs.roadmap) { total -= estTokens(outDocs.roadmap); delete outDocs.roadmap; } if (outDocs.context) { total -= estTokens(outDocs.context); delete outDocs.context; } if (total <= TOKEN_BUDGET) return { files: outFiles, docs: outDocs }; if (outDocs.agents) { total -= estTokens(outDocs.agents); delete outDocs.agents; } if (total <= TOKEN_BUDGET) return { files: outFiles, docs: outDocs }; if (outDocs.boochat) { const maxChars = TOKEN_BUDGET * CHARS_PER_TOKEN; if (outDocs.boochat.length > maxChars) { outDocs.boochat = outDocs.boochat.slice(0, maxChars); } } return { files: outFiles, docs: outDocs }; } function buildPayload( toolName: string, toolResultText: string, files: FetchedFile[], docs: DocsCollection, userMessage: string, ): OpenAiMessage[] { const sections: string[] = []; sections.push(`## Codecontext tool output (${toolName})\n\n${toolResultText}`); if (files.length > 0) { sections.push(`---\n\n## Auto-fetched source files`); for (const f of files) { sections.push(`### ${f.path}\n\n\`\`\`\n${f.content}\n\`\`\``); } } const docEntries: Array<[string, string | undefined]> = [ ['BOOCHAT.md', docs.boochat], ['AGENTS.md', docs.agents], ['CONTEXT.md', docs.context], ['roadmap', docs.roadmap], ]; const presentDocs = docEntries.filter(([, v]) => Boolean(v)); if (presentDocs.length > 0) { sections.push(`---\n\n## Project documentation`); for (const [name, v] of presentDocs) { sections.push(`### ${name}\n\n${v!}`); } } sections.push(`---\n\n## Original user question\n\n${userMessage}`); return [ { role: 'system', content: SYNTHESIS_SYSTEM_PROMPT }, { role: 'user', content: sections.join('\n\n') }, ]; }