boocode/apps/server/src/services/synthesisPipeline.ts

// v1.13.13: forced second-inference synthesis pass for codecontext
// overview/analysis tools. Triggered from tool-phase.ts after a codecontext
// tool call lands and BEFORE the normal recursive runAssistantTurn fires.
//
// Inputs to the synthesis stream:
//   1. The codecontext tool's result text.
//   2. Top-N source files referenced in that text, fetched via view_file.
//   3. Project documentation auto-fetched from the repo root.
//   4. The original user message that triggered the turn.
//
// Output: a NEW assistant message whose sole part is kind='synthesis'.
// Streams to the client as deltas exactly like a normal assistant turn.
//
// Failure modes (all fall through to recursive runAssistantTurn):
//   - SYNTHESIS_TOOLS membership check fails -> return false immediately.
//   - File-fetch / doc-fetch errors -> silent skip, continue with what we have.
//   - Stream error / timeout -> mark synth message status='failed', return false.
//   - User-abort -> mark cancelled and re-throw so the outer abort handler runs.

import { promises as fs } from 'node:fs';
import { join } from 'node:path';

import { TOOLS_BY_NAME } from './tools.js';
import { streamCompletion } from './inference/stream-phase.js';
import { SYNTHESIS_SYSTEM_PROMPT } from './synthesisPrompt.js';
import { insertParts } from './inference/parts.js';
import { finalizeStreamedRow } from './inference/error-handler.js';
import { readTruncation } from './truncate.js';

import type { Session } from '../types/api.js';
import type { OpenAiMessage } from './inference/payload.js';
import type { InferenceContext, TurnArgs } from './inference/types.js';

export const SYNTHESIS_TOOLS: ReadonlySet<string> = new Set([
  'get_codebase_overview',
  'get_framework_analysis',
  'get_semantic_neighborhoods',
]);

const TOP_N_FILES = 5;
const FILE_LINE_CAP = 200;
const DOC_LINE_CAP = 500;
// Token budget for the auto-fetched content (files + docs combined). Estimated
// via chars/4 — a rough but stable proxy that doesn't require a tokenizer dep.
const TOKEN_BUDGET = 32_000;
const CHARS_PER_TOKEN = 4;
// 90s per synthesis call. Long enough for a thoughtful overview against a
// large auto-fetched payload; short enough that a hung upstream falls through
// to the normal recursive turn within a typical user attention window.
const SYNTH_TIMEOUT_MS = 90_000;

// File-extension regex for referenced-file extraction. Limited to source-
// language extensions so we don't pull in lockfiles, images, etc.
const FILE_PATH_RE =
  /(?:^|[`'"<\s\(\[])([A-Za-z0-9_./@-]+\.(?:ts|tsx|js|jsx|py|go|rs|java|kt|c|cpp|h|hpp|md|json|yaml|yml|sql|sh|html|css))(?=[`'"<\)\]\s,;:]|$)/gm;

export interface SynthesisParams {
  ctx: InferenceContext;
  args: TurnArgs;
  session: Session;
  projectRoot: string;
  toolName: string;
  toolResultText: string;
  // v1.13.15-b: when codecontext's wrapper hit its 32k inline-truncation
  // limit, we expand the full content via readTruncation for reference-file
  // extraction only. toolResultText (the truncated head) still ships to the
  // synth model — preserves the 32k payload-budget contract.
  truncated?: boolean;
  // opaque id (tr_<…>), not a filesystem path — see truncate.ts naming note
  outputPath?: string;
}

interface FetchedFile {
  path: string;
  content: string;
}

interface DocsCollection {
  boochat?: string;
  agents?: string;
  context?: string;
  roadmap?: string;
}

export async function runSynthesisPass(p: SynthesisParams): Promise<boolean> {
  if (!SYNTHESIS_TOOLS.has(p.toolName)) return false;

  let synthMessageId: string | null = null;
  let accumulated = '';
  let timedOut = false;
  const synthCtrl = new AbortController();
  const timer = setTimeout(() => {
    timedOut = true;
    synthCtrl.abort();
  }, SYNTH_TIMEOUT_MS);

  try {
    const userMessage = await fetchOriginalUserMessage(p.ctx, p.args.chatId);
    if (!userMessage) {
      p.ctx.log.warn({ chatId: p.args.chatId }, 'synthesis: no user message found; falling through');
      return false;
    }

    // v1.13.15-b: when the tool result was inline-truncated by the wrapper
    // (32k cap, see codecontext_client.ts:114), expand the full content from
    // tmpfs for reference-file extraction. The synth payload still ships the
    // truncated head (see buildPayload call below) so the token-budget
    // contract holds. Graceful degradation: if readTruncation returns null
    // (missing id, ENOENT) or throws, fall back to the truncated head.
    let extractionSource = p.toolResultText;
    if (p.truncated && p.outputPath) {
      try {
        const full = await readTruncation(p.outputPath);
        if (full !== null) {
          extractionSource = full;
          p.ctx.log.info(
            {
              chatId: p.args.chatId,
              toolName: p.toolName,
              originalChars: p.toolResultText.length,
              fullChars: full.length,
            },
            'synthesis: expanded truncated tool output',
          );
        }
      } catch (err) {
        p.ctx.log.warn(
          { chatId: p.args.chatId, toolName: p.toolName, err: String(err) },
          'synthesis: readTruncation failed, using truncated output',
        );
      }
    }

    const refFiles = extractReferencedFiles(extractionSource);
    const files = await fetchTopFiles(refFiles, p.projectRoot);
    const docs = await fetchProjectDocs(p.projectRoot);
    const { files: budgetedFiles, docs: budgetedDocs } = applyTokenBudget(files, docs);
    const synthMessages = buildPayload(
      p.toolName,
      // Truncated head only — full content was used for reference extraction above
      p.toolResultText,
      budgetedFiles,
      budgetedDocs,
      userMessage,
    );

    // Insert + announce the synthesis assistant message. From here on, any
    // exception must clean up via the catch block so the row doesn't linger
    // in 'streaming' status (the 5min stale-streaming sweeper catches it
    // eventually, but explicit cleanup is better).
    const [synthRow] = await p.ctx.sql<
      { id: string; started_at: string }[]
    >`
      INSERT INTO messages (session_id, chat_id, role, content, status, started_at, created_at)
      VALUES (${p.args.sessionId}, ${p.args.chatId}, 'assistant', '', 'streaming', clock_timestamp(), clock_timestamp())
      RETURNING id, started_at
    `;
    synthMessageId = synthRow!.id;
    const startedAt = synthRow!.started_at;

    p.ctx.publish(p.args.sessionId, {
      type: 'message_started',
      message_id: synthMessageId,
      chat_id: p.args.chatId,
      role: 'assistant',
    });

    // Combine the user-abort signal with our synthesis-specific timeout so
    // either fires correctly. The `timedOut` flag in scope tells us which one
    // tripped after streamCompletion throws.
    const combinedSignal: AbortSignal | undefined = p.args.signal
      ? AbortSignal.any([p.args.signal, synthCtrl.signal])
      : synthCtrl.signal;

    const onDelta = (delta: string): void => {
      accumulated += delta;
      p.ctx.publish(p.args.sessionId, {
        type: 'delta',
        message_id: synthMessageId!,
        chat_id: p.args.chatId,
        content: delta,
      });
    };

    const streamResult = await streamCompletion(
      p.ctx,
      p.session.model,
      synthMessages,
      { tools: null },
      onDelta,
      undefined,
      combinedSignal,
    );

    // P5: the n_ctx lookup + complete UPDATE + message_complete frame are the
    // shared success-finalize atom (finalizeStreamedRow). beforeComplete writes
    // the kind='synthesis' part in the original order (UPDATE → insertParts →
    // message_complete), preserving timing exactly.
    await finalizeStreamedRow(p.ctx, {
      sessionId: p.args.sessionId,
      chatId: p.args.chatId,
      messageId: synthMessageId,
      model: p.session.model,
      content: streamResult.content,
      completionTokens: streamResult.completionTokens,
      promptTokens: streamResult.promptTokens,
      startedAt,
      beforeComplete: () =>
        insertParts(p.ctx.sql, [
          {
            message_id: synthMessageId!,
            sequence: 0,
            kind: 'synthesis',
            payload: { text: streamResult.content },
          },
        ]),
    });
    p.ctx.publishUser({
      type: 'chat_status',
      chat_id: p.args.chatId,
      status: 'idle',
      at: new Date().toISOString(),
    });
    p.ctx.log.info(
      {
        chatId: p.args.chatId,
        synthMessageId,
        toolName: p.toolName,
        chars: streamResult.content.length,
        files: budgetedFiles.length,
      },
      'synthesis pass complete',
    );
    return true;
  } catch (err) {
    await markSynthFailed(p, synthMessageId, accumulated).catch((cleanupErr) => {
      p.ctx.log.warn({ cleanupErr: String(cleanupErr) }, 'synthesis cleanup UPDATE failed');
    });
    if (err instanceof Error && err.name === 'AbortError') {
      if (timedOut) {
        p.ctx.log.warn(
          { toolName: p.toolName, chatId: p.args.chatId },
          'synthesis pass timed out; falling through to recursive turn',
        );
        return false;
      }
      // User-initiated abort: propagate so the outer error handler marks the
      // parent turn cancelled. The synth message is already marked failed by
      // markSynthFailed above.
      throw err;
    }
    p.ctx.log.warn(
      { err: String(err), toolName: p.toolName, chatId: p.args.chatId },
      'synthesis pass failed; falling through to recursive turn',
    );
    return false;
  } finally {
    clearTimeout(timer);
  }
}

async function markSynthFailed(
  p: SynthesisParams,
  synthMessageId: string | null,
  accumulated: string,
): Promise<void> {
  if (synthMessageId === null) return;
  await p.ctx.sql`
    UPDATE messages
    SET content = ${accumulated},
        status = 'failed',
        finished_at = clock_timestamp()
    WHERE id = ${synthMessageId}
  `;
  // Republish so the frontend's live state flips from 'streaming' to
  // terminal. message_complete carries no error reason — the row's status
  // column is the truth. The 5-state chat_status dot has 'error' but we
  // don't fire that here because the broader inference is about to retry
  // via recursion; flipping the user-channel status to 'error' would race
  // the recursive turn's 'streaming' announcement.
  p.ctx.publish(p.args.sessionId, {
    type: 'message_complete',
    message_id: synthMessageId,
    chat_id: p.args.chatId,
    model: p.session.model,
  });
}

async function fetchOriginalUserMessage(
  ctx: InferenceContext,
  chatId: string,
): Promise<string | null> {
  const rows = await ctx.sql<{ content: string }[]>`
    SELECT content FROM messages
    WHERE chat_id = ${chatId} AND role = 'user'
    ORDER BY created_at DESC
    LIMIT 1
  `;
  return rows[0]?.content ?? null;
}

function extractReferencedFiles(text: string): string[] {
  const seen = new Set<string>();
  const order: string[] = [];
  let m: RegExpExecArray | null;
  while ((m = FILE_PATH_RE.exec(text)) !== null) {
    const candidate = m[1]!;
    if (seen.has(candidate)) continue;
    if (
      candidate.includes('node_modules') ||
      candidate.includes('/dist/') ||
      candidate.includes('/test/') ||
      candidate.includes('/tests/') ||
      /\.(test|spec)\.[a-z]+$/.test(candidate)
    ) {
      continue;
    }
    seen.add(candidate);
    order.push(candidate);
  }
  return order;
}

async function fetchTopFiles(refs: string[], projectRoot: string): Promise<FetchedFile[]> {
  const tool = TOOLS_BY_NAME['view_file'];
  if (!tool) return [];
  const out: FetchedFile[] = [];
  for (const p of refs.slice(0, TOP_N_FILES)) {
    const absPath = p.startsWith('/') ? p : join(projectRoot, p);
    try {
      const r = await tool.execute({ path: absPath, end_line: FILE_LINE_CAP }, projectRoot);
      const content = (r as { content?: string }).content ?? '';
      if (content) out.push({ path: p, content });
    } catch {
      // path-scope blocked, secret-filtered, file too large, or missing —
      // skip silently. The remaining files (or none) still produce a
      // meaningful synthesis input.
    }
  }
  return out;
}

async function fetchProjectDocs(projectRoot: string): Promise<DocsCollection> {
  const tool = TOOLS_BY_NAME['view_file'];
  if (!tool) return {};
  const docs: DocsCollection = {};
  for (const [filename, key] of [
    ['BOOCHAT.md', 'boochat'],
    ['AGENTS.md', 'agents'],
    ['CONTEXT.md', 'context'],
  ] as const) {
    try {
      const r = await tool.execute(
        { path: join(projectRoot, filename), end_line: DOC_LINE_CAP },
        projectRoot,
      );
      const content = (r as { content?: string }).content;
      if (content) docs[key] = content;
    } catch {
      // missing doc — skip
    }
  }
  // Case-insensitive *roadmap*.md glob. Picks the first match (alphabetical
  // by readdir() order); typical projects have at most one roadmap doc.
  try {
    const entries = await fs.readdir(projectRoot);
    const roadmap = entries.find(
      (e) => /roadmap/i.test(e) && e.toLowerCase().endsWith('.md'),
    );
    if (roadmap) {
      const r = await tool.execute(
        { path: join(projectRoot, roadmap), end_line: DOC_LINE_CAP },
        projectRoot,
      );
      const content = (r as { content?: string }).content;
      if (content) docs.roadmap = content;
    }
  } catch {
    // unreadable project root — skip
  }
  return docs;
}

function estTokens(s: string | undefined): number {
  return s ? Math.ceil(s.length / CHARS_PER_TOKEN) : 0;
}

function applyTokenBudget(
  files: FetchedFile[],
  docs: DocsCollection,
): { files: FetchedFile[]; docs: DocsCollection } {
  let total = 0;
  for (const f of files) total += estTokens(f.content);
  total += estTokens(docs.boochat) + estTokens(docs.agents) + estTokens(docs.context) + estTokens(docs.roadmap);
  if (total <= TOKEN_BUDGET) return { files, docs };

  // Drop priority (lowest priority dropped first):
  //   1. top-2..N files (keep top-1)
  //   2. top-1 file
  //   3. roadmap (+ CONTEXT.md grouped here — dispatch listed roadmap above
  //      AGENTS.md, CONTEXT.md was not in the priority list)
  //   4. AGENTS.md
  //   5. BOOCHAT.md (never dropped — truncate to budget if alone exceeds)
  let outFiles = files.slice();
  const outDocs: DocsCollection = { ...docs };

  while (total > TOKEN_BUDGET && outFiles.length > 1) {
    const last = outFiles.pop()!;
    total -= estTokens(last.content);
  }
  if (total <= TOKEN_BUDGET) return { files: outFiles, docs: outDocs };

  if (outFiles[0]) {
    total -= estTokens(outFiles[0].content);
    outFiles = [];
  }
  if (total <= TOKEN_BUDGET) return { files: outFiles, docs: outDocs };

  if (outDocs.roadmap) {
    total -= estTokens(outDocs.roadmap);
    delete outDocs.roadmap;
  }
  if (outDocs.context) {
    total -= estTokens(outDocs.context);
    delete outDocs.context;
  }
  if (total <= TOKEN_BUDGET) return { files: outFiles, docs: outDocs };

  if (outDocs.agents) {
    total -= estTokens(outDocs.agents);
    delete outDocs.agents;
  }
  if (total <= TOKEN_BUDGET) return { files: outFiles, docs: outDocs };

  if (outDocs.boochat) {
    const maxChars = TOKEN_BUDGET * CHARS_PER_TOKEN;
    if (outDocs.boochat.length > maxChars) {
      outDocs.boochat = outDocs.boochat.slice(0, maxChars);
    }
  }
  return { files: outFiles, docs: outDocs };
}

function buildPayload(
  toolName: string,
  toolResultText: string,
  files: FetchedFile[],
  docs: DocsCollection,
  userMessage: string,
): OpenAiMessage[] {
  const sections: string[] = [];
  sections.push(`## Codecontext tool output (${toolName})\n\n${toolResultText}`);
  if (files.length > 0) {
    sections.push(`---\n\n## Auto-fetched source files`);
    for (const f of files) {
      sections.push(`### ${f.path}\n\n\`\`\`\n${f.content}\n\`\`\``);
    }
  }
  const docEntries: Array<[string, string | undefined]> = [
    ['BOOCHAT.md', docs.boochat],
    ['AGENTS.md', docs.agents],
    ['CONTEXT.md', docs.context],
    ['roadmap', docs.roadmap],
  ];
  const presentDocs = docEntries.filter(([, v]) => Boolean(v));
  if (presentDocs.length > 0) {
    sections.push(`---\n\n## Project documentation`);
    for (const [name, v] of presentDocs) {
      sections.push(`### ${name}\n\n${v!}`);
    }
  }
  sections.push(`---\n\n## Original user question\n\n${userMessage}`);
  return [
    { role: 'system', content: SYNTHESIS_SYSTEM_PROMPT },
    { role: 'user', content: sections.join('\n\n') },
  ];
}