boocode/apps/coder/src/services/arena-analyzer-helpers.ts

/**
 * Pure, side-effect-free helpers for the Arena analyzer.
 * No DB, no IO, no network — safe to unit-test directly.
 *
 * Covers: digest-prompt assembly, judge-prompt assembly, winner extraction
 * from the judge output, the <2-survivors no-winner rule, and the
 * cross-examination prompt.
 */

// ─── Shared types ─────────────────────────────────────────────────────────────

export interface ContestantDigestInput {
  identity: string;
  model: string;
  resultMd: string;
  diffPatch?: string;
  benchmarkLine: string;
}

export interface ContestantDigest {
  identity: string;
  model: string;
  digest: string;
  benchmarkLine: string;
}

// ─── Digest stage ─────────────────────────────────────────────────────────────

/**
 * Build the system + user prompts for the per-contestant digest call.
 * The digest is a short structured summary; it keeps each call's context small
 * so the downstream judge only sees digests (not raw diffs).
 */
export function buildDigestPrompt(input: ContestantDigestInput): { system: string; user: string } {
  const system =
    'You are an expert technical analyst evaluating the output of an AI coding or Q&A battle. ' +
    'Produce a concise structured digest (under 300 words, Markdown bullet points) covering: ' +
    '(1) correctness and quality, (2) completeness, (3) notable strengths, (4) notable weaknesses or issues. ' +
    'Do not reference the battle or other contestants — focus only on this submission.';

  const parts: string[] = [
    `# Contestant: ${input.identity} / ${input.model}`,
    `\nBenchmark: ${input.benchmarkLine}`,
    '\n## Result\n',
    input.resultMd.slice(0, 8_000),
  ];

  if (input.diffPatch) {
    parts.push('\n## Code Changes (diff)\n```diff');
    parts.push(input.diffPatch.slice(0, 5_000));
    parts.push('```');
  }

  return { system, user: parts.join('\n') };
}

// ─── Judge stage ──────────────────────────────────────────────────────────────

/**
 * Build the system + user prompts for the comparative judge call.
 * Receives contestant digests (NOT raw diffs) to keep context bounded.
 *
 * The judge output must contain a line starting with WINNER: or NO_WINNER.
 * The caller extracts it with extractWinner().
 */
export function buildJudgePrompt(
  originalPrompt: string,
  digests: ContestantDigest[],
): { system: string; user: string } {
  const canName = shouldNameWinner(digests.length);

  const winnerInstruction = canName
    ? 'After your comparative analysis, name the best submission on its own line in this exact format:\n' +
      'WINNER: <identity>/<model>\n' +
      'where <identity> and <model> exactly match the heading above. No other text on that line.'
    : 'Fewer than 2 contestants succeeded. Do NOT name a winner. Write the following on its own line:\nNO_WINNER';

  const system =
    'You are an expert judge for an AI battle. You have received digest summaries of each ' +
    "contestant's work on the same task. Write a comparative analysis, then follow these instructions:\n" +
    winnerInstruction;

  const parts: string[] = [
    '# Original Task Prompt\n',
    originalPrompt.slice(0, 2_000),
    '\n# Contestant Digests\n',
  ];

  for (const d of digests) {
    parts.push(`\n## ${d.identity} / ${d.model}`);
    parts.push(`Benchmark: ${d.benchmarkLine}`);
    parts.push(d.digest);
  }

  parts.push(
    '\n# Instructions\nCompare the contestants and follow the winner-naming instructions above.',
  );

  return { system, user: parts.join('\n') };
}

// ─── No-winner rule ───────────────────────────────────────────────────────────

/**
 * Returns true when enough contestants succeeded to name a winner.
 * Rule: at least 2 must have produced a result. With 0 or 1 success the
 * analysis must NOT name a winner (no meaningful comparison possible).
 */
export function shouldNameWinner(succeededCount: number): boolean {
  return succeededCount >= 2;
}

// ─── Winner extraction ────────────────────────────────────────────────────────

/**
 * Parse the judge's text output and extract the declared winner.
 * Looks for a line matching: WINNER: <identity>/<model>
 * Returns null when no valid winner line is found, or when the line contains
 * NO_WINNER.
 *
 * The parse is lenient on surrounding whitespace and case for the keyword.
 */
export function extractWinner(judgeOutput: string): { identity: string; model: string } | null {
  for (const line of judgeOutput.split('\n')) {
    const trimmed = line.trim();
    if (!trimmed.toUpperCase().startsWith('WINNER:')) continue;

    const rest = trimmed.slice('WINNER:'.length).trim();
    if (rest.toUpperCase() === 'NO_WINNER' || rest === '') return null;

    const slashIdx = rest.indexOf('/');
    if (slashIdx === -1) return null;

    const identity = rest.slice(0, slashIdx).trim();
    const model = rest.slice(slashIdx + 1).trim();
    if (identity && model) return { identity, model };
  }
  return null;
}

// ─── Cross-examination stage ──────────────────────────────────────────────────

/**
 * Build the system + user prompts for a cross-examination call.
 * The cross-examiner sees the original prompt, contestant digests, and the
 * proposed analysis, and is asked to challenge the result.
 */
export function buildCrossExamPrompt(opts: {
  originalPrompt: string;
  digests: ContestantDigest[];
  analysisContent: string;
  proposedWinner: string | null;
  examinerIdentity: string;
  examinerModel: string;
}): { system: string; user: string } {
  const system =
    `You are ${opts.examinerIdentity} (model: ${opts.examinerModel}), acting as an independent ` +
    'cross-examiner in an AI battle. Your role is to critically challenge the proposed analysis ' +
    'and winner, then give your own verdict. Be rigorous but fair. ' +
    'End your response with your verdict on its own line:\n' +
    'VERDICT: <identity>/<model>  — if you agree or disagree with the proposed winner but can name one\n' +
    'VERDICT: NO_WINNER  — if no clear winner exists';

  const parts: string[] = [
    '# Original Task Prompt\n',
    opts.originalPrompt.slice(0, 2_000),
    '\n# Contestant Digests\n',
  ];

  for (const d of opts.digests) {
    parts.push(`\n## ${d.identity} / ${d.model}`);
    parts.push(`Benchmark: ${d.benchmarkLine}`);
    parts.push(d.digest);
  }

  parts.push('\n# Proposed Analysis\n');
  parts.push(opts.analysisContent.slice(0, 5_000));

  if (opts.proposedWinner) {
    parts.push(`\n*(Proposed winner: ${opts.proposedWinner})*`);
  } else {
    parts.push('\n*(No winner was proposed — fewer than 2 contestants succeeded.)*');
  }

  parts.push(
    '\n# Your Cross-Examination\n' +
      'Challenge the analysis above, then give your independent verdict (VERDICT: … on its own line).',
  );

  return { system, user: parts.join('\n') };
}