/** * Pure, side-effect-free helpers for the Arena analyzer. * No DB, no IO, no network — safe to unit-test directly. * * Covers: digest-prompt assembly, judge-prompt assembly, winner extraction * from the judge output, the <2-survivors no-winner rule, and the * cross-examination prompt. */ // ─── Shared types ───────────────────────────────────────────────────────────── export interface ContestantDigestInput { identity: string; model: string; resultMd: string; diffPatch?: string; benchmarkLine: string; } export interface ContestantDigest { identity: string; model: string; digest: string; benchmarkLine: string; } // ─── Digest stage ───────────────────────────────────────────────────────────── /** * Build the system + user prompts for the per-contestant digest call. * The digest is a short structured summary; it keeps each call's context small * so the downstream judge only sees digests (not raw diffs). */ export function buildDigestPrompt(input: ContestantDigestInput): { system: string; user: string } { const system = 'You are an expert technical analyst evaluating the output of an AI coding or Q&A battle. ' + 'Produce a concise structured digest (under 300 words, Markdown bullet points) covering: ' + '(1) correctness and quality, (2) completeness, (3) notable strengths, (4) notable weaknesses or issues. ' + 'Do not reference the battle or other contestants — focus only on this submission.'; const parts: string[] = [ `# Contestant: ${input.identity} / ${input.model}`, `\nBenchmark: ${input.benchmarkLine}`, '\n## Result\n', input.resultMd.slice(0, 8_000), ]; if (input.diffPatch) { parts.push('\n## Code Changes (diff)\n```diff'); parts.push(input.diffPatch.slice(0, 5_000)); parts.push('```'); } return { system, user: parts.join('\n') }; } // ─── Judge stage ────────────────────────────────────────────────────────────── /** * Build the system + user prompts for the comparative judge call. * Receives contestant digests (NOT raw diffs) to keep context bounded. * * The judge output must contain a line starting with WINNER: or NO_WINNER. * The caller extracts it with extractWinner(). */ export function buildJudgePrompt( originalPrompt: string, digests: ContestantDigest[], ): { system: string; user: string } { const canName = shouldNameWinner(digests.length); const winnerInstruction = canName ? 'After your comparative analysis, name the best submission on its own line in this exact format:\n' + 'WINNER: /\n' + 'where and exactly match the heading above. No other text on that line.' : 'Fewer than 2 contestants succeeded. Do NOT name a winner. Write the following on its own line:\nNO_WINNER'; const system = 'You are an expert judge for an AI battle. You have received digest summaries of each ' + "contestant's work on the same task. Write a comparative analysis, then follow these instructions:\n" + winnerInstruction; const parts: string[] = [ '# Original Task Prompt\n', originalPrompt.slice(0, 2_000), '\n# Contestant Digests\n', ]; for (const d of digests) { parts.push(`\n## ${d.identity} / ${d.model}`); parts.push(`Benchmark: ${d.benchmarkLine}`); parts.push(d.digest); } parts.push( '\n# Instructions\nCompare the contestants and follow the winner-naming instructions above.', ); return { system, user: parts.join('\n') }; } // ─── No-winner rule ─────────────────────────────────────────────────────────── /** * Returns true when enough contestants succeeded to name a winner. * Rule: at least 2 must have produced a result. With 0 or 1 success the * analysis must NOT name a winner (no meaningful comparison possible). */ export function shouldNameWinner(succeededCount: number): boolean { return succeededCount >= 2; } // ─── Winner extraction ──────────────────────────────────────────────────────── /** * Parse the judge's text output and extract the declared winner. * Looks for a line matching: WINNER: / * Returns null when no valid winner line is found, or when the line contains * NO_WINNER. * * The parse is lenient on surrounding whitespace and case for the keyword. */ export function extractWinner(judgeOutput: string): { identity: string; model: string } | null { for (const line of judgeOutput.split('\n')) { const trimmed = line.trim(); if (!trimmed.toUpperCase().startsWith('WINNER:')) continue; const rest = trimmed.slice('WINNER:'.length).trim(); if (rest.toUpperCase() === 'NO_WINNER' || rest === '') return null; const slashIdx = rest.indexOf('/'); if (slashIdx === -1) return null; const identity = rest.slice(0, slashIdx).trim(); const model = rest.slice(slashIdx + 1).trim(); if (identity && model) return { identity, model }; } return null; } // ─── Cross-examination stage ────────────────────────────────────────────────── /** * Build the system + user prompts for a cross-examination call. * The cross-examiner sees the original prompt, contestant digests, and the * proposed analysis, and is asked to challenge the result. */ export function buildCrossExamPrompt(opts: { originalPrompt: string; digests: ContestantDigest[]; analysisContent: string; proposedWinner: string | null; examinerIdentity: string; examinerModel: string; }): { system: string; user: string } { const system = `You are ${opts.examinerIdentity} (model: ${opts.examinerModel}), acting as an independent ` + 'cross-examiner in an AI battle. Your role is to critically challenge the proposed analysis ' + 'and winner, then give your own verdict. Be rigorous but fair. ' + 'End your response with your verdict on its own line:\n' + 'VERDICT: / — if you agree or disagree with the proposed winner but can name one\n' + 'VERDICT: NO_WINNER — if no clear winner exists'; const parts: string[] = [ '# Original Task Prompt\n', opts.originalPrompt.slice(0, 2_000), '\n# Contestant Digests\n', ]; for (const d of opts.digests) { parts.push(`\n## ${d.identity} / ${d.model}`); parts.push(`Benchmark: ${d.benchmarkLine}`); parts.push(d.digest); } parts.push('\n# Proposed Analysis\n'); parts.push(opts.analysisContent.slice(0, 5_000)); if (opts.proposedWinner) { parts.push(`\n*(Proposed winner: ${opts.proposedWinner})*`); } else { parts.push('\n*(No winner was proposed — fewer than 2 contestants succeeded.)*'); } parts.push( '\n# Your Cross-Examination\n' + 'Challenge the analysis above, then give your independent verdict (VERDICT: … on its own line).', ); return { system, user: parts.join('\n') }; }