import { describe, it, expect } from 'vitest'; import { buildDigestPrompt, buildJudgePrompt, buildCrossExamPrompt, extractWinner, shouldNameWinner, type ContestantDigest, type ContestantDigestInput, } from '../arena-analyzer-helpers.js'; // ─── shouldNameWinner ───────────────────────────────────────────────────────── describe('shouldNameWinner', () => { it('returns false with 0 succeeded contestants', () => { expect(shouldNameWinner(0)).toBe(false); }); it('returns false with exactly 1 succeeded contestant', () => { expect(shouldNameWinner(1)).toBe(false); }); it('returns true with exactly 2 succeeded contestants', () => { expect(shouldNameWinner(2)).toBe(true); }); it('returns true with more than 2 succeeded contestants', () => { expect(shouldNameWinner(3)).toBe(true); expect(shouldNameWinner(6)).toBe(true); }); }); // ─── extractWinner ──────────────────────────────────────────────────────────── describe('extractWinner', () => { it('extracts identity and model from a WINNER: line', () => { const output = 'Some analysis\n\nWINNER: claude/opus-4-5\n\nMore text.'; expect(extractWinner(output)).toEqual({ identity: 'claude', model: 'opus-4-5' }); }); it('is case-insensitive for the WINNER keyword', () => { expect(extractWinner('winner: boocode/qwen3.6-35b')).toEqual({ identity: 'boocode', model: 'qwen3.6-35b', }); expect(extractWinner('Winner: opencode/some-model')).toEqual({ identity: 'opencode', model: 'some-model', }); }); it('returns null when NO_WINNER is declared', () => { expect(extractWinner('WINNER: NO_WINNER')).toBeNull(); expect(extractWinner('winner: no_winner')).toBeNull(); }); it('returns null when no WINNER line is present', () => { expect(extractWinner('Just some analysis text with no verdict.')).toBeNull(); expect(extractWinner('')).toBeNull(); }); it('returns null when the WINNER line has no slash separator', () => { expect(extractWinner('WINNER: justidentity')).toBeNull(); }); it('returns null when the WINNER line is empty after the colon', () => { expect(extractWinner('WINNER:')).toBeNull(); expect(extractWinner('WINNER: ')).toBeNull(); }); it('handles leading and trailing whitespace around the slash parts', () => { const result = extractWinner('WINNER: claude / opus-4-5 '); expect(result).toEqual({ identity: 'claude', model: 'opus-4-5' }); }); it('picks the first WINNER line when multiple are present', () => { const output = 'WINNER: claude/opus-4-5\nWINNER: opencode/other-model'; expect(extractWinner(output)).toEqual({ identity: 'claude', model: 'opus-4-5' }); }); it('handles model names that contain slashes by splitting at the first slash only', () => { // edge case: model name with a slash — should still split at first slash // identity = 'native', model = 'llama-swap/qwen3.6' const result = extractWinner('WINNER: native/llama-swap/qwen3.6'); expect(result).toEqual({ identity: 'native', model: 'llama-swap/qwen3.6' }); }); }); // ─── buildDigestPrompt ──────────────────────────────────────────────────────── describe('buildDigestPrompt', () => { const base: ContestantDigestInput = { identity: 'claude', model: 'opus-4-5', resultMd: '# Output\n\nSome result content.', benchmarkLine: '12000ms', }; it('returns an object with non-empty system and user strings', () => { const { system, user } = buildDigestPrompt(base); expect(system.length).toBeGreaterThan(0); expect(user.length).toBeGreaterThan(0); }); it('includes the contestant identity and model in the user prompt', () => { const { user } = buildDigestPrompt(base); expect(user).toContain('claude'); expect(user).toContain('opus-4-5'); }); it('includes the benchmark line in the user prompt', () => { const { user } = buildDigestPrompt(base); expect(user).toContain('12000ms'); }); it('includes the result.md content in the user prompt', () => { const { user } = buildDigestPrompt(base); expect(user).toContain('Some result content.'); }); it('includes the diff.patch when provided', () => { const input: ContestantDigestInput = { ...base, diffPatch: '--- a/foo.ts\n+++ b/foo.ts\n+added' }; const { user } = buildDigestPrompt(input); expect(user).toContain('added'); expect(user).toContain('```diff'); }); it('omits the diff section when diffPatch is undefined', () => { const { user } = buildDigestPrompt(base); expect(user).not.toContain('```diff'); }); it('truncates resultMd longer than 8000 characters', () => { const longResult = 'x'.repeat(10_000); const { user } = buildDigestPrompt({ ...base, resultMd: longResult }); // The truncated content must not exceed 8000 chars in the sliced section. // We just check the total user string doesn't balloon unreasonably. expect(user.length).toBeLessThan(15_000); }); it('truncates diffPatch longer than 5000 characters', () => { const longDiff = '+' + 'x'.repeat(10_000); const { user } = buildDigestPrompt({ ...base, diffPatch: longDiff }); expect(user.length).toBeLessThan(16_000); }); }); // ─── buildJudgePrompt ───────────────────────────────────────────────────────── describe('buildJudgePrompt', () => { const digests: ContestantDigest[] = [ { identity: 'claude', model: 'opus-4-5', digest: 'Good result.', benchmarkLine: '5000ms' }, { identity: 'opencode', model: 'qwen3.6', digest: 'Decent result.', benchmarkLine: '8000ms' }, ]; it('includes the original prompt in the user section', () => { const { user } = buildJudgePrompt('Write a sorting algorithm', digests); expect(user).toContain('Write a sorting algorithm'); }); it('includes each contestant heading in the user section', () => { const { user } = buildJudgePrompt('prompt', digests); expect(user).toContain('claude'); expect(user).toContain('opus-4-5'); expect(user).toContain('opencode'); expect(user).toContain('qwen3.6'); }); it('includes each contestant digest text', () => { const { user } = buildJudgePrompt('prompt', digests); expect(user).toContain('Good result.'); expect(user).toContain('Decent result.'); }); it('instructs the model to name a WINNER when 2+ digests are provided', () => { const { system } = buildJudgePrompt('prompt', digests); expect(system).toContain('WINNER:'); }); it('instructs the model NOT to name a winner when fewer than 2 digests are provided', () => { const oneDigest = digests.slice(0, 1); const { system } = buildJudgePrompt('prompt', oneDigest); expect(system).toContain('NO_WINNER'); expect(system).not.toContain('WINNER: '); }); it('instructs NO_WINNER when digests list is empty', () => { const { system } = buildJudgePrompt('prompt', []); expect(system).toContain('NO_WINNER'); }); it('truncates originalPrompt longer than 2000 characters', () => { const longPrompt = 'p'.repeat(5_000); const { user } = buildJudgePrompt(longPrompt, digests); // Should not contain more than 2000 chars of the prompt. const promptSection = user.split('# Contestant Digests')[0] ?? ''; expect(promptSection.length).toBeLessThan(3_000); }); }); // ─── buildCrossExamPrompt ───────────────────────────────────────────────────── describe('buildCrossExamPrompt', () => { const digests: ContestantDigest[] = [ { identity: 'claude', model: 'opus-4-5', digest: 'Strong result.', benchmarkLine: '5000ms' }, { identity: 'boocode', model: 'qwen3.6-35b', digest: 'Decent result.', benchmarkLine: '12000ms' }, ]; const baseOpts = { originalPrompt: 'Write a sorting algorithm.', digests, analysisContent: '# Arena Analysis\n\nClaude did better.\n\nWINNER: claude/opus-4-5', proposedWinner: 'claude/opus-4-5', examinerIdentity: 'goose', examinerModel: 'gpt-4o', }; it('includes the examiner identity and model in the system prompt', () => { const { system } = buildCrossExamPrompt(baseOpts); expect(system).toContain('goose'); expect(system).toContain('gpt-4o'); }); it('includes the original prompt in the user section', () => { const { user } = buildCrossExamPrompt(baseOpts); expect(user).toContain('Write a sorting algorithm.'); }); it('includes each contestant digest', () => { const { user } = buildCrossExamPrompt(baseOpts); expect(user).toContain('Strong result.'); expect(user).toContain('Decent result.'); }); it('includes the proposed analysis content', () => { const { user } = buildCrossExamPrompt(baseOpts); expect(user).toContain('Claude did better.'); }); it('includes the proposed winner when set', () => { const { user } = buildCrossExamPrompt(baseOpts); expect(user).toContain('claude/opus-4-5'); }); it('notes that no winner was proposed when proposedWinner is null', () => { const { user } = buildCrossExamPrompt({ ...baseOpts, proposedWinner: null }); expect(user).toContain('No winner was proposed'); }); it('instructs the examiner to provide a VERDICT line', () => { const { system } = buildCrossExamPrompt(baseOpts); expect(system).toContain('VERDICT:'); }); });