import { describe, it, expect } from 'vitest'; import { classifyLane, nextLocalContestant, isBattleComplete, computeBenchmark, sanitizeSlug, buildBattleSlug, buildContestantDir, reconcileContestantResume, reconcileContestants, type ContestantSlot, } from '../arena-decisions.js'; // Local models = what the llama-swap server actually serves. const LOCAL_MODELS: ReadonlySet = new Set([ 'qwen3.6-35b-a3b-mxfp4', 'qwen2.5-coder-7b', ]); // ─── classifyLane ──────────────────────────────────────────────────────────── describe('classifyLane', () => { it('classifies qa battles as local regardless of identity or model', () => { expect(classifyLane('qa', 'boocode', 'qwen3.6-35b-a3b-mxfp4', LOCAL_MODELS)).toBe('local'); expect(classifyLane('qa', 'claude', 'claude-opus-4-5', LOCAL_MODELS)).toBe('local'); expect(classifyLane('qa', 'Debugger', 'cloud-model', new Set())).toBe('local'); expect(classifyLane('qa', 'opencode', 'any-model', LOCAL_MODELS)).toBe('local'); }); it('classifies coding contestants as local when model is in localModels', () => { expect(classifyLane('coding', 'boocode', 'qwen3.6-35b-a3b-mxfp4', LOCAL_MODELS)).toBe('local'); expect(classifyLane('coding', 'opencode', 'qwen3.6-35b-a3b-mxfp4', LOCAL_MODELS)).toBe('local'); expect(classifyLane('coding', 'qwen', 'qwen2.5-coder-7b', LOCAL_MODELS)).toBe('local'); }); it('classifies coding contestants as cloud when model is not in localModels', () => { expect(classifyLane('coding', 'claude', 'claude-opus-4-5', LOCAL_MODELS)).toBe('cloud'); expect(classifyLane('coding', 'opencode', 'claude-opus-4-5', LOCAL_MODELS)).toBe('cloud'); expect(classifyLane('coding', 'goose', 'gpt-4o', LOCAL_MODELS)).toBe('cloud'); expect(classifyLane('coding', 'qwen', 'unknown-remote-model', LOCAL_MODELS)).toBe('cloud'); }); it('uses the injected localModels set, not a hardcoded list', () => { const custom = new Set(['my-local-model']); expect(classifyLane('coding', 'any-agent', 'my-local-model', custom)).toBe('local'); expect(classifyLane('coding', 'boocode', 'other-model', custom)).toBe('cloud'); }); it('defaults to cloud for an empty localModels set', () => { expect(classifyLane('coding', 'boocode', 'qwen3.6-35b-a3b-mxfp4', new Set())).toBe('cloud'); expect(classifyLane('coding', 'native', 'any-local-model', new Set())).toBe('cloud'); }); }); // ─── nextLocalContestant ───────────────────────────────────────────────────── describe('nextLocalContestant', () => { it('returns null for an empty list', () => { expect(nextLocalContestant([])).toBeNull(); }); it('returns null when no local contestants are queued', () => { const slots: ContestantSlot[] = [ { id: 'c1', lane: 'local', status: 'running' }, { id: 'c2', lane: 'cloud', status: 'queued' }, ]; expect(nextLocalContestant(slots)).toBeNull(); }); it('returns the first queued local contestant in order', () => { const slots: ContestantSlot[] = [ { id: 'c1', lane: 'local', status: 'done' }, { id: 'c2', lane: 'local', status: 'queued' }, { id: 'c3', lane: 'local', status: 'queued' }, ]; expect(nextLocalContestant(slots)).toBe('c2'); }); it('skips done/error local contestants and cloud contestants', () => { const slots: ContestantSlot[] = [ { id: 'c1', lane: 'cloud', status: 'queued' }, { id: 'c2', lane: 'local', status: 'error' }, { id: 'c3', lane: 'local', status: 'queued' }, ]; expect(nextLocalContestant(slots)).toBe('c3'); }); it('returns null when all local contestants are done or error', () => { const slots: ContestantSlot[] = [ { id: 'c1', lane: 'local', status: 'done' }, { id: 'c2', lane: 'local', status: 'error' }, ]; expect(nextLocalContestant(slots)).toBeNull(); }); }); // ─── isBattleComplete ──────────────────────────────────────────────────────── describe('isBattleComplete', () => { it('returns false for an empty list', () => { expect(isBattleComplete([])).toBe(false); }); it('returns true when all contestants are done', () => { expect(isBattleComplete([{ status: 'done' }, { status: 'done' }])).toBe(true); }); it('returns true when all contestants are error', () => { expect(isBattleComplete([{ status: 'error' }, { status: 'error' }])).toBe(true); }); it('returns true for a mixed done/error result', () => { expect(isBattleComplete([{ status: 'done' }, { status: 'error' }, { status: 'done' }])).toBe(true); }); it('returns false while any contestant is still running', () => { expect(isBattleComplete([{ status: 'done' }, { status: 'running' }])).toBe(false); }); it('returns false while any contestant is still queued', () => { expect(isBattleComplete([{ status: 'done' }, { status: 'queued' }])).toBe(false); }); }); // ─── computeBenchmark ──────────────────────────────────────────────────────── describe('computeBenchmark', () => { const t0 = new Date('2026-06-06T10:00:00.000Z'); const t1 = new Date('2026-06-06T10:00:05.000Z'); // +5 000ms it('computes duration in ms for both lanes', () => { const local = computeBenchmark(t0, t1, 100, 'local'); expect(local.durationMs).toBe(5000); const cloud = computeBenchmark(t0, t1, null, 'cloud'); expect(cloud.durationMs).toBe(5000); }); it('computes tokens/sec for local lane when costTokens is known', () => { const bench = computeBenchmark(t0, t1, 500, 'local'); expect(bench.tokensPerSec).toBeCloseTo(100, 5); // 500 / 5 = 100 tok/s }); it('omits tokens/sec for cloud lane regardless of costTokens', () => { const bench = computeBenchmark(t0, t1, 500, 'cloud'); expect(bench.tokensPerSec).toBeNull(); }); it('omits tokens/sec for local lane when costTokens is null', () => { const bench = computeBenchmark(t0, t1, null, 'local'); expect(bench.tokensPerSec).toBeNull(); }); it('returns durationMs = 0 and null tokensPerSec when timestamps are equal', () => { const bench = computeBenchmark(t0, t0, 100, 'local'); expect(bench.durationMs).toBe(0); expect(bench.tokensPerSec).toBeNull(); }); it('clamps negative duration to 0 (clock skew)', () => { const bench = computeBenchmark(t1, t0, 50, 'local'); expect(bench.durationMs).toBe(0); expect(bench.tokensPerSec).toBeNull(); }); it('includes token breakdown when provided', () => { const breakdown = { system: 10, user: 20, assistant: 30, tools: 40, reasoning: 5, total: 105, }; const bench = computeBenchmark(t0, t1, 500, 'local', breakdown); expect(bench.tokenBreakdown).toEqual(breakdown); }); it('defaults token breakdown to null when omitted', () => { const bench = computeBenchmark(t0, t1, 500, 'local'); expect(bench.tokenBreakdown).toBeNull(); }); }); // ─── sanitizeSlug ──────────────────────────────────────────────────────────── describe('sanitizeSlug', () => { it('lowercases and preserves alphanumeric + hyphens', () => { expect(sanitizeSlug('claude')).toBe('claude'); expect(sanitizeSlug('claude-opus-4-5')).toBe('claude-opus-4-5'); }); it('replaces spaces and special characters with hyphens', () => { expect(sanitizeSlug('Code Reviewer')).toBe('code-reviewer'); expect(sanitizeSlug('native/boocode')).toBe('native-boocode'); expect(sanitizeSlug('qwen2.5-coder-35b')).toBe('qwen2-5-coder-35b'); }); it('collapses consecutive non-alphanumeric runs to a single hyphen', () => { expect(sanitizeSlug('foo bar---baz')).toBe('foo-bar-baz'); }); it('strips leading and trailing hyphens', () => { expect(sanitizeSlug('---foo---')).toBe('foo'); }); it('truncates to 64 characters', () => { const long = 'a'.repeat(100); expect(sanitizeSlug(long).length).toBe(64); }); }); // ─── buildBattleSlug ───────────────────────────────────────────────────────── describe('buildBattleSlug', () => { it('builds a deterministic dated slug from id, type, and createdAt', () => { const id = 'a1b2c3d4-e5f6-7890-abcd-ef1234567890'; const createdAt = new Date('2026-06-06T12:00:00.000Z'); const slug = buildBattleSlug(id, 'coding', createdAt); expect(slug).toBe('2026-06-06-coding-a1b2c3d4'); }); it('includes the battle type in the slug', () => { const id = 'aaaaaaaa-0000-0000-0000-000000000000'; const createdAt = new Date('2026-01-01T00:00:00.000Z'); expect(buildBattleSlug(id, 'qa', createdAt)).toContain('-qa-'); expect(buildBattleSlug(id, 'coding', createdAt)).toContain('-coding-'); }); it('uses the first 8 hex chars of the uuid (dashes stripped)', () => { const id = 'deadbeef-0000-0000-0000-000000000000'; const slug = buildBattleSlug(id, 'coding', new Date('2026-06-06T00:00:00Z')); expect(slug.endsWith('-deadbeef')).toBe(true); }); }); // ─── buildContestantDir ────────────────────────────────────────────────────── describe('buildContestantDir', () => { it('joins sanitized identity and model with a hyphen', () => { expect(buildContestantDir('claude', 'claude-opus-4-5')).toBe('claude-claude-opus-4-5'); }); it('sanitizes both parts independently', () => { expect(buildContestantDir('Code Reviewer', 'qwen2.5-35b')).toBe('code-reviewer-qwen2-5-35b'); }); }); // ─── reconcileContestantResume ─────────────────────────────────────────────── describe('reconcileContestantResume', () => { it('keeps non-running contestants regardless of task state', () => { for (const status of ['queued', 'done', 'error']) { expect(reconcileContestantResume(status, 'tid', 'completed')).toBe('keep'); expect(reconcileContestantResume(status, null, null)).toBe('keep'); } }); it('re-dispatches a running contestant with no task_id', () => { expect(reconcileContestantResume('running', null, null)).toBe('re-dispatch'); }); it('re-dispatches a running contestant whose task row is absent', () => { expect(reconcileContestantResume('running', 'tid', null)).toBe('re-dispatch'); }); it('marks done when the task completed before the terminal callback ran', () => { expect(reconcileContestantResume('running', 'tid', 'completed')).toBe('mark-done'); }); it('marks error when the task failed', () => { expect(reconcileContestantResume('running', 'tid', 'failed')).toBe('mark-error'); }); it('marks cancelled when the task was cancelled', () => { expect(reconcileContestantResume('running', 'tid', 'cancelled')).toBe('mark-cancelled'); }); it('keeps a running contestant whose task is pending (dispatcher handles it)', () => { expect(reconcileContestantResume('running', 'tid', 'pending')).toBe('keep'); }); it('re-dispatches when the task is stuck running (process died)', () => { expect(reconcileContestantResume('running', 'tid', 'running')).toBe('re-dispatch'); }); it('re-dispatches when the task is blocked (permission dialog gone on restart)', () => { expect(reconcileContestantResume('running', 'tid', 'blocked')).toBe('re-dispatch'); }); }); // ─── reconcileContestants ──────────────────────────────────────────────────── describe('reconcileContestants', () => { it('returns one decision per contestant', () => { const contestants = [ { contestantId: 'c1', taskId: null, status: 'done' }, { contestantId: 'c2', taskId: 't1', status: 'running' }, { contestantId: 'c3', taskId: 't2', status: 'running' }, ]; const taskStates = new Map([['t1', 'completed'], ['t2', 'running']]); const decisions = reconcileContestants(contestants, taskStates); expect(decisions).toHaveLength(3); expect(decisions[0]).toEqual({ contestantId: 'c1', action: 'keep' }); expect(decisions[1]).toEqual({ contestantId: 'c2', action: 'mark-done' }); expect(decisions[2]).toEqual({ contestantId: 'c3', action: 're-dispatch' }); }); it('re-dispatches a running contestant whose taskId is absent from taskStates', () => { const contestants = [{ contestantId: 'c1', taskId: 'orphan', status: 'running' }]; const decisions = reconcileContestants(contestants, new Map()); expect(decisions[0]?.action).toBe('re-dispatch'); }); it('re-dispatches a running contestant with null taskId', () => { const contestants = [{ contestantId: 'c1', taskId: null, status: 'running' }]; const decisions = reconcileContestants(contestants, new Map()); expect(decisions[0]?.action).toBe('re-dispatch'); }); it('returns empty array for no contestants', () => { expect(reconcileContestants([], new Map())).toEqual([]); }); it('keeps a running contestant whose task is pending', () => { const contestants = [{ contestantId: 'c1', taskId: 't1', status: 'running' }]; const taskStates = new Map([['t1', 'pending']]); const decisions = reconcileContestants(contestants, taskStates); expect(decisions[0]?.action).toBe('keep'); }); it('handles a mixed battle: done/queued kept, stale running re-dispatched', () => { const contestants = [ { contestantId: 'c1', taskId: 't1', status: 'done' }, { contestantId: 'c2', taskId: null, status: 'queued' }, { contestantId: 'c3', taskId: 't2', status: 'running' }, { contestantId: 'c4', taskId: 't3', status: 'running' }, ]; const taskStates = new Map([ ['t1', 'completed'], ['t2', 'running'], // stuck — process dead ['t3', 'pending'], // dispatcher will handle ]); const decisions = reconcileContestants(contestants, taskStates); expect(decisions.find((d) => d.contestantId === 'c1')?.action).toBe('keep'); expect(decisions.find((d) => d.contestantId === 'c2')?.action).toBe('keep'); expect(decisions.find((d) => d.contestantId === 'c3')?.action).toBe('re-dispatch'); expect(decisions.find((d) => d.contestantId === 'c4')?.action).toBe('keep'); }); });