Files
boocode/apps/coder/src/services/__tests__/arena-decisions.test.ts

351 lines
15 KiB
TypeScript

import { describe, it, expect } from 'vitest';
import {
classifyLane,
nextLocalContestant,
isBattleComplete,
computeBenchmark,
sanitizeSlug,
buildBattleSlug,
buildContestantDir,
reconcileContestantResume,
reconcileContestants,
type ContestantSlot,
} from '../arena-decisions.js';
// Local models = what the llama-swap server actually serves.
const LOCAL_MODELS: ReadonlySet<string> = new Set([
'qwen3.6-35b-a3b-mxfp4',
'qwen2.5-coder-7b',
]);
// ─── classifyLane ────────────────────────────────────────────────────────────
describe('classifyLane', () => {
it('classifies qa battles as local regardless of identity or model', () => {
expect(classifyLane('qa', 'boocode', 'qwen3.6-35b-a3b-mxfp4', LOCAL_MODELS)).toBe('local');
expect(classifyLane('qa', 'claude', 'claude-opus-4-5', LOCAL_MODELS)).toBe('local');
expect(classifyLane('qa', 'Debugger', 'cloud-model', new Set())).toBe('local');
expect(classifyLane('qa', 'opencode', 'any-model', LOCAL_MODELS)).toBe('local');
});
it('classifies coding contestants as local when model is in localModels', () => {
expect(classifyLane('coding', 'boocode', 'qwen3.6-35b-a3b-mxfp4', LOCAL_MODELS)).toBe('local');
expect(classifyLane('coding', 'opencode', 'qwen3.6-35b-a3b-mxfp4', LOCAL_MODELS)).toBe('local');
expect(classifyLane('coding', 'qwen', 'qwen2.5-coder-7b', LOCAL_MODELS)).toBe('local');
});
it('classifies coding contestants as cloud when model is not in localModels', () => {
expect(classifyLane('coding', 'claude', 'claude-opus-4-5', LOCAL_MODELS)).toBe('cloud');
expect(classifyLane('coding', 'opencode', 'claude-opus-4-5', LOCAL_MODELS)).toBe('cloud');
expect(classifyLane('coding', 'goose', 'gpt-4o', LOCAL_MODELS)).toBe('cloud');
expect(classifyLane('coding', 'qwen', 'unknown-remote-model', LOCAL_MODELS)).toBe('cloud');
});
it('uses the injected localModels set, not a hardcoded list', () => {
const custom = new Set(['my-local-model']);
expect(classifyLane('coding', 'any-agent', 'my-local-model', custom)).toBe('local');
expect(classifyLane('coding', 'boocode', 'other-model', custom)).toBe('cloud');
});
it('defaults to cloud for an empty localModels set', () => {
expect(classifyLane('coding', 'boocode', 'qwen3.6-35b-a3b-mxfp4', new Set())).toBe('cloud');
expect(classifyLane('coding', 'native', 'any-local-model', new Set())).toBe('cloud');
});
});
// ─── nextLocalContestant ─────────────────────────────────────────────────────
describe('nextLocalContestant', () => {
it('returns null for an empty list', () => {
expect(nextLocalContestant([])).toBeNull();
});
it('returns null when no local contestants are queued', () => {
const slots: ContestantSlot[] = [
{ id: 'c1', lane: 'local', status: 'running' },
{ id: 'c2', lane: 'cloud', status: 'queued' },
];
expect(nextLocalContestant(slots)).toBeNull();
});
it('returns the first queued local contestant in order', () => {
const slots: ContestantSlot[] = [
{ id: 'c1', lane: 'local', status: 'done' },
{ id: 'c2', lane: 'local', status: 'queued' },
{ id: 'c3', lane: 'local', status: 'queued' },
];
expect(nextLocalContestant(slots)).toBe('c2');
});
it('skips done/error local contestants and cloud contestants', () => {
const slots: ContestantSlot[] = [
{ id: 'c1', lane: 'cloud', status: 'queued' },
{ id: 'c2', lane: 'local', status: 'error' },
{ id: 'c3', lane: 'local', status: 'queued' },
];
expect(nextLocalContestant(slots)).toBe('c3');
});
it('returns null when all local contestants are done or error', () => {
const slots: ContestantSlot[] = [
{ id: 'c1', lane: 'local', status: 'done' },
{ id: 'c2', lane: 'local', status: 'error' },
];
expect(nextLocalContestant(slots)).toBeNull();
});
});
// ─── isBattleComplete ────────────────────────────────────────────────────────
describe('isBattleComplete', () => {
it('returns false for an empty list', () => {
expect(isBattleComplete([])).toBe(false);
});
it('returns true when all contestants are done', () => {
expect(isBattleComplete([{ status: 'done' }, { status: 'done' }])).toBe(true);
});
it('returns true when all contestants are error', () => {
expect(isBattleComplete([{ status: 'error' }, { status: 'error' }])).toBe(true);
});
it('returns true for a mixed done/error result', () => {
expect(isBattleComplete([{ status: 'done' }, { status: 'error' }, { status: 'done' }])).toBe(true);
});
it('returns false while any contestant is still running', () => {
expect(isBattleComplete([{ status: 'done' }, { status: 'running' }])).toBe(false);
});
it('returns false while any contestant is still queued', () => {
expect(isBattleComplete([{ status: 'done' }, { status: 'queued' }])).toBe(false);
});
});
// ─── computeBenchmark ────────────────────────────────────────────────────────
describe('computeBenchmark', () => {
const t0 = new Date('2026-06-06T10:00:00.000Z');
const t1 = new Date('2026-06-06T10:00:05.000Z'); // +5 000ms
it('computes duration in ms for both lanes', () => {
const local = computeBenchmark(t0, t1, 100, 'local');
expect(local.durationMs).toBe(5000);
const cloud = computeBenchmark(t0, t1, null, 'cloud');
expect(cloud.durationMs).toBe(5000);
});
it('computes tokens/sec for local lane when costTokens is known', () => {
const bench = computeBenchmark(t0, t1, 500, 'local');
expect(bench.tokensPerSec).toBeCloseTo(100, 5); // 500 / 5 = 100 tok/s
});
it('omits tokens/sec for cloud lane regardless of costTokens', () => {
const bench = computeBenchmark(t0, t1, 500, 'cloud');
expect(bench.tokensPerSec).toBeNull();
});
it('omits tokens/sec for local lane when costTokens is null', () => {
const bench = computeBenchmark(t0, t1, null, 'local');
expect(bench.tokensPerSec).toBeNull();
});
it('returns durationMs = 0 and null tokensPerSec when timestamps are equal', () => {
const bench = computeBenchmark(t0, t0, 100, 'local');
expect(bench.durationMs).toBe(0);
expect(bench.tokensPerSec).toBeNull();
});
it('clamps negative duration to 0 (clock skew)', () => {
const bench = computeBenchmark(t1, t0, 50, 'local');
expect(bench.durationMs).toBe(0);
expect(bench.tokensPerSec).toBeNull();
});
it('includes token breakdown when provided', () => {
const breakdown = {
system: 10,
user: 20,
assistant: 30,
tools: 40,
reasoning: 5,
total: 105,
};
const bench = computeBenchmark(t0, t1, 500, 'local', breakdown);
expect(bench.tokenBreakdown).toEqual(breakdown);
});
it('defaults token breakdown to null when omitted', () => {
const bench = computeBenchmark(t0, t1, 500, 'local');
expect(bench.tokenBreakdown).toBeNull();
});
});
// ─── sanitizeSlug ────────────────────────────────────────────────────────────
describe('sanitizeSlug', () => {
it('lowercases and preserves alphanumeric + hyphens', () => {
expect(sanitizeSlug('claude')).toBe('claude');
expect(sanitizeSlug('claude-opus-4-5')).toBe('claude-opus-4-5');
});
it('replaces spaces and special characters with hyphens', () => {
expect(sanitizeSlug('Code Reviewer')).toBe('code-reviewer');
expect(sanitizeSlug('native/boocode')).toBe('native-boocode');
expect(sanitizeSlug('qwen2.5-coder-35b')).toBe('qwen2-5-coder-35b');
});
it('collapses consecutive non-alphanumeric runs to a single hyphen', () => {
expect(sanitizeSlug('foo bar---baz')).toBe('foo-bar-baz');
});
it('strips leading and trailing hyphens', () => {
expect(sanitizeSlug('---foo---')).toBe('foo');
});
it('truncates to 64 characters', () => {
const long = 'a'.repeat(100);
expect(sanitizeSlug(long).length).toBe(64);
});
});
// ─── buildBattleSlug ─────────────────────────────────────────────────────────
describe('buildBattleSlug', () => {
it('builds a deterministic dated slug from id, type, and createdAt', () => {
const id = 'a1b2c3d4-e5f6-7890-abcd-ef1234567890';
const createdAt = new Date('2026-06-06T12:00:00.000Z');
const slug = buildBattleSlug(id, 'coding', createdAt);
expect(slug).toBe('2026-06-06-coding-a1b2c3d4');
});
it('includes the battle type in the slug', () => {
const id = 'aaaaaaaa-0000-0000-0000-000000000000';
const createdAt = new Date('2026-01-01T00:00:00.000Z');
expect(buildBattleSlug(id, 'qa', createdAt)).toContain('-qa-');
expect(buildBattleSlug(id, 'coding', createdAt)).toContain('-coding-');
});
it('uses the first 8 hex chars of the uuid (dashes stripped)', () => {
const id = 'deadbeef-0000-0000-0000-000000000000';
const slug = buildBattleSlug(id, 'coding', new Date('2026-06-06T00:00:00Z'));
expect(slug.endsWith('-deadbeef')).toBe(true);
});
});
// ─── buildContestantDir ──────────────────────────────────────────────────────
describe('buildContestantDir', () => {
it('joins sanitized identity and model with a hyphen', () => {
expect(buildContestantDir('claude', 'claude-opus-4-5')).toBe('claude-claude-opus-4-5');
});
it('sanitizes both parts independently', () => {
expect(buildContestantDir('Code Reviewer', 'qwen2.5-35b')).toBe('code-reviewer-qwen2-5-35b');
});
});
// ─── reconcileContestantResume ───────────────────────────────────────────────
describe('reconcileContestantResume', () => {
it('keeps non-running contestants regardless of task state', () => {
for (const status of ['queued', 'done', 'error']) {
expect(reconcileContestantResume(status, 'tid', 'completed')).toBe('keep');
expect(reconcileContestantResume(status, null, null)).toBe('keep');
}
});
it('re-dispatches a running contestant with no task_id', () => {
expect(reconcileContestantResume('running', null, null)).toBe('re-dispatch');
});
it('re-dispatches a running contestant whose task row is absent', () => {
expect(reconcileContestantResume('running', 'tid', null)).toBe('re-dispatch');
});
it('marks done when the task completed before the terminal callback ran', () => {
expect(reconcileContestantResume('running', 'tid', 'completed')).toBe('mark-done');
});
it('marks error when the task failed', () => {
expect(reconcileContestantResume('running', 'tid', 'failed')).toBe('mark-error');
});
it('marks cancelled when the task was cancelled', () => {
expect(reconcileContestantResume('running', 'tid', 'cancelled')).toBe('mark-cancelled');
});
it('keeps a running contestant whose task is pending (dispatcher handles it)', () => {
expect(reconcileContestantResume('running', 'tid', 'pending')).toBe('keep');
});
it('re-dispatches when the task is stuck running (process died)', () => {
expect(reconcileContestantResume('running', 'tid', 'running')).toBe('re-dispatch');
});
it('re-dispatches when the task is blocked (permission dialog gone on restart)', () => {
expect(reconcileContestantResume('running', 'tid', 'blocked')).toBe('re-dispatch');
});
});
// ─── reconcileContestants ────────────────────────────────────────────────────
describe('reconcileContestants', () => {
it('returns one decision per contestant', () => {
const contestants = [
{ contestantId: 'c1', taskId: null, status: 'done' },
{ contestantId: 'c2', taskId: 't1', status: 'running' },
{ contestantId: 'c3', taskId: 't2', status: 'running' },
];
const taskStates = new Map([['t1', 'completed'], ['t2', 'running']]);
const decisions = reconcileContestants(contestants, taskStates);
expect(decisions).toHaveLength(3);
expect(decisions[0]).toEqual({ contestantId: 'c1', action: 'keep' });
expect(decisions[1]).toEqual({ contestantId: 'c2', action: 'mark-done' });
expect(decisions[2]).toEqual({ contestantId: 'c3', action: 're-dispatch' });
});
it('re-dispatches a running contestant whose taskId is absent from taskStates', () => {
const contestants = [{ contestantId: 'c1', taskId: 'orphan', status: 'running' }];
const decisions = reconcileContestants(contestants, new Map());
expect(decisions[0]?.action).toBe('re-dispatch');
});
it('re-dispatches a running contestant with null taskId', () => {
const contestants = [{ contestantId: 'c1', taskId: null, status: 'running' }];
const decisions = reconcileContestants(contestants, new Map());
expect(decisions[0]?.action).toBe('re-dispatch');
});
it('returns empty array for no contestants', () => {
expect(reconcileContestants([], new Map())).toEqual([]);
});
it('keeps a running contestant whose task is pending', () => {
const contestants = [{ contestantId: 'c1', taskId: 't1', status: 'running' }];
const taskStates = new Map([['t1', 'pending']]);
const decisions = reconcileContestants(contestants, taskStates);
expect(decisions[0]?.action).toBe('keep');
});
it('handles a mixed battle: done/queued kept, stale running re-dispatched', () => {
const contestants = [
{ contestantId: 'c1', taskId: 't1', status: 'done' },
{ contestantId: 'c2', taskId: null, status: 'queued' },
{ contestantId: 'c3', taskId: 't2', status: 'running' },
{ contestantId: 'c4', taskId: 't3', status: 'running' },
];
const taskStates = new Map([
['t1', 'completed'],
['t2', 'running'], // stuck — process dead
['t3', 'pending'], // dispatcher will handle
]);
const decisions = reconcileContestants(contestants, taskStates);
expect(decisions.find((d) => d.contestantId === 'c1')?.action).toBe('keep');
expect(decisions.find((d) => d.contestantId === 'c2')?.action).toBe('keep');
expect(decisions.find((d) => d.contestantId === 'c3')?.action).toBe('re-dispatch');
expect(decisions.find((d) => d.contestantId === 'c4')?.action).toBe('keep');
});
});