351 lines
15 KiB
TypeScript
351 lines
15 KiB
TypeScript
import { describe, it, expect } from 'vitest';
|
|
import {
|
|
classifyLane,
|
|
nextLocalContestant,
|
|
isBattleComplete,
|
|
computeBenchmark,
|
|
sanitizeSlug,
|
|
buildBattleSlug,
|
|
buildContestantDir,
|
|
reconcileContestantResume,
|
|
reconcileContestants,
|
|
type ContestantSlot,
|
|
} from '../arena-decisions.js';
|
|
|
|
// Local models = what the llama-swap server actually serves.
|
|
const LOCAL_MODELS: ReadonlySet<string> = new Set([
|
|
'qwen3.6-35b-a3b-mxfp4',
|
|
'qwen2.5-coder-7b',
|
|
]);
|
|
|
|
// ─── classifyLane ────────────────────────────────────────────────────────────
|
|
|
|
describe('classifyLane', () => {
|
|
it('classifies qa battles as local regardless of identity or model', () => {
|
|
expect(classifyLane('qa', 'boocode', 'qwen3.6-35b-a3b-mxfp4', LOCAL_MODELS)).toBe('local');
|
|
expect(classifyLane('qa', 'claude', 'claude-opus-4-5', LOCAL_MODELS)).toBe('local');
|
|
expect(classifyLane('qa', 'Debugger', 'cloud-model', new Set())).toBe('local');
|
|
expect(classifyLane('qa', 'opencode', 'any-model', LOCAL_MODELS)).toBe('local');
|
|
});
|
|
|
|
it('classifies coding contestants as local when model is in localModels', () => {
|
|
expect(classifyLane('coding', 'boocode', 'qwen3.6-35b-a3b-mxfp4', LOCAL_MODELS)).toBe('local');
|
|
expect(classifyLane('coding', 'opencode', 'qwen3.6-35b-a3b-mxfp4', LOCAL_MODELS)).toBe('local');
|
|
expect(classifyLane('coding', 'qwen', 'qwen2.5-coder-7b', LOCAL_MODELS)).toBe('local');
|
|
});
|
|
|
|
it('classifies coding contestants as cloud when model is not in localModels', () => {
|
|
expect(classifyLane('coding', 'claude', 'claude-opus-4-5', LOCAL_MODELS)).toBe('cloud');
|
|
expect(classifyLane('coding', 'opencode', 'claude-opus-4-5', LOCAL_MODELS)).toBe('cloud');
|
|
expect(classifyLane('coding', 'goose', 'gpt-4o', LOCAL_MODELS)).toBe('cloud');
|
|
expect(classifyLane('coding', 'qwen', 'unknown-remote-model', LOCAL_MODELS)).toBe('cloud');
|
|
});
|
|
|
|
it('uses the injected localModels set, not a hardcoded list', () => {
|
|
const custom = new Set(['my-local-model']);
|
|
expect(classifyLane('coding', 'any-agent', 'my-local-model', custom)).toBe('local');
|
|
expect(classifyLane('coding', 'boocode', 'other-model', custom)).toBe('cloud');
|
|
});
|
|
|
|
it('defaults to cloud for an empty localModels set', () => {
|
|
expect(classifyLane('coding', 'boocode', 'qwen3.6-35b-a3b-mxfp4', new Set())).toBe('cloud');
|
|
expect(classifyLane('coding', 'native', 'any-local-model', new Set())).toBe('cloud');
|
|
});
|
|
});
|
|
|
|
// ─── nextLocalContestant ─────────────────────────────────────────────────────
|
|
|
|
describe('nextLocalContestant', () => {
|
|
it('returns null for an empty list', () => {
|
|
expect(nextLocalContestant([])).toBeNull();
|
|
});
|
|
|
|
it('returns null when no local contestants are queued', () => {
|
|
const slots: ContestantSlot[] = [
|
|
{ id: 'c1', lane: 'local', status: 'running' },
|
|
{ id: 'c2', lane: 'cloud', status: 'queued' },
|
|
];
|
|
expect(nextLocalContestant(slots)).toBeNull();
|
|
});
|
|
|
|
it('returns the first queued local contestant in order', () => {
|
|
const slots: ContestantSlot[] = [
|
|
{ id: 'c1', lane: 'local', status: 'done' },
|
|
{ id: 'c2', lane: 'local', status: 'queued' },
|
|
{ id: 'c3', lane: 'local', status: 'queued' },
|
|
];
|
|
expect(nextLocalContestant(slots)).toBe('c2');
|
|
});
|
|
|
|
it('skips done/error local contestants and cloud contestants', () => {
|
|
const slots: ContestantSlot[] = [
|
|
{ id: 'c1', lane: 'cloud', status: 'queued' },
|
|
{ id: 'c2', lane: 'local', status: 'error' },
|
|
{ id: 'c3', lane: 'local', status: 'queued' },
|
|
];
|
|
expect(nextLocalContestant(slots)).toBe('c3');
|
|
});
|
|
|
|
it('returns null when all local contestants are done or error', () => {
|
|
const slots: ContestantSlot[] = [
|
|
{ id: 'c1', lane: 'local', status: 'done' },
|
|
{ id: 'c2', lane: 'local', status: 'error' },
|
|
];
|
|
expect(nextLocalContestant(slots)).toBeNull();
|
|
});
|
|
});
|
|
|
|
// ─── isBattleComplete ────────────────────────────────────────────────────────
|
|
|
|
describe('isBattleComplete', () => {
|
|
it('returns false for an empty list', () => {
|
|
expect(isBattleComplete([])).toBe(false);
|
|
});
|
|
|
|
it('returns true when all contestants are done', () => {
|
|
expect(isBattleComplete([{ status: 'done' }, { status: 'done' }])).toBe(true);
|
|
});
|
|
|
|
it('returns true when all contestants are error', () => {
|
|
expect(isBattleComplete([{ status: 'error' }, { status: 'error' }])).toBe(true);
|
|
});
|
|
|
|
it('returns true for a mixed done/error result', () => {
|
|
expect(isBattleComplete([{ status: 'done' }, { status: 'error' }, { status: 'done' }])).toBe(true);
|
|
});
|
|
|
|
it('returns false while any contestant is still running', () => {
|
|
expect(isBattleComplete([{ status: 'done' }, { status: 'running' }])).toBe(false);
|
|
});
|
|
|
|
it('returns false while any contestant is still queued', () => {
|
|
expect(isBattleComplete([{ status: 'done' }, { status: 'queued' }])).toBe(false);
|
|
});
|
|
});
|
|
|
|
// ─── computeBenchmark ────────────────────────────────────────────────────────
|
|
|
|
describe('computeBenchmark', () => {
|
|
const t0 = new Date('2026-06-06T10:00:00.000Z');
|
|
const t1 = new Date('2026-06-06T10:00:05.000Z'); // +5 000ms
|
|
|
|
it('computes duration in ms for both lanes', () => {
|
|
const local = computeBenchmark(t0, t1, 100, 'local');
|
|
expect(local.durationMs).toBe(5000);
|
|
const cloud = computeBenchmark(t0, t1, null, 'cloud');
|
|
expect(cloud.durationMs).toBe(5000);
|
|
});
|
|
|
|
it('computes tokens/sec for local lane when costTokens is known', () => {
|
|
const bench = computeBenchmark(t0, t1, 500, 'local');
|
|
expect(bench.tokensPerSec).toBeCloseTo(100, 5); // 500 / 5 = 100 tok/s
|
|
});
|
|
|
|
it('omits tokens/sec for cloud lane regardless of costTokens', () => {
|
|
const bench = computeBenchmark(t0, t1, 500, 'cloud');
|
|
expect(bench.tokensPerSec).toBeNull();
|
|
});
|
|
|
|
it('omits tokens/sec for local lane when costTokens is null', () => {
|
|
const bench = computeBenchmark(t0, t1, null, 'local');
|
|
expect(bench.tokensPerSec).toBeNull();
|
|
});
|
|
|
|
it('returns durationMs = 0 and null tokensPerSec when timestamps are equal', () => {
|
|
const bench = computeBenchmark(t0, t0, 100, 'local');
|
|
expect(bench.durationMs).toBe(0);
|
|
expect(bench.tokensPerSec).toBeNull();
|
|
});
|
|
|
|
it('clamps negative duration to 0 (clock skew)', () => {
|
|
const bench = computeBenchmark(t1, t0, 50, 'local');
|
|
expect(bench.durationMs).toBe(0);
|
|
expect(bench.tokensPerSec).toBeNull();
|
|
});
|
|
|
|
it('includes token breakdown when provided', () => {
|
|
const breakdown = {
|
|
system: 10,
|
|
user: 20,
|
|
assistant: 30,
|
|
tools: 40,
|
|
reasoning: 5,
|
|
total: 105,
|
|
};
|
|
const bench = computeBenchmark(t0, t1, 500, 'local', breakdown);
|
|
expect(bench.tokenBreakdown).toEqual(breakdown);
|
|
});
|
|
|
|
it('defaults token breakdown to null when omitted', () => {
|
|
const bench = computeBenchmark(t0, t1, 500, 'local');
|
|
expect(bench.tokenBreakdown).toBeNull();
|
|
});
|
|
});
|
|
|
|
// ─── sanitizeSlug ────────────────────────────────────────────────────────────
|
|
|
|
describe('sanitizeSlug', () => {
|
|
it('lowercases and preserves alphanumeric + hyphens', () => {
|
|
expect(sanitizeSlug('claude')).toBe('claude');
|
|
expect(sanitizeSlug('claude-opus-4-5')).toBe('claude-opus-4-5');
|
|
});
|
|
|
|
it('replaces spaces and special characters with hyphens', () => {
|
|
expect(sanitizeSlug('Code Reviewer')).toBe('code-reviewer');
|
|
expect(sanitizeSlug('native/boocode')).toBe('native-boocode');
|
|
expect(sanitizeSlug('qwen2.5-coder-35b')).toBe('qwen2-5-coder-35b');
|
|
});
|
|
|
|
it('collapses consecutive non-alphanumeric runs to a single hyphen', () => {
|
|
expect(sanitizeSlug('foo bar---baz')).toBe('foo-bar-baz');
|
|
});
|
|
|
|
it('strips leading and trailing hyphens', () => {
|
|
expect(sanitizeSlug('---foo---')).toBe('foo');
|
|
});
|
|
|
|
it('truncates to 64 characters', () => {
|
|
const long = 'a'.repeat(100);
|
|
expect(sanitizeSlug(long).length).toBe(64);
|
|
});
|
|
});
|
|
|
|
// ─── buildBattleSlug ─────────────────────────────────────────────────────────
|
|
|
|
describe('buildBattleSlug', () => {
|
|
it('builds a deterministic dated slug from id, type, and createdAt', () => {
|
|
const id = 'a1b2c3d4-e5f6-7890-abcd-ef1234567890';
|
|
const createdAt = new Date('2026-06-06T12:00:00.000Z');
|
|
const slug = buildBattleSlug(id, 'coding', createdAt);
|
|
expect(slug).toBe('2026-06-06-coding-a1b2c3d4');
|
|
});
|
|
|
|
it('includes the battle type in the slug', () => {
|
|
const id = 'aaaaaaaa-0000-0000-0000-000000000000';
|
|
const createdAt = new Date('2026-01-01T00:00:00.000Z');
|
|
expect(buildBattleSlug(id, 'qa', createdAt)).toContain('-qa-');
|
|
expect(buildBattleSlug(id, 'coding', createdAt)).toContain('-coding-');
|
|
});
|
|
|
|
it('uses the first 8 hex chars of the uuid (dashes stripped)', () => {
|
|
const id = 'deadbeef-0000-0000-0000-000000000000';
|
|
const slug = buildBattleSlug(id, 'coding', new Date('2026-06-06T00:00:00Z'));
|
|
expect(slug.endsWith('-deadbeef')).toBe(true);
|
|
});
|
|
});
|
|
|
|
// ─── buildContestantDir ──────────────────────────────────────────────────────
|
|
|
|
describe('buildContestantDir', () => {
|
|
it('joins sanitized identity and model with a hyphen', () => {
|
|
expect(buildContestantDir('claude', 'claude-opus-4-5')).toBe('claude-claude-opus-4-5');
|
|
});
|
|
|
|
it('sanitizes both parts independently', () => {
|
|
expect(buildContestantDir('Code Reviewer', 'qwen2.5-35b')).toBe('code-reviewer-qwen2-5-35b');
|
|
});
|
|
});
|
|
|
|
// ─── reconcileContestantResume ───────────────────────────────────────────────
|
|
|
|
describe('reconcileContestantResume', () => {
|
|
it('keeps non-running contestants regardless of task state', () => {
|
|
for (const status of ['queued', 'done', 'error']) {
|
|
expect(reconcileContestantResume(status, 'tid', 'completed')).toBe('keep');
|
|
expect(reconcileContestantResume(status, null, null)).toBe('keep');
|
|
}
|
|
});
|
|
|
|
it('re-dispatches a running contestant with no task_id', () => {
|
|
expect(reconcileContestantResume('running', null, null)).toBe('re-dispatch');
|
|
});
|
|
|
|
it('re-dispatches a running contestant whose task row is absent', () => {
|
|
expect(reconcileContestantResume('running', 'tid', null)).toBe('re-dispatch');
|
|
});
|
|
|
|
it('marks done when the task completed before the terminal callback ran', () => {
|
|
expect(reconcileContestantResume('running', 'tid', 'completed')).toBe('mark-done');
|
|
});
|
|
|
|
it('marks error when the task failed', () => {
|
|
expect(reconcileContestantResume('running', 'tid', 'failed')).toBe('mark-error');
|
|
});
|
|
|
|
it('marks cancelled when the task was cancelled', () => {
|
|
expect(reconcileContestantResume('running', 'tid', 'cancelled')).toBe('mark-cancelled');
|
|
});
|
|
|
|
it('keeps a running contestant whose task is pending (dispatcher handles it)', () => {
|
|
expect(reconcileContestantResume('running', 'tid', 'pending')).toBe('keep');
|
|
});
|
|
|
|
it('re-dispatches when the task is stuck running (process died)', () => {
|
|
expect(reconcileContestantResume('running', 'tid', 'running')).toBe('re-dispatch');
|
|
});
|
|
|
|
it('re-dispatches when the task is blocked (permission dialog gone on restart)', () => {
|
|
expect(reconcileContestantResume('running', 'tid', 'blocked')).toBe('re-dispatch');
|
|
});
|
|
});
|
|
|
|
// ─── reconcileContestants ────────────────────────────────────────────────────
|
|
|
|
describe('reconcileContestants', () => {
|
|
it('returns one decision per contestant', () => {
|
|
const contestants = [
|
|
{ contestantId: 'c1', taskId: null, status: 'done' },
|
|
{ contestantId: 'c2', taskId: 't1', status: 'running' },
|
|
{ contestantId: 'c3', taskId: 't2', status: 'running' },
|
|
];
|
|
const taskStates = new Map([['t1', 'completed'], ['t2', 'running']]);
|
|
const decisions = reconcileContestants(contestants, taskStates);
|
|
expect(decisions).toHaveLength(3);
|
|
expect(decisions[0]).toEqual({ contestantId: 'c1', action: 'keep' });
|
|
expect(decisions[1]).toEqual({ contestantId: 'c2', action: 'mark-done' });
|
|
expect(decisions[2]).toEqual({ contestantId: 'c3', action: 're-dispatch' });
|
|
});
|
|
|
|
it('re-dispatches a running contestant whose taskId is absent from taskStates', () => {
|
|
const contestants = [{ contestantId: 'c1', taskId: 'orphan', status: 'running' }];
|
|
const decisions = reconcileContestants(contestants, new Map());
|
|
expect(decisions[0]?.action).toBe('re-dispatch');
|
|
});
|
|
|
|
it('re-dispatches a running contestant with null taskId', () => {
|
|
const contestants = [{ contestantId: 'c1', taskId: null, status: 'running' }];
|
|
const decisions = reconcileContestants(contestants, new Map());
|
|
expect(decisions[0]?.action).toBe('re-dispatch');
|
|
});
|
|
|
|
it('returns empty array for no contestants', () => {
|
|
expect(reconcileContestants([], new Map())).toEqual([]);
|
|
});
|
|
|
|
it('keeps a running contestant whose task is pending', () => {
|
|
const contestants = [{ contestantId: 'c1', taskId: 't1', status: 'running' }];
|
|
const taskStates = new Map([['t1', 'pending']]);
|
|
const decisions = reconcileContestants(contestants, taskStates);
|
|
expect(decisions[0]?.action).toBe('keep');
|
|
});
|
|
|
|
it('handles a mixed battle: done/queued kept, stale running re-dispatched', () => {
|
|
const contestants = [
|
|
{ contestantId: 'c1', taskId: 't1', status: 'done' },
|
|
{ contestantId: 'c2', taskId: null, status: 'queued' },
|
|
{ contestantId: 'c3', taskId: 't2', status: 'running' },
|
|
{ contestantId: 'c4', taskId: 't3', status: 'running' },
|
|
];
|
|
const taskStates = new Map([
|
|
['t1', 'completed'],
|
|
['t2', 'running'], // stuck — process dead
|
|
['t3', 'pending'], // dispatcher will handle
|
|
]);
|
|
const decisions = reconcileContestants(contestants, taskStates);
|
|
expect(decisions.find((d) => d.contestantId === 'c1')?.action).toBe('keep');
|
|
expect(decisions.find((d) => d.contestantId === 'c2')?.action).toBe('keep');
|
|
expect(decisions.find((d) => d.contestantId === 'c3')?.action).toBe('re-dispatch');
|
|
expect(decisions.find((d) => d.contestantId === 'c4')?.action).toBe('keep');
|
|
});
|
|
});
|