boocode/apps/coder/src/services/__tests__/arena-decisions.test.ts

import { describe, it, expect } from 'vitest';
import {
  classifyLane,
  nextLocalContestant,
  isBattleComplete,
  computeBenchmark,
  sanitizeSlug,
  buildBattleSlug,
  buildContestantDir,
  reconcileContestantResume,
  reconcileContestants,
  type ContestantSlot,
} from '../arena-decisions.js';

// Local models = what the llama-swap server actually serves.
const LOCAL_MODELS: ReadonlySet<string> = new Set([
  'qwen3.6-35b-a3b-mxfp4',
  'qwen2.5-coder-7b',
]);

// ─── classifyLane ────────────────────────────────────────────────────────────

describe('classifyLane', () => {
  it('classifies qa battles as local regardless of identity or model', () => {
    expect(classifyLane('qa', 'boocode', 'qwen3.6-35b-a3b-mxfp4', LOCAL_MODELS)).toBe('local');
    expect(classifyLane('qa', 'claude', 'claude-opus-4-5', LOCAL_MODELS)).toBe('local');
    expect(classifyLane('qa', 'Debugger', 'cloud-model', new Set())).toBe('local');
    expect(classifyLane('qa', 'opencode', 'any-model', LOCAL_MODELS)).toBe('local');
  });

  it('classifies coding contestants as local when model is in localModels', () => {
    expect(classifyLane('coding', 'boocode', 'qwen3.6-35b-a3b-mxfp4', LOCAL_MODELS)).toBe('local');
    expect(classifyLane('coding', 'opencode', 'qwen3.6-35b-a3b-mxfp4', LOCAL_MODELS)).toBe('local');
    expect(classifyLane('coding', 'qwen', 'qwen2.5-coder-7b', LOCAL_MODELS)).toBe('local');
  });

  it('classifies coding contestants as cloud when model is not in localModels', () => {
    expect(classifyLane('coding', 'claude', 'claude-opus-4-5', LOCAL_MODELS)).toBe('cloud');
    expect(classifyLane('coding', 'opencode', 'claude-opus-4-5', LOCAL_MODELS)).toBe('cloud');
    expect(classifyLane('coding', 'goose', 'gpt-4o', LOCAL_MODELS)).toBe('cloud');
    expect(classifyLane('coding', 'qwen', 'unknown-remote-model', LOCAL_MODELS)).toBe('cloud');
  });

  it('uses the injected localModels set, not a hardcoded list', () => {
    const custom = new Set(['my-local-model']);
    expect(classifyLane('coding', 'any-agent', 'my-local-model', custom)).toBe('local');
    expect(classifyLane('coding', 'boocode', 'other-model', custom)).toBe('cloud');
  });

  it('defaults to cloud for an empty localModels set', () => {
    expect(classifyLane('coding', 'boocode', 'qwen3.6-35b-a3b-mxfp4', new Set())).toBe('cloud');
    expect(classifyLane('coding', 'native', 'any-local-model', new Set())).toBe('cloud');
  });
});

// ─── nextLocalContestant ─────────────────────────────────────────────────────

describe('nextLocalContestant', () => {
  it('returns null for an empty list', () => {
    expect(nextLocalContestant([])).toBeNull();
  });

  it('returns null when no local contestants are queued', () => {
    const slots: ContestantSlot[] = [
      { id: 'c1', lane: 'local', status: 'running' },
      { id: 'c2', lane: 'cloud', status: 'queued' },
    ];
    expect(nextLocalContestant(slots)).toBeNull();
  });

  it('returns the first queued local contestant in order', () => {
    const slots: ContestantSlot[] = [
      { id: 'c1', lane: 'local', status: 'done' },
      { id: 'c2', lane: 'local', status: 'queued' },
      { id: 'c3', lane: 'local', status: 'queued' },
    ];
    expect(nextLocalContestant(slots)).toBe('c2');
  });

  it('skips done/error local contestants and cloud contestants', () => {
    const slots: ContestantSlot[] = [
      { id: 'c1', lane: 'cloud', status: 'queued' },
      { id: 'c2', lane: 'local', status: 'error' },
      { id: 'c3', lane: 'local', status: 'queued' },
    ];
    expect(nextLocalContestant(slots)).toBe('c3');
  });

  it('returns null when all local contestants are done or error', () => {
    const slots: ContestantSlot[] = [
      { id: 'c1', lane: 'local', status: 'done' },
      { id: 'c2', lane: 'local', status: 'error' },
    ];
    expect(nextLocalContestant(slots)).toBeNull();
  });
});

// ─── isBattleComplete ────────────────────────────────────────────────────────

describe('isBattleComplete', () => {
  it('returns false for an empty list', () => {
    expect(isBattleComplete([])).toBe(false);
  });

  it('returns true when all contestants are done', () => {
    expect(isBattleComplete([{ status: 'done' }, { status: 'done' }])).toBe(true);
  });

  it('returns true when all contestants are error', () => {
    expect(isBattleComplete([{ status: 'error' }, { status: 'error' }])).toBe(true);
  });

  it('returns true for a mixed done/error result', () => {
    expect(isBattleComplete([{ status: 'done' }, { status: 'error' }, { status: 'done' }])).toBe(true);
  });

  it('returns false while any contestant is still running', () => {
    expect(isBattleComplete([{ status: 'done' }, { status: 'running' }])).toBe(false);
  });

  it('returns false while any contestant is still queued', () => {
    expect(isBattleComplete([{ status: 'done' }, { status: 'queued' }])).toBe(false);
  });
});

// ─── computeBenchmark ────────────────────────────────────────────────────────

describe('computeBenchmark', () => {
  const t0 = new Date('2026-06-06T10:00:00.000Z');
  const t1 = new Date('2026-06-06T10:00:05.000Z'); // +5 000ms

  it('computes duration in ms for both lanes', () => {
    const local = computeBenchmark(t0, t1, 100, 'local');
    expect(local.durationMs).toBe(5000);
    const cloud = computeBenchmark(t0, t1, null, 'cloud');
    expect(cloud.durationMs).toBe(5000);
  });

  it('computes tokens/sec for local lane when costTokens is known', () => {
    const bench = computeBenchmark(t0, t1, 500, 'local');
    expect(bench.tokensPerSec).toBeCloseTo(100, 5); // 500 / 5 = 100 tok/s
  });

  it('omits tokens/sec for cloud lane regardless of costTokens', () => {
    const bench = computeBenchmark(t0, t1, 500, 'cloud');
    expect(bench.tokensPerSec).toBeNull();
  });

  it('omits tokens/sec for local lane when costTokens is null', () => {
    const bench = computeBenchmark(t0, t1, null, 'local');
    expect(bench.tokensPerSec).toBeNull();
  });

  it('returns durationMs = 0 and null tokensPerSec when timestamps are equal', () => {
    const bench = computeBenchmark(t0, t0, 100, 'local');
    expect(bench.durationMs).toBe(0);
    expect(bench.tokensPerSec).toBeNull();
  });

  it('clamps negative duration to 0 (clock skew)', () => {
    const bench = computeBenchmark(t1, t0, 50, 'local');
    expect(bench.durationMs).toBe(0);
    expect(bench.tokensPerSec).toBeNull();
  });

  it('includes token breakdown when provided', () => {
    const breakdown = {
      system: 10,
      user: 20,
      assistant: 30,
      tools: 40,
      reasoning: 5,
      total: 105,
    };
    const bench = computeBenchmark(t0, t1, 500, 'local', breakdown);
    expect(bench.tokenBreakdown).toEqual(breakdown);
  });

  it('defaults token breakdown to null when omitted', () => {
    const bench = computeBenchmark(t0, t1, 500, 'local');
    expect(bench.tokenBreakdown).toBeNull();
  });
});

// ─── sanitizeSlug ────────────────────────────────────────────────────────────

describe('sanitizeSlug', () => {
  it('lowercases and preserves alphanumeric + hyphens', () => {
    expect(sanitizeSlug('claude')).toBe('claude');
    expect(sanitizeSlug('claude-opus-4-5')).toBe('claude-opus-4-5');
  });

  it('replaces spaces and special characters with hyphens', () => {
    expect(sanitizeSlug('Code Reviewer')).toBe('code-reviewer');
    expect(sanitizeSlug('native/boocode')).toBe('native-boocode');
    expect(sanitizeSlug('qwen2.5-coder-35b')).toBe('qwen2-5-coder-35b');
  });

  it('collapses consecutive non-alphanumeric runs to a single hyphen', () => {
    expect(sanitizeSlug('foo  bar---baz')).toBe('foo-bar-baz');
  });

  it('strips leading and trailing hyphens', () => {
    expect(sanitizeSlug('---foo---')).toBe('foo');
  });

  it('truncates to 64 characters', () => {
    const long = 'a'.repeat(100);
    expect(sanitizeSlug(long).length).toBe(64);
  });
});

// ─── buildBattleSlug ─────────────────────────────────────────────────────────

describe('buildBattleSlug', () => {
  it('builds a deterministic dated slug from id, type, and createdAt', () => {
    const id = 'a1b2c3d4-e5f6-7890-abcd-ef1234567890';
    const createdAt = new Date('2026-06-06T12:00:00.000Z');
    const slug = buildBattleSlug(id, 'coding', createdAt);
    expect(slug).toBe('2026-06-06-coding-a1b2c3d4');
  });

  it('includes the battle type in the slug', () => {
    const id = 'aaaaaaaa-0000-0000-0000-000000000000';
    const createdAt = new Date('2026-01-01T00:00:00.000Z');
    expect(buildBattleSlug(id, 'qa', createdAt)).toContain('-qa-');
    expect(buildBattleSlug(id, 'coding', createdAt)).toContain('-coding-');
  });

  it('uses the first 8 hex chars of the uuid (dashes stripped)', () => {
    const id = 'deadbeef-0000-0000-0000-000000000000';
    const slug = buildBattleSlug(id, 'coding', new Date('2026-06-06T00:00:00Z'));
    expect(slug.endsWith('-deadbeef')).toBe(true);
  });
});

// ─── buildContestantDir ──────────────────────────────────────────────────────

describe('buildContestantDir', () => {
  it('joins sanitized identity and model with a hyphen', () => {
    expect(buildContestantDir('claude', 'claude-opus-4-5')).toBe('claude-claude-opus-4-5');
  });

  it('sanitizes both parts independently', () => {
    expect(buildContestantDir('Code Reviewer', 'qwen2.5-35b')).toBe('code-reviewer-qwen2-5-35b');
  });
});

// ─── reconcileContestantResume ───────────────────────────────────────────────

describe('reconcileContestantResume', () => {
  it('keeps non-running contestants regardless of task state', () => {
    for (const status of ['queued', 'done', 'error']) {
      expect(reconcileContestantResume(status, 'tid', 'completed')).toBe('keep');
      expect(reconcileContestantResume(status, null, null)).toBe('keep');
    }
  });

  it('re-dispatches a running contestant with no task_id', () => {
    expect(reconcileContestantResume('running', null, null)).toBe('re-dispatch');
  });

  it('re-dispatches a running contestant whose task row is absent', () => {
    expect(reconcileContestantResume('running', 'tid', null)).toBe('re-dispatch');
  });

  it('marks done when the task completed before the terminal callback ran', () => {
    expect(reconcileContestantResume('running', 'tid', 'completed')).toBe('mark-done');
  });

  it('marks error when the task failed', () => {
    expect(reconcileContestantResume('running', 'tid', 'failed')).toBe('mark-error');
  });

  it('marks cancelled when the task was cancelled', () => {
    expect(reconcileContestantResume('running', 'tid', 'cancelled')).toBe('mark-cancelled');
  });

  it('keeps a running contestant whose task is pending (dispatcher handles it)', () => {
    expect(reconcileContestantResume('running', 'tid', 'pending')).toBe('keep');
  });

  it('re-dispatches when the task is stuck running (process died)', () => {
    expect(reconcileContestantResume('running', 'tid', 'running')).toBe('re-dispatch');
  });

  it('re-dispatches when the task is blocked (permission dialog gone on restart)', () => {
    expect(reconcileContestantResume('running', 'tid', 'blocked')).toBe('re-dispatch');
  });
});

// ─── reconcileContestants ────────────────────────────────────────────────────

describe('reconcileContestants', () => {
  it('returns one decision per contestant', () => {
    const contestants = [
      { contestantId: 'c1', taskId: null, status: 'done' },
      { contestantId: 'c2', taskId: 't1', status: 'running' },
      { contestantId: 'c3', taskId: 't2', status: 'running' },
    ];
    const taskStates = new Map([['t1', 'completed'], ['t2', 'running']]);
    const decisions = reconcileContestants(contestants, taskStates);
    expect(decisions).toHaveLength(3);
    expect(decisions[0]).toEqual({ contestantId: 'c1', action: 'keep' });
    expect(decisions[1]).toEqual({ contestantId: 'c2', action: 'mark-done' });
    expect(decisions[2]).toEqual({ contestantId: 'c3', action: 're-dispatch' });
  });

  it('re-dispatches a running contestant whose taskId is absent from taskStates', () => {
    const contestants = [{ contestantId: 'c1', taskId: 'orphan', status: 'running' }];
    const decisions = reconcileContestants(contestants, new Map());
    expect(decisions[0]?.action).toBe('re-dispatch');
  });

  it('re-dispatches a running contestant with null taskId', () => {
    const contestants = [{ contestantId: 'c1', taskId: null, status: 'running' }];
    const decisions = reconcileContestants(contestants, new Map());
    expect(decisions[0]?.action).toBe('re-dispatch');
  });

  it('returns empty array for no contestants', () => {
    expect(reconcileContestants([], new Map())).toEqual([]);
  });

  it('keeps a running contestant whose task is pending', () => {
    const contestants = [{ contestantId: 'c1', taskId: 't1', status: 'running' }];
    const taskStates = new Map([['t1', 'pending']]);
    const decisions = reconcileContestants(contestants, taskStates);
    expect(decisions[0]?.action).toBe('keep');
  });

  it('handles a mixed battle: done/queued kept, stale running re-dispatched', () => {
    const contestants = [
      { contestantId: 'c1', taskId: 't1', status: 'done' },
      { contestantId: 'c2', taskId: null, status: 'queued' },
      { contestantId: 'c3', taskId: 't2', status: 'running' },
      { contestantId: 'c4', taskId: 't3', status: 'running' },
    ];
    const taskStates = new Map([
      ['t1', 'completed'],
      ['t2', 'running'],  // stuck — process dead
      ['t3', 'pending'],  // dispatcher will handle
    ]);
    const decisions = reconcileContestants(contestants, taskStates);
    expect(decisions.find((d) => d.contestantId === 'c1')?.action).toBe('keep');
    expect(decisions.find((d) => d.contestantId === 'c2')?.action).toBe('keep');
    expect(decisions.find((d) => d.contestantId === 'c3')?.action).toBe('re-dispatch');
    expect(decisions.find((d) => d.contestantId === 'c4')?.action).toBe('keep');
  });
});