/** * Pure scheduling and classification decisions for the Arena battle-runner. * No database, no IO. Mirrors the pattern of flow-runner-decisions.ts. * * Vocabulary: * local lane — llama-swap-backed contestants, run strictly one at a time * cloud lane — cloud-backed contestants, run all in parallel * * A contestant's status lifecycle: * queued → running → done | error */ import type { BattleType, ContestantLane, TokenBreakdown } from '@boocode/contracts/arena'; // ─── Lane classification ────────────────────────────────────────────────────── /** * Classify a contestant into a lane. * * Q&A contestants always run on the native (llama-swap) backend → local. * Coding contestants: their MODEL is checked against the localModels set * (all model IDs served by the local llama-swap server). This means an * opencode or qwen contestant pointed at a local model counts as local, * which correctly captures GPU-contention and fair benchmarking (ADR 0001). * * @param battleType 'coding' | 'qa' * @param identity backend name (coding) or persona name (qa) — not used for lane logic * @param model the contestant's model id * @param localModels set of model IDs served by the local llama-swap server */ export function classifyLane( battleType: BattleType, _identity: string, model: string, localModels: ReadonlySet, ): ContestantLane { if (battleType === 'qa') return 'local'; return localModels.has(model) ? 'local' : 'cloud'; } // ─── Local-lane queue ───────────────────────────────────────────────────────── export interface ContestantSlot { id: string; lane: ContestantLane; status: string; } /** * The next queued local contestant to dispatch — the first 'queued' contestant * in the local lane, in creation order (caller must supply rows in created_at ASC). * Returns null when the local queue is empty or all local slots are non-queued. */ export function nextLocalContestant(contestants: readonly ContestantSlot[]): string | null { for (const c of contestants) { if (c.lane === 'local' && c.status === 'queued') return c.id; } return null; } // ─── Battle completion ──────────────────────────────────────────────────────── /** * True when every contestant has reached a terminal state (done | error). * Returns false for an empty list — a battle with no contestants never completes. */ export function isBattleComplete(contestants: readonly { status: string }[]): boolean { if (contestants.length === 0) return false; return contestants.every((c) => c.status === 'done' || c.status === 'error'); } // ─── Benchmark ──────────────────────────────────────────────────────────────── export interface Benchmark { durationMs: number; tokensPerSec: number | null; tokenBreakdown: TokenBreakdown | null; } /** * Compute the benchmark for a contestant. * Wall-clock duration is captured for every contestant; tokens/sec is only * meaningful for local (llama-swap) contestants where the model has sole * access to the GPU and the measurement is fair. */ export function computeBenchmark( startedAt: Date, endedAt: Date, costTokens: number | null, lane: ContestantLane, tokenBreakdown: TokenBreakdown | null = null, ): Benchmark { const durationMs = Math.max(0, endedAt.getTime() - startedAt.getTime()); const tokensPerSec = lane === 'local' && costTokens !== null && durationMs > 0 ? (costTokens / durationMs) * 1000 : null; return { durationMs, tokensPerSec, tokenBreakdown }; } // ─── Slug / path helpers ────────────────────────────────────────────────────── /** * Sanitize a string for use as a directory name component. * Lowercases, replaces non-alphanumeric runs with '-', trims leading/trailing * dashes, and caps at 64 characters. */ export function sanitizeSlug(s: string): string { return s .toLowerCase() .replace(/[^a-z0-9]+/g, '-') .replace(/^-+|-+$/g, '') .slice(0, 64); } /** * Build the dated battle slug used as the Arena results folder name. * Format: YYYY-MM-DD-- * Deterministic: callers can rebuild it from (id, type, created_at) on resume. */ export function buildBattleSlug(battleId: string, battleType: BattleType, createdAt: Date): string { const date = createdAt.toISOString().slice(0, 10); const shortId = battleId.replace(/-/g, '').slice(0, 8); return `${date}-${battleType}-${shortId}`; } /** * Build the per-contestant results directory name within a battle folder. * Format: - */ export function buildContestantDir(identity: string, model: string): string { return `${sanitizeSlug(identity)}-${sanitizeSlug(model)}`; } // ─── Resume reconciliation ──────────────────────────────────────────────────── export type ContestantResumeAction = | 'keep' | 're-dispatch' | 'mark-done' | 'mark-error' | 'mark-cancelled'; export interface ContestantResumeDecision { contestantId: string; action: ContestantResumeAction; } /** * Decide what to do with ONE contestant during startup resume. * Mirrors reconcileResumeStep from flow-runner-decisions.ts. * * @param status contestants.status * @param taskId contestants.task_id (null when not yet dispatched) * @param taskState tasks.state for taskId, or null if the task row is absent */ export function reconcileContestantResume( status: string, taskId: string | null, taskState: string | null, ): ContestantResumeAction { if (status !== 'running') return 'keep'; if (!taskId || taskState === null) return 're-dispatch'; switch (taskState) { case 'completed': return 'mark-done'; case 'failed': return 'mark-error'; case 'cancelled': return 'mark-cancelled'; case 'pending': return 'keep'; // dispatcher startup poll will run it normally default: return 're-dispatch'; // 'running'/'blocked' — process is dead } } /** * Reconcile every contestant of an in-flight battle for startup resume. * Returns one decision per contestant. Pure — no IO. */ export function reconcileContestants( contestants: ReadonlyArray<{ contestantId: string; taskId: string | null; status: string }>, taskStates: ReadonlyMap, ): ContestantResumeDecision[] { return contestants.map((c) => ({ contestantId: c.contestantId, action: reconcileContestantResume( c.status, c.taskId, c.taskId ? (taskStates.get(c.taskId) ?? null) : null, ), })); }