Files
boocode/apps/coder/src/services/arena-decisions.ts

189 lines
7.0 KiB
TypeScript

/**
* Pure scheduling and classification decisions for the Arena battle-runner.
* No database, no IO. Mirrors the pattern of flow-runner-decisions.ts.
*
* Vocabulary:
* local lane — llama-swap-backed contestants, run strictly one at a time
* cloud lane — cloud-backed contestants, run all in parallel
*
* A contestant's status lifecycle:
* queued → running → done | error
*/
import type { BattleType, ContestantLane, TokenBreakdown } from '@boocode/contracts/arena';
// ─── Lane classification ──────────────────────────────────────────────────────
/**
* Classify a contestant into a lane.
*
* Q&A contestants always run on the native (llama-swap) backend → local.
* Coding contestants: their MODEL is checked against the localModels set
* (all model IDs served by the local llama-swap server). This means an
* opencode or qwen contestant pointed at a local model counts as local,
* which correctly captures GPU-contention and fair benchmarking (ADR 0001).
*
* @param battleType 'coding' | 'qa'
* @param identity backend name (coding) or persona name (qa) — not used for lane logic
* @param model the contestant's model id
* @param localModels set of model IDs served by the local llama-swap server
*/
export function classifyLane(
battleType: BattleType,
_identity: string,
model: string,
localModels: ReadonlySet<string>,
): ContestantLane {
if (battleType === 'qa') return 'local';
return localModels.has(model) ? 'local' : 'cloud';
}
// ─── Local-lane queue ─────────────────────────────────────────────────────────
export interface ContestantSlot {
id: string;
lane: ContestantLane;
status: string;
}
/**
* The next queued local contestant to dispatch — the first 'queued' contestant
* in the local lane, in creation order (caller must supply rows in created_at ASC).
* Returns null when the local queue is empty or all local slots are non-queued.
*/
export function nextLocalContestant(contestants: readonly ContestantSlot[]): string | null {
for (const c of contestants) {
if (c.lane === 'local' && c.status === 'queued') return c.id;
}
return null;
}
// ─── Battle completion ────────────────────────────────────────────────────────
/**
* True when every contestant has reached a terminal state (done | error).
* Returns false for an empty list — a battle with no contestants never completes.
*/
export function isBattleComplete(contestants: readonly { status: string }[]): boolean {
if (contestants.length === 0) return false;
return contestants.every((c) => c.status === 'done' || c.status === 'error');
}
// ─── Benchmark ────────────────────────────────────────────────────────────────
export interface Benchmark {
durationMs: number;
tokensPerSec: number | null;
tokenBreakdown: TokenBreakdown | null;
}
/**
* Compute the benchmark for a contestant.
* Wall-clock duration is captured for every contestant; tokens/sec is only
* meaningful for local (llama-swap) contestants where the model has sole
* access to the GPU and the measurement is fair.
*/
export function computeBenchmark(
startedAt: Date,
endedAt: Date,
costTokens: number | null,
lane: ContestantLane,
tokenBreakdown: TokenBreakdown | null = null,
): Benchmark {
const durationMs = Math.max(0, endedAt.getTime() - startedAt.getTime());
const tokensPerSec =
lane === 'local' && costTokens !== null && durationMs > 0
? (costTokens / durationMs) * 1000
: null;
return { durationMs, tokensPerSec, tokenBreakdown };
}
// ─── Slug / path helpers ──────────────────────────────────────────────────────
/**
* Sanitize a string for use as a directory name component.
* Lowercases, replaces non-alphanumeric runs with '-', trims leading/trailing
* dashes, and caps at 64 characters.
*/
export function sanitizeSlug(s: string): string {
return s
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '')
.slice(0, 64);
}
/**
* Build the dated battle slug used as the Arena results folder name.
* Format: YYYY-MM-DD-<battleType>-<first-8-hex-of-uuid>
* Deterministic: callers can rebuild it from (id, type, created_at) on resume.
*/
export function buildBattleSlug(battleId: string, battleType: BattleType, createdAt: Date): string {
const date = createdAt.toISOString().slice(0, 10);
const shortId = battleId.replace(/-/g, '').slice(0, 8);
return `${date}-${battleType}-${shortId}`;
}
/**
* Build the per-contestant results directory name within a battle folder.
* Format: <sanitized-identity>-<sanitized-model>
*/
export function buildContestantDir(identity: string, model: string): string {
return `${sanitizeSlug(identity)}-${sanitizeSlug(model)}`;
}
// ─── Resume reconciliation ────────────────────────────────────────────────────
export type ContestantResumeAction =
| 'keep'
| 're-dispatch'
| 'mark-done'
| 'mark-error'
| 'mark-cancelled';
export interface ContestantResumeDecision {
contestantId: string;
action: ContestantResumeAction;
}
/**
* Decide what to do with ONE contestant during startup resume.
* Mirrors reconcileResumeStep from flow-runner-decisions.ts.
*
* @param status contestants.status
* @param taskId contestants.task_id (null when not yet dispatched)
* @param taskState tasks.state for taskId, or null if the task row is absent
*/
export function reconcileContestantResume(
status: string,
taskId: string | null,
taskState: string | null,
): ContestantResumeAction {
if (status !== 'running') return 'keep';
if (!taskId || taskState === null) return 're-dispatch';
switch (taskState) {
case 'completed': return 'mark-done';
case 'failed': return 'mark-error';
case 'cancelled': return 'mark-cancelled';
case 'pending': return 'keep'; // dispatcher startup poll will run it normally
default: return 're-dispatch'; // 'running'/'blocked' — process is dead
}
}
/**
* Reconcile every contestant of an in-flight battle for startup resume.
* Returns one decision per contestant. Pure — no IO.
*/
export function reconcileContestants(
contestants: ReadonlyArray<{ contestantId: string; taskId: string | null; status: string }>,
taskStates: ReadonlyMap<string, string>,
): ContestantResumeDecision[] {
return contestants.map((c) => ({
contestantId: c.contestantId,
action: reconcileContestantResume(
c.status,
c.taskId,
c.taskId ? (taskStates.get(c.taskId) ?? null) : null,
),
}));
}