189 lines
7.0 KiB
TypeScript
189 lines
7.0 KiB
TypeScript
/**
|
|
* Pure scheduling and classification decisions for the Arena battle-runner.
|
|
* No database, no IO. Mirrors the pattern of flow-runner-decisions.ts.
|
|
*
|
|
* Vocabulary:
|
|
* local lane — llama-swap-backed contestants, run strictly one at a time
|
|
* cloud lane — cloud-backed contestants, run all in parallel
|
|
*
|
|
* A contestant's status lifecycle:
|
|
* queued → running → done | error
|
|
*/
|
|
import type { BattleType, ContestantLane, TokenBreakdown } from '@boocode/contracts/arena';
|
|
|
|
// ─── Lane classification ──────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Classify a contestant into a lane.
|
|
*
|
|
* Q&A contestants always run on the native (llama-swap) backend → local.
|
|
* Coding contestants: their MODEL is checked against the localModels set
|
|
* (all model IDs served by the local llama-swap server). This means an
|
|
* opencode or qwen contestant pointed at a local model counts as local,
|
|
* which correctly captures GPU-contention and fair benchmarking (ADR 0001).
|
|
*
|
|
* @param battleType 'coding' | 'qa'
|
|
* @param identity backend name (coding) or persona name (qa) — not used for lane logic
|
|
* @param model the contestant's model id
|
|
* @param localModels set of model IDs served by the local llama-swap server
|
|
*/
|
|
export function classifyLane(
|
|
battleType: BattleType,
|
|
_identity: string,
|
|
model: string,
|
|
localModels: ReadonlySet<string>,
|
|
): ContestantLane {
|
|
if (battleType === 'qa') return 'local';
|
|
return localModels.has(model) ? 'local' : 'cloud';
|
|
}
|
|
|
|
// ─── Local-lane queue ─────────────────────────────────────────────────────────
|
|
|
|
export interface ContestantSlot {
|
|
id: string;
|
|
lane: ContestantLane;
|
|
status: string;
|
|
}
|
|
|
|
/**
|
|
* The next queued local contestant to dispatch — the first 'queued' contestant
|
|
* in the local lane, in creation order (caller must supply rows in created_at ASC).
|
|
* Returns null when the local queue is empty or all local slots are non-queued.
|
|
*/
|
|
export function nextLocalContestant(contestants: readonly ContestantSlot[]): string | null {
|
|
for (const c of contestants) {
|
|
if (c.lane === 'local' && c.status === 'queued') return c.id;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
// ─── Battle completion ────────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* True when every contestant has reached a terminal state (done | error).
|
|
* Returns false for an empty list — a battle with no contestants never completes.
|
|
*/
|
|
export function isBattleComplete(contestants: readonly { status: string }[]): boolean {
|
|
if (contestants.length === 0) return false;
|
|
return contestants.every((c) => c.status === 'done' || c.status === 'error');
|
|
}
|
|
|
|
// ─── Benchmark ────────────────────────────────────────────────────────────────
|
|
|
|
export interface Benchmark {
|
|
durationMs: number;
|
|
tokensPerSec: number | null;
|
|
tokenBreakdown: TokenBreakdown | null;
|
|
}
|
|
|
|
/**
|
|
* Compute the benchmark for a contestant.
|
|
* Wall-clock duration is captured for every contestant; tokens/sec is only
|
|
* meaningful for local (llama-swap) contestants where the model has sole
|
|
* access to the GPU and the measurement is fair.
|
|
*/
|
|
export function computeBenchmark(
|
|
startedAt: Date,
|
|
endedAt: Date,
|
|
costTokens: number | null,
|
|
lane: ContestantLane,
|
|
tokenBreakdown: TokenBreakdown | null = null,
|
|
): Benchmark {
|
|
const durationMs = Math.max(0, endedAt.getTime() - startedAt.getTime());
|
|
const tokensPerSec =
|
|
lane === 'local' && costTokens !== null && durationMs > 0
|
|
? (costTokens / durationMs) * 1000
|
|
: null;
|
|
return { durationMs, tokensPerSec, tokenBreakdown };
|
|
}
|
|
|
|
// ─── Slug / path helpers ──────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Sanitize a string for use as a directory name component.
|
|
* Lowercases, replaces non-alphanumeric runs with '-', trims leading/trailing
|
|
* dashes, and caps at 64 characters.
|
|
*/
|
|
export function sanitizeSlug(s: string): string {
|
|
return s
|
|
.toLowerCase()
|
|
.replace(/[^a-z0-9]+/g, '-')
|
|
.replace(/^-+|-+$/g, '')
|
|
.slice(0, 64);
|
|
}
|
|
|
|
/**
|
|
* Build the dated battle slug used as the Arena results folder name.
|
|
* Format: YYYY-MM-DD-<battleType>-<first-8-hex-of-uuid>
|
|
* Deterministic: callers can rebuild it from (id, type, created_at) on resume.
|
|
*/
|
|
export function buildBattleSlug(battleId: string, battleType: BattleType, createdAt: Date): string {
|
|
const date = createdAt.toISOString().slice(0, 10);
|
|
const shortId = battleId.replace(/-/g, '').slice(0, 8);
|
|
return `${date}-${battleType}-${shortId}`;
|
|
}
|
|
|
|
/**
|
|
* Build the per-contestant results directory name within a battle folder.
|
|
* Format: <sanitized-identity>-<sanitized-model>
|
|
*/
|
|
export function buildContestantDir(identity: string, model: string): string {
|
|
return `${sanitizeSlug(identity)}-${sanitizeSlug(model)}`;
|
|
}
|
|
|
|
// ─── Resume reconciliation ────────────────────────────────────────────────────
|
|
|
|
export type ContestantResumeAction =
|
|
| 'keep'
|
|
| 're-dispatch'
|
|
| 'mark-done'
|
|
| 'mark-error'
|
|
| 'mark-cancelled';
|
|
|
|
export interface ContestantResumeDecision {
|
|
contestantId: string;
|
|
action: ContestantResumeAction;
|
|
}
|
|
|
|
/**
|
|
* Decide what to do with ONE contestant during startup resume.
|
|
* Mirrors reconcileResumeStep from flow-runner-decisions.ts.
|
|
*
|
|
* @param status contestants.status
|
|
* @param taskId contestants.task_id (null when not yet dispatched)
|
|
* @param taskState tasks.state for taskId, or null if the task row is absent
|
|
*/
|
|
export function reconcileContestantResume(
|
|
status: string,
|
|
taskId: string | null,
|
|
taskState: string | null,
|
|
): ContestantResumeAction {
|
|
if (status !== 'running') return 'keep';
|
|
if (!taskId || taskState === null) return 're-dispatch';
|
|
switch (taskState) {
|
|
case 'completed': return 'mark-done';
|
|
case 'failed': return 'mark-error';
|
|
case 'cancelled': return 'mark-cancelled';
|
|
case 'pending': return 'keep'; // dispatcher startup poll will run it normally
|
|
default: return 're-dispatch'; // 'running'/'blocked' — process is dead
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Reconcile every contestant of an in-flight battle for startup resume.
|
|
* Returns one decision per contestant. Pure — no IO.
|
|
*/
|
|
export function reconcileContestants(
|
|
contestants: ReadonlyArray<{ contestantId: string; taskId: string | null; status: string }>,
|
|
taskStates: ReadonlyMap<string, string>,
|
|
): ContestantResumeDecision[] {
|
|
return contestants.map((c) => ({
|
|
contestantId: c.contestantId,
|
|
action: reconcileContestantResume(
|
|
c.status,
|
|
c.taskId,
|
|
c.taskId ? (taskStates.get(c.taskId) ?? null) : null,
|
|
),
|
|
}));
|
|
}
|