boocode/apps/coder/src/services/arena-decisions.ts

/**
 * Pure scheduling and classification decisions for the Arena battle-runner.
 * No database, no IO. Mirrors the pattern of flow-runner-decisions.ts.
 *
 * Vocabulary:
 *   local lane  — llama-swap-backed contestants, run strictly one at a time
 *   cloud lane  — cloud-backed contestants, run all in parallel
 *
 * A contestant's status lifecycle:
 *   queued → running → done | error
 */
import type { BattleType, ContestantLane, TokenBreakdown } from '@boocode/contracts/arena';

// ─── Lane classification ──────────────────────────────────────────────────────

/**
 * Classify a contestant into a lane.
 *
 * Q&A contestants always run on the native (llama-swap) backend → local.
 * Coding contestants: their MODEL is checked against the localModels set
 * (all model IDs served by the local llama-swap server). This means an
 * opencode or qwen contestant pointed at a local model counts as local,
 * which correctly captures GPU-contention and fair benchmarking (ADR 0001).
 *
 * @param battleType  'coding' | 'qa'
 * @param identity    backend name (coding) or persona name (qa) — not used for lane logic
 * @param model       the contestant's model id
 * @param localModels set of model IDs served by the local llama-swap server
 */
export function classifyLane(
  battleType: BattleType,
  _identity: string,
  model: string,
  localModels: ReadonlySet<string>,
): ContestantLane {
  if (battleType === 'qa') return 'local';
  return localModels.has(model) ? 'local' : 'cloud';
}

// ─── Local-lane queue ─────────────────────────────────────────────────────────

export interface ContestantSlot {
  id: string;
  lane: ContestantLane;
  status: string;
}

/**
 * The next queued local contestant to dispatch — the first 'queued' contestant
 * in the local lane, in creation order (caller must supply rows in created_at ASC).
 * Returns null when the local queue is empty or all local slots are non-queued.
 */
export function nextLocalContestant(contestants: readonly ContestantSlot[]): string | null {
  for (const c of contestants) {
    if (c.lane === 'local' && c.status === 'queued') return c.id;
  }
  return null;
}

// ─── Battle completion ────────────────────────────────────────────────────────

/**
 * True when every contestant has reached a terminal state (done | error).
 * Returns false for an empty list — a battle with no contestants never completes.
 */
export function isBattleComplete(contestants: readonly { status: string }[]): boolean {
  if (contestants.length === 0) return false;
  return contestants.every((c) => c.status === 'done' || c.status === 'error');
}

// ─── Benchmark ────────────────────────────────────────────────────────────────

export interface Benchmark {
  durationMs: number;
  tokensPerSec: number | null;
  tokenBreakdown: TokenBreakdown | null;
}

/**
 * Compute the benchmark for a contestant.
 * Wall-clock duration is captured for every contestant; tokens/sec is only
 * meaningful for local (llama-swap) contestants where the model has sole
 * access to the GPU and the measurement is fair.
 */
export function computeBenchmark(
  startedAt: Date,
  endedAt: Date,
  costTokens: number | null,
  lane: ContestantLane,
  tokenBreakdown: TokenBreakdown | null = null,
): Benchmark {
  const durationMs = Math.max(0, endedAt.getTime() - startedAt.getTime());
  const tokensPerSec =
    lane === 'local' && costTokens !== null && durationMs > 0
      ? (costTokens / durationMs) * 1000
      : null;
  return { durationMs, tokensPerSec, tokenBreakdown };
}

// ─── Slug / path helpers ──────────────────────────────────────────────────────

/**
 * Sanitize a string for use as a directory name component.
 * Lowercases, replaces non-alphanumeric runs with '-', trims leading/trailing
 * dashes, and caps at 64 characters.
 */
export function sanitizeSlug(s: string): string {
  return s
    .toLowerCase()
    .replace(/[^a-z0-9]+/g, '-')
    .replace(/^-+|-+$/g, '')
    .slice(0, 64);
}

/**
 * Build the dated battle slug used as the Arena results folder name.
 * Format: YYYY-MM-DD-<battleType>-<first-8-hex-of-uuid>
 * Deterministic: callers can rebuild it from (id, type, created_at) on resume.
 */
export function buildBattleSlug(battleId: string, battleType: BattleType, createdAt: Date): string {
  const date = createdAt.toISOString().slice(0, 10);
  const shortId = battleId.replace(/-/g, '').slice(0, 8);
  return `${date}-${battleType}-${shortId}`;
}

/**
 * Build the per-contestant results directory name within a battle folder.
 * Format: <sanitized-identity>-<sanitized-model>
 */
export function buildContestantDir(identity: string, model: string): string {
  return `${sanitizeSlug(identity)}-${sanitizeSlug(model)}`;
}

// ─── Resume reconciliation ────────────────────────────────────────────────────

export type ContestantResumeAction =
  | 'keep'
  | 're-dispatch'
  | 'mark-done'
  | 'mark-error'
  | 'mark-cancelled';

export interface ContestantResumeDecision {
  contestantId: string;
  action: ContestantResumeAction;
}

/**
 * Decide what to do with ONE contestant during startup resume.
 * Mirrors reconcileResumeStep from flow-runner-decisions.ts.
 *
 * @param status    contestants.status
 * @param taskId    contestants.task_id (null when not yet dispatched)
 * @param taskState tasks.state for taskId, or null if the task row is absent
 */
export function reconcileContestantResume(
  status: string,
  taskId: string | null,
  taskState: string | null,
): ContestantResumeAction {
  if (status !== 'running') return 'keep';
  if (!taskId || taskState === null) return 're-dispatch';
  switch (taskState) {
    case 'completed': return 'mark-done';
    case 'failed':    return 'mark-error';
    case 'cancelled': return 'mark-cancelled';
    case 'pending':   return 'keep'; // dispatcher startup poll will run it normally
    default:          return 're-dispatch'; // 'running'/'blocked' — process is dead
  }
}

/**
 * Reconcile every contestant of an in-flight battle for startup resume.
 * Returns one decision per contestant. Pure — no IO.
 */
export function reconcileContestants(
  contestants: ReadonlyArray<{ contestantId: string; taskId: string | null; status: string }>,
  taskStates: ReadonlyMap<string, string>,
): ContestantResumeDecision[] {
  return contestants.map((c) => ({
    contestantId: c.contestantId,
    action: reconcileContestantResume(
      c.status,
      c.taskId,
      c.taskId ? (taskStates.get(c.taskId) ?? null) : null,
    ),
  }));
}