feat(web,coder): arena pane — compare 2-6 AI competitors on same prompt

Arena is a new pane kind for competitive AI evaluation. A Battle runs the same prompt against 2-6 Contestants across two concurrent lanes: local lane (llama-swap models, serial) and cloud lane (parallel). Added to all three registries: @boocode/contracts WsFrameSchema, server InferenceFrame, and web WsFrame. Backend (apps/coder): - arena-runner: battle scheduler, lane classifier, benchmark, results writer, resume, user winner override - arena-analyzer: two-stage digest→judge analysis on DEFAULT_MODEL - arena-decisions: status transitions and resume logic (unit-tested) - arena-analyzer-helpers: pure helper functions (unit-tested) - arena-model-call: model call utility for analysis - arena routes: create/get/list/stop/analyze/cross-examine/winner/diff - schema: battles, contestants, cross_examinations tables (idempotent) - remove old /api/arena* routes and tasks.arena_id column Frontend (apps/web): - ArenaLauncherDialog: battle type, prompt, contestant selection - ArenaPane: live roster, streaming output, analysis, cross-exam - DiffView: unified diff with line-by-line color for coding contests - Winner override per-row dropdown (Trophy icon) - battle_updated WS handler for live winner/analysis updates - arena pane kind in Workspace, ChatTabBar, useSidebar Cross-app: - ArenaState and ArenaContestantShape/WsFrame types (contracts) - battle_* frames in WsFrameSchema, InferenceFrame, and web WsFrame - manifest.json written per battle results folder - /Arena added to .gitignore Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-06 23:25:29 +00:00
parent e04d0fdaa8
commit d6d246c15b
34 changed files with 4581 additions and 146 deletions
--- a/apps/coder/src/index.ts
+++ b/apps/coder/src/index.ts
@@ -23,8 +23,8 @@ import { registerAgentSessionRoutes } from './routes/agent-sessions.js';
 import { registerTaskRoutes } from './routes/tasks.js';
 import { registerInboxRoutes } from './routes/inbox.js';
 import { registerStatsRoutes } from './routes/stats.js';
-import { registerArenaRoutes } from './routes/arena.js';
 import { registerRunsRoutes } from './routes/runs.js';
+import { registerArenaRoutes } from './routes/arena.js';
 import { registerProviderRoutes } from './routes/providers.js';
 import { registerWorktreeSafetyRoutes } from './routes/worktree-safety.js';
 import { registerLifecycleRoutes } from './routes/lifecycle.js';
@@ -34,10 +34,13 @@ import { createDispatcher } from './services/dispatcher.js';
 // Orchestrator (Phase 2): DB-backed flow-runner; advances on the dispatcher's
 // onTaskTerminal hook.
 import { createFlowRunner } from './services/flow-runner.js';
+// Arena: DB-backed battle-runner; also advances on the onTaskTerminal hook.
+import { createBattleRunner, type DispatchContestantFn } from './services/arena-runner.js';
+import { createAnalyzer } from './services/arena-analyzer.js';
 import { agentPool } from './services/agent-pool.js';
 import { createOrphanWorktreeReaper } from './services/orphan-worktree-reaper.js';
 import { probeAgents } from './services/agent-probe.js';
-import { getProviderSnapshot, persistProbedModels } from './services/provider-snapshot.js';
+import { getProviderSnapshot, persistProbedModels, fetchLlamaSwapModels } from './services/provider-snapshot.js';
 import { setPermissionHooks } from './services/permission-waiter.js';
 import { publishAgentStatus } from './services/agent-status-publish.js';
 import { homedir } from 'node:os';
@@ -220,31 +223,119 @@ async function main() {

  // Orchestrator (Phase 2): the flow-runner reacts to the dispatcher's
  // onTaskTerminal hook to advance flow_runs. Created before the dispatcher so its
-  // terminal callback can be wired in. Its launch() is driven by the runs route
-  // (a later phase); resume on startup is a later phase too.
+  // terminal callback can be wired in.
  const flowRunner = createFlowRunner({ sql, broker, log: app.log, config });

-  // Phase 4: dispatcher — polls tasks table and runs inference. onTaskTerminal
-  // notifies the flow-runner when a step's task settles (D-2).
+  // Arena SEAM (a): build the local-model set from the live llama-swap model list.
+  // Both bare IDs ('qwen3.6-35b') and prefixed IDs ('llama-swap/qwen3.6-35b') are
+  // included so opencode-style prefixed contestants and native-style bare contestants
+  // both classify correctly as local.
+  const localModelsList = await fetchLlamaSwapModels(config).catch(() => []);
+  const localModels = new Set([
+    ...localModelsList.map((m) => m.id),
+    ...localModelsList.map((m) => `llama-swap/${m.id}`),
+  ]);
+
+  // Arena dispatch function — Phase 4 SEAM (b).
+  // Coding: insert a tasks row with agent=identity (null for native/boocode);
+  //   the dispatcher creates a worktree and runs the external agent (or native).
+  // Q&A: pre-create a session with agent_id stamped to the persona slug so native
+  //   inference loads the persona's system_prompt + tools from AGENTS.md;
+  //   task.session_id is pre-set so runNativeInference reuses the session.
+  const dispatchContestant: DispatchContestantFn = async ({
+    projectId,
+    prompt,
+    identity,
+    model,
+    battleType,
+  }) => {
+    if (battleType === 'qa') {
+      const sessionName = `Arena Q&A [${identity}]: ${prompt.slice(0, 30)}`;
+      const [session] = await sql<{ id: string }[]>`
+        INSERT INTO sessions (project_id, name, model, agent_id, status)
+        VALUES (${projectId}, ${sessionName}, ${model}, ${identity}, 'open')
+        RETURNING id
+      `;
+      const [task] = await sql<{ id: string }[]>`
+        INSERT INTO tasks (project_id, input, model, session_id)
+        VALUES (${projectId}, ${prompt}, ${model}, ${session!.id})
+        RETURNING id
+      `;
+      return { taskId: task!.id, sessionId: session!.id };
+    }
+    // Coding: boocode = native inference (no external agent); any other identity
+    // is an external agent name (claude, opencode, qwen, goose) that maps to
+    // available_agents and gets its own per-task worktree via runExternalAgent.
+    // Session is created lazily by the dispatcher, so sessionId is unknown here.
+    const agentName = identity === 'boocode' ? null : identity;
+    const [task] = await sql<{ id: string }[]>`
+      INSERT INTO tasks (project_id, input, agent, model)
+      VALUES (${projectId}, ${prompt}, ${agentName}, ${model})
+      RETURNING id
+    `;
+    return { taskId: task!.id, sessionId: null };
+  };
+
+  // Arena analyzer: two-stage digest→judge (v1). Pluggable seam — a v2 Han
+  // Orchestrator flow can replace this without schema changes.
+  const analyzer = createAnalyzer({
+    sql,
+    broker,
+    log: app.log,
+    config,
+    localModels,
+  });
+
+  // Arena battle-runner: notified on the same onTaskTerminal hook as the flow-runner.
+  const battleRunner = createBattleRunner({
+    sql,
+    broker,
+    log: app.log,
+    dispatch: dispatchContestant,
+    onBattleComplete: (battleId) => {
+      void analyzer.analyze(battleId);
+    },
+    onCrossExamStart: ({ battleId, crossExamId, identity, model }) => {
+      void analyzer.crossExamine(battleId, crossExamId, { identity, model });
+    },
+    localModels,
+  });
+
+  // Compose onTaskTerminal: both flow-runner and battle-runner are notified.
+  // Each ignores tasks it doesn't own (flow-runner checks flow_steps.task_id;
+  // battle-runner checks contestants.task_id).
+  const onTaskTerminal = (taskId: string, state: string): void => {
+    flowRunner.handleTaskTerminal(taskId, state);
+    battleRunner.handleTaskTerminal(taskId, state);
+  };
+
+  // Phase 4: dispatcher — polls tasks table and runs inference. The composed
+  // onTaskTerminal hook notifies both the flow-runner and the battle-runner when
+  // any task settles.
  const dispatcher = createDispatcher({
    sql,
    inference: inferenceApi,
    broker,
    log: app.log,
    config,
-    onTaskTerminal: flowRunner.handleTaskTerminal,
+    onTaskTerminal,
  });
  dispatcher.start();

-  // Phase 5: re-advance any flow_runs that were 'running' when the service last
-  // stopped (D-9). Runs AFTER dispatcher.start() so re-dispatched 'pending' tasks
-  // are picked up by the dispatcher's startup poll.
+  // Re-advance in-flight flow_runs and battles after a coder restart. Both run
+  // AFTER dispatcher.start() so re-dispatched 'pending' tasks are picked up.
  void flowRunner.initResume().catch((err) => {
    app.log.error(
      { err: err instanceof Error ? err.message : String(err) },
      'flow-runner: initResume failed',
    );
  });
+  void battleRunner.initResume().catch((err) => {
+    app.log.error(
+      { err: err instanceof Error ? err.message : String(err) },
+      'arena: initResume failed',
+    );
+  });

  // v2.6 Phase 3: configure + start the agent-pool lifecycle sweep (idle-TTL +
  // LRU-cap eviction of warm backends, plus each backend's proactive health probe)
@@ -281,8 +372,8 @@ async function main() {
  registerTaskRoutes(app, sql, inferenceApi, dispatcher.cancelExternalTask);
  registerInboxRoutes(app, sql);
  registerStatsRoutes(app, sql);
-  registerArenaRoutes(app, sql);
  registerRunsRoutes(app, sql, flowRunner, dispatcher.cancelExternalTask);
+  registerArenaRoutes(app, sql, battleRunner, dispatcher.cancelExternalTask, config);
  registerProviderRoutes(app, sql, config);
  registerWorktreeSafetyRoutes(app, sql);
  registerLifecycleRoutes(app, sql);
--- a/apps/coder/src/routes/arena.ts
+++ b/apps/coder/src/routes/arena.ts
@@ -1,136 +1,412 @@
 /**
- * v2.0.5: Arena routes — competitive dispatch of the same task to multiple agents.
+ * Arena routes — HTTP surface for the Battle UI.
 *
- * POST /api/arena        — create an arena with 2-5 contestants
- * GET  /api/arena/:id    — get all tasks in an arena
- * POST /api/arena/:id/select/:task_id — mark a task as the arena winner
+ * POST /api/battles                         — launch a battle
+ * GET  /api/battles?project_id=             — list battles for a project
+ * GET  /api/battles/:id                     — one battle + contestants + cross-exams
+ * POST /api/battles/:id/stop                — cancel a running battle
+ * POST /api/battles/:id/analyze             — trigger analysis (Phase 5 fills the logic)
+ * POST /api/battles/:id/cross-examine       — start a cross-examination (Phase 5 fills the logic)
+ *
+ * Mirrors the shape of runs.ts (Orchestrator routes). Battle creation delegates to
+ * the battle-runner; cancellation calls cancelBattle then aborts in-flight tasks
+ * via the dispatcher's cancelExternalTask.
 */
 import type { FastifyInstance } from 'fastify';
 import { z } from 'zod';
+import { readFile } from 'node:fs/promises';
+import { join } from 'node:path';
 import type { Sql } from '../db.js';
+import type { Config } from '../config.js';
+import type { BattleRunner } from '../services/arena-runner.js';
+import type { ExternalCancelFn } from './tasks.js';
+import { arenaModelCall } from '../services/arena-model-call.js';

-const ContestantSchema = z.object({
-  agent: z.string().max(100).optional(),
-  model: z.string().max(200).optional(),
-  mode_id: z.string().max(200).optional(),
-  thinking_option_id: z.string().max(200).optional(),
+// ─── Validation schemas ───────────────────────────────────────────────────────
+
+const UuidParam = z.string().uuid();
+
+const ContestantInput = z.object({
+  identity: z.string().min(1).max(200),
+  model: z.string().min(1).max(200),
 });

-const CreateArenaBody = z.object({
+const CreateBattleBody = z.object({
  project_id: z.string().uuid(),
-  input: z.string().min(1).max(64_000),
-  contestants: z.array(ContestantSchema).min(2).max(5),
+  battle_type: z.enum(['coding', 'qa']),
+  prompt: z.string().min(1).max(64_000),
+  contestants: z
+    .array(ContestantInput)
+    .min(2, 'at least 2 contestants required')
+    .max(6, 'at most 6 contestants allowed'),
 });

-interface TaskRow {
-  id: string;
-  agent: string | null;
-  model: string | null;
-  mode_id: string | null;
-  thinking_option_id: string | null;
-  state: string;
-}
+const ListBattlesQuery = z.object({
+  project_id: z.string().uuid(),
+});

-export function registerArenaRoutes(app: FastifyInstance, sql: Sql): void {
-  // POST /api/arena — create a new arena
-  app.post('/api/arena', async (req, reply) => {
-    const parsed = CreateArenaBody.safeParse(req.body);
+const CrossExamineBody = z.object({
+  identity: z.string().min(1).max(200),
+  model: z.string().min(1).max(200),
+});
+
+const SetWinnerBody = z.object({
+  winner_contestant_id: z.string().uuid().nullable(),
+});
+
+// ─── Route registration ───────────────────────────────────────────────────────
+
+const GeneratePromptBody = z.object({
+  description: z.string().min(1).max(2_000),
+});
+
+export function registerArenaRoutes(
+  app: FastifyInstance,
+  sql: Sql,
+  battleRunner: BattleRunner,
+  cancelExternal: ExternalCancelFn,
+  config: Config,
+): void {
+
+  // POST /api/battles/generate-prompt — draft a fuller battle prompt from a
+  // short description using the default BooChat model. One-shot, non-streaming.
+  // Must be registered BEFORE /api/battles/:id so the literal 'generate-prompt'
+  // path is not mistaken for a UUID param.
+  app.post('/api/battles/generate-prompt', async (req, reply) => {
+    const parsed = GeneratePromptBody.safeParse(req.body);
    if (!parsed.success) {
      reply.code(400);
      return { error: 'invalid body', details: parsed.error.flatten() };
    }

-    const { project_id, input, contestants } = parsed.data;
-    const arenaId = crypto.randomUUID();
+    const { description } = parsed.data;

-    const tasks: TaskRow[] = [];
-    for (const contestant of contestants) {
-      const [task] = await sql<TaskRow[]>`
-        INSERT INTO tasks (project_id, input, agent, model, mode_id, thinking_option_id, arena_id)
-        VALUES (
-          ${project_id},
-          ${input},
-          ${contestant.agent ?? null},
-          ${contestant.model ?? null},
-          ${contestant.mode_id ?? null},
-          ${contestant.thinking_option_id ?? null},
-          ${arenaId}
-        )
-        RETURNING id, agent, model, mode_id, thinking_option_id, state
-      `;
-      tasks.push(task!);
+    try {
+      const prompt = await arenaModelCall({
+        config,
+        model: config.DEFAULT_MODEL,
+        system: [
+          'You are a battle-prompt writer for an AI Arena.',
+          'The user gives you a short description of a coding or Q&A challenge.',
+          'Expand it into a clear, self-contained prompt (2–6 sentences) that any AI model can act on.',
+          'Include specific acceptance criteria where helpful.',
+          'Output ONLY the prompt — no preamble, no labels, no meta-commentary.',
+        ].join(' '),
+        user: description,
+        maxTokens: 400,
+        temperature: 0.6,
+      });
+      return { prompt };
+    } catch (err) {
+      app.log.warn(
+        { err: err instanceof Error ? err.message : String(err) },
+        'arena generate-prompt: model call failed',
+      );
+      reply.code(502);
+      return { error: 'model call failed' };
    }
+  });
+
+  // POST /api/battles — launch a battle
+  app.post('/api/battles', async (req, reply) => {
+    const parsed = CreateBattleBody.safeParse(req.body);
+    if (!parsed.success) {
+      reply.code(400);
+      return { error: 'invalid body', details: parsed.error.flatten() };
+    }
+
+    const { project_id, battle_type, prompt, contestants } = parsed.data;
+
+    // Reject duplicate (identity, model) pairs up front — the schema UNIQUE
+    // constraint would catch it too, but an early 422 is friendlier.
+    const seen = new Set<string>();
+    for (const c of contestants) {
+      const key = `${c.identity}::${c.model}`;
+      if (seen.has(key)) {
+        reply.code(422);
+        return {
+          error: 'duplicate_contestant',
+          message: `duplicate contestant: identity="${c.identity}" model="${c.model}"`,
+        };
+      }
+      seen.add(key);
+    }
+
+    // Verify project exists
+    const [proj] = await sql<{ id: string }[]>`SELECT id FROM projects WHERE id = ${project_id}`;
+    if (!proj) {
+      reply.code(404);
+      return { error: 'project not found' };
+    }
+
+    const { battleId } = await battleRunner.startBattle({
+      projectId: project_id,
+      battleType: battle_type,
+      prompt,
+      contestants,
+    });

    reply.code(201);
-    return {
-      arena_id: arenaId,
-      tasks: tasks.map((t) => ({
-        id: t.id,
-        agent: t.agent,
-        model: t.model,
-        mode_id: t.mode_id,
-        thinking_option_id: t.thinking_option_id,
-        state: t.state,
-      })),
-    };
+    return { battle_id: battleId };
  });

-  // GET /api/arena/:arena_id — list all tasks in an arena
-  app.get<{ Params: { arena_id: string } }>('/api/arena/:arena_id', async (req, reply) => {
-    const { arena_id } = req.params;
-
-    // Validate UUID format
-    const uuidRegex = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
-    if (!uuidRegex.test(arena_id)) {
+  // GET /api/battles?project_id= — list battles, most-recent-first
+  app.get('/api/battles', async (req, reply) => {
+    const parsed = ListBattlesQuery.safeParse(req.query);
+    if (!parsed.success) {
      reply.code(400);
-      return { error: 'invalid arena_id format' };
+      return { error: 'invalid query', details: parsed.error.flatten() };
    }

-    const tasks = await sql`
-      SELECT id, project_id, state, input, output_summary, agent, model, mode_id, thinking_option_id, execution_path, session_id, started_at, ended_at, created_at, arena_id
-      FROM tasks
-      WHERE arena_id = ${arena_id}
-      ORDER BY created_at
+    const battles = await sql`
+      SELECT id, project_id, battle_type, prompt, status,
+             winner_contestant_id, results_path, error,
+             created_at, updated_at
+      FROM battles
+      WHERE project_id = ${parsed.data.project_id}
+      ORDER BY created_at DESC
+      LIMIT 100
    `;

-    if (tasks.length === 0) {
-      reply.code(404);
-      return { error: 'arena not found' };
-    }
-
-    return { arena_id, tasks };
+    return { battles };
  });

-  // POST /api/arena/:arena_id/select/:task_id — mark the winner
-  app.post<{ Params: { arena_id: string; task_id: string } }>(
-    '/api/arena/:arena_id/select/:task_id',
-    async (req, reply) => {
-      const { arena_id, task_id } = req.params;
-
-      // Verify the task belongs to this arena
-      const rows = await sql<{ id: string; state: string; arena_id: string | null }[]>`
-        SELECT id, state, arena_id FROM tasks WHERE id = ${task_id}
-      `;
-
-      if (rows.length === 0) {
-        reply.code(404);
-        return { error: 'task not found' };
-      }
-
-      const task = rows[0]!;
-      if (task.arena_id !== arena_id) {
-        reply.code(409);
-        return { error: 'task does not belong to this arena' };
-      }
-
-      // Mark as selected via output_summary prefix (lightweight — no schema change)
-      await sql`
-        UPDATE tasks
-        SET output_summary = COALESCE('[SELECTED] ' || output_summary, '[SELECTED]')
-        WHERE id = ${task_id}
-      `;
-
-      return { selected: true, task_id, arena_id };
+  // GET /api/battles/:id — one battle + its contestants + cross-examinations
+  app.get<{ Params: { id: string } }>('/api/battles/:id', async (req, reply) => {
+    const parsedId = UuidParam.safeParse(req.params.id);
+    if (!parsedId.success) {
+      reply.code(400);
+      return { error: 'invalid id' };
    }
-  );
+    const id = parsedId.data;
+
+    const [battle] = await sql<{
+      id: string;
+      project_id: string;
+      battle_type: string;
+      prompt: string;
+      status: string;
+      winner_contestant_id: string | null;
+      results_path: string | null;
+      error: string | null;
+      created_at: unknown;
+      updated_at: unknown;
+    }[]>`
+      SELECT id, project_id, battle_type, prompt, status,
+             winner_contestant_id, results_path, error,
+             created_at, updated_at
+      FROM battles WHERE id = ${id}
+    `;
+
+    if (!battle) {
+      reply.code(404);
+      return { error: 'battle not found' };
+    }
+
+    const contestants = await sql`
+      SELECT id, battle_id, identity, model, lane, task_id, worktree_id,
+             status, duration_ms, tokens_per_sec, cost_tokens, result_path, error,
+             created_at, updated_at
+      FROM contestants
+      WHERE battle_id = ${id}
+      ORDER BY created_at ASC
+    `;
+
+    const crossExaminations = await sql`
+      SELECT id, battle_id, identity, model, verdict, created_at
+      FROM cross_examinations
+      WHERE battle_id = ${id}
+      ORDER BY created_at ASC
+    `;
+
+    return { battle, contestants, cross_examinations: crossExaminations };
+  });
+
+  // POST /api/battles/:id/stop — cancel a running battle
+  app.post<{ Params: { id: string } }>('/api/battles/:id/stop', async (req, reply) => {
+    const parsedId = UuidParam.safeParse(req.params.id);
+    if (!parsedId.success) {
+      reply.code(400);
+      return { error: 'invalid id' };
+    }
+    const id = parsedId.data;
+
+    const [row] = await sql<{ id: string; status: string }[]>`
+      SELECT id, status FROM battles WHERE id = ${id}
+    `;
+    if (!row) {
+      reply.code(404);
+      return { error: 'battle not found' };
+    }
+    if (row.status !== 'running') {
+      reply.code(409);
+      return { error: `cannot stop battle in status '${row.status}'` };
+    }
+
+    const { cancelled, taskIds } = await battleRunner.cancelBattle(id);
+    if (!cancelled) {
+      reply.code(409);
+      return { error: 'battle is no longer running' };
+    }
+
+    // Abort any in-flight dispatcher tasks (cloud contestants running externally).
+    for (const taskId of taskIds) {
+      cancelExternal(taskId);
+    }
+
+    return { cancelled: true };
+  });
+
+  // GET /api/battles/:id/analysis — read analysis.md from the battle's results_path
+  app.get<{ Params: { id: string } }>('/api/battles/:id/analysis', async (req, reply) => {
+    const parsedId = UuidParam.safeParse(req.params.id);
+    if (!parsedId.success) {
+      reply.code(400);
+      return { error: 'invalid id' };
+    }
+    const id = parsedId.data;
+
+    const [row] = await sql<{ results_path: string | null }[]>`
+      SELECT results_path FROM battles WHERE id = ${id}
+    `;
+    if (!row) {
+      reply.code(404);
+      return { error: 'battle not found' };
+    }
+    if (!row.results_path) {
+      reply.code(404);
+      return { error: 'analysis not ready' };
+    }
+
+    try {
+      const text = await readFile(join(row.results_path, 'analysis.md'), 'utf8');
+      return { text };
+    } catch {
+      reply.code(404);
+      return { error: 'analysis not ready' };
+    }
+  });
+
+  // POST /api/battles/:id/analyze — trigger or re-trigger analysis
+  app.post<{ Params: { id: string } }>('/api/battles/:id/analyze', async (req, reply) => {
+    const parsedId = UuidParam.safeParse(req.params.id);
+    if (!parsedId.success) {
+      reply.code(400);
+      return { error: 'invalid id' };
+    }
+    const id = parsedId.data;
+
+    const [row] = await sql<{ id: string; status: string }[]>`
+      SELECT id, status FROM battles WHERE id = ${id}
+    `;
+    if (!row) {
+      reply.code(404);
+      return { error: 'battle not found' };
+    }
+    if (row.status === 'running') {
+      reply.code(409);
+      return { error: 'battle is still running — wait for all contestants to finish' };
+    }
+
+    const result = await battleRunner.triggerAnalysis(id);
+    if (!result.triggered) {
+      reply.code(404);
+      return { error: 'battle not found' };
+    }
+
+    reply.code(202);
+    return { triggered: true };
+  });
+
+  // PATCH /api/battles/:id/winner — manually set or clear the winner.
+  // Validates the contestant belongs to the battle; publishes battle_updated so
+  // the pane badge reflects the override immediately. Human is authoritative.
+  app.patch<{ Params: { id: string } }>('/api/battles/:id/winner', async (req, reply) => {
+    const parsedId = UuidParam.safeParse(req.params.id);
+    if (!parsedId.success) {
+      reply.code(400);
+      return { error: 'invalid id' };
+    }
+
+    const parsed = SetWinnerBody.safeParse(req.body);
+    if (!parsed.success) {
+      reply.code(400);
+      return { error: 'invalid body', details: parsed.error.flatten() };
+    }
+
+    const result = await battleRunner.setWinner(parsedId.data, parsed.data.winner_contestant_id);
+    if (!result.ok) {
+      if (result.notFound) { reply.code(404); return { error: 'battle not found' }; }
+      if (result.invalidContestant) { reply.code(422); return { error: 'contestant not found in this battle' }; }
+      reply.code(500); return { error: 'unknown error' };
+    }
+    return { ok: true };
+  });
+
+  // GET /api/battles/:id/contestants/:cid/diff — read the diff.patch for a coding contestant.
+  app.get<{ Params: { id: string; cid: string } }>('/api/battles/:id/contestants/:cid/diff', async (req, reply) => {
+    const parsedId = UuidParam.safeParse(req.params.id);
+    const parsedCid = UuidParam.safeParse(req.params.cid);
+    if (!parsedId.success || !parsedCid.success) {
+      reply.code(400);
+      return { error: 'invalid id' };
+    }
+
+    const [contestant] = await sql<{ result_path: string | null }[]>`
+      SELECT result_path FROM contestants
+      WHERE id = ${parsedCid.data} AND battle_id = ${parsedId.data}
+    `;
+    if (!contestant) {
+      reply.code(404);
+      return { error: 'contestant not found' };
+    }
+    if (!contestant.result_path) {
+      reply.code(404);
+      return { error: 'diff not available' };
+    }
+
+    try {
+      const text = await readFile(join(contestant.result_path, 'diff.patch'), 'utf8');
+      return { diff: text };
+    } catch {
+      reply.code(404);
+      return { error: 'diff not available' };
+    }
+  });
+
+  // POST /api/battles/:id/cross-examine — start a cross-examination
+  app.post<{ Params: { id: string } }>('/api/battles/:id/cross-examine', async (req, reply) => {
+    const parsedId = UuidParam.safeParse(req.params.id);
+    if (!parsedId.success) {
+      reply.code(400);
+      return { error: 'invalid id' };
+    }
+    const id = parsedId.data;
+
+    const parsed = CrossExamineBody.safeParse(req.body);
+    if (!parsed.success) {
+      reply.code(400);
+      return { error: 'invalid body', details: parsed.error.flatten() };
+    }
+
+    const [row] = await sql<{ id: string; status: string }[]>`
+      SELECT id, status FROM battles WHERE id = ${id}
+    `;
+    if (!row) {
+      reply.code(404);
+      return { error: 'battle not found' };
+    }
+    if (row.status === 'running') {
+      reply.code(409);
+      return { error: 'battle is still running — cross-examine after all contestants finish' };
+    }
+
+    const { crossExamId } = await battleRunner.startCrossExam(id, {
+      identity: parsed.data.identity,
+      model: parsed.data.model,
+    });
+
+    reply.code(202);
+    return { cross_exam_id: crossExamId };
+  });
 }
--- a/apps/coder/src/schema.sql
+++ b/apps/coder/src/schema.sql
@@ -54,9 +54,6 @@ DO $$ BEGIN
  END IF;
 END $$;

-- v2.0.5: arena support — group tasks into competitive arenas.
-ALTER TABLE tasks ADD COLUMN IF NOT EXISTS arena_id UUID;
-
 -- Human inbox: tasks needing attention
 CREATE OR REPLACE VIEW human_inbox AS
  SELECT * FROM tasks WHERE state IN ('blocked', 'failed');
@@ -81,6 +78,7 @@ ALTER TABLE tasks ADD COLUMN IF NOT EXISTS thinking_option_id TEXT;
 DROP VIEW IF EXISTS human_inbox;
 ALTER TABLE tasks DROP COLUMN IF EXISTS feature_values;
 ALTER TABLE tasks DROP COLUMN IF EXISTS worktree_path;
+ALTER TABLE tasks DROP COLUMN IF EXISTS arena_id;
 CREATE OR REPLACE VIEW human_inbox AS
  SELECT * FROM tasks WHERE state IN ('blocked', 'failed');

@@ -157,7 +155,7 @@ CREATE UNIQUE INDEX IF NOT EXISTS worktrees_active_path_uidx ON worktrees(path)
 DROP TABLE IF EXISTS session_worktrees;

 -- Dispatch hint: which chat (tab) a task belongs to. The coder message route and
-- skills route set it from the frontend tab; session-less creators (arena, MCP,
+-- skills route set it from the frontend tab; session-less creators (MCP,
 -- new_task, generic /api/tasks) leave it NULL and the dispatcher creates a chat.
 ALTER TABLE tasks ADD COLUMN IF NOT EXISTS chat_id UUID REFERENCES chats(id) ON DELETE SET NULL;

@@ -271,7 +269,7 @@ ALTER TABLE agent_sessions ADD CONSTRAINT agent_sessions_backend_chk
  CHECK (backend IN ('opencode_server', 'acp_warm', 'claude_sdk'));

 -- LISTEN/NOTIFY fast path: every tasks INSERT (from any call site — routes,
-- new_task tool, arena, MCP server) fires pg_notify('tasks_new') in the same
+-- new_task tool, MCP server) fires pg_notify('tasks_new') in the same
 -- transaction, so the dispatcher reacts immediately instead of waiting for the
 -- fallback poll. Postgres holds the notification until COMMIT, so the listener
 -- always sees the committed row. A trigger covers all insert paths with no
@@ -357,3 +355,71 @@ DO $$ BEGIN
      CHECK (status IN ('pending', 'running', 'completed', 'failed', 'skipped', 'cancelled'));
  END IF;
 END $$;
+
+-- Arena: battles + contestants + cross_examinations.
+-- project_id carries no FK (matches tasks.project_id + flow_runs.project_id convention).
+-- winner_contestant_id FK is deferred (forward reference): added via guarded ALTER below.
+CREATE TABLE IF NOT EXISTS battles (
+  id                   UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+  project_id           UUID NOT NULL,
+  battle_type          TEXT NOT NULL,
+  prompt               TEXT NOT NULL,
+  status               TEXT NOT NULL DEFAULT 'pending',
+  winner_contestant_id UUID,
+  results_path         TEXT,
+  error                TEXT,
+  created_at           TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp(),
+  updated_at           TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp(),
+  CONSTRAINT battles_type_chk   CHECK (battle_type IN ('coding', 'qa')),
+  CONSTRAINT battles_status_chk CHECK (status IN ('pending', 'running', 'completed', 'failed', 'cancelled'))
+);
+
+CREATE TABLE IF NOT EXISTS contestants (
+  id             UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+  battle_id      UUID NOT NULL REFERENCES battles(id) ON DELETE CASCADE,
+  identity       TEXT NOT NULL,
+  model          TEXT NOT NULL,
+  lane           TEXT NOT NULL,
+  task_id        UUID REFERENCES tasks(id) ON DELETE SET NULL,
+  worktree_id    UUID REFERENCES worktrees(id) ON DELETE SET NULL,
+  status         TEXT NOT NULL DEFAULT 'queued',
+  duration_ms    INTEGER,
+  tokens_per_sec DOUBLE PRECISION,
+  cost_tokens    INTEGER,
+  result_path    TEXT,
+  error          TEXT,
+  created_at     TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp(),
+  updated_at     TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp(),
+  CONSTRAINT contestants_lane_chk   CHECK (lane   IN ('local', 'cloud')),
+  CONSTRAINT contestants_status_chk CHECK (status IN ('queued', 'running', 'done', 'error')),
+  UNIQUE (battle_id, identity, model)
+);
+
+CREATE TABLE IF NOT EXISTS cross_examinations (
+  id         UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+  battle_id  UUID NOT NULL REFERENCES battles(id) ON DELETE CASCADE,
+  identity   TEXT NOT NULL,
+  model      TEXT NOT NULL,
+  verdict    TEXT,
+  created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
+);
+
+-- Add the winner FK now that contestants exists.
+DO $$ BEGIN
+  IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'battles_winner_contestant_id_fkey') THEN
+    ALTER TABLE battles ADD CONSTRAINT battles_winner_contestant_id_fkey
+      FOREIGN KEY (winner_contestant_id) REFERENCES contestants(id) ON DELETE SET NULL;
+  END IF;
+END $$;
+
+-- battles query (GET /api/battles?project_id=).
+CREATE INDEX IF NOT EXISTS battles_project_created_idx ON battles(project_id, created_at DESC);
+
+-- Lane-scheduler advance scans (contestants WHERE battle_id = ? AND status = ?).
+CREATE INDEX IF NOT EXISTS contestants_battle_status_idx ON contestants(battle_id, status);
+
+-- onTaskTerminal callback: look up the contestant owning a completed task.
+CREATE INDEX IF NOT EXISTS contestants_task_id_idx ON contestants(task_id);
+
+-- Cross-examination listing per battle.
+CREATE INDEX IF NOT EXISTS cross_examinations_battle_idx ON cross_examinations(battle_id);
--- a/apps/coder/src/services/tests/arena-analyzer-helpers.test.ts
+++ b/apps/coder/src/services/tests/arena-analyzer-helpers.test.ts
@@ -0,0 +1,254 @@
+import { describe, it, expect } from 'vitest';
+import {
+  buildDigestPrompt,
+  buildJudgePrompt,
+  buildCrossExamPrompt,
+  extractWinner,
+  shouldNameWinner,
+  type ContestantDigest,
+  type ContestantDigestInput,
+} from '../arena-analyzer-helpers.js';
+
+// ─── shouldNameWinner ─────────────────────────────────────────────────────────
+
+describe('shouldNameWinner', () => {
+  it('returns false with 0 succeeded contestants', () => {
+    expect(shouldNameWinner(0)).toBe(false);
+  });
+
+  it('returns false with exactly 1 succeeded contestant', () => {
+    expect(shouldNameWinner(1)).toBe(false);
+  });
+
+  it('returns true with exactly 2 succeeded contestants', () => {
+    expect(shouldNameWinner(2)).toBe(true);
+  });
+
+  it('returns true with more than 2 succeeded contestants', () => {
+    expect(shouldNameWinner(3)).toBe(true);
+    expect(shouldNameWinner(6)).toBe(true);
+  });
+});
+
+// ─── extractWinner ────────────────────────────────────────────────────────────
+
+describe('extractWinner', () => {
+  it('extracts identity and model from a WINNER: line', () => {
+    const output = 'Some analysis\n\nWINNER: claude/opus-4-5\n\nMore text.';
+    expect(extractWinner(output)).toEqual({ identity: 'claude', model: 'opus-4-5' });
+  });
+
+  it('is case-insensitive for the WINNER keyword', () => {
+    expect(extractWinner('winner: boocode/qwen3.6-35b')).toEqual({
+      identity: 'boocode',
+      model: 'qwen3.6-35b',
+    });
+    expect(extractWinner('Winner: opencode/some-model')).toEqual({
+      identity: 'opencode',
+      model: 'some-model',
+    });
+  });
+
+  it('returns null when NO_WINNER is declared', () => {
+    expect(extractWinner('WINNER: NO_WINNER')).toBeNull();
+    expect(extractWinner('winner: no_winner')).toBeNull();
+  });
+
+  it('returns null when no WINNER line is present', () => {
+    expect(extractWinner('Just some analysis text with no verdict.')).toBeNull();
+    expect(extractWinner('')).toBeNull();
+  });
+
+  it('returns null when the WINNER line has no slash separator', () => {
+    expect(extractWinner('WINNER: justidentity')).toBeNull();
+  });
+
+  it('returns null when the WINNER line is empty after the colon', () => {
+    expect(extractWinner('WINNER:')).toBeNull();
+    expect(extractWinner('WINNER:   ')).toBeNull();
+  });
+
+  it('handles leading and trailing whitespace around the slash parts', () => {
+    const result = extractWinner('WINNER:  claude / opus-4-5 ');
+    expect(result).toEqual({ identity: 'claude', model: 'opus-4-5' });
+  });
+
+  it('picks the first WINNER line when multiple are present', () => {
+    const output = 'WINNER: claude/opus-4-5\nWINNER: opencode/other-model';
+    expect(extractWinner(output)).toEqual({ identity: 'claude', model: 'opus-4-5' });
+  });
+
+  it('handles model names that contain slashes by splitting at the first slash only', () => {
+    // edge case: model name with a slash — should still split at first slash
+    // identity = 'native', model = 'llama-swap/qwen3.6'
+    const result = extractWinner('WINNER: native/llama-swap/qwen3.6');
+    expect(result).toEqual({ identity: 'native', model: 'llama-swap/qwen3.6' });
+  });
+});
+
+// ─── buildDigestPrompt ────────────────────────────────────────────────────────
+
+describe('buildDigestPrompt', () => {
+  const base: ContestantDigestInput = {
+    identity: 'claude',
+    model: 'opus-4-5',
+    resultMd: '# Output\n\nSome result content.',
+    benchmarkLine: '12000ms',
+  };
+
+  it('returns an object with non-empty system and user strings', () => {
+    const { system, user } = buildDigestPrompt(base);
+    expect(system.length).toBeGreaterThan(0);
+    expect(user.length).toBeGreaterThan(0);
+  });
+
+  it('includes the contestant identity and model in the user prompt', () => {
+    const { user } = buildDigestPrompt(base);
+    expect(user).toContain('claude');
+    expect(user).toContain('opus-4-5');
+  });
+
+  it('includes the benchmark line in the user prompt', () => {
+    const { user } = buildDigestPrompt(base);
+    expect(user).toContain('12000ms');
+  });
+
+  it('includes the result.md content in the user prompt', () => {
+    const { user } = buildDigestPrompt(base);
+    expect(user).toContain('Some result content.');
+  });
+
+  it('includes the diff.patch when provided', () => {
+    const input: ContestantDigestInput = { ...base, diffPatch: '--- a/foo.ts\n+++ b/foo.ts\n+added' };
+    const { user } = buildDigestPrompt(input);
+    expect(user).toContain('added');
+    expect(user).toContain('```diff');
+  });
+
+  it('omits the diff section when diffPatch is undefined', () => {
+    const { user } = buildDigestPrompt(base);
+    expect(user).not.toContain('```diff');
+  });
+
+  it('truncates resultMd longer than 8000 characters', () => {
+    const longResult = 'x'.repeat(10_000);
+    const { user } = buildDigestPrompt({ ...base, resultMd: longResult });
+    // The truncated content must not exceed 8000 chars in the sliced section.
+    // We just check the total user string doesn't balloon unreasonably.
+    expect(user.length).toBeLessThan(15_000);
+  });
+
+  it('truncates diffPatch longer than 5000 characters', () => {
+    const longDiff = '+' + 'x'.repeat(10_000);
+    const { user } = buildDigestPrompt({ ...base, diffPatch: longDiff });
+    expect(user.length).toBeLessThan(16_000);
+  });
+});
+
+// ─── buildJudgePrompt ─────────────────────────────────────────────────────────
+
+describe('buildJudgePrompt', () => {
+  const digests: ContestantDigest[] = [
+    { identity: 'claude', model: 'opus-4-5', digest: 'Good result.', benchmarkLine: '5000ms' },
+    { identity: 'opencode', model: 'qwen3.6', digest: 'Decent result.', benchmarkLine: '8000ms' },
+  ];
+
+  it('includes the original prompt in the user section', () => {
+    const { user } = buildJudgePrompt('Write a sorting algorithm', digests);
+    expect(user).toContain('Write a sorting algorithm');
+  });
+
+  it('includes each contestant heading in the user section', () => {
+    const { user } = buildJudgePrompt('prompt', digests);
+    expect(user).toContain('claude');
+    expect(user).toContain('opus-4-5');
+    expect(user).toContain('opencode');
+    expect(user).toContain('qwen3.6');
+  });
+
+  it('includes each contestant digest text', () => {
+    const { user } = buildJudgePrompt('prompt', digests);
+    expect(user).toContain('Good result.');
+    expect(user).toContain('Decent result.');
+  });
+
+  it('instructs the model to name a WINNER when 2+ digests are provided', () => {
+    const { system } = buildJudgePrompt('prompt', digests);
+    expect(system).toContain('WINNER:');
+  });
+
+  it('instructs the model NOT to name a winner when fewer than 2 digests are provided', () => {
+    const oneDigest = digests.slice(0, 1);
+    const { system } = buildJudgePrompt('prompt', oneDigest);
+    expect(system).toContain('NO_WINNER');
+    expect(system).not.toContain('WINNER: <identity>');
+  });
+
+  it('instructs NO_WINNER when digests list is empty', () => {
+    const { system } = buildJudgePrompt('prompt', []);
+    expect(system).toContain('NO_WINNER');
+  });
+
+  it('truncates originalPrompt longer than 2000 characters', () => {
+    const longPrompt = 'p'.repeat(5_000);
+    const { user } = buildJudgePrompt(longPrompt, digests);
+    // Should not contain more than 2000 chars of the prompt.
+    const promptSection = user.split('# Contestant Digests')[0] ?? '';
+    expect(promptSection.length).toBeLessThan(3_000);
+  });
+});
+
+// ─── buildCrossExamPrompt ─────────────────────────────────────────────────────
+
+describe('buildCrossExamPrompt', () => {
+  const digests: ContestantDigest[] = [
+    { identity: 'claude', model: 'opus-4-5', digest: 'Strong result.', benchmarkLine: '5000ms' },
+    { identity: 'boocode', model: 'qwen3.6-35b', digest: 'Decent result.', benchmarkLine: '12000ms' },
+  ];
+
+  const baseOpts = {
+    originalPrompt: 'Write a sorting algorithm.',
+    digests,
+    analysisContent: '# Arena Analysis\n\nClaude did better.\n\nWINNER: claude/opus-4-5',
+    proposedWinner: 'claude/opus-4-5',
+    examinerIdentity: 'goose',
+    examinerModel: 'gpt-4o',
+  };
+
+  it('includes the examiner identity and model in the system prompt', () => {
+    const { system } = buildCrossExamPrompt(baseOpts);
+    expect(system).toContain('goose');
+    expect(system).toContain('gpt-4o');
+  });
+
+  it('includes the original prompt in the user section', () => {
+    const { user } = buildCrossExamPrompt(baseOpts);
+    expect(user).toContain('Write a sorting algorithm.');
+  });
+
+  it('includes each contestant digest', () => {
+    const { user } = buildCrossExamPrompt(baseOpts);
+    expect(user).toContain('Strong result.');
+    expect(user).toContain('Decent result.');
+  });
+
+  it('includes the proposed analysis content', () => {
+    const { user } = buildCrossExamPrompt(baseOpts);
+    expect(user).toContain('Claude did better.');
+  });
+
+  it('includes the proposed winner when set', () => {
+    const { user } = buildCrossExamPrompt(baseOpts);
+    expect(user).toContain('claude/opus-4-5');
+  });
+
+  it('notes that no winner was proposed when proposedWinner is null', () => {
+    const { user } = buildCrossExamPrompt({ ...baseOpts, proposedWinner: null });
+    expect(user).toContain('No winner was proposed');
+  });
+
+  it('instructs the examiner to provide a VERDICT line', () => {
+    const { system } = buildCrossExamPrompt(baseOpts);
+    expect(system).toContain('VERDICT:');
+  });
+});
--- a/apps/coder/src/services/tests/arena-decisions.test.ts
+++ b/apps/coder/src/services/tests/arena-decisions.test.ts
@@ -0,0 +1,332 @@
+import { describe, it, expect } from 'vitest';
+import {
+  classifyLane,
+  nextLocalContestant,
+  isBattleComplete,
+  computeBenchmark,
+  sanitizeSlug,
+  buildBattleSlug,
+  buildContestantDir,
+  reconcileContestantResume,
+  reconcileContestants,
+  type ContestantSlot,
+} from '../arena-decisions.js';
+
+// Local models = what the llama-swap server actually serves.
+const LOCAL_MODELS: ReadonlySet<string> = new Set([
+  'qwen3.6-35b-a3b-mxfp4',
+  'qwen2.5-coder-7b',
+]);
+
+// ─── classifyLane ────────────────────────────────────────────────────────────
+
+describe('classifyLane', () => {
+  it('classifies qa battles as local regardless of identity or model', () => {
+    expect(classifyLane('qa', 'boocode', 'qwen3.6-35b-a3b-mxfp4', LOCAL_MODELS)).toBe('local');
+    expect(classifyLane('qa', 'claude', 'claude-opus-4-5', LOCAL_MODELS)).toBe('local');
+    expect(classifyLane('qa', 'Debugger', 'cloud-model', new Set())).toBe('local');
+    expect(classifyLane('qa', 'opencode', 'any-model', LOCAL_MODELS)).toBe('local');
+  });
+
+  it('classifies coding contestants as local when model is in localModels', () => {
+    expect(classifyLane('coding', 'boocode', 'qwen3.6-35b-a3b-mxfp4', LOCAL_MODELS)).toBe('local');
+    expect(classifyLane('coding', 'opencode', 'qwen3.6-35b-a3b-mxfp4', LOCAL_MODELS)).toBe('local');
+    expect(classifyLane('coding', 'qwen', 'qwen2.5-coder-7b', LOCAL_MODELS)).toBe('local');
+  });
+
+  it('classifies coding contestants as cloud when model is not in localModels', () => {
+    expect(classifyLane('coding', 'claude', 'claude-opus-4-5', LOCAL_MODELS)).toBe('cloud');
+    expect(classifyLane('coding', 'opencode', 'claude-opus-4-5', LOCAL_MODELS)).toBe('cloud');
+    expect(classifyLane('coding', 'goose', 'gpt-4o', LOCAL_MODELS)).toBe('cloud');
+    expect(classifyLane('coding', 'qwen', 'unknown-remote-model', LOCAL_MODELS)).toBe('cloud');
+  });
+
+  it('uses the injected localModels set, not a hardcoded list', () => {
+    const custom = new Set(['my-local-model']);
+    expect(classifyLane('coding', 'any-agent', 'my-local-model', custom)).toBe('local');
+    expect(classifyLane('coding', 'boocode', 'other-model', custom)).toBe('cloud');
+  });
+
+  it('defaults to cloud for an empty localModels set', () => {
+    expect(classifyLane('coding', 'boocode', 'qwen3.6-35b-a3b-mxfp4', new Set())).toBe('cloud');
+    expect(classifyLane('coding', 'native', 'any-local-model', new Set())).toBe('cloud');
+  });
+});
+
+// ─── nextLocalContestant ─────────────────────────────────────────────────────
+
+describe('nextLocalContestant', () => {
+  it('returns null for an empty list', () => {
+    expect(nextLocalContestant([])).toBeNull();
+  });
+
+  it('returns null when no local contestants are queued', () => {
+    const slots: ContestantSlot[] = [
+      { id: 'c1', lane: 'local', status: 'running' },
+      { id: 'c2', lane: 'cloud', status: 'queued' },
+    ];
+    expect(nextLocalContestant(slots)).toBeNull();
+  });
+
+  it('returns the first queued local contestant in order', () => {
+    const slots: ContestantSlot[] = [
+      { id: 'c1', lane: 'local', status: 'done' },
+      { id: 'c2', lane: 'local', status: 'queued' },
+      { id: 'c3', lane: 'local', status: 'queued' },
+    ];
+    expect(nextLocalContestant(slots)).toBe('c2');
+  });
+
+  it('skips done/error local contestants and cloud contestants', () => {
+    const slots: ContestantSlot[] = [
+      { id: 'c1', lane: 'cloud', status: 'queued' },
+      { id: 'c2', lane: 'local', status: 'error' },
+      { id: 'c3', lane: 'local', status: 'queued' },
+    ];
+    expect(nextLocalContestant(slots)).toBe('c3');
+  });
+
+  it('returns null when all local contestants are done or error', () => {
+    const slots: ContestantSlot[] = [
+      { id: 'c1', lane: 'local', status: 'done' },
+      { id: 'c2', lane: 'local', status: 'error' },
+    ];
+    expect(nextLocalContestant(slots)).toBeNull();
+  });
+});
+
+// ─── isBattleComplete ────────────────────────────────────────────────────────
+
+describe('isBattleComplete', () => {
+  it('returns false for an empty list', () => {
+    expect(isBattleComplete([])).toBe(false);
+  });
+
+  it('returns true when all contestants are done', () => {
+    expect(isBattleComplete([{ status: 'done' }, { status: 'done' }])).toBe(true);
+  });
+
+  it('returns true when all contestants are error', () => {
+    expect(isBattleComplete([{ status: 'error' }, { status: 'error' }])).toBe(true);
+  });
+
+  it('returns true for a mixed done/error result', () => {
+    expect(isBattleComplete([{ status: 'done' }, { status: 'error' }, { status: 'done' }])).toBe(true);
+  });
+
+  it('returns false while any contestant is still running', () => {
+    expect(isBattleComplete([{ status: 'done' }, { status: 'running' }])).toBe(false);
+  });
+
+  it('returns false while any contestant is still queued', () => {
+    expect(isBattleComplete([{ status: 'done' }, { status: 'queued' }])).toBe(false);
+  });
+});
+
+// ─── computeBenchmark ────────────────────────────────────────────────────────
+
+describe('computeBenchmark', () => {
+  const t0 = new Date('2026-06-06T10:00:00.000Z');
+  const t1 = new Date('2026-06-06T10:00:05.000Z'); // +5 000ms
+
+  it('computes duration in ms for both lanes', () => {
+    const local = computeBenchmark(t0, t1, 100, 'local');
+    expect(local.durationMs).toBe(5000);
+    const cloud = computeBenchmark(t0, t1, null, 'cloud');
+    expect(cloud.durationMs).toBe(5000);
+  });
+
+  it('computes tokens/sec for local lane when costTokens is known', () => {
+    const bench = computeBenchmark(t0, t1, 500, 'local');
+    expect(bench.tokensPerSec).toBeCloseTo(100, 5); // 500 / 5 = 100 tok/s
+  });
+
+  it('omits tokens/sec for cloud lane regardless of costTokens', () => {
+    const bench = computeBenchmark(t0, t1, 500, 'cloud');
+    expect(bench.tokensPerSec).toBeNull();
+  });
+
+  it('omits tokens/sec for local lane when costTokens is null', () => {
+    const bench = computeBenchmark(t0, t1, null, 'local');
+    expect(bench.tokensPerSec).toBeNull();
+  });
+
+  it('returns durationMs = 0 and null tokensPerSec when timestamps are equal', () => {
+    const bench = computeBenchmark(t0, t0, 100, 'local');
+    expect(bench.durationMs).toBe(0);
+    expect(bench.tokensPerSec).toBeNull();
+  });
+
+  it('clamps negative duration to 0 (clock skew)', () => {
+    const bench = computeBenchmark(t1, t0, 50, 'local');
+    expect(bench.durationMs).toBe(0);
+    expect(bench.tokensPerSec).toBeNull();
+  });
+});
+
+// ─── sanitizeSlug ────────────────────────────────────────────────────────────
+
+describe('sanitizeSlug', () => {
+  it('lowercases and preserves alphanumeric + hyphens', () => {
+    expect(sanitizeSlug('claude')).toBe('claude');
+    expect(sanitizeSlug('claude-opus-4-5')).toBe('claude-opus-4-5');
+  });
+
+  it('replaces spaces and special characters with hyphens', () => {
+    expect(sanitizeSlug('Code Reviewer')).toBe('code-reviewer');
+    expect(sanitizeSlug('native/boocode')).toBe('native-boocode');
+    expect(sanitizeSlug('qwen2.5-coder-35b')).toBe('qwen2-5-coder-35b');
+  });
+
+  it('collapses consecutive non-alphanumeric runs to a single hyphen', () => {
+    expect(sanitizeSlug('foo  bar---baz')).toBe('foo-bar-baz');
+  });
+
+  it('strips leading and trailing hyphens', () => {
+    expect(sanitizeSlug('---foo---')).toBe('foo');
+  });
+
+  it('truncates to 64 characters', () => {
+    const long = 'a'.repeat(100);
+    expect(sanitizeSlug(long).length).toBe(64);
+  });
+});
+
+// ─── buildBattleSlug ─────────────────────────────────────────────────────────
+
+describe('buildBattleSlug', () => {
+  it('builds a deterministic dated slug from id, type, and createdAt', () => {
+    const id = 'a1b2c3d4-e5f6-7890-abcd-ef1234567890';
+    const createdAt = new Date('2026-06-06T12:00:00.000Z');
+    const slug = buildBattleSlug(id, 'coding', createdAt);
+    expect(slug).toBe('2026-06-06-coding-a1b2c3d4');
+  });
+
+  it('includes the battle type in the slug', () => {
+    const id = 'aaaaaaaa-0000-0000-0000-000000000000';
+    const createdAt = new Date('2026-01-01T00:00:00.000Z');
+    expect(buildBattleSlug(id, 'qa', createdAt)).toContain('-qa-');
+    expect(buildBattleSlug(id, 'coding', createdAt)).toContain('-coding-');
+  });
+
+  it('uses the first 8 hex chars of the uuid (dashes stripped)', () => {
+    const id = 'deadbeef-0000-0000-0000-000000000000';
+    const slug = buildBattleSlug(id, 'coding', new Date('2026-06-06T00:00:00Z'));
+    expect(slug.endsWith('-deadbeef')).toBe(true);
+  });
+});
+
+// ─── buildContestantDir ──────────────────────────────────────────────────────
+
+describe('buildContestantDir', () => {
+  it('joins sanitized identity and model with a hyphen', () => {
+    expect(buildContestantDir('claude', 'claude-opus-4-5')).toBe('claude-claude-opus-4-5');
+  });
+
+  it('sanitizes both parts independently', () => {
+    expect(buildContestantDir('Code Reviewer', 'qwen2.5-35b')).toBe('code-reviewer-qwen2-5-35b');
+  });
+});
+
+// ─── reconcileContestantResume ───────────────────────────────────────────────
+
+describe('reconcileContestantResume', () => {
+  it('keeps non-running contestants regardless of task state', () => {
+    for (const status of ['queued', 'done', 'error']) {
+      expect(reconcileContestantResume(status, 'tid', 'completed')).toBe('keep');
+      expect(reconcileContestantResume(status, null, null)).toBe('keep');
+    }
+  });
+
+  it('re-dispatches a running contestant with no task_id', () => {
+    expect(reconcileContestantResume('running', null, null)).toBe('re-dispatch');
+  });
+
+  it('re-dispatches a running contestant whose task row is absent', () => {
+    expect(reconcileContestantResume('running', 'tid', null)).toBe('re-dispatch');
+  });
+
+  it('marks done when the task completed before the terminal callback ran', () => {
+    expect(reconcileContestantResume('running', 'tid', 'completed')).toBe('mark-done');
+  });
+
+  it('marks error when the task failed', () => {
+    expect(reconcileContestantResume('running', 'tid', 'failed')).toBe('mark-error');
+  });
+
+  it('marks cancelled when the task was cancelled', () => {
+    expect(reconcileContestantResume('running', 'tid', 'cancelled')).toBe('mark-cancelled');
+  });
+
+  it('keeps a running contestant whose task is pending (dispatcher handles it)', () => {
+    expect(reconcileContestantResume('running', 'tid', 'pending')).toBe('keep');
+  });
+
+  it('re-dispatches when the task is stuck running (process died)', () => {
+    expect(reconcileContestantResume('running', 'tid', 'running')).toBe('re-dispatch');
+  });
+
+  it('re-dispatches when the task is blocked (permission dialog gone on restart)', () => {
+    expect(reconcileContestantResume('running', 'tid', 'blocked')).toBe('re-dispatch');
+  });
+});
+
+// ─── reconcileContestants ────────────────────────────────────────────────────
+
+describe('reconcileContestants', () => {
+  it('returns one decision per contestant', () => {
+    const contestants = [
+      { contestantId: 'c1', taskId: null, status: 'done' },
+      { contestantId: 'c2', taskId: 't1', status: 'running' },
+      { contestantId: 'c3', taskId: 't2', status: 'running' },
+    ];
+    const taskStates = new Map([['t1', 'completed'], ['t2', 'running']]);
+    const decisions = reconcileContestants(contestants, taskStates);
+    expect(decisions).toHaveLength(3);
+    expect(decisions[0]).toEqual({ contestantId: 'c1', action: 'keep' });
+    expect(decisions[1]).toEqual({ contestantId: 'c2', action: 'mark-done' });
+    expect(decisions[2]).toEqual({ contestantId: 'c3', action: 're-dispatch' });
+  });
+
+  it('re-dispatches a running contestant whose taskId is absent from taskStates', () => {
+    const contestants = [{ contestantId: 'c1', taskId: 'orphan', status: 'running' }];
+    const decisions = reconcileContestants(contestants, new Map());
+    expect(decisions[0]?.action).toBe('re-dispatch');
+  });
+
+  it('re-dispatches a running contestant with null taskId', () => {
+    const contestants = [{ contestantId: 'c1', taskId: null, status: 'running' }];
+    const decisions = reconcileContestants(contestants, new Map());
+    expect(decisions[0]?.action).toBe('re-dispatch');
+  });
+
+  it('returns empty array for no contestants', () => {
+    expect(reconcileContestants([], new Map())).toEqual([]);
+  });
+
+  it('keeps a running contestant whose task is pending', () => {
+    const contestants = [{ contestantId: 'c1', taskId: 't1', status: 'running' }];
+    const taskStates = new Map([['t1', 'pending']]);
+    const decisions = reconcileContestants(contestants, taskStates);
+    expect(decisions[0]?.action).toBe('keep');
+  });
+
+  it('handles a mixed battle: done/queued kept, stale running re-dispatched', () => {
+    const contestants = [
+      { contestantId: 'c1', taskId: 't1', status: 'done' },
+      { contestantId: 'c2', taskId: null, status: 'queued' },
+      { contestantId: 'c3', taskId: 't2', status: 'running' },
+      { contestantId: 'c4', taskId: 't3', status: 'running' },
+    ];
+    const taskStates = new Map([
+      ['t1', 'completed'],
+      ['t2', 'running'],  // stuck — process dead
+      ['t3', 'pending'],  // dispatcher will handle
+    ]);
+    const decisions = reconcileContestants(contestants, taskStates);
+    expect(decisions.find((d) => d.contestantId === 'c1')?.action).toBe('keep');
+    expect(decisions.find((d) => d.contestantId === 'c2')?.action).toBe('keep');
+    expect(decisions.find((d) => d.contestantId === 'c3')?.action).toBe('re-dispatch');
+    expect(decisions.find((d) => d.contestantId === 'c4')?.action).toBe('keep');
+  });
+});
--- a/apps/coder/src/services/arena-analyzer-helpers.ts
+++ b/apps/coder/src/services/arena-analyzer-helpers.ts
@@ -0,0 +1,191 @@
+/**
+ * Pure, side-effect-free helpers for the Arena analyzer.
+ * No DB, no IO, no network — safe to unit-test directly.
+ *
+ * Covers: digest-prompt assembly, judge-prompt assembly, winner extraction
+ * from the judge output, the <2-survivors no-winner rule, and the
+ * cross-examination prompt.
+ */
+
+// ─── Shared types ─────────────────────────────────────────────────────────────
+
+export interface ContestantDigestInput {
+  identity: string;
+  model: string;
+  resultMd: string;
+  diffPatch?: string;
+  benchmarkLine: string;
+}
+
+export interface ContestantDigest {
+  identity: string;
+  model: string;
+  digest: string;
+  benchmarkLine: string;
+}
+
+// ─── Digest stage ─────────────────────────────────────────────────────────────
+
+/**
+ * Build the system + user prompts for the per-contestant digest call.
+ * The digest is a short structured summary; it keeps each call's context small
+ * so the downstream judge only sees digests (not raw diffs).
+ */
+export function buildDigestPrompt(input: ContestantDigestInput): { system: string; user: string } {
+  const system =
+    'You are an expert technical analyst evaluating the output of an AI coding or Q&A battle. ' +
+    'Produce a concise structured digest (under 300 words, Markdown bullet points) covering: ' +
+    '(1) correctness and quality, (2) completeness, (3) notable strengths, (4) notable weaknesses or issues. ' +
+    'Do not reference the battle or other contestants — focus only on this submission.';
+
+  const parts: string[] = [
+    `# Contestant: ${input.identity} / ${input.model}`,
+    `\nBenchmark: ${input.benchmarkLine}`,
+    '\n## Result\n',
+    input.resultMd.slice(0, 8_000),
+  ];
+
+  if (input.diffPatch) {
+    parts.push('\n## Code Changes (diff)\n```diff');
+    parts.push(input.diffPatch.slice(0, 5_000));
+    parts.push('```');
+  }
+
+  return { system, user: parts.join('\n') };
+}
+
+// ─── Judge stage ──────────────────────────────────────────────────────────────
+
+/**
+ * Build the system + user prompts for the comparative judge call.
+ * Receives contestant digests (NOT raw diffs) to keep context bounded.
+ *
+ * The judge output must contain a line starting with WINNER: or NO_WINNER.
+ * The caller extracts it with extractWinner().
+ */
+export function buildJudgePrompt(
+  originalPrompt: string,
+  digests: ContestantDigest[],
+): { system: string; user: string } {
+  const canName = shouldNameWinner(digests.length);
+
+  const winnerInstruction = canName
+    ? 'After your comparative analysis, name the best submission on its own line in this exact format:\n' +
+      'WINNER: <identity>/<model>\n' +
+      'where <identity> and <model> exactly match the heading above. No other text on that line.'
+    : 'Fewer than 2 contestants succeeded. Do NOT name a winner. Write the following on its own line:\nNO_WINNER';
+
+  const system =
+    'You are an expert judge for an AI battle. You have received digest summaries of each ' +
+    "contestant's work on the same task. Write a comparative analysis, then follow these instructions:\n" +
+    winnerInstruction;
+
+  const parts: string[] = [
+    '# Original Task Prompt\n',
+    originalPrompt.slice(0, 2_000),
+    '\n# Contestant Digests\n',
+  ];
+
+  for (const d of digests) {
+    parts.push(`\n## ${d.identity} / ${d.model}`);
+    parts.push(`Benchmark: ${d.benchmarkLine}`);
+    parts.push(d.digest);
+  }
+
+  parts.push(
+    '\n# Instructions\nCompare the contestants and follow the winner-naming instructions above.',
+  );
+
+  return { system, user: parts.join('\n') };
+}
+
+// ─── No-winner rule ───────────────────────────────────────────────────────────
+
+/**
+ * Returns true when enough contestants succeeded to name a winner.
+ * Rule: at least 2 must have produced a result. With 0 or 1 success the
+ * analysis must NOT name a winner (no meaningful comparison possible).
+ */
+export function shouldNameWinner(succeededCount: number): boolean {
+  return succeededCount >= 2;
+}
+
+// ─── Winner extraction ────────────────────────────────────────────────────────
+
+/**
+ * Parse the judge's text output and extract the declared winner.
+ * Looks for a line matching: WINNER: <identity>/<model>
+ * Returns null when no valid winner line is found, or when the line contains
+ * NO_WINNER.
+ *
+ * The parse is lenient on surrounding whitespace and case for the keyword.
+ */
+export function extractWinner(judgeOutput: string): { identity: string; model: string } | null {
+  for (const line of judgeOutput.split('\n')) {
+    const trimmed = line.trim();
+    if (!trimmed.toUpperCase().startsWith('WINNER:')) continue;
+
+    const rest = trimmed.slice('WINNER:'.length).trim();
+    if (rest.toUpperCase() === 'NO_WINNER' || rest === '') return null;
+
+    const slashIdx = rest.indexOf('/');
+    if (slashIdx === -1) return null;
+
+    const identity = rest.slice(0, slashIdx).trim();
+    const model = rest.slice(slashIdx + 1).trim();
+    if (identity && model) return { identity, model };
+  }
+  return null;
+}
+
+// ─── Cross-examination stage ──────────────────────────────────────────────────
+
+/**
+ * Build the system + user prompts for a cross-examination call.
+ * The cross-examiner sees the original prompt, contestant digests, and the
+ * proposed analysis, and is asked to challenge the result.
+ */
+export function buildCrossExamPrompt(opts: {
+  originalPrompt: string;
+  digests: ContestantDigest[];
+  analysisContent: string;
+  proposedWinner: string | null;
+  examinerIdentity: string;
+  examinerModel: string;
+}): { system: string; user: string } {
+  const system =
+    `You are ${opts.examinerIdentity} (model: ${opts.examinerModel}), acting as an independent ` +
+    'cross-examiner in an AI battle. Your role is to critically challenge the proposed analysis ' +
+    'and winner, then give your own verdict. Be rigorous but fair. ' +
+    'End your response with your verdict on its own line:\n' +
+    'VERDICT: <identity>/<model>  — if you agree or disagree with the proposed winner but can name one\n' +
+    'VERDICT: NO_WINNER  — if no clear winner exists';
+
+  const parts: string[] = [
+    '# Original Task Prompt\n',
+    opts.originalPrompt.slice(0, 2_000),
+    '\n# Contestant Digests\n',
+  ];
+
+  for (const d of opts.digests) {
+    parts.push(`\n## ${d.identity} / ${d.model}`);
+    parts.push(`Benchmark: ${d.benchmarkLine}`);
+    parts.push(d.digest);
+  }
+
+  parts.push('\n# Proposed Analysis\n');
+  parts.push(opts.analysisContent.slice(0, 5_000));
+
+  if (opts.proposedWinner) {
+    parts.push(`\n*(Proposed winner: ${opts.proposedWinner})*`);
+  } else {
+    parts.push('\n*(No winner was proposed — fewer than 2 contestants succeeded.)*');
+  }
+
+  parts.push(
+    '\n# Your Cross-Examination\n' +
+      'Challenge the analysis above, then give your independent verdict (VERDICT: … on its own line).',
+  );
+
+  return { system, user: parts.join('\n') };
+}
--- a/apps/coder/src/services/arena-analyzer.ts
+++ b/apps/coder/src/services/arena-analyzer.ts
@@ -0,0 +1,496 @@
+/**
+ * Arena Analyzer — pluggable seam for battle analysis and cross-examination.
+ *
+ * The Analyzer interface is the plug point: a v2 Han Orchestrator flow can
+ * replace the v1 two-stage digest→judge implementation without a schema change.
+ *
+ * v1 implementation uses DEFAULT_MODEL via direct llama-swap calls (arenaModelCall):
+ *   Digest stage  — one call per succeeded contestant, concurrent; produces a
+ *                   bounded summary of each result (result.md + diff.patch for
+ *                   coding, result.md for Q&A).
+ *   Judge stage   — one call with all digests + the original prompt; writes
+ *                   analysis.md, names a winner (unless < 2 succeeded), and
+ *                   updates battles.winner_contestant_id.
+ *
+ * Cross-examination:
+ *   Local model   — direct arenaModelCall to llama-swap with the chosen model.
+ *   Cloud model   — inserts a tasks row (triggers the dispatcher via pg_notify);
+ *                   polls for completion; reads output_summary as the verdict.
+ *   In both cases the verdict is written to cross_examinations.verdict, appended
+ *   to <resultsPath>/cross-exam.md, and a battle_updated frame is published.
+ *
+ * Never throws — all errors are caught, logged, and swallowed so the caller
+ * (arena-runner's onBattleComplete / onCrossExamStart) is never wedged.
+ */
+
+import { readFile, writeFile, mkdir } from 'node:fs/promises';
+import { join } from 'node:path';
+import type { Sql } from '../db.js';
+import type { Broker } from '@boocode/server/broker';
+import type { WsFrame } from '@boocode/contracts/ws-frames';
+import type { FastifyBaseLogger } from 'fastify';
+import type { Config } from '../config.js';
+import type { BattleType } from '@boocode/contracts/arena';
+import { arenaModelCall } from './arena-model-call.js';
+import {
+  buildDigestPrompt,
+  buildJudgePrompt,
+  buildCrossExamPrompt,
+  extractWinner,
+  shouldNameWinner,
+  type ContestantDigest,
+} from './arena-analyzer-helpers.js';
+
+// ─── Public interface ─────────────────────────────────────────────────────────
+
+/** Pluggable analysis seam — swap to a Han Orchestrator flow in v2. */
+export interface Analyzer {
+  /** Run the two-stage digest→judge analysis for a completed battle. */
+  analyze(battleId: string): Promise<void>;
+  /**
+   * Run a cross-examination for an already-inserted cross_examinations row.
+   * The result is written back to that row and a battle_updated frame is published.
+   */
+  crossExamine(
+    battleId: string,
+    crossExamId: string,
+    opts: { identity: string; model: string },
+  ): Promise<void>;
+}
+
+// ─── Internal DB row types ────────────────────────────────────────────────────
+
+interface BattleRow {
+  id: string;
+  project_id: string;
+  battle_type: BattleType;
+  prompt: string;
+  status: string;
+  results_path: string | null;
+  winner_contestant_id: string | null;
+}
+
+interface ContestantRow {
+  id: string;
+  identity: string;
+  model: string;
+  lane: string;
+  status: string;
+  result_path: string | null;
+  duration_ms: number | null;
+  tokens_per_sec: number | null;
+}
+
+// ─── Factory ──────────────────────────────────────────────────────────────────
+
+interface AnalyzerDeps {
+  sql: Sql;
+  broker: Broker;
+  log: FastifyBaseLogger;
+  config: Pick<Config, 'LLAMA_SWAP_URL' | 'DEFAULT_MODEL'>;
+  /** Model IDs served by local llama-swap — cross-exam routing uses this. */
+  localModels: ReadonlySet<string>;
+}
+
+export function createAnalyzer(deps: AnalyzerDeps): Analyzer {
+  const { sql, broker, log, config, localModels } = deps;
+
+  // ─── analyze ──────────────────────────────────────────────────────────────
+
+  async function analyze(battleId: string): Promise<void> {
+    try {
+      await runAnalysis(battleId);
+    } catch (err) {
+      log.error(
+        { err: errMsg(err), battleId },
+        'arena-analyzer: analysis failed',
+      );
+    }
+  }
+
+  async function runAnalysis(battleId: string): Promise<void> {
+    const battle = await loadBattle(battleId);
+    if (!battle) {
+      log.warn({ battleId }, 'arena-analyzer: battle not found');
+      return;
+    }
+
+    const contestants = await loadContestants(battleId);
+    const succeeded = contestants.filter((c) => c.status === 'done' && c.result_path);
+
+    log.info(
+      { battleId, total: contestants.length, succeeded: succeeded.length },
+      'arena-analyzer: starting analysis',
+    );
+
+    // Digest stage — concurrent, one call per succeeded contestant.
+    const digests = (
+      await Promise.all(succeeded.map((c) => digestContestant(battle, c)))
+    ).filter((d): d is ContestantDigest => d !== null);
+
+    // Failed contestants are noted in the analysis even if they produced no digest.
+    const failedNotes = contestants
+      .filter((c) => c.status === 'error')
+      .map((c) => `- **${c.identity} / ${c.model}**: failed (no result)\n`);
+
+    // Judge stage — single call with all digests.
+    const { analysisText, winner } = await judgeContestants(battle, digests, failedNotes);
+
+    // Write analysis.md to the battle results folder.
+    const resultsPath = battle.results_path;
+    if (resultsPath) {
+      await mkdir(resultsPath, { recursive: true });
+      await writeFile(join(resultsPath, 'analysis.md'), analysisText, 'utf8');
+    }
+
+    // Resolve the winner to a contestant id and update the battle row.
+    let winnerId: string | null = null;
+    if (winner && shouldNameWinner(succeeded.length)) {
+      const winnerContestant = contestants.find(
+        (c) => c.identity === winner.identity && c.model === winner.model,
+      );
+      if (winnerContestant) {
+        winnerId = winnerContestant.id;
+        await sql`
+          UPDATE battles
+          SET winner_contestant_id = ${winnerId}, updated_at = clock_timestamp()
+          WHERE id = ${battleId}
+        `;
+        log.info({ battleId, winnerId, identity: winner.identity, model: winner.model }, 'arena-analyzer: winner set');
+      } else {
+        log.warn({ battleId, winner }, 'arena-analyzer: judge named a winner not found in contestants');
+      }
+    }
+
+    publishUser({
+      type: 'battle_updated',
+      battle_id: battleId,
+      winner_contestant_id: winnerId,
+      analysis_ready: true,
+    });
+
+    log.info({ battleId }, 'arena-analyzer: analysis complete');
+  }
+
+  // ─── crossExamine ─────────────────────────────────────────────────────────
+
+  async function crossExamine(
+    battleId: string,
+    crossExamId: string,
+    opts: { identity: string; model: string },
+  ): Promise<void> {
+    try {
+      await runCrossExam(battleId, crossExamId, opts);
+    } catch (err) {
+      log.error(
+        { err: errMsg(err), battleId, crossExamId },
+        'arena-analyzer: cross-exam failed',
+      );
+    }
+  }
+
+  async function runCrossExam(
+    battleId: string,
+    crossExamId: string,
+    opts: { identity: string; model: string },
+  ): Promise<void> {
+    const battle = await loadBattle(battleId);
+    if (!battle) {
+      log.warn({ battleId }, 'arena-analyzer: battle not found for cross-exam');
+      return;
+    }
+
+    const contestants = await loadContestants(battleId);
+
+    // Re-read the digests (if contestants have results) for context.
+    const succeeded = contestants.filter((c) => c.status === 'done' && c.result_path);
+    const digests = (
+      await Promise.all(succeeded.map((c) => digestContestant(battle, c)))
+    ).filter((d): d is ContestantDigest => d !== null);
+
+    // Read analysis.md for the proposed analysis content.
+    let analysisContent = '';
+    if (battle.results_path) {
+      analysisContent = await readFile(
+        join(battle.results_path, 'analysis.md'), 'utf8',
+      ).catch(() => '');
+    }
+
+    // Resolve proposed winner label.
+    let proposedWinner: string | null = null;
+    if (battle.winner_contestant_id) {
+      const w = contestants.find((c) => c.id === battle.winner_contestant_id);
+      if (w) proposedWinner = `${w.identity}/${w.model}`;
+    }
+
+    const { system, user } = buildCrossExamPrompt({
+      originalPrompt: battle.prompt,
+      digests,
+      analysisContent,
+      proposedWinner,
+      examinerIdentity: opts.identity,
+      examinerModel: opts.model,
+    });
+
+    log.info({ battleId, crossExamId, identity: opts.identity, model: opts.model }, 'arena-analyzer: running cross-exam');
+
+    const verdict = await executeModelCall({
+      battleId,
+      projectId: battle.project_id,
+      identity: opts.identity,
+      model: opts.model,
+      system,
+      user,
+    });
+
+    // Persist verdict and append to cross-exam.md.
+    await sql`
+      UPDATE cross_examinations
+      SET verdict = ${verdict}
+      WHERE id = ${crossExamId}
+    `;
+
+    if (battle.results_path) {
+      const crossExamPath = join(battle.results_path, 'cross-exam.md');
+      const section =
+        `\n---\n\n# Cross-Examination by ${opts.identity} / ${opts.model}\n\n` +
+        `${verdict}\n`;
+      await writeFile(crossExamPath, section, { flag: 'a', encoding: 'utf8' });
+    }
+
+    publishUser({
+      type: 'battle_updated',
+      battle_id: battleId,
+      cross_exam_id: crossExamId,
+    });
+
+    log.info({ battleId, crossExamId }, 'arena-analyzer: cross-exam complete');
+  }
+
+  // ─── Model call routing ───────────────────────────────────────────────────
+
+  /**
+   * Route a one-shot model call to llama-swap (local) or the task dispatcher
+   * (cloud). Cloud dispatch inserts a tasks row and polls for completion.
+   */
+  async function executeModelCall(opts: {
+    battleId: string;
+    projectId: string;
+    identity: string;
+    model: string;
+    system: string;
+    user: string;
+  }): Promise<string> {
+    const isLocal = localModels.has(opts.model) || localModels.has(`llama-swap/${opts.model}`);
+
+    if (isLocal) {
+      return arenaModelCall({
+        config,
+        model: opts.model,
+        system: opts.system,
+        user: opts.user,
+        maxTokens: 2_000,
+        temperature: 0.3,
+      });
+    }
+
+    // Cloud path: dispatch through the task system and poll for completion.
+    return executeCloudModelCall(opts);
+  }
+
+  async function executeCloudModelCall(opts: {
+    projectId: string;
+    identity: string;
+    model: string;
+    system: string;
+    user: string;
+  }): Promise<string> {
+    // The cross-exam prompt is the full input to the external agent. We embed
+    // the system prompt as a preamble in the user message (external agents don't
+    // take a separate system arg through the tasks dispatcher).
+    const input = `${opts.system}\n\n${opts.user}`;
+
+    // For well-known external agents, stamp the agent name so the dispatcher
+    // routes via PTY/ACP. For unknown identities fall back to native inference
+    // (agent = null → DEFAULT_MODEL text generation).
+    const knownAgents = new Set(['claude', 'opencode', 'qwen', 'goose']);
+    const agentName = knownAgents.has(opts.identity) ? opts.identity : null;
+
+    const [task] = await sql<{ id: string }[]>`
+      INSERT INTO tasks (project_id, input, agent, model)
+      VALUES (${opts.projectId}, ${input}, ${agentName}, ${opts.model})
+      RETURNING id
+    `;
+    const taskId = task!.id;
+
+    log.info({ taskId, identity: opts.identity, model: opts.model }, 'arena-analyzer: cloud cross-exam task dispatched');
+
+    // Poll until terminal (up to 5 minutes).
+    const timeoutMs = 5 * 60 * 1_000;
+    const pollMs = 2_000;
+    const deadline = Date.now() + timeoutMs;
+
+    while (Date.now() < deadline) {
+      await sleep(pollMs);
+      const [row] = await sql<{ state: string; output_summary: string | null }[]>`
+        SELECT state, output_summary FROM tasks WHERE id = ${taskId}
+      `;
+      if (!row) break;
+      if (row.state === 'completed') return row.output_summary ?? '';
+      if (row.state === 'failed' || row.state === 'cancelled') {
+        throw new Error(`cross-exam task ${row.state}: ${row.output_summary ?? ''}`);
+      }
+    }
+
+    throw new Error(`cloud cross-exam task timed out after ${timeoutMs / 1000}s`);
+  }
+
+  // ─── Digest helper ────────────────────────────────────────────────────────
+
+  async function digestContestant(
+    battle: BattleRow,
+    c: ContestantRow,
+  ): Promise<ContestantDigest | null> {
+    if (!c.result_path) return null;
+
+    const resultMd = await readFile(join(c.result_path, 'result.md'), 'utf8').catch(() => '');
+
+    let diffPatch: string | undefined;
+    if (battle.battle_type === 'coding') {
+      diffPatch = await readFile(join(c.result_path, 'diff.patch'), 'utf8').catch(
+        () => undefined,
+      );
+    }
+
+    const benchmarkLine = formatBenchmarkLine(c);
+    const { system, user } = buildDigestPrompt({
+      identity: c.identity,
+      model: c.model,
+      resultMd,
+      diffPatch,
+      benchmarkLine,
+    });
+
+    let digest: string;
+    try {
+      digest = await arenaModelCall({
+        config,
+        model: config.DEFAULT_MODEL,
+        system,
+        user,
+        maxTokens: 500,
+        temperature: 0.3,
+      });
+    } catch (err) {
+      log.warn(
+        { err: errMsg(err), identity: c.identity, model: c.model },
+        'arena-analyzer: digest call failed — skipping contestant',
+      );
+      return null;
+    }
+
+    return { identity: c.identity, model: c.model, digest, benchmarkLine };
+  }
+
+  // ─── Judge helper ─────────────────────────────────────────────────────────
+
+  async function judgeContestants(
+    battle: BattleRow,
+    digests: ContestantDigest[],
+    failedNotes: string[],
+  ): Promise<{ analysisText: string; winner: { identity: string; model: string } | null }> {
+    const { system, user } = buildJudgePrompt(battle.prompt, digests);
+
+    let judgeOutput = '';
+    try {
+      judgeOutput = await arenaModelCall({
+        config,
+        model: config.DEFAULT_MODEL,
+        system,
+        user,
+        maxTokens: 2_000,
+        temperature: 0.3,
+      });
+    } catch (err) {
+      log.error({ err: errMsg(err), battleId: battle.id }, 'arena-analyzer: judge call failed');
+      judgeOutput = '*(Judge call failed — no comparison produced.)*';
+    }
+
+    const winner = shouldNameWinner(digests.length) ? extractWinner(judgeOutput) : null;
+
+    const sections: string[] = [
+      `# Arena Analysis`,
+      `\n**Battle type:** ${battle.battle_type}`,
+    ];
+
+    if (failedNotes.length > 0) {
+      sections.push('\n## Failed Contestants\n');
+      sections.push(...failedNotes);
+    }
+
+    if (digests.length > 0) {
+      sections.push('\n## Contestant Digests\n');
+      for (const d of digests) {
+        sections.push(`### ${d.identity} / ${d.model}`);
+        sections.push(`*Benchmark: ${d.benchmarkLine}*\n`);
+        sections.push(d.digest);
+      }
+    }
+
+    sections.push("\n## Judge's Verdict\n");
+    sections.push(judgeOutput);
+
+    if (winner) {
+      sections.push(`\n## Winner\n**${winner.identity} / ${winner.model}**`);
+    } else {
+      const reason =
+        digests.length < 2
+          ? 'fewer than 2 contestants produced results'
+          : 'no clear winner identified';
+      sections.push(`\n## Winner\n*No winner named (${reason}).*`);
+    }
+
+    return { analysisText: sections.join('\n'), winner };
+  }
+
+  // ─── DB helpers ───────────────────────────────────────────────────────────
+
+  async function loadBattle(battleId: string): Promise<BattleRow | null> {
+    const [b] = await sql<BattleRow[]>`
+      SELECT id, project_id, battle_type, prompt, status, results_path, winner_contestant_id
+      FROM battles WHERE id = ${battleId}
+    `;
+    return b ?? null;
+  }
+
+  async function loadContestants(battleId: string): Promise<ContestantRow[]> {
+    return sql<ContestantRow[]>`
+      SELECT id, identity, model, lane, status, result_path, duration_ms, tokens_per_sec
+      FROM contestants WHERE battle_id = ${battleId}
+      ORDER BY created_at ASC
+    `;
+  }
+
+  // ─── Misc helpers ─────────────────────────────────────────────────────────
+
+  function formatBenchmarkLine(c: ContestantRow): string {
+    const parts: string[] = [];
+    if (c.duration_ms !== null) parts.push(`${c.duration_ms}ms`);
+    if (c.tokens_per_sec !== null) parts.push(`${c.tokens_per_sec.toFixed(1)} tok/s`);
+    return parts.length > 0 ? parts.join(', ') : 'no benchmark';
+  }
+
+  function publishUser(frame: Record<string, unknown>): void {
+    broker.publishUserFrame('default', frame as unknown as WsFrame);
+  }
+
+  function sleep(ms: number): Promise<void> {
+    return new Promise((resolve) => setTimeout(resolve, ms));
+  }
+
+  return { analyze, crossExamine };
+}
+
+function errMsg(e: unknown): string {
+  return e instanceof Error ? e.message : String(e);
+}
--- a/apps/coder/src/services/arena-decisions.ts
+++ b/apps/coder/src/services/arena-decisions.ts
@@ -0,0 +1,186 @@
+/**
+ * Pure scheduling and classification decisions for the Arena battle-runner.
+ * No database, no IO. Mirrors the pattern of flow-runner-decisions.ts.
+ *
+ * Vocabulary:
+ *   local lane  — llama-swap-backed contestants, run strictly one at a time
+ *   cloud lane  — cloud-backed contestants, run all in parallel
+ *
+ * A contestant's status lifecycle:
+ *   queued → running → done | error
+ */
+import type { BattleType, ContestantLane } from '@boocode/contracts/arena';
+
+// ─── Lane classification ──────────────────────────────────────────────────────
+
+/**
+ * Classify a contestant into a lane.
+ *
+ * Q&A contestants always run on the native (llama-swap) backend → local.
+ * Coding contestants: their MODEL is checked against the localModels set
+ * (all model IDs served by the local llama-swap server). This means an
+ * opencode or qwen contestant pointed at a local model counts as local,
+ * which correctly captures GPU-contention and fair benchmarking (ADR 0001).
+ *
+ * @param battleType  'coding' | 'qa'
+ * @param identity    backend name (coding) or persona name (qa) — not used for lane logic
+ * @param model       the contestant's model id
+ * @param localModels set of model IDs served by the local llama-swap server
+ */
+export function classifyLane(
+  battleType: BattleType,
+  _identity: string,
+  model: string,
+  localModels: ReadonlySet<string>,
+): ContestantLane {
+  if (battleType === 'qa') return 'local';
+  return localModels.has(model) ? 'local' : 'cloud';
+}
+
+// ─── Local-lane queue ─────────────────────────────────────────────────────────
+
+export interface ContestantSlot {
+  id: string;
+  lane: ContestantLane;
+  status: string;
+}
+
+/**
+ * The next queued local contestant to dispatch — the first 'queued' contestant
+ * in the local lane, in creation order (caller must supply rows in created_at ASC).
+ * Returns null when the local queue is empty or all local slots are non-queued.
+ */
+export function nextLocalContestant(contestants: readonly ContestantSlot[]): string | null {
+  for (const c of contestants) {
+    if (c.lane === 'local' && c.status === 'queued') return c.id;
+  }
+  return null;
+}
+
+// ─── Battle completion ────────────────────────────────────────────────────────
+
+/**
+ * True when every contestant has reached a terminal state (done | error).
+ * Returns false for an empty list — a battle with no contestants never completes.
+ */
+export function isBattleComplete(contestants: readonly { status: string }[]): boolean {
+  if (contestants.length === 0) return false;
+  return contestants.every((c) => c.status === 'done' || c.status === 'error');
+}
+
+// ─── Benchmark ────────────────────────────────────────────────────────────────
+
+export interface Benchmark {
+  durationMs: number;
+  tokensPerSec: number | null;
+}
+
+/**
+ * Compute the benchmark for a contestant.
+ * Wall-clock duration is captured for every contestant; tokens/sec is only
+ * meaningful for local (llama-swap) contestants where the model has sole
+ * access to the GPU and the measurement is fair.
+ */
+export function computeBenchmark(
+  startedAt: Date,
+  endedAt: Date,
+  costTokens: number | null,
+  lane: ContestantLane,
+): Benchmark {
+  const durationMs = Math.max(0, endedAt.getTime() - startedAt.getTime());
+  const tokensPerSec =
+    lane === 'local' && costTokens !== null && durationMs > 0
+      ? (costTokens / durationMs) * 1000
+      : null;
+  return { durationMs, tokensPerSec };
+}
+
+// ─── Slug / path helpers ──────────────────────────────────────────────────────
+
+/**
+ * Sanitize a string for use as a directory name component.
+ * Lowercases, replaces non-alphanumeric runs with '-', trims leading/trailing
+ * dashes, and caps at 64 characters.
+ */
+export function sanitizeSlug(s: string): string {
+  return s
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-+|-+$/g, '')
+    .slice(0, 64);
+}
+
+/**
+ * Build the dated battle slug used as the Arena results folder name.
+ * Format: YYYY-MM-DD-<battleType>-<first-8-hex-of-uuid>
+ * Deterministic: callers can rebuild it from (id, type, created_at) on resume.
+ */
+export function buildBattleSlug(battleId: string, battleType: BattleType, createdAt: Date): string {
+  const date = createdAt.toISOString().slice(0, 10);
+  const shortId = battleId.replace(/-/g, '').slice(0, 8);
+  return `${date}-${battleType}-${shortId}`;
+}
+
+/**
+ * Build the per-contestant results directory name within a battle folder.
+ * Format: <sanitized-identity>-<sanitized-model>
+ */
+export function buildContestantDir(identity: string, model: string): string {
+  return `${sanitizeSlug(identity)}-${sanitizeSlug(model)}`;
+}
+
+// ─── Resume reconciliation ────────────────────────────────────────────────────
+
+export type ContestantResumeAction =
+  | 'keep'
+  | 're-dispatch'
+  | 'mark-done'
+  | 'mark-error'
+  | 'mark-cancelled';
+
+export interface ContestantResumeDecision {
+  contestantId: string;
+  action: ContestantResumeAction;
+}
+
+/**
+ * Decide what to do with ONE contestant during startup resume.
+ * Mirrors reconcileResumeStep from flow-runner-decisions.ts.
+ *
+ * @param status    contestants.status
+ * @param taskId    contestants.task_id (null when not yet dispatched)
+ * @param taskState tasks.state for taskId, or null if the task row is absent
+ */
+export function reconcileContestantResume(
+  status: string,
+  taskId: string | null,
+  taskState: string | null,
+): ContestantResumeAction {
+  if (status !== 'running') return 'keep';
+  if (!taskId || taskState === null) return 're-dispatch';
+  switch (taskState) {
+    case 'completed': return 'mark-done';
+    case 'failed':    return 'mark-error';
+    case 'cancelled': return 'mark-cancelled';
+    case 'pending':   return 'keep'; // dispatcher startup poll will run it normally
+    default:          return 're-dispatch'; // 'running'/'blocked' — process is dead
+  }
+}
+
+/**
+ * Reconcile every contestant of an in-flight battle for startup resume.
+ * Returns one decision per contestant. Pure — no IO.
+ */
+export function reconcileContestants(
+  contestants: ReadonlyArray<{ contestantId: string; taskId: string | null; status: string }>,
+  taskStates: ReadonlyMap<string, string>,
+): ContestantResumeDecision[] {
+  return contestants.map((c) => ({
+    contestantId: c.contestantId,
+    action: reconcileContestantResume(
+      c.status,
+      c.taskId,
+      c.taskId ? (taskStates.get(c.taskId) ?? null) : null,
+    ),
+  }));
+}
--- a/apps/coder/src/services/arena-model-call.ts
+++ b/apps/coder/src/services/arena-model-call.ts
@@ -0,0 +1,70 @@
+/**
+ * One-shot model completion for the Arena analyzer.
+ *
+ * Calls the local llama-swap server directly for a single non-streaming
+ * completion. Used for the digest and judge stages (always DEFAULT_MODEL)
+ * and for local-model cross-examinations (any local model).
+ *
+ * Mirrors apps/server/src/services/task-model.ts but targets the coder's
+ * config shape and uses a longer timeout appropriate for analysis calls.
+ */
+
+import type { Config } from '../config.js';
+
+const TIMEOUT_MS = 120_000;
+
+export async function arenaModelCall(opts: {
+  config: Pick<Config, 'LLAMA_SWAP_URL'>;
+  model: string;
+  system: string;
+  user: string;
+  maxTokens?: number;
+  temperature?: number;
+}): Promise<string> {
+  const { config, model, system, user } = opts;
+  const maxTokens = opts.maxTokens ?? 2_000;
+  const temperature = opts.temperature ?? 0.3;
+
+  const res = await fetch(`${config.LLAMA_SWAP_URL}/v1/chat/completions`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({
+      model,
+      messages: [
+        { role: 'system', content: system },
+        { role: 'user', content: user },
+      ],
+      max_tokens: maxTokens,
+      temperature,
+      stream: false,
+      chat_template_kwargs: { enable_thinking: false },
+    }),
+    signal: AbortSignal.timeout(TIMEOUT_MS),
+  });
+
+  if (!res.ok) {
+    const text = await res.text().catch(() => '');
+    throw new Error(`llama-swap responded ${res.status}: ${text.slice(0, 200)}`);
+  }
+
+  const data = (await res.json()) as {
+    choices?: Array<{
+      message?: { content?: string; reasoning_content?: string };
+    }>;
+  };
+
+  const choice = data.choices?.[0]?.message;
+  if (!choice) return '';
+
+  const content = (choice.content ?? '').trim();
+  if (content.length > 0) return content;
+
+  // For thinking-mode models the answer sometimes only lands in reasoning_content.
+  const reasoning = (choice.reasoning_content ?? '').trim();
+  if (reasoning.length > 0) {
+    const lines = reasoning.split('\n').filter((l) => l.trim().length > 0);
+    return lines[lines.length - 1] ?? '';
+  }
+
+  return '';
+}
--- a/apps/coder/src/services/arena-runner.ts
+++ b/apps/coder/src/services/arena-runner.ts
@@ -0,0 +1,895 @@
+/**
+ * Arena battle-runner — DB-backed execution engine for Arena battles.
+ *
+ * Mirrors flow-runner.ts but implements the Arena's two-lane scheduler instead
+ * of the Orchestrator's wave scheduler. Persists to battles/contestants tables
+ * (not flow_runs/flow_steps). Each contestant is dispatched as a real tasks row
+ * via an injected DispatchContestantFn (Phase 4 wires this to the dispatcher).
+ * Advances on the dispatcher's onTaskTerminal hook.
+ *
+ * Scheduling:
+ *   - Cloud lane: all contestants start immediately, in parallel.
+ *   - Local lane: contestants run strictly one at a time (serial queue). Only
+ *     the first local contestant runs at start; the next is dispatched when the
+ *     current one terminates. Both lanes run concurrently with each other.
+ *
+ * Results:
+ *   Written to <projectRoot>/Arena/<battleSlug>/<identity>-<model>/
+ *   Coding: result.md + diff.patch (from the contestant's worktree).
+ *   Q&A:    result.md with the text answer.
+ *
+ * Analyzer seam:
+ *   onBattleComplete is called when all contestants are terminal. Phase 5 wires
+ *   this to the two-stage digest→judge analyzer. A failed contestant does NOT
+ *   abort the battle — others continue and the analyzer judges survivors.
+ */
+import type { Sql } from '../db.js';
+import type { Broker } from '@boocode/server/broker';
+import type { WsFrame } from '@boocode/contracts/ws-frames';
+import type { FastifyBaseLogger } from 'fastify';
+import type { BattleType, ContestantLane } from '@boocode/contracts/arena';
+import { mkdir, writeFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import { diffWorktree } from './worktrees.js';
+import {
+  buildBattleSlug,
+  buildContestantDir,
+  classifyLane,
+  computeBenchmark,
+  isBattleComplete,
+  nextLocalContestant,
+  reconcileContestants,
+  type ContestantResumeAction,
+  type ContestantSlot,
+} from './arena-decisions.js';
+
+// ─── Public types ─────────────────────────────────────────────────────────────
+
+export interface ContestantSpec {
+  /** Backend name (coding) or persona name (qa). */
+  identity: string;
+  model: string;
+}
+
+export interface BattleStartOpts {
+  projectId: string;
+  battleType: BattleType;
+  prompt: string;
+  /** 2–6 contestants. Duplicate (identity, model) pairs are rejected by the schema UNIQUE constraint. */
+  contestants: ContestantSpec[];
+}
+
+/**
+ * Injected dispatch function — Phase 4 wires this to the real task inserter.
+ * Must INSERT a tasks row and return its id. The arena-runner sets the
+ * contestant's task_id and status after this call.
+ * `sessionId` is returned when already known (Q&A pre-creates the session);
+ * null for coding contestants whose session is created lazily by the dispatcher.
+ */
+export type DispatchContestantFn = (opts: {
+  projectId: string;
+  contestantId: string;
+  prompt: string;
+  identity: string;
+  model: string;
+  battleType: BattleType;
+}) => Promise<{ taskId: string; sessionId: string | null }>;
+
+/**
+ * Called once when every contestant in a battle has reached a terminal state.
+ * Phase 5 wires this to the two-stage digest→judge analyzer.
+ * Must never throw — the caller swallows errors.
+ */
+export type OnBattleComplete = (battleId: string) => void;
+
+/**
+ * Called after a cross_examinations row has been inserted, with its id.
+ * Phase 5 wires this to the analyzer's cross-examination runner.
+ * Must never throw — the caller swallows errors.
+ */
+export type OnCrossExamStart = (opts: {
+  battleId: string;
+  crossExamId: string;
+  identity: string;
+  model: string;
+}) => void;
+
+export interface BattleRunner {
+  /** Start a battle: persist it + its contestants, classify lanes, dispatch initial wave. */
+  startBattle(opts: BattleStartOpts): Promise<{ battleId: string }>;
+  /**
+   * Wire to createDispatcher({ onTaskTerminal }). Fires when ANY task settles;
+   * the runner ignores tasks it doesn't own. Never throws.
+   */
+  handleTaskTerminal(taskId: string, state: string): void;
+  /**
+   * Re-advance any battles still marked 'running' after a coder restart.
+   * Mirrors flow-runner's initResume (D-9). Never throws.
+   */
+  initResume(): Promise<void>;
+  /**
+   * Cancel a running battle. Marks it and all non-terminal contestants cancelled,
+   * publishes frames, and returns the task_ids of in-flight contestants so the
+   * route can abort them via the dispatcher's cancelExternalTask.
+   */
+  cancelBattle(battleId: string): Promise<{ cancelled: boolean; taskIds: string[] }>;
+  /**
+   * Trigger analysis for a completed (or manually re-analyzed) battle.
+   * Phase 5 wires this to the two-stage digest→judge analyzer. For now, calls
+   * the injected onBattleComplete seam directly.
+   */
+  triggerAnalysis(battleId: string): Promise<{ triggered: boolean }>;
+  /**
+   * Start a cross-examination on a battle. Inserts a cross_examinations row and
+   * invokes the analyzer seam. Phase 5 fills the actual verdict logic.
+   */
+  startCrossExam(
+    battleId: string,
+    opts: { identity: string; model: string },
+  ): Promise<{ crossExamId: string }>;
+  /**
+   * Manually set (or clear) the winner. Validates the contestant belongs to the
+   * battle, updates battles.winner_contestant_id, and publishes a battle_updated
+   * frame so the pane reflects the override immediately.
+   */
+  setWinner(battleId: string, winnerId: string | null): Promise<{
+    ok: boolean;
+    notFound?: boolean;
+    invalidContestant?: boolean;
+  }>;
+}
+
+// ─── Internal row shapes ──────────────────────────────────────────────────────
+
+interface ContestantRow {
+  id: string;
+  battle_id: string;
+  identity: string;
+  model: string;
+  lane: ContestantLane;
+  task_id: string | null;
+  worktree_id: string | null;
+  status: string;
+}
+
+interface BattleRow {
+  id: string;
+  project_id: string;
+  battle_type: BattleType;
+  prompt: string;
+  status: string;
+  results_path: string | null;
+  created_at: Date;
+}
+
+// ─── Deps / factory ───────────────────────────────────────────────────────────
+
+interface Deps {
+  sql: Sql;
+  broker: Broker;
+  log: FastifyBaseLogger;
+  dispatch: DispatchContestantFn;
+  onBattleComplete: OnBattleComplete;
+  /**
+   * Called after a cross_examinations row is inserted. Phase 5 wires this to
+   * the analyzer's cross-examination runner. Optional: absent → no cross-exam
+   * logic runs (stub behaviour for tests).
+   */
+  onCrossExamStart?: OnCrossExamStart;
+  /**
+   * Model IDs served by the local llama-swap server. Used for lane classification:
+   * a contestant whose model is in this set runs in the local lane (serial, GPU-fair).
+   * Q&A contestants are always local regardless of this set.
+   * Defaults to an empty set → all coding contestants go to the cloud lane.
+   */
+  localModels?: ReadonlySet<string>;
+}
+
+const DEFAULT_LOCAL_MODELS: ReadonlySet<string> = new Set();
+
+export function createBattleRunner(deps: Deps): BattleRunner {
+  const { sql, broker, log, dispatch, onBattleComplete, onCrossExamStart } = deps;
+  const localModels = deps.localModels ?? DEFAULT_LOCAL_MODELS;
+
+  // Serialize local-lane advance per battle so two near-simultaneous terminal
+  // callbacks don't double-dispatch the next local contestant.
+  const advanceChain = new Map<string, Promise<void>>();
+
+  // Delta bridge: per-contestant broker unsubscribe functions.
+  // 'terminated' sentinel prevents a late-arriving setupDeltaBridge from
+  // registering a subscription that would never be cleaned up.
+  const deltaUnsubs = new Map<string, (() => void) | 'terminated'>();
+
+  function publishUser(frame: Record<string, unknown>): void {
+    broker.publishUserFrame('default', frame as unknown as WsFrame);
+  }
+
+  /**
+   * Subscribe to the contestant's inference session and forward delta frames
+   * to the user channel as contestant_updated{delta}. Polls for session_id
+   * when not immediately known (coding contestants whose session is created
+   * lazily by the dispatcher). Unsubscribes on termination or max retries.
+   */
+  async function setupDeltaBridge(
+    battleId: string,
+    contestantId: string,
+    taskId: string,
+    knownSessionId: string | null,
+  ): Promise<void> {
+    let sessionId = knownSessionId;
+    if (!sessionId) {
+      // Coding contestant: session_id is written by the dispatcher just before
+      // inference starts. Poll until it appears or the contestant terminates.
+      for (let i = 0; i < 50; i++) {
+        if (deltaUnsubs.get(contestantId) === 'terminated') return;
+        const [row] = await sql<{ session_id: string | null }[]>`
+          SELECT session_id FROM tasks WHERE id = ${taskId}
+        `.catch(() => []);
+        if (row?.session_id) { sessionId = row.session_id; break; }
+        await new Promise((r) => setTimeout(r, 200));
+      }
+    }
+    if (!sessionId) return;
+    if (deltaUnsubs.get(contestantId) === 'terminated') return;
+
+    const unsub = broker.subscribe(sessionId, (frame) => {
+      if (frame.type === 'delta') {
+        const deltaContent = (frame as unknown as { content?: unknown }).content;
+        if (typeof deltaContent === 'string') {
+          publishUser({
+            type: 'contestant_updated',
+            battle_id: battleId,
+            contestant_id: contestantId,
+            delta: deltaContent,
+          });
+        }
+      }
+    });
+
+    const existing = deltaUnsubs.get(contestantId);
+    if (existing === 'terminated') {
+      unsub();
+    } else {
+      deltaUnsubs.set(contestantId, unsub);
+    }
+  }
+
+  function teardownDeltaBridge(contestantId: string): void {
+    const entry = deltaUnsubs.get(contestantId);
+    if (typeof entry === 'function') {
+      entry();
+      deltaUnsubs.delete(contestantId);
+    } else {
+      deltaUnsubs.set(contestantId, 'terminated');
+    }
+  }
+
+  // ─── startBattle ────────────────────────────────────────────────────────────
+
+  async function startBattle(opts: BattleStartOpts): Promise<{ battleId: string }> {
+    if (opts.contestants.length < 2 || opts.contestants.length > 6) {
+      throw new Error(`battle requires 2–6 contestants; got ${opts.contestants.length}`);
+    }
+
+    const [proj] = await sql<{ path: string }[]>`SELECT path FROM projects WHERE id = ${opts.projectId}`;
+    if (!proj) throw new Error(`project not found: ${opts.projectId}`);
+
+    // Insert the battle row as 'running'; update results_path once we have the id.
+    const [battle] = await sql<{ id: string; created_at: Date }[]>`
+      INSERT INTO battles (project_id, battle_type, prompt, status)
+      VALUES (${opts.projectId}, ${opts.battleType}, ${opts.prompt}, 'running')
+      RETURNING id, created_at
+    `;
+    const battleId = battle!.id;
+    const battleSlug = buildBattleSlug(battleId, opts.battleType, battle!.created_at);
+    const resultsPath = join(proj.path, 'Arena', battleSlug);
+
+    await sql`
+      UPDATE battles SET results_path = ${resultsPath}, updated_at = clock_timestamp()
+      WHERE id = ${battleId}
+    `;
+
+    // Insert all contestant rows with lane classification.
+    const contestantRows: Array<{ id: string; identity: string; model: string; lane: ContestantLane }> = [];
+    for (const spec of opts.contestants) {
+      const lane = classifyLane(opts.battleType, spec.identity, spec.model, localModels);
+      const [row] = await sql<{ id: string }[]>`
+        INSERT INTO contestants (battle_id, identity, model, lane, status)
+        VALUES (${battleId}, ${spec.identity}, ${spec.model}, ${lane}, 'queued')
+        RETURNING id
+      `;
+      contestantRows.push({ id: row!.id, identity: spec.identity, model: spec.model, lane });
+    }
+
+    // Write initial manifest so the results folder is always populated.
+    await writeManifest(
+      battleId, resultsPath, opts.battleType, opts.prompt, battle!.created_at,
+      contestantRows.map((c) => ({ identity: c.identity, model: c.model, lane: c.lane })),
+      null,
+    ).catch((err) => {
+      log.warn({ err: errMsg(err), battleId }, 'arena-runner: initial manifest write failed');
+    });
+
+    publishUser({
+      type: 'battle_started',
+      battle_id: battleId,
+      battle_type: opts.battleType,
+      prompt: opts.prompt,
+      contestants: contestantRows.map((c) => ({
+        id: c.id,
+        identity: c.identity,
+        model: c.model,
+        lane: c.lane,
+      })),
+    });
+
+    // Dispatch: cloud lane starts all contestants in parallel; local lane starts
+    // only the first queued contestant (serial queue).
+    let localStarted = false;
+    for (const c of contestantRows) {
+      if (c.lane === 'cloud') {
+        await dispatchContestant(battleId, opts.projectId, opts.battleType, opts.prompt, c);
+      } else if (!localStarted) {
+        await dispatchContestant(battleId, opts.projectId, opts.battleType, opts.prompt, c);
+        localStarted = true;
+        // remaining local contestants stay 'queued' until this one finishes
+      }
+    }
+
+    return { battleId };
+  }
+
+  async function dispatchContestant(
+    battleId: string,
+    projectId: string,
+    battleType: BattleType,
+    prompt: string,
+    c: { id: string; identity: string; model: string; lane: ContestantLane },
+  ): Promise<void> {
+    const { taskId, sessionId } = await dispatch({
+      projectId,
+      contestantId: c.id,
+      prompt,
+      identity: c.identity,
+      model: c.model,
+      battleType,
+    });
+    await sql`
+      UPDATE contestants
+      SET task_id = ${taskId}, status = 'running', updated_at = clock_timestamp()
+      WHERE id = ${c.id}
+    `;
+    publishContestantFrame(battleId, c.id, { status: 'running' });
+    // Start the delta bridge in the background; unsubscribe when the contestant
+    // terminates (teardownDeltaBridge called in handleTaskTerminal).
+    void setupDeltaBridge(battleId, c.id, taskId, sessionId ?? null);
+  }
+
+  // ─── local-lane advance (serialized per battle) ───────────────────────────
+
+  function advanceLocalLane(battleId: string): Promise<void> {
+    const prev = advanceChain.get(battleId) ?? Promise.resolve();
+    const next = prev
+      .catch(() => {})
+      .then(() =>
+        advanceLocalLaneInner(battleId).catch((err) => {
+          log.error({ err: errMsg(err), battleId }, 'arena-runner: advanceLocalLane failed');
+        }),
+      );
+    advanceChain.set(battleId, next);
+    void next.finally(() => {
+      if (advanceChain.get(battleId) === next) advanceChain.delete(battleId);
+    });
+    return next;
+  }
+
+  async function advanceLocalLaneInner(battleId: string): Promise<void> {
+    const battle = await loadBattle(battleId);
+    if (!battle || battle.status !== 'running') return;
+
+    const contestants = await loadContestants(battleId);
+    const slots: ContestantSlot[] = contestants.map((c) => ({
+      id: c.id,
+      lane: c.lane,
+      status: c.status,
+    }));
+
+    // Nothing to do if the local lane is still busy.
+    const localRunning = slots.some((c) => c.lane === 'local' && c.status === 'running');
+    if (localRunning) return;
+
+    const nextId = nextLocalContestant(slots);
+    if (!nextId) return; // local queue is exhausted
+
+    const next = contestants.find((c) => c.id === nextId)!;
+    await dispatchContestant(battleId, battle.project_id, battle.battle_type, battle.prompt, {
+      id: next.id,
+      identity: next.identity,
+      model: next.model,
+      lane: next.lane,
+    });
+  }
+
+  // ─── handleTaskTerminal ───────────────────────────────────────────────────
+
+  function handleTaskTerminal(taskId: string, state: string): void {
+    void (async () => {
+      // Look up which contestant owns this task (contestants_task_id_idx).
+      const [row] = await sql<ContestantRow[]>`
+        SELECT id, battle_id, identity, model, lane, task_id, worktree_id, status
+        FROM contestants WHERE task_id = ${taskId}
+      `;
+      if (!row) return; // not an arena task — ignore
+      if (row.status !== 'running') return; // already settled (idempotent)
+
+      const battle = await loadBattle(row.battle_id);
+
+      // Pull the task row for benchmark + output.
+      const [task] = await sql<{
+        chat_id: string | null;
+        started_at: Date | null;
+        ended_at: Date | null;
+        cost_tokens: number | null;
+      }[]>`SELECT chat_id, started_at, ended_at, cost_tokens FROM tasks WHERE id = ${taskId}`;
+
+      const endedAt = task?.ended_at ?? new Date();
+
+      if (state === 'completed') {
+        const startedAt = task?.started_at ?? endedAt;
+        const bench = computeBenchmark(startedAt, endedAt, task?.cost_tokens ?? null, row.lane);
+
+        const output = task?.chat_id ? await readChatOutput(task.chat_id) : '';
+
+        const resultPath = battle
+          ? await writeContestantResults(battle, row, output, bench).catch((err) => {
+              log.warn({ err: errMsg(err), contestantId: row.id }, 'arena-runner: result write failed');
+              return null;
+            })
+          : null;
+
+        await sql`
+          UPDATE contestants
+          SET status      = 'done',
+              duration_ms = ${Math.round(bench.durationMs)},
+              tokens_per_sec = ${bench.tokensPerSec},
+              cost_tokens = ${task?.cost_tokens ?? null},
+              result_path = ${resultPath},
+              updated_at  = clock_timestamp()
+          WHERE id = ${row.id} AND status = 'running'
+        `;
+        teardownDeltaBridge(row.id);
+
+        // Check if this was the last contestant.
+        const allContestants = await loadContestants(row.battle_id);
+        const battleDone = isBattleComplete(allContestants);
+
+        publishContestantFrame(row.battle_id, row.id, {
+          status: 'done',
+          duration_ms: Math.round(bench.durationMs),
+          ...(bench.tokensPerSec !== null ? { tokens_per_sec: bench.tokensPerSec } : {}),
+          ...(battleDone ? { battle_status: 'completed' } : {}),
+        });
+
+        if (battleDone) {
+          await completeBattle(row.battle_id);
+        } else if (row.lane === 'local') {
+          void advanceLocalLane(row.battle_id);
+        }
+      } else {
+        // failed or cancelled — the contest continues; this contestant is error.
+        const errorMsg = state === 'cancelled' ? 'cancelled' : `task ${state}`;
+        await sql`
+          UPDATE contestants
+          SET status = 'error', error = ${errorMsg}, updated_at = clock_timestamp()
+          WHERE id = ${row.id} AND status = 'running'
+        `;
+        teardownDeltaBridge(row.id);
+
+        const allContestants = await loadContestants(row.battle_id);
+        const battleDone = isBattleComplete(allContestants);
+
+        publishContestantFrame(row.battle_id, row.id, {
+          status: 'error',
+          error: errorMsg,
+          ...(battleDone ? { battle_status: 'completed' } : {}),
+        });
+
+        if (battleDone) {
+          await completeBattle(row.battle_id);
+        } else if (row.lane === 'local') {
+          void advanceLocalLane(row.battle_id);
+        }
+      }
+    })().catch((err) => {
+      log.error({ err: errMsg(err), taskId }, 'arena-runner: handleTaskTerminal failed');
+    });
+  }
+
+  // ─── battle finalization ──────────────────────────────────────────────────
+
+  async function completeBattle(battleId: string): Promise<void> {
+    const updated = await sql`
+      UPDATE battles SET status = 'completed', updated_at = clock_timestamp()
+      WHERE id = ${battleId} AND status = 'running'
+    `;
+    if (updated.count === 0) return; // already terminal (race guard)
+    log.info({ battleId }, 'arena-runner: battle completed');
+
+    // Update manifest with finished_at timestamp.
+    const completedBattle = await loadBattle(battleId);
+    if (completedBattle?.results_path) {
+      const contestants = await loadContestants(battleId);
+      await writeManifest(
+        battleId,
+        completedBattle.results_path,
+        completedBattle.battle_type,
+        completedBattle.prompt,
+        completedBattle.created_at,
+        contestants.map((c) => ({ identity: c.identity, model: c.model, lane: c.lane })),
+        new Date(),
+      ).catch((err) => {
+        log.warn({ err: errMsg(err), battleId }, 'arena-runner: manifest update failed');
+      });
+    }
+
+    onBattleComplete(battleId);
+  }
+
+  // ─── manifest writer ─────────────────────────────────────────────────────
+
+  async function writeManifest(
+    battleId: string,
+    resultsPath: string,
+    battleType: BattleType,
+    prompt: string,
+    createdAt: Date,
+    contestants: Array<{ identity: string; model: string; lane: ContestantLane }>,
+    finishedAt: Date | null,
+  ): Promise<void> {
+    await mkdir(resultsPath, { recursive: true });
+    const manifest = {
+      id: battleId,
+      battle_type: battleType,
+      prompt,
+      contestants,
+      created_at: createdAt.toISOString(),
+      finished_at: finishedAt?.toISOString() ?? null,
+    };
+    await writeFile(join(resultsPath, 'manifest.json'), JSON.stringify(manifest, null, 2), 'utf8');
+  }
+
+  // ─── results writer ───────────────────────────────────────────────────────
+
+  async function writeContestantResults(
+    battle: BattleRow,
+    contestant: { identity: string; model: string; lane: ContestantLane; worktree_id: string | null },
+    output: string,
+    bench: { durationMs: number; tokensPerSec: number | null },
+  ): Promise<string> {
+    const resultsPath = await getOrBuildResultsPath(battle);
+    if (!resultsPath) throw new Error('cannot resolve results path for battle ' + battle.id);
+
+    const contestantDir = buildContestantDir(contestant.identity, contestant.model);
+    const dir = join(resultsPath, contestantDir);
+    await mkdir(dir, { recursive: true });
+
+    const benchLines = [
+      `duration: ${bench.durationMs}ms`,
+      bench.tokensPerSec != null ? `tokens/sec: ${bench.tokensPerSec.toFixed(1)}` : null,
+    ]
+      .filter(Boolean)
+      .join('\n');
+
+    const resultMd =
+      `# ${contestant.identity} / ${contestant.model}\n\n` +
+      `## Benchmark\n\n${benchLines}\n\n` +
+      `## Output\n\n${output}\n`;
+    await writeFile(join(dir, 'result.md'), resultMd, 'utf8');
+
+    if (battle.battle_type === 'coding' && contestant.worktree_id) {
+      const [wt] = await sql<{ path: string; base_commit: string | null }[]>`
+        SELECT path, base_commit FROM worktrees WHERE id = ${contestant.worktree_id}
+      `;
+      if (wt) {
+        const [proj] = await sql<{ path: string }[]>`
+          SELECT path FROM projects WHERE id = ${battle.project_id}
+        `;
+        if (proj) {
+          const diff = await diffWorktree(wt.path, proj.path, {
+            baseRef: wt.base_commit ?? undefined,
+          }).catch(() => '');
+          await writeFile(join(dir, 'diff.patch'), diff, 'utf8');
+        }
+      }
+    }
+
+    return dir;
+  }
+
+  /** Resolve or rebuild results_path for a battle (handles crash-before-UPDATE). */
+  async function getOrBuildResultsPath(battle: BattleRow): Promise<string | null> {
+    if (battle.results_path) return battle.results_path;
+    const [proj] = await sql<{ path: string }[]>`SELECT path FROM projects WHERE id = ${battle.project_id}`;
+    if (!proj) return null;
+    const slug = buildBattleSlug(battle.id, battle.battle_type, battle.created_at);
+    const resultsPath = join(proj.path, 'Arena', slug);
+    await sql`
+      UPDATE battles SET results_path = ${resultsPath}, updated_at = clock_timestamp()
+      WHERE id = ${battle.id}
+    `;
+    return resultsPath;
+  }
+
+  // ─── helpers ──────────────────────────────────────────────────────────────
+
+  async function readChatOutput(chatId: string): Promise<string> {
+    const [m] = await sql<{ content: string | null }[]>`
+      SELECT content FROM messages
+      WHERE chat_id = ${chatId} AND role = 'assistant'
+      ORDER BY created_at DESC LIMIT 1
+    `;
+    return m?.content ?? '';
+  }
+
+  async function loadBattle(battleId: string): Promise<BattleRow | null> {
+    const [b] = await sql<BattleRow[]>`
+      SELECT id, project_id, battle_type, prompt, status, results_path, created_at
+      FROM battles WHERE id = ${battleId}
+    `;
+    return b ?? null;
+  }
+
+  async function loadContestants(battleId: string): Promise<ContestantRow[]> {
+    return sql<ContestantRow[]>`
+      SELECT id, battle_id, identity, model, lane, task_id, worktree_id, status
+      FROM contestants WHERE battle_id = ${battleId}
+      ORDER BY created_at ASC
+    `;
+  }
+
+  function publishContestantFrame(
+    battleId: string,
+    contestantId: string,
+    extra: Record<string, unknown>,
+  ): void {
+    publishUser({
+      type: 'contestant_updated',
+      battle_id: battleId,
+      contestant_id: contestantId,
+      ...extra,
+    });
+  }
+
+  // ─── initResume ───────────────────────────────────────────────────────────
+
+  async function initResume(): Promise<void> {
+    const battles = await sql<BattleRow[]>`
+      SELECT id, project_id, battle_type, prompt, status, results_path, created_at
+      FROM battles WHERE status = 'running'
+    `;
+    if (battles.length === 0) return;
+    log.info({ count: battles.length }, 'arena-runner: resuming in-flight battles on startup');
+    for (const battle of battles) {
+      await resumeBattle(battle).catch((err) => {
+        log.error({ err: errMsg(err), battleId: battle.id }, 'arena-runner: initResume failed for battle');
+      });
+    }
+  }
+
+  async function resumeBattle(battle: BattleRow): Promise<void> {
+    const contestants = await loadContestants(battle.id);
+
+    const taskIds = contestants.map((c) => c.task_id).filter((id): id is string => id !== null);
+    const taskStates = new Map<string, string>();
+    if (taskIds.length > 0) {
+      const tasks = await sql<{ id: string; state: string }[]>`
+        SELECT id, state FROM tasks WHERE id = ANY(${taskIds})
+      `;
+      for (const t of tasks) taskStates.set(t.id, t.state);
+    }
+
+    const decisions = reconcileContestants(
+      contestants.map((c) => ({ contestantId: c.id, taskId: c.task_id, status: c.status })),
+      taskStates,
+    );
+
+    for (const decision of decisions) {
+      if (decision.action === 'keep') continue;
+      const contestant = contestants.find((c) => c.id === decision.contestantId)!;
+      await applyResumeDecision(battle, contestant, decision.action);
+    }
+
+    // Re-check completion after applying decisions.
+    const updated = await loadContestants(battle.id);
+    if (isBattleComplete(updated)) {
+      await completeBattle(battle.id);
+    } else {
+      // Advance local lane in case a slot opened up.
+      void advanceLocalLane(battle.id);
+    }
+
+    log.info({ battleId: battle.id }, 'arena-runner: battle resumed');
+  }
+
+  async function applyResumeDecision(
+    battle: BattleRow,
+    contestant: ContestantRow,
+    action: ContestantResumeAction,
+  ): Promise<void> {
+    switch (action) {
+      case 'keep': break;
+
+      case 'mark-done': {
+        const taskRow = contestant.task_id
+          ? (await sql<{ started_at: Date | null; ended_at: Date | null; cost_tokens: number | null; chat_id: string | null }[]>`
+              SELECT started_at, ended_at, cost_tokens, chat_id FROM tasks WHERE id = ${contestant.task_id}`)[0]
+          : null;
+        const endedAt = taskRow?.ended_at ?? new Date();
+        const startedAt = taskRow?.started_at ?? endedAt;
+        const bench = computeBenchmark(startedAt, endedAt, taskRow?.cost_tokens ?? null, contestant.lane);
+        const output = taskRow?.chat_id ? await readChatOutput(taskRow.chat_id) : '';
+        const resultPath = battle
+          ? await writeContestantResults(battle, contestant, output, bench).catch((err) => {
+              log.warn({ err: errMsg(err), contestantId: contestant.id }, 'arena-runner: resume result write failed');
+              return null;
+            })
+          : null;
+        await sql`
+          UPDATE contestants
+          SET status = 'done',
+              duration_ms = ${Math.round(bench.durationMs)},
+              tokens_per_sec = ${bench.tokensPerSec},
+              result_path = ${resultPath},
+              updated_at = clock_timestamp()
+          WHERE id = ${contestant.id}
+        `;
+        break;
+      }
+
+      case 'mark-error':
+        await sql`
+          UPDATE contestants
+          SET status = 'error', error = 'task failed before callback',
+              updated_at = clock_timestamp()
+          WHERE id = ${contestant.id}
+        `;
+        break;
+
+      case 'mark-cancelled':
+        await sql`
+          UPDATE contestants
+          SET status = 'error', error = 'cancelled before callback',
+              updated_at = clock_timestamp()
+          WHERE id = ${contestant.id}
+        `;
+        break;
+
+      case 're-dispatch': {
+        const { taskId } = await dispatch({
+          projectId: battle.project_id,
+          contestantId: contestant.id,
+          prompt: battle.prompt,
+          identity: contestant.identity,
+          model: contestant.model,
+          battleType: battle.battle_type,
+        });
+        await sql`
+          UPDATE contestants
+          SET task_id = ${taskId}, updated_at = clock_timestamp()
+          WHERE id = ${contestant.id}
+        `;
+        log.info(
+          { battleId: battle.id, contestantId: contestant.id, taskId },
+          'arena-runner: contestant re-dispatched on resume',
+        );
+        break;
+      }
+    }
+  }
+
+  // ─── cancelBattle ─────────────────────────────────────────────────────────
+
+  async function cancelBattle(battleId: string): Promise<{ cancelled: boolean; taskIds: string[] }> {
+    const updated = await sql`
+      UPDATE battles SET status = 'cancelled', updated_at = clock_timestamp()
+      WHERE id = ${battleId} AND status = 'running'
+    `;
+    if (updated.count === 0) return { cancelled: false, taskIds: [] };
+
+    // Mark all non-terminal contestants cancelled and collect in-flight task_ids.
+    const contestants = await sql<{ id: string; task_id: string | null; status: string }[]>`
+      SELECT id, task_id, status FROM contestants
+      WHERE battle_id = ${battleId} AND status NOT IN ('done', 'error')
+    `;
+
+    if (contestants.length > 0) {
+      await sql`
+        UPDATE contestants
+        SET status = 'error', error = 'battle cancelled', updated_at = clock_timestamp()
+        WHERE battle_id = ${battleId} AND status NOT IN ('done', 'error')
+      `;
+      for (const c of contestants) {
+        publishContestantFrame(battleId, c.id, {
+          status: 'error',
+          error: 'battle cancelled',
+          battle_status: 'cancelled',
+        });
+      }
+    }
+
+    const taskIds = contestants
+      .filter(
+        (c): c is typeof c & { task_id: string } =>
+          c.task_id !== null && c.status === 'running',
+      )
+      .map((c) => c.task_id);
+
+    log.info({ battleId }, 'arena-runner: battle cancelled by request');
+    return { cancelled: true, taskIds };
+  }
+
+  // ─── triggerAnalysis (Phase 5 seam) ──────────────────────────────────────
+
+  async function triggerAnalysis(battleId: string): Promise<{ triggered: boolean }> {
+    const battle = await loadBattle(battleId);
+    if (!battle) return { triggered: false };
+    log.info({ battleId }, 'arena-runner: triggerAnalysis requested');
+    // Calls the injected onBattleComplete seam — Phase 5 replaces this with the
+    // real two-stage digest→judge analyzer (see ADR 0002 + plan Phase 5).
+    onBattleComplete(battleId);
+    return { triggered: true };
+  }
+
+  // ─── startCrossExam (Phase 5 seam) ───────────────────────────────────────
+
+  async function startCrossExam(
+    battleId: string,
+    opts: { identity: string; model: string },
+  ): Promise<{ crossExamId: string }> {
+    const [row] = await sql<{ id: string }[]>`
+      INSERT INTO cross_examinations (battle_id, identity, model)
+      VALUES (${battleId}, ${opts.identity}, ${opts.model})
+      RETURNING id
+    `;
+    const crossExamId = row!.id;
+    log.info({ battleId, crossExamId, ...opts }, 'arena-runner: cross-exam inserted, triggering analyzer');
+    if (onCrossExamStart) {
+      try {
+        onCrossExamStart({ battleId, crossExamId, identity: opts.identity, model: opts.model });
+      } catch (err) {
+        log.error({ err: err instanceof Error ? err.message : String(err), battleId, crossExamId }, 'arena-runner: onCrossExamStart threw');
+      }
+    }
+    return { crossExamId };
+  }
+
+  // ─── setWinner (user override) ────────────────────────────────────────────
+
+  async function setWinner(
+    battleId: string,
+    winnerId: string | null,
+  ): Promise<{ ok: boolean; notFound?: boolean; invalidContestant?: boolean }> {
+    const [row] = await sql<{ id: string }[]>`SELECT id FROM battles WHERE id = ${battleId}`;
+    if (!row) return { ok: false, notFound: true };
+
+    if (winnerId !== null) {
+      const [c] = await sql<{ id: string }[]>`
+        SELECT id FROM contestants WHERE id = ${winnerId} AND battle_id = ${battleId}
+      `;
+      if (!c) return { ok: false, invalidContestant: true };
+    }
+
+    await sql`
+      UPDATE battles SET winner_contestant_id = ${winnerId}, updated_at = clock_timestamp()
+      WHERE id = ${battleId}
+    `;
+    publishUser({ type: 'battle_updated', battle_id: battleId, winner_contestant_id: winnerId });
+    return { ok: true };
+  }
+
+  return { startBattle, handleTaskTerminal, initResume, cancelBattle, triggerAnalysis, startCrossExam, setWinner };
+}
+
+function errMsg(e: unknown): string {
+  return e instanceof Error ? e.message : String(e);
+}
--- a/apps/coder/src/services/dispatcher.ts
+++ b/apps/coder/src/services/dispatcher.ts
@@ -310,6 +310,9 @@ export function createDispatcher(deps: Deps): {
    const taskId = task.id;
    log.info({ taskId }, 'dispatcher: starting task (path A — native)');

+    // Declared before try so the catch block can write it back on the task row.
+    let chatId: string | null = null;
+
    try {
      // Mark running
      await sql`
@@ -318,26 +321,29 @@ export function createDispatcher(deps: Deps): {
        WHERE id = ${taskId}
      `;

-      // Create session + chat for this task
+      // Session setup: reuse a pre-created session (e.g. Q&A arena contestants
+      // whose persona is stamped on the session via agent_id) or create a fresh one.
      const model = task.model ?? config.DEFAULT_MODEL;
-      const sessionName = 'Task: ' + task.input.slice(0, 40);
-
-      const [session] = await sql<{ id: string }[]>`
-        INSERT INTO sessions (project_id, name, model, status)
-        VALUES (${task.project_id}, ${sessionName}, ${model}, 'open')
-        RETURNING id
-      `;
-      const sessionId = session!.id;
+      let sessionId: string;
+      if (task.session_id) {
+        sessionId = task.session_id;
+      } else {
+        const sessionName = 'Task: ' + task.input.slice(0, 40);
+        const [session] = await sql<{ id: string }[]>`
+          INSERT INTO sessions (project_id, name, model, status)
+          VALUES (${task.project_id}, ${sessionName}, ${model}, 'open')
+          RETURNING id
+        `;
+        sessionId = session!.id;
+        await sql`UPDATE tasks SET session_id = ${sessionId} WHERE id = ${taskId}`;
+      }

      const [chat] = await sql<{ id: string }[]>`
        INSERT INTO chats (session_id, name, status)
        VALUES (${sessionId}, 'Task execution', 'open')
        RETURNING id
      `;
-      const chatId = chat!.id;
-
-      // Link task to session
-      await sql`UPDATE tasks SET session_id = ${sessionId} WHERE id = ${taskId}`;
+      chatId = chat!.id;

      // Create user message + streaming assistant
      await sql<{ id: string }[]>`
@@ -382,7 +388,7 @@ export function createDispatcher(deps: Deps): {
        const summary = (msg?.content ?? '').slice(0, 500);
        await sql`
          UPDATE tasks
-          SET state = 'completed', ended_at = clock_timestamp(), output_summary = ${summary}, cost_tokens = ${costTokens}
+          SET state = 'completed', ended_at = clock_timestamp(), output_summary = ${summary}, cost_tokens = ${costTokens}, chat_id = ${chatId}
          WHERE id = ${taskId}
        `;
        log.info({ taskId, costTokens }, 'dispatcher: task completed (native)');
@@ -409,7 +415,7 @@ export function createDispatcher(deps: Deps): {
        const summary = (msg?.content ?? 'Inference failed').slice(0, 500);
        await sql`
          UPDATE tasks
-          SET state = 'failed', ended_at = clock_timestamp(), output_summary = ${summary}, cost_tokens = ${costTokens}
+          SET state = 'failed', ended_at = clock_timestamp(), output_summary = ${summary}, cost_tokens = ${costTokens}, chat_id = ${chatId}
          WHERE id = ${taskId}
        `;
        log.warn({ taskId, finalStatus }, 'dispatcher: task failed (native)');
@@ -419,7 +425,7 @@ export function createDispatcher(deps: Deps): {
      log.error({ taskId, err: errMsg }, 'dispatcher: task error (native)');
      await sql`
        UPDATE tasks
-        SET state = 'failed', ended_at = clock_timestamp(), output_summary = ${errMsg.slice(0, 500)}
+        SET state = 'failed', ended_at = clock_timestamp(), output_summary = ${errMsg.slice(0, 500)}, chat_id = ${chatId}
        WHERE id = ${taskId}
      `.catch(() => {});
    }