feat(web,coder): arena pane — compare 2-6 AI competitors on same prompt

Arena is a new pane kind for competitive AI evaluation. A Battle runs
the same prompt against 2-6 Contestants across two concurrent lanes:
local lane (llama-swap models, serial) and cloud lane (parallel).

Added to all three registries: @boocode/contracts WsFrameSchema,
server InferenceFrame, and web WsFrame.

Backend (apps/coder):
- arena-runner: battle scheduler, lane classifier, benchmark, results
  writer, resume, user winner override
- arena-analyzer: two-stage digest→judge analysis on DEFAULT_MODEL
- arena-decisions: status transitions and resume logic (unit-tested)
- arena-analyzer-helpers: pure helper functions (unit-tested)
- arena-model-call: model call utility for analysis
- arena routes: create/get/list/stop/analyze/cross-examine/winner/diff
- schema: battles, contestants, cross_examinations tables (idempotent)
- remove old /api/arena* routes and tasks.arena_id column

Frontend (apps/web):
- ArenaLauncherDialog: battle type, prompt, contestant selection
- ArenaPane: live roster, streaming output, analysis, cross-exam
- DiffView: unified diff with line-by-line color for coding contests
- Winner override per-row dropdown (Trophy icon)
- battle_updated WS handler for live winner/analysis updates
- arena pane kind in Workspace, ChatTabBar, useSidebar

Cross-app:
- ArenaState and ArenaContestantShape/WsFrame types (contracts)
- battle_* frames in WsFrameSchema, InferenceFrame, and web WsFrame
- manifest.json written per battle results folder
- /Arena added to .gitignore
This commit is contained in:
2026-06-06 23:25:29 +00:00
parent 84a024a5a4
commit 3474be4865
34 changed files with 4581 additions and 146 deletions

View File

@@ -3,6 +3,7 @@ import type { DragEvent } from 'react';
import { toast } from 'sonner';
import { api } from '@/api/client';
import type {
ArenaState,
ClosedPaneEntry,
HtmlArtifactState,
MarkdownArtifactState,
@@ -187,6 +188,16 @@ function orchestratorPane(state: OrchestratorState): WorkspacePane {
};
}
function arenaPane(state: ArenaState): WorkspacePane {
return {
id: generateId(),
kind: 'arena',
chatIds: [],
activeChatIdx: -1,
arena_state: state,
};
}
// v1.9: settings panes are ephemeral. Filter them out before persisting so a
// page reload always returns to a clean workspace; the user re-opens via the
// sidebar Settings button when needed.
@@ -290,6 +301,8 @@ export interface UseWorkspacePanesResult {
createTab: (paneIdx: number, kind: WorkspaceTabKind) => Promise<void>;
/** Open an orchestrator run pane (or focus an existing one for the same run_id). */
addOrchestratorPane: (state: OrchestratorState) => string | null;
/** Open an arena battle pane (or focus an existing one for the same battle_id). */
addArenaPane: (state: ArenaState) => string | null;
/** Back-compat alias for createTab(paneIdx, 'coder'). */
createCoderTab: (paneIdx: number) => Promise<void>;
// Open-on-first-click, close-on-second-click. Singleton — settings panes
@@ -877,6 +890,38 @@ export function useWorkspacePanes(sessionId: string): UseWorkspacePanesResult {
});
}, [addOrchestratorPane]);
const addArenaPane = useCallback((state: ArenaState): string | null => {
let openedId: string | null = null;
setPanes((prev) => {
const existingIdx = prev.findIndex(
(p) => p.kind === 'arena' && p.arena_state?.battle_id === state.battle_id,
);
if (existingIdx >= 0) {
setActivePaneIdx(existingIdx);
openedId = prev[existingIdx]!.id;
return prev;
}
if (nonSettingsCount(prev) >= MAX_PANES) {
toast.error(`Maximum ${MAX_PANES} panes`);
return prev;
}
const newPane = arenaPane(state);
openedId = newPane.id;
const next = [...prev, newPane];
setActivePaneIdx(next.length - 1);
return next;
});
return openedId;
}, []);
// Arena pane: open via sessionEvents (fired by the launcher).
useEffect(() => {
return sessionEvents.subscribe((ev) => {
if (ev.type !== 'open_arena_pane') return;
addArenaPane(ev.state);
});
}, [addArenaPane]);
// Returns the new settings pane id when one is OPENED (so mobile callers can
// push ?pane= atomically — see addPaneAndSwitch), or null when it was closed.
// Id generated outside the updater so a strict-mode double-invoke agrees.
@@ -1121,6 +1166,7 @@ export function useWorkspacePanes(sessionId: string): UseWorkspacePanesResult {
addSplitPane,
createTab,
addOrchestratorPane,
addArenaPane,
createCoderTab,
toggleSettingsPane,
removePane,