feat(web,coder): arena pane — compare 2-6 AI competitors on same prompt
Arena is a new pane kind for competitive AI evaluation. A Battle runs the same prompt against 2-6 Contestants across two concurrent lanes: local lane (llama-swap models, serial) and cloud lane (parallel). Added to all three registries: @boocode/contracts WsFrameSchema, server InferenceFrame, and web WsFrame. Backend (apps/coder): - arena-runner: battle scheduler, lane classifier, benchmark, results writer, resume, user winner override - arena-analyzer: two-stage digest→judge analysis on DEFAULT_MODEL - arena-decisions: status transitions and resume logic (unit-tested) - arena-analyzer-helpers: pure helper functions (unit-tested) - arena-model-call: model call utility for analysis - arena routes: create/get/list/stop/analyze/cross-examine/winner/diff - schema: battles, contestants, cross_examinations tables (idempotent) - remove old /api/arena* routes and tasks.arena_id column Frontend (apps/web): - ArenaLauncherDialog: battle type, prompt, contestant selection - ArenaPane: live roster, streaming output, analysis, cross-exam - DiffView: unified diff with line-by-line color for coding contests - Winner override per-row dropdown (Trophy icon) - battle_updated WS handler for live winner/analysis updates - arena pane kind in Workspace, ChatTabBar, useSidebar Cross-app: - ArenaState and ArenaContestantShape/WsFrame types (contracts) - battle_* frames in WsFrameSchema, InferenceFrame, and web WsFrame - manifest.json written per battle results folder - /Arena added to .gitignore
This commit is contained in:
@@ -3,6 +3,7 @@ import type { DragEvent } from 'react';
|
||||
import { toast } from 'sonner';
|
||||
import { api } from '@/api/client';
|
||||
import type {
|
||||
ArenaState,
|
||||
ClosedPaneEntry,
|
||||
HtmlArtifactState,
|
||||
MarkdownArtifactState,
|
||||
@@ -187,6 +188,16 @@ function orchestratorPane(state: OrchestratorState): WorkspacePane {
|
||||
};
|
||||
}
|
||||
|
||||
function arenaPane(state: ArenaState): WorkspacePane {
|
||||
return {
|
||||
id: generateId(),
|
||||
kind: 'arena',
|
||||
chatIds: [],
|
||||
activeChatIdx: -1,
|
||||
arena_state: state,
|
||||
};
|
||||
}
|
||||
|
||||
// v1.9: settings panes are ephemeral. Filter them out before persisting so a
|
||||
// page reload always returns to a clean workspace; the user re-opens via the
|
||||
// sidebar Settings button when needed.
|
||||
@@ -290,6 +301,8 @@ export interface UseWorkspacePanesResult {
|
||||
createTab: (paneIdx: number, kind: WorkspaceTabKind) => Promise<void>;
|
||||
/** Open an orchestrator run pane (or focus an existing one for the same run_id). */
|
||||
addOrchestratorPane: (state: OrchestratorState) => string | null;
|
||||
/** Open an arena battle pane (or focus an existing one for the same battle_id). */
|
||||
addArenaPane: (state: ArenaState) => string | null;
|
||||
/** Back-compat alias for createTab(paneIdx, 'coder'). */
|
||||
createCoderTab: (paneIdx: number) => Promise<void>;
|
||||
// Open-on-first-click, close-on-second-click. Singleton — settings panes
|
||||
@@ -877,6 +890,38 @@ export function useWorkspacePanes(sessionId: string): UseWorkspacePanesResult {
|
||||
});
|
||||
}, [addOrchestratorPane]);
|
||||
|
||||
const addArenaPane = useCallback((state: ArenaState): string | null => {
|
||||
let openedId: string | null = null;
|
||||
setPanes((prev) => {
|
||||
const existingIdx = prev.findIndex(
|
||||
(p) => p.kind === 'arena' && p.arena_state?.battle_id === state.battle_id,
|
||||
);
|
||||
if (existingIdx >= 0) {
|
||||
setActivePaneIdx(existingIdx);
|
||||
openedId = prev[existingIdx]!.id;
|
||||
return prev;
|
||||
}
|
||||
if (nonSettingsCount(prev) >= MAX_PANES) {
|
||||
toast.error(`Maximum ${MAX_PANES} panes`);
|
||||
return prev;
|
||||
}
|
||||
const newPane = arenaPane(state);
|
||||
openedId = newPane.id;
|
||||
const next = [...prev, newPane];
|
||||
setActivePaneIdx(next.length - 1);
|
||||
return next;
|
||||
});
|
||||
return openedId;
|
||||
}, []);
|
||||
|
||||
// Arena pane: open via sessionEvents (fired by the launcher).
|
||||
useEffect(() => {
|
||||
return sessionEvents.subscribe((ev) => {
|
||||
if (ev.type !== 'open_arena_pane') return;
|
||||
addArenaPane(ev.state);
|
||||
});
|
||||
}, [addArenaPane]);
|
||||
|
||||
// Returns the new settings pane id when one is OPENED (so mobile callers can
|
||||
// push ?pane= atomically — see addPaneAndSwitch), or null when it was closed.
|
||||
// Id generated outside the updater so a strict-mode double-invoke agrees.
|
||||
@@ -1121,6 +1166,7 @@ export function useWorkspacePanes(sessionId: string): UseWorkspacePanesResult {
|
||||
addSplitPane,
|
||||
createTab,
|
||||
addOrchestratorPane,
|
||||
addArenaPane,
|
||||
createCoderTab,
|
||||
toggleSettingsPane,
|
||||
removePane,
|
||||
|
||||
Reference in New Issue
Block a user