feat(web,coder): arena pane — compare 2-6 AI competitors on same prompt
Arena is a new pane kind for competitive AI evaluation. A Battle runs the same prompt against 2-6 Contestants across two concurrent lanes: local lane (llama-swap models, serial) and cloud lane (parallel). Added to all three registries: @boocode/contracts WsFrameSchema, server InferenceFrame, and web WsFrame. Backend (apps/coder): - arena-runner: battle scheduler, lane classifier, benchmark, results writer, resume, user winner override - arena-analyzer: two-stage digest→judge analysis on DEFAULT_MODEL - arena-decisions: status transitions and resume logic (unit-tested) - arena-analyzer-helpers: pure helper functions (unit-tested) - arena-model-call: model call utility for analysis - arena routes: create/get/list/stop/analyze/cross-examine/winner/diff - schema: battles, contestants, cross_examinations tables (idempotent) - remove old /api/arena* routes and tasks.arena_id column Frontend (apps/web): - ArenaLauncherDialog: battle type, prompt, contestant selection - ArenaPane: live roster, streaming output, analysis, cross-exam - DiffView: unified diff with line-by-line color for coding contests - Winner override per-row dropdown (Trophy icon) - battle_updated WS handler for live winner/analysis updates - arena pane kind in Workspace, ChatTabBar, useSidebar Cross-app: - ArenaState and ArenaContestantShape/WsFrame types (contracts) - battle_* frames in WsFrameSchema, InferenceFrame, and web WsFrame - manifest.json written per battle results folder - /Arena added to .gitignore
This commit is contained in:
@@ -3,7 +3,11 @@
|
||||
// also refresh the sidebar's session list).
|
||||
|
||||
import type {
|
||||
ArenaState,
|
||||
BattleShape,
|
||||
Chat,
|
||||
ContestantShape,
|
||||
CrossExaminationShape,
|
||||
ErrorReason,
|
||||
HtmlArtifactState,
|
||||
MarkdownArtifactState,
|
||||
@@ -231,6 +235,53 @@ export interface FlowRunStepUpdatedEvent {
|
||||
report?: string;
|
||||
}
|
||||
|
||||
// Arena: emitted by "New Arena" menu items to request the launcher dialog.
|
||||
export interface OpenArenaLauncherEvent {
|
||||
type: 'open_arena_launcher';
|
||||
project_id: string;
|
||||
placement?: 'new' | 'split';
|
||||
}
|
||||
|
||||
// Arena: emitted after a battle is created to open/focus the arena pane.
|
||||
export interface OpenArenaPaneEvent {
|
||||
type: 'open_arena_pane';
|
||||
state: ArenaState;
|
||||
placement?: 'new' | 'split';
|
||||
}
|
||||
|
||||
// Arena: battle lifecycle frames forwarded from the coder user channel.
|
||||
export interface BattleStartedEvent {
|
||||
type: 'battle_started';
|
||||
battle_id: string;
|
||||
battle_type: 'coding' | 'qa';
|
||||
prompt: string;
|
||||
contestants: Array<{ id: string; identity: string; model: string; lane: 'local' | 'cloud' }>;
|
||||
}
|
||||
|
||||
export interface ContestantUpdatedEvent {
|
||||
type: 'contestant_updated';
|
||||
battle_id: string;
|
||||
contestant_id: string;
|
||||
status?: 'queued' | 'running' | 'done' | 'error';
|
||||
duration_ms?: number;
|
||||
tokens_per_sec?: number;
|
||||
battle_status?: 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
|
||||
delta?: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface BattleUpdatedEvent {
|
||||
type: 'battle_updated';
|
||||
battle_id: string;
|
||||
status?: 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
|
||||
winner_contestant_id?: string | null;
|
||||
analysis_ready?: boolean;
|
||||
cross_exam_id?: string;
|
||||
}
|
||||
|
||||
// Re-export arena API shapes for consumers that need the full battle data.
|
||||
export type { BattleShape, ContestantShape, CrossExaminationShape };
|
||||
|
||||
export type SessionEvent =
|
||||
| SessionRenamedEvent
|
||||
| ProjectCreatedEvent
|
||||
@@ -262,7 +313,12 @@ export type SessionEvent =
|
||||
| OpenOrchestratorPaneEvent
|
||||
| FlowRunStartedEvent
|
||||
| FlowRunStepUpdatedEvent
|
||||
| OpenFlowLauncherEvent;
|
||||
| OpenFlowLauncherEvent
|
||||
| OpenArenaLauncherEvent
|
||||
| OpenArenaPaneEvent
|
||||
| BattleStartedEvent
|
||||
| ContestantUpdatedEvent
|
||||
| BattleUpdatedEvent;
|
||||
type Listener = (event: SessionEvent) => void;
|
||||
|
||||
const listeners = new Set<Listener>();
|
||||
|
||||
@@ -8,7 +8,13 @@
|
||||
import { useEffect } from 'react';
|
||||
import { WsFrameSchema } from '@boocode/contracts/ws-frames';
|
||||
import { sessionEvents } from './sessionEvents';
|
||||
import type { FlowRunStartedEvent, FlowRunStepUpdatedEvent } from './sessionEvents';
|
||||
import type {
|
||||
BattleStartedEvent,
|
||||
BattleUpdatedEvent,
|
||||
ContestantUpdatedEvent,
|
||||
FlowRunStartedEvent,
|
||||
FlowRunStepUpdatedEvent,
|
||||
} from './sessionEvents';
|
||||
|
||||
const RECONNECT_INITIAL_MS = 1000;
|
||||
const RECONNECT_MAX_MS = 30_000;
|
||||
@@ -49,6 +55,12 @@ export function useCoderUserEvents(): void {
|
||||
sessionEvents.emit(frame as unknown as FlowRunStartedEvent);
|
||||
} else if (frame.type === 'flow_run_step_updated') {
|
||||
sessionEvents.emit(frame as unknown as FlowRunStepUpdatedEvent);
|
||||
} else if (frame.type === 'battle_started') {
|
||||
sessionEvents.emit(frame as unknown as BattleStartedEvent);
|
||||
} else if (frame.type === 'contestant_updated') {
|
||||
sessionEvents.emit(frame as unknown as ContestantUpdatedEvent);
|
||||
} else if (frame.type === 'battle_updated') {
|
||||
sessionEvents.emit(frame as unknown as BattleUpdatedEvent);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -204,6 +204,13 @@ function applyFrame(state: State, frame: WsFrame): State {
|
||||
// No-op here to keep TS exhaustiveness satisfied.
|
||||
return state;
|
||||
}
|
||||
case 'battle_started':
|
||||
case 'contestant_updated':
|
||||
case 'battle_updated': {
|
||||
// Arena frames consumed by ArenaPane's own subscription.
|
||||
// No-op here to keep TS exhaustiveness satisfied.
|
||||
return state;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -195,6 +195,13 @@ function applyEvent(prev: SidebarResponse, event: import('./sessionEvents').Sess
|
||||
case 'flow_run_step_updated':
|
||||
// Consumed by useWorkspacePanes / OrchestratorPane / FlowLauncherDialog; sidebar has no stake.
|
||||
return prev;
|
||||
case 'open_arena_launcher':
|
||||
case 'open_arena_pane':
|
||||
case 'battle_started':
|
||||
case 'contestant_updated':
|
||||
case 'battle_updated':
|
||||
// Consumed by useWorkspacePanes / ArenaPane / ArenaLauncherDialog; sidebar has no stake.
|
||||
return prev;
|
||||
case 'project_archived': {
|
||||
const next = prev.projects.filter((p) => p.id !== event.project_id);
|
||||
if (next.length === prev.projects.length) return prev;
|
||||
|
||||
@@ -3,6 +3,7 @@ import type { DragEvent } from 'react';
|
||||
import { toast } from 'sonner';
|
||||
import { api } from '@/api/client';
|
||||
import type {
|
||||
ArenaState,
|
||||
ClosedPaneEntry,
|
||||
HtmlArtifactState,
|
||||
MarkdownArtifactState,
|
||||
@@ -187,6 +188,16 @@ function orchestratorPane(state: OrchestratorState): WorkspacePane {
|
||||
};
|
||||
}
|
||||
|
||||
function arenaPane(state: ArenaState): WorkspacePane {
|
||||
return {
|
||||
id: generateId(),
|
||||
kind: 'arena',
|
||||
chatIds: [],
|
||||
activeChatIdx: -1,
|
||||
arena_state: state,
|
||||
};
|
||||
}
|
||||
|
||||
// v1.9: settings panes are ephemeral. Filter them out before persisting so a
|
||||
// page reload always returns to a clean workspace; the user re-opens via the
|
||||
// sidebar Settings button when needed.
|
||||
@@ -290,6 +301,8 @@ export interface UseWorkspacePanesResult {
|
||||
createTab: (paneIdx: number, kind: WorkspaceTabKind) => Promise<void>;
|
||||
/** Open an orchestrator run pane (or focus an existing one for the same run_id). */
|
||||
addOrchestratorPane: (state: OrchestratorState) => string | null;
|
||||
/** Open an arena battle pane (or focus an existing one for the same battle_id). */
|
||||
addArenaPane: (state: ArenaState) => string | null;
|
||||
/** Back-compat alias for createTab(paneIdx, 'coder'). */
|
||||
createCoderTab: (paneIdx: number) => Promise<void>;
|
||||
// Open-on-first-click, close-on-second-click. Singleton — settings panes
|
||||
@@ -877,6 +890,38 @@ export function useWorkspacePanes(sessionId: string): UseWorkspacePanesResult {
|
||||
});
|
||||
}, [addOrchestratorPane]);
|
||||
|
||||
const addArenaPane = useCallback((state: ArenaState): string | null => {
|
||||
let openedId: string | null = null;
|
||||
setPanes((prev) => {
|
||||
const existingIdx = prev.findIndex(
|
||||
(p) => p.kind === 'arena' && p.arena_state?.battle_id === state.battle_id,
|
||||
);
|
||||
if (existingIdx >= 0) {
|
||||
setActivePaneIdx(existingIdx);
|
||||
openedId = prev[existingIdx]!.id;
|
||||
return prev;
|
||||
}
|
||||
if (nonSettingsCount(prev) >= MAX_PANES) {
|
||||
toast.error(`Maximum ${MAX_PANES} panes`);
|
||||
return prev;
|
||||
}
|
||||
const newPane = arenaPane(state);
|
||||
openedId = newPane.id;
|
||||
const next = [...prev, newPane];
|
||||
setActivePaneIdx(next.length - 1);
|
||||
return next;
|
||||
});
|
||||
return openedId;
|
||||
}, []);
|
||||
|
||||
// Arena pane: open via sessionEvents (fired by the launcher).
|
||||
useEffect(() => {
|
||||
return sessionEvents.subscribe((ev) => {
|
||||
if (ev.type !== 'open_arena_pane') return;
|
||||
addArenaPane(ev.state);
|
||||
});
|
||||
}, [addArenaPane]);
|
||||
|
||||
// Returns the new settings pane id when one is OPENED (so mobile callers can
|
||||
// push ?pane= atomically — see addPaneAndSwitch), or null when it was closed.
|
||||
// Id generated outside the updater so a strict-mode double-invoke agrees.
|
||||
@@ -1121,6 +1166,7 @@ export function useWorkspacePanes(sessionId: string): UseWorkspacePanesResult {
|
||||
addSplitPane,
|
||||
createTab,
|
||||
addOrchestratorPane,
|
||||
addArenaPane,
|
||||
createCoderTab,
|
||||
toggleSettingsPane,
|
||||
removePane,
|
||||
|
||||
Reference in New Issue
Block a user