feat(web,coder): arena pane — compare 2-6 AI competitors on same prompt
Arena is a new pane kind for competitive AI evaluation. A Battle runs the same prompt against 2-6 Contestants across two concurrent lanes: local lane (llama-swap models, serial) and cloud lane (parallel). Added to all three registries: @boocode/contracts WsFrameSchema, server InferenceFrame, and web WsFrame. Backend (apps/coder): - arena-runner: battle scheduler, lane classifier, benchmark, results writer, resume, user winner override - arena-analyzer: two-stage digest→judge analysis on DEFAULT_MODEL - arena-decisions: status transitions and resume logic (unit-tested) - arena-analyzer-helpers: pure helper functions (unit-tested) - arena-model-call: model call utility for analysis - arena routes: create/get/list/stop/analyze/cross-examine/winner/diff - schema: battles, contestants, cross_examinations tables (idempotent) - remove old /api/arena* routes and tasks.arena_id column Frontend (apps/web): - ArenaLauncherDialog: battle type, prompt, contestant selection - ArenaPane: live roster, streaming output, analysis, cross-exam - DiffView: unified diff with line-by-line color for coding contests - Winner override per-row dropdown (Trophy icon) - battle_updated WS handler for live winner/analysis updates - arena pane kind in Workspace, ChatTabBar, useSidebar Cross-app: - ArenaState and ArenaContestantShape/WsFrame types (contracts) - battle_* frames in WsFrameSchema, InferenceFrame, and web WsFrame - manifest.json written per battle results folder - /Arena added to .gitignore Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -358,6 +358,53 @@ export const FlowRunStepUpdatedFrame = z.object({
|
||||
report: z.string().optional(),
|
||||
});
|
||||
|
||||
// ---- arena frames ----------------------------------------------------------
|
||||
|
||||
const ContestantManifestEntry = z.object({
|
||||
id: Uuid,
|
||||
identity: z.string().min(1),
|
||||
model: z.string().min(1),
|
||||
lane: z.enum(['local', 'cloud']),
|
||||
});
|
||||
|
||||
// Published once when a battle starts. Carries the contestant roster so the
|
||||
// ArenaPane can build its grid immediately.
|
||||
export const BattleStartedFrame = z.object({
|
||||
type: z.literal('battle_started'),
|
||||
battle_id: Uuid,
|
||||
battle_type: z.enum(['coding', 'qa']),
|
||||
prompt: z.string(),
|
||||
contestants: z.array(ContestantManifestEntry),
|
||||
});
|
||||
|
||||
// Published on every contestant status change or streaming update.
|
||||
// `delta` carries the latest chunk of streaming output while status='running'.
|
||||
// `battle_status` is present only on the final transition that closes the battle.
|
||||
export const ContestantUpdatedFrame = z.object({
|
||||
type: z.literal('contestant_updated'),
|
||||
battle_id: Uuid,
|
||||
contestant_id: Uuid,
|
||||
status: z.enum(['queued', 'running', 'done', 'error']).optional(),
|
||||
duration_ms: z.number().int().nonnegative().optional(),
|
||||
tokens_per_sec: z.number().nonnegative().optional(),
|
||||
battle_status: z.enum(['pending', 'running', 'completed', 'failed', 'cancelled']).optional(),
|
||||
delta: z.string().optional(),
|
||||
error: z.string().optional(),
|
||||
});
|
||||
|
||||
// Published when battle-level state changes that don't ride on a contestant
|
||||
// update: analysis finished, winner set, cross-exam verdict ready. The pane
|
||||
// uses this to update its analysis panel and winner badge without a refetch.
|
||||
// Fields are all optional — publishers include only what changed.
|
||||
export const BattleUpdatedFrame = z.object({
|
||||
type: z.literal('battle_updated'),
|
||||
battle_id: Uuid,
|
||||
status: z.enum(['pending', 'running', 'completed', 'failed', 'cancelled']).optional(),
|
||||
winner_contestant_id: Uuid.nullable().optional(),
|
||||
analysis_ready: z.boolean().optional(),
|
||||
cross_exam_id: Uuid.optional(),
|
||||
});
|
||||
|
||||
// ---- discriminated union ---------------------------------------------------
|
||||
|
||||
export const WsFrameSchema = z.discriminatedUnion('type', [
|
||||
@@ -381,6 +428,10 @@ export const WsFrameSchema = z.discriminatedUnion('type', [
|
||||
// orchestrator
|
||||
FlowRunStartedFrame,
|
||||
FlowRunStepUpdatedFrame,
|
||||
// arena
|
||||
BattleStartedFrame,
|
||||
ContestantUpdatedFrame,
|
||||
BattleUpdatedFrame,
|
||||
// per-user
|
||||
ChatStatusFrame,
|
||||
SessionUpdatedFrame,
|
||||
@@ -425,6 +476,9 @@ export const KNOWN_FRAME_TYPES: readonly WsFrame['type'][] = [
|
||||
'agent_status_updated',
|
||||
'flow_run_started',
|
||||
'flow_run_step_updated',
|
||||
'battle_started',
|
||||
'contestant_updated',
|
||||
'battle_updated',
|
||||
'chat_status',
|
||||
'session_updated',
|
||||
'session_renamed',
|
||||
|
||||
Reference in New Issue
Block a user