feat(web,coder): arena pane — compare 2-6 AI competitors on same prompt
Arena is a new pane kind for competitive AI evaluation. A Battle runs the same prompt against 2-6 Contestants across two concurrent lanes: local lane (llama-swap models, serial) and cloud lane (parallel). Added to all three registries: @boocode/contracts WsFrameSchema, server InferenceFrame, and web WsFrame. Backend (apps/coder): - arena-runner: battle scheduler, lane classifier, benchmark, results writer, resume, user winner override - arena-analyzer: two-stage digest→judge analysis on DEFAULT_MODEL - arena-decisions: status transitions and resume logic (unit-tested) - arena-analyzer-helpers: pure helper functions (unit-tested) - arena-model-call: model call utility for analysis - arena routes: create/get/list/stop/analyze/cross-examine/winner/diff - schema: battles, contestants, cross_examinations tables (idempotent) - remove old /api/arena* routes and tasks.arena_id column Frontend (apps/web): - ArenaLauncherDialog: battle type, prompt, contestant selection - ArenaPane: live roster, streaming output, analysis, cross-exam - DiffView: unified diff with line-by-line color for coding contests - Winner override per-row dropdown (Trophy icon) - battle_updated WS handler for live winner/analysis updates - arena pane kind in Workspace, ChatTabBar, useSidebar Cross-app: - ArenaState and ArenaContestantShape/WsFrame types (contracts) - battle_* frames in WsFrameSchema, InferenceFrame, and web WsFrame - manifest.json written per battle results folder - /Arena added to .gitignore Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -372,13 +372,12 @@ ALTER TABLE messages ADD COLUMN IF NOT EXISTS tail_start_id UUID REFERENCES mess
|
||||
ALTER TABLE chats ADD COLUMN IF NOT EXISTS needs_compaction BOOLEAN NOT NULL DEFAULT FALSE;
|
||||
CREATE INDEX IF NOT EXISTS idx_messages_chat_compacted ON messages (chat_id, compacted_at);
|
||||
|
||||
-- tasks table (provider dispatch, arena)
|
||||
-- tasks table (provider dispatch)
|
||||
CREATE TABLE IF NOT EXISTS tasks (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
|
||||
session_id UUID REFERENCES sessions(id) ON DELETE CASCADE,
|
||||
parent_task_id UUID REFERENCES tasks(id),
|
||||
arena_id UUID,
|
||||
state TEXT NOT NULL DEFAULT 'pending'
|
||||
CHECK (state IN ('pending','running','completed','failed','blocked','cancelled')),
|
||||
input TEXT NOT NULL,
|
||||
@@ -405,3 +404,6 @@ DO $$ BEGIN
|
||||
FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- Remove the v2.0.5 arena_id column (replaced by the new Arena feature).
|
||||
ALTER TABLE tasks DROP COLUMN IF EXISTS arena_id;
|
||||
|
||||
@@ -44,7 +44,11 @@ export interface InferenceFrame {
|
||||
| 'chat_renamed'
|
||||
| 'error'
|
||||
| 'flow_run_started'
|
||||
| 'flow_run_step_updated';
|
||||
| 'flow_run_step_updated'
|
||||
// arena frames
|
||||
| 'battle_started'
|
||||
| 'contestant_updated'
|
||||
| 'battle_updated';
|
||||
message_id?: string;
|
||||
message_ids?: string[];
|
||||
chat_id?: string;
|
||||
@@ -84,6 +88,19 @@ export interface InferenceFrame {
|
||||
status?: string;
|
||||
run_status?: 'running' | 'completed' | 'failed' | 'cancelled';
|
||||
report?: string;
|
||||
// arena frames
|
||||
battle_id?: string;
|
||||
battle_type?: 'coding' | 'qa';
|
||||
prompt?: string;
|
||||
contestants?: Array<{ id: string; identity: string; model: string; lane: 'local' | 'cloud' }>;
|
||||
contestant_id?: string;
|
||||
battle_status?: 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
|
||||
duration_ms?: number;
|
||||
tokens_per_sec?: number;
|
||||
winner_contestant_id?: string | null;
|
||||
analysis_ready?: boolean;
|
||||
cross_exam_id?: string;
|
||||
delta?: string;
|
||||
}
|
||||
|
||||
export type FramePublisher = (sessionId: string, frame: InferenceFrame) => void;
|
||||
|
||||
Reference in New Issue
Block a user