feat(web,coder): arena pane — compare 2-6 AI competitors on same prompt
Arena is a new pane kind for competitive AI evaluation. A Battle runs the same prompt against 2-6 Contestants across two concurrent lanes: local lane (llama-swap models, serial) and cloud lane (parallel). Added to all three registries: @boocode/contracts WsFrameSchema, server InferenceFrame, and web WsFrame. Backend (apps/coder): - arena-runner: battle scheduler, lane classifier, benchmark, results writer, resume, user winner override - arena-analyzer: two-stage digest→judge analysis on DEFAULT_MODEL - arena-decisions: status transitions and resume logic (unit-tested) - arena-analyzer-helpers: pure helper functions (unit-tested) - arena-model-call: model call utility for analysis - arena routes: create/get/list/stop/analyze/cross-examine/winner/diff - schema: battles, contestants, cross_examinations tables (idempotent) - remove old /api/arena* routes and tasks.arena_id column Frontend (apps/web): - ArenaLauncherDialog: battle type, prompt, contestant selection - ArenaPane: live roster, streaming output, analysis, cross-exam - DiffView: unified diff with line-by-line color for coding contests - Winner override per-row dropdown (Trophy icon) - battle_updated WS handler for live winner/analysis updates - arena pane kind in Workspace, ChatTabBar, useSidebar Cross-app: - ArenaState and ArenaContestantShape/WsFrame types (contracts) - battle_* frames in WsFrameSchema, InferenceFrame, and web WsFrame - manifest.json written per battle results folder - /Arena added to .gitignore Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -27,6 +27,9 @@ import type {
|
||||
WorkspaceState,
|
||||
FlowRunRow,
|
||||
FlowStepRow,
|
||||
BattleShape,
|
||||
ContestantShape,
|
||||
CrossExaminationShape,
|
||||
} from './types';
|
||||
|
||||
// v2.6 Phase 1-UX §9b: chat-scoped agent-session rows. Returned by
|
||||
@@ -518,6 +521,63 @@ export const api = {
|
||||
request<AgentsResponse>(`/api/projects/${projectId}/agents`),
|
||||
},
|
||||
|
||||
// Arena battle API — proxied to boocoder at /api/coder/battles/*.
|
||||
battles: {
|
||||
create: (body: {
|
||||
project_id: string;
|
||||
battle_type: 'coding' | 'qa';
|
||||
prompt: string;
|
||||
contestants: Array<{ identity: string; model: string }>;
|
||||
}) =>
|
||||
request<{ battle_id: string }>('/api/coder/battles', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify(body),
|
||||
}),
|
||||
list: (projectId: string) =>
|
||||
request<{ battles: BattleShape[] }>(
|
||||
`/api/coder/battles?project_id=${encodeURIComponent(projectId)}`,
|
||||
),
|
||||
get: (battleId: string) =>
|
||||
request<{
|
||||
battle: BattleShape;
|
||||
contestants: ContestantShape[];
|
||||
cross_examinations: CrossExaminationShape[];
|
||||
}>(`/api/coder/battles/${encodeURIComponent(battleId)}`),
|
||||
stop: (battleId: string) =>
|
||||
request<{ cancelled: boolean }>(
|
||||
`/api/coder/battles/${encodeURIComponent(battleId)}/stop`,
|
||||
{ method: 'POST' },
|
||||
),
|
||||
analyze: (battleId: string) =>
|
||||
request<{ triggered: boolean }>(
|
||||
`/api/coder/battles/${encodeURIComponent(battleId)}/analyze`,
|
||||
{ method: 'POST' },
|
||||
),
|
||||
crossExamine: (battleId: string, body: { identity: string; model: string }) =>
|
||||
request<{ cross_exam_id: string }>(
|
||||
`/api/coder/battles/${encodeURIComponent(battleId)}/cross-examine`,
|
||||
{ method: 'POST', body: JSON.stringify(body) },
|
||||
),
|
||||
getAnalysis: (battleId: string) =>
|
||||
request<{ text: string }>(
|
||||
`/api/coder/battles/${encodeURIComponent(battleId)}/analysis`,
|
||||
),
|
||||
generatePrompt: (description: string) =>
|
||||
request<{ prompt: string }>('/api/coder/battles/generate-prompt', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ description }),
|
||||
}),
|
||||
setWinner: (battleId: string, body: { winner_contestant_id: string | null }) =>
|
||||
request<{ ok: boolean }>(
|
||||
`/api/coder/battles/${encodeURIComponent(battleId)}/winner`,
|
||||
{ method: 'PATCH', body: JSON.stringify(body) },
|
||||
),
|
||||
getDiff: (battleId: string, contestantId: string) =>
|
||||
request<{ diff: string }>(
|
||||
`/api/coder/battles/${encodeURIComponent(battleId)}/contestants/${encodeURIComponent(contestantId)}/diff`,
|
||||
),
|
||||
},
|
||||
|
||||
skills: {
|
||||
list: () => request<{ skills: Skill[] }>('/api/skills'),
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user