feat(web,coder): arena pane — compare 2-6 AI competitors on same prompt

Arena is a new pane kind for competitive AI evaluation. A Battle runs
the same prompt against 2-6 Contestants across two concurrent lanes:
local lane (llama-swap models, serial) and cloud lane (parallel).

Added to all three registries: @boocode/contracts WsFrameSchema,
server InferenceFrame, and web WsFrame.

Backend (apps/coder):
- arena-runner: battle scheduler, lane classifier, benchmark, results
  writer, resume, user winner override
- arena-analyzer: two-stage digest→judge analysis on DEFAULT_MODEL
- arena-decisions: status transitions and resume logic (unit-tested)
- arena-analyzer-helpers: pure helper functions (unit-tested)
- arena-model-call: model call utility for analysis
- arena routes: create/get/list/stop/analyze/cross-examine/winner/diff
- schema: battles, contestants, cross_examinations tables (idempotent)
- remove old /api/arena* routes and tasks.arena_id column

Frontend (apps/web):
- ArenaLauncherDialog: battle type, prompt, contestant selection
- ArenaPane: live roster, streaming output, analysis, cross-exam
- DiffView: unified diff with line-by-line color for coding contests
- Winner override per-row dropdown (Trophy icon)
- battle_updated WS handler for live winner/analysis updates
- arena pane kind in Workspace, ChatTabBar, useSidebar

Cross-app:
- ArenaState and ArenaContestantShape/WsFrame types (contracts)
- battle_* frames in WsFrameSchema, InferenceFrame, and web WsFrame
- manifest.json written per battle results folder
- /Arena added to .gitignore
This commit is contained in:
2026-06-06 23:25:29 +00:00
parent 84a024a5a4
commit 3474be4865
34 changed files with 4581 additions and 146 deletions

View File

@@ -0,0 +1,410 @@
// ArenaLauncherDialog — mirrors FlowLauncherDialog.
// Opens via sessionEvents 'open_arena_launcher'.
// Flow: pick Battle Type → write/generate prompt → add 26 contestants → Start.
import { useCallback, useEffect, useRef, useState } from 'react';
import { Loader2, Minus, Plus, Swords, TriangleAlert, X } from 'lucide-react';
import { toast } from 'sonner';
import {
Dialog,
DialogContent,
DialogFooter,
DialogHeader,
DialogTitle,
} from '@/components/ui/dialog';
import { Button } from '@/components/ui/button';
import { Label } from '@/components/ui/label';
import { api } from '@/api/client';
import type { Agent, ProviderSnapshotEntry } from '@/api/types';
import { sessionEvents } from '@/hooks/sessionEvents';
import { useProviderSnapshot } from '@/hooks/useProviderSnapshot';
import { cn } from '@/lib/utils';
// ─── types ────────────────────────────────────────────────────────────────────
type BattleType = 'coding' | 'qa';
interface Contestant {
key: string; // local unique key for React
identity: string;
model: string;
}
// ─── helpers ─────────────────────────────────────────────────────────────────
function newContestant(): Contestant {
return { key: crypto.randomUUID(), identity: '', model: '' };
}
function isDuplicate(contestants: Contestant[], c: Contestant): boolean {
const dups = contestants.filter(
(x) => x.key !== c.key && x.identity === c.identity && x.model === c.model && x.identity !== '',
);
return dups.length > 0;
}
function hasDuplicatePair(contestants: Contestant[]): boolean {
return contestants.some((c) => isDuplicate(contestants, c));
}
function localCount(battleType: BattleType, contestants: Contestant[], snapshot: ProviderSnapshotEntry[] | null): number {
if (battleType === 'qa') return contestants.filter((c) => c.identity !== '').length;
const boocode = snapshot?.find((e) => e.name === 'boocode');
const localModelIds = new Set(boocode?.models.map((m) => m.id) ?? []);
return contestants.filter((c) => {
// Match bare IDs (boocode/native) and llama-swap/-prefixed IDs used by
// opencode and other external agents pointing at the local llama-swap server.
return localModelIds.has(c.model) || localModelIds.has(c.model.replace(/^llama-swap\//, ''));
}).length;
}
// ─── ContestantRow ────────────────────────────────────────────────────────────
function ContestantRow({
contestant,
battleType,
snapshot,
agents,
allContestants,
onUpdate,
onRemove,
removable,
}: {
contestant: Contestant;
battleType: BattleType;
snapshot: ProviderSnapshotEntry[] | null;
agents: Agent[];
allContestants: Contestant[];
onUpdate: (patch: Partial<Contestant>) => void;
onRemove: () => void;
removable: boolean;
}) {
const dup = isDuplicate(allContestants, contestant);
// Identity options for Coding: installed provider names.
// Identity options for Q&A: agents by id.
const identityOptions =
battleType === 'coding'
? (snapshot ?? [])
.filter((e) => e.installed && e.enabled)
.map((e) => ({ value: e.name, label: e.label }))
: agents.map((a) => ({ value: a.id, label: a.name }));
// Model options: for Coding use the selected provider's models; for Q&A use boocode models.
const modelOptions: { value: string; label: string }[] = (() => {
if (battleType === 'coding') {
const provider = (snapshot ?? []).find((e) => e.name === contestant.identity);
return (provider?.models ?? []).map((m) => ({ value: m.id, label: m.label }));
}
// Q&A: native backend only — use boocode models
const boocode = (snapshot ?? []).find((e) => e.name === 'boocode');
return (boocode?.models ?? []).map((m) => ({ value: m.id, label: m.label }));
})();
function handleIdentityChange(value: string) {
// Reset model when identity changes so stale model doesn't persist.
onUpdate({ identity: value, model: '' });
}
function handleModelChange(value: string) {
onUpdate({ model: value });
}
return (
<div className={cn('flex items-center gap-2', dup && 'opacity-60')}>
<select
value={contestant.identity}
onChange={(e) => handleIdentityChange(e.target.value)}
className="flex-1 min-w-0 text-xs border border-border rounded bg-background px-2 py-1.5 text-foreground focus:outline-none focus:ring-1 focus:ring-ring"
aria-label={battleType === 'coding' ? 'Backend' : 'Persona'}
>
<option value="">{battleType === 'coding' ? 'Backend…' : 'Persona…'}</option>
{identityOptions.map((o) => (
<option key={o.value} value={o.value}>{o.label}</option>
))}
</select>
<select
value={contestant.model}
onChange={(e) => handleModelChange(e.target.value)}
disabled={!contestant.identity}
className="flex-1 min-w-0 text-xs border border-border rounded bg-background px-2 py-1.5 text-foreground focus:outline-none focus:ring-1 focus:ring-ring disabled:opacity-50"
aria-label="Model"
>
<option value="">Model</option>
{modelOptions.map((o) => (
<option key={o.value} value={o.value}>{o.label}</option>
))}
</select>
{dup && (
<span title="Duplicate contestant" className="shrink-0 text-destructive">
<TriangleAlert size={12} />
</span>
)}
{removable && (
<button
type="button"
onClick={onRemove}
className="shrink-0 inline-flex items-center justify-center p-1 rounded text-muted-foreground hover:bg-muted hover:text-foreground"
aria-label="Remove contestant"
>
<Minus size={12} />
</button>
)}
</div>
);
}
// ─── ArenaLauncherDialog ──────────────────────────────────────────────────────
export function ArenaLauncherDialog() {
const [open, setOpen] = useState(false);
const [projectId, setProjectId] = useState('');
const [placement, setPlacement] = useState<'new' | 'split'>('new');
const [battleType, setBattleType] = useState<BattleType>('coding');
const [prompt, setPrompt] = useState('');
const [contestants, setContestants] = useState<Contestant[]>(() => [
newContestant(),
newContestant(),
]);
const [generating, setGenerating] = useState(false);
const [starting, setStarting] = useState(false);
const [agents, setAgents] = useState<Agent[]>([]);
const promptRef = useRef<HTMLTextAreaElement>(null);
const snapshot = useProviderSnapshot();
useEffect(() => {
return sessionEvents.subscribe((ev) => {
if (ev.type !== 'open_arena_launcher') return;
setProjectId(ev.project_id);
setPlacement(ev.placement ?? 'new');
setBattleType('coding');
setPrompt('');
setContestants([newContestant(), newContestant()]);
setGenerating(false);
setStarting(false);
setOpen(true);
});
}, []);
// Load agents list when dialog opens (for Q&A mode).
useEffect(() => {
if (!open || !projectId) return;
api.agents.list(projectId)
.then((r) => setAgents(r.agents))
.catch(() => {});
}, [open, projectId]);
const handleGeneratePrompt = useCallback(async () => {
const description = prompt.trim();
if (!description || generating) return;
setGenerating(true);
try {
const { prompt: generated } = await api.battles.generatePrompt(description);
setPrompt(generated);
promptRef.current?.focus();
} catch (err) {
toast.error(err instanceof Error ? err.message : 'Generate failed');
} finally {
setGenerating(false);
}
}, [prompt, generating]);
function updateContestant(key: string, patch: Partial<Contestant>) {
setContestants((prev) => prev.map((c) => (c.key === key ? { ...c, ...patch } : c)));
}
function removeContestant(key: string) {
setContestants((prev) => prev.filter((c) => c.key !== key));
}
function addContestant() {
if (contestants.length >= 6) return;
setContestants((prev) => [...prev, newContestant()]);
}
const canStart =
!starting &&
prompt.trim().length > 0 &&
contestants.length >= 2 &&
contestants.every((c) => c.identity !== '' && c.model !== '') &&
!hasDuplicatePair(contestants);
const localLaneCount = localCount(battleType, contestants, snapshot);
const showLocalWarning = localLaneCount >= 3;
async function handleStart() {
if (!canStart) return;
setStarting(true);
try {
const { battle_id } = await api.battles.create({
project_id: projectId,
battle_type: battleType,
prompt: prompt.trim(),
contestants: contestants.map((c) => ({ identity: c.identity, model: c.model })),
});
sessionEvents.emit({
type: 'open_arena_pane',
state: { battle_id, battle_type: battleType, prompt: prompt.trim() },
placement,
});
setOpen(false);
} catch (err) {
toast.error(err instanceof Error ? err.message : 'Failed to start battle');
} finally {
setStarting(false);
}
}
return (
<Dialog open={open} onOpenChange={setOpen}>
<DialogContent
className="flex flex-col gap-0 p-0 max-h-[85vh] sm:max-w-lg overflow-hidden"
showCloseButton={false}
>
<DialogHeader className="gap-1.5 px-4 pt-4 pb-3 border-b shrink-0">
<div className="flex items-center gap-2">
<Swords size={14} className="text-muted-foreground shrink-0" />
<DialogTitle className="text-sm font-medium">New Arena Battle</DialogTitle>
</div>
<p className="text-xs text-muted-foreground">
Run the same prompt against multiple AI competitors and pick the best result.
</p>
</DialogHeader>
<div className="flex flex-col gap-4 overflow-y-auto overscroll-contain px-4 py-3">
{/* Battle type */}
<div className="flex flex-col gap-1.5">
<Label className="text-xs text-muted-foreground">Battle type</Label>
<div className="flex gap-1.5">
{(['coding', 'qa'] as const).map((t) => (
<button
key={t}
type="button"
onClick={() => { setBattleType(t); setContestants([newContestant(), newContestant()]); }}
aria-pressed={battleType === t}
className={cn(
'flex-1 rounded-lg border py-1.5 text-xs transition-colors capitalize',
battleType === t
? 'border-primary bg-primary/10 text-primary font-medium'
: 'border-border text-muted-foreground hover:bg-muted hover:text-foreground',
)}
>
{t === 'coding' ? 'Coding' : 'Q&A'}
</button>
))}
</div>
<p className="text-xs text-muted-foreground">
{battleType === 'coding'
? 'Each contestant works in its own isolated worktree. Results include a diff.'
: 'Contestants answer the prompt as text. No code changes.'}
</p>
</div>
{/* Prompt */}
<div className="flex flex-col gap-1.5">
<div className="flex items-center justify-between">
<Label htmlFor="arena-prompt" className="text-xs text-muted-foreground">
Prompt
</Label>
<button
type="button"
onClick={() => void handleGeneratePrompt()}
disabled={generating || prompt.trim().length === 0}
className="text-xs text-primary hover:text-primary/80 disabled:opacity-40 disabled:cursor-default flex items-center gap-1"
title="Expand your description into a fuller battle prompt"
>
{generating && <Loader2 size={10} className="animate-spin" />}
Generate prompt
</button>
</div>
<textarea
id="arena-prompt"
ref={promptRef}
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
placeholder={
battleType === 'coding'
? 'Describe a coding task, or enter a short description and click Generate prompt…'
: 'Ask a question or describe a topic, or enter a short description and click Generate prompt…'
}
rows={4}
className="w-full text-sm border border-border rounded bg-background px-3 py-2 text-foreground placeholder:text-muted-foreground focus:outline-none focus:ring-1 focus:ring-ring resize-none"
/>
</div>
{/* Contestants */}
<div className="flex flex-col gap-2">
<div className="flex items-center justify-between">
<Label className="text-xs text-muted-foreground">
Contestants ({contestants.length}/6)
</Label>
<span className="text-xs text-muted-foreground">
{battleType === 'coding' ? 'Backend + Model' : 'Persona + Model'}
</span>
</div>
<div className="flex flex-col gap-1.5">
{contestants.map((c) => (
<ContestantRow
key={c.key}
contestant={c}
battleType={battleType}
snapshot={snapshot}
agents={agents}
allContestants={contestants}
onUpdate={(patch) => updateContestant(c.key, patch)}
onRemove={() => removeContestant(c.key)}
removable={contestants.length > 2}
/>
))}
</div>
{contestants.length < 6 && (
<button
type="button"
onClick={addContestant}
className="flex items-center gap-1.5 text-xs text-muted-foreground hover:text-foreground py-1"
>
<Plus size={12} /> Add contestant
</button>
)}
{hasDuplicatePair(contestants) && (
<div className="flex items-center gap-1.5 text-xs text-destructive">
<TriangleAlert size={12} />
Duplicate contestants (same identity + model) are not allowed.
</div>
)}
{showLocalWarning && (
<div className="flex items-center gap-1.5 text-xs text-amber-600 dark:text-amber-400">
<TriangleAlert size={12} />
{localLaneCount} local contestants will run serially (one GPU load at a time). This battle will take a while.
</div>
)}
</div>
</div>
<DialogFooter className="px-4 py-3 border-t shrink-0 flex items-center justify-between">
<button
type="button"
onClick={() => setOpen(false)}
className="flex items-center gap-1.5 text-xs text-muted-foreground hover:text-foreground"
>
<X size={12} /> Cancel
</button>
<Button
type="button"
size="sm"
onClick={() => void handleStart()}
disabled={!canStart}
>
{starting ? <Loader2 className="animate-spin" /> : <Swords size={14} />}
Start battle
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
);
}