Files
boocode/apps/web/src/components/ArenaLauncherDialog.tsx
indifferentketchup 3474be4865 feat(web,coder): arena pane — compare 2-6 AI competitors on same prompt
Arena is a new pane kind for competitive AI evaluation. A Battle runs
the same prompt against 2-6 Contestants across two concurrent lanes:
local lane (llama-swap models, serial) and cloud lane (parallel).

Added to all three registries: @boocode/contracts WsFrameSchema,
server InferenceFrame, and web WsFrame.

Backend (apps/coder):
- arena-runner: battle scheduler, lane classifier, benchmark, results
  writer, resume, user winner override
- arena-analyzer: two-stage digest→judge analysis on DEFAULT_MODEL
- arena-decisions: status transitions and resume logic (unit-tested)
- arena-analyzer-helpers: pure helper functions (unit-tested)
- arena-model-call: model call utility for analysis
- arena routes: create/get/list/stop/analyze/cross-examine/winner/diff
- schema: battles, contestants, cross_examinations tables (idempotent)
- remove old /api/arena* routes and tasks.arena_id column

Frontend (apps/web):
- ArenaLauncherDialog: battle type, prompt, contestant selection
- ArenaPane: live roster, streaming output, analysis, cross-exam
- DiffView: unified diff with line-by-line color for coding contests
- Winner override per-row dropdown (Trophy icon)
- battle_updated WS handler for live winner/analysis updates
- arena pane kind in Workspace, ChatTabBar, useSidebar

Cross-app:
- ArenaState and ArenaContestantShape/WsFrame types (contracts)
- battle_* frames in WsFrameSchema, InferenceFrame, and web WsFrame
- manifest.json written per battle results folder
- /Arena added to .gitignore
2026-06-06 23:25:29 +00:00

411 lines
16 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// ArenaLauncherDialog — mirrors FlowLauncherDialog.
// Opens via sessionEvents 'open_arena_launcher'.
// Flow: pick Battle Type → write/generate prompt → add 26 contestants → Start.
import { useCallback, useEffect, useRef, useState } from 'react';
import { Loader2, Minus, Plus, Swords, TriangleAlert, X } from 'lucide-react';
import { toast } from 'sonner';
import {
Dialog,
DialogContent,
DialogFooter,
DialogHeader,
DialogTitle,
} from '@/components/ui/dialog';
import { Button } from '@/components/ui/button';
import { Label } from '@/components/ui/label';
import { api } from '@/api/client';
import type { Agent, ProviderSnapshotEntry } from '@/api/types';
import { sessionEvents } from '@/hooks/sessionEvents';
import { useProviderSnapshot } from '@/hooks/useProviderSnapshot';
import { cn } from '@/lib/utils';
// ─── types ────────────────────────────────────────────────────────────────────
type BattleType = 'coding' | 'qa';
interface Contestant {
key: string; // local unique key for React
identity: string;
model: string;
}
// ─── helpers ─────────────────────────────────────────────────────────────────
function newContestant(): Contestant {
return { key: crypto.randomUUID(), identity: '', model: '' };
}
function isDuplicate(contestants: Contestant[], c: Contestant): boolean {
const dups = contestants.filter(
(x) => x.key !== c.key && x.identity === c.identity && x.model === c.model && x.identity !== '',
);
return dups.length > 0;
}
function hasDuplicatePair(contestants: Contestant[]): boolean {
return contestants.some((c) => isDuplicate(contestants, c));
}
function localCount(battleType: BattleType, contestants: Contestant[], snapshot: ProviderSnapshotEntry[] | null): number {
if (battleType === 'qa') return contestants.filter((c) => c.identity !== '').length;
const boocode = snapshot?.find((e) => e.name === 'boocode');
const localModelIds = new Set(boocode?.models.map((m) => m.id) ?? []);
return contestants.filter((c) => {
// Match bare IDs (boocode/native) and llama-swap/-prefixed IDs used by
// opencode and other external agents pointing at the local llama-swap server.
return localModelIds.has(c.model) || localModelIds.has(c.model.replace(/^llama-swap\//, ''));
}).length;
}
// ─── ContestantRow ────────────────────────────────────────────────────────────
function ContestantRow({
contestant,
battleType,
snapshot,
agents,
allContestants,
onUpdate,
onRemove,
removable,
}: {
contestant: Contestant;
battleType: BattleType;
snapshot: ProviderSnapshotEntry[] | null;
agents: Agent[];
allContestants: Contestant[];
onUpdate: (patch: Partial<Contestant>) => void;
onRemove: () => void;
removable: boolean;
}) {
const dup = isDuplicate(allContestants, contestant);
// Identity options for Coding: installed provider names.
// Identity options for Q&A: agents by id.
const identityOptions =
battleType === 'coding'
? (snapshot ?? [])
.filter((e) => e.installed && e.enabled)
.map((e) => ({ value: e.name, label: e.label }))
: agents.map((a) => ({ value: a.id, label: a.name }));
// Model options: for Coding use the selected provider's models; for Q&A use boocode models.
const modelOptions: { value: string; label: string }[] = (() => {
if (battleType === 'coding') {
const provider = (snapshot ?? []).find((e) => e.name === contestant.identity);
return (provider?.models ?? []).map((m) => ({ value: m.id, label: m.label }));
}
// Q&A: native backend only — use boocode models
const boocode = (snapshot ?? []).find((e) => e.name === 'boocode');
return (boocode?.models ?? []).map((m) => ({ value: m.id, label: m.label }));
})();
function handleIdentityChange(value: string) {
// Reset model when identity changes so stale model doesn't persist.
onUpdate({ identity: value, model: '' });
}
function handleModelChange(value: string) {
onUpdate({ model: value });
}
return (
<div className={cn('flex items-center gap-2', dup && 'opacity-60')}>
<select
value={contestant.identity}
onChange={(e) => handleIdentityChange(e.target.value)}
className="flex-1 min-w-0 text-xs border border-border rounded bg-background px-2 py-1.5 text-foreground focus:outline-none focus:ring-1 focus:ring-ring"
aria-label={battleType === 'coding' ? 'Backend' : 'Persona'}
>
<option value="">{battleType === 'coding' ? 'Backend…' : 'Persona…'}</option>
{identityOptions.map((o) => (
<option key={o.value} value={o.value}>{o.label}</option>
))}
</select>
<select
value={contestant.model}
onChange={(e) => handleModelChange(e.target.value)}
disabled={!contestant.identity}
className="flex-1 min-w-0 text-xs border border-border rounded bg-background px-2 py-1.5 text-foreground focus:outline-none focus:ring-1 focus:ring-ring disabled:opacity-50"
aria-label="Model"
>
<option value="">Model</option>
{modelOptions.map((o) => (
<option key={o.value} value={o.value}>{o.label}</option>
))}
</select>
{dup && (
<span title="Duplicate contestant" className="shrink-0 text-destructive">
<TriangleAlert size={12} />
</span>
)}
{removable && (
<button
type="button"
onClick={onRemove}
className="shrink-0 inline-flex items-center justify-center p-1 rounded text-muted-foreground hover:bg-muted hover:text-foreground"
aria-label="Remove contestant"
>
<Minus size={12} />
</button>
)}
</div>
);
}
// ─── ArenaLauncherDialog ──────────────────────────────────────────────────────
export function ArenaLauncherDialog() {
const [open, setOpen] = useState(false);
const [projectId, setProjectId] = useState('');
const [placement, setPlacement] = useState<'new' | 'split'>('new');
const [battleType, setBattleType] = useState<BattleType>('coding');
const [prompt, setPrompt] = useState('');
const [contestants, setContestants] = useState<Contestant[]>(() => [
newContestant(),
newContestant(),
]);
const [generating, setGenerating] = useState(false);
const [starting, setStarting] = useState(false);
const [agents, setAgents] = useState<Agent[]>([]);
const promptRef = useRef<HTMLTextAreaElement>(null);
const snapshot = useProviderSnapshot();
useEffect(() => {
return sessionEvents.subscribe((ev) => {
if (ev.type !== 'open_arena_launcher') return;
setProjectId(ev.project_id);
setPlacement(ev.placement ?? 'new');
setBattleType('coding');
setPrompt('');
setContestants([newContestant(), newContestant()]);
setGenerating(false);
setStarting(false);
setOpen(true);
});
}, []);
// Load agents list when dialog opens (for Q&A mode).
useEffect(() => {
if (!open || !projectId) return;
api.agents.list(projectId)
.then((r) => setAgents(r.agents))
.catch(() => {});
}, [open, projectId]);
const handleGeneratePrompt = useCallback(async () => {
const description = prompt.trim();
if (!description || generating) return;
setGenerating(true);
try {
const { prompt: generated } = await api.battles.generatePrompt(description);
setPrompt(generated);
promptRef.current?.focus();
} catch (err) {
toast.error(err instanceof Error ? err.message : 'Generate failed');
} finally {
setGenerating(false);
}
}, [prompt, generating]);
function updateContestant(key: string, patch: Partial<Contestant>) {
setContestants((prev) => prev.map((c) => (c.key === key ? { ...c, ...patch } : c)));
}
function removeContestant(key: string) {
setContestants((prev) => prev.filter((c) => c.key !== key));
}
function addContestant() {
if (contestants.length >= 6) return;
setContestants((prev) => [...prev, newContestant()]);
}
const canStart =
!starting &&
prompt.trim().length > 0 &&
contestants.length >= 2 &&
contestants.every((c) => c.identity !== '' && c.model !== '') &&
!hasDuplicatePair(contestants);
const localLaneCount = localCount(battleType, contestants, snapshot);
const showLocalWarning = localLaneCount >= 3;
async function handleStart() {
if (!canStart) return;
setStarting(true);
try {
const { battle_id } = await api.battles.create({
project_id: projectId,
battle_type: battleType,
prompt: prompt.trim(),
contestants: contestants.map((c) => ({ identity: c.identity, model: c.model })),
});
sessionEvents.emit({
type: 'open_arena_pane',
state: { battle_id, battle_type: battleType, prompt: prompt.trim() },
placement,
});
setOpen(false);
} catch (err) {
toast.error(err instanceof Error ? err.message : 'Failed to start battle');
} finally {
setStarting(false);
}
}
return (
<Dialog open={open} onOpenChange={setOpen}>
<DialogContent
className="flex flex-col gap-0 p-0 max-h-[85vh] sm:max-w-lg overflow-hidden"
showCloseButton={false}
>
<DialogHeader className="gap-1.5 px-4 pt-4 pb-3 border-b shrink-0">
<div className="flex items-center gap-2">
<Swords size={14} className="text-muted-foreground shrink-0" />
<DialogTitle className="text-sm font-medium">New Arena Battle</DialogTitle>
</div>
<p className="text-xs text-muted-foreground">
Run the same prompt against multiple AI competitors and pick the best result.
</p>
</DialogHeader>
<div className="flex flex-col gap-4 overflow-y-auto overscroll-contain px-4 py-3">
{/* Battle type */}
<div className="flex flex-col gap-1.5">
<Label className="text-xs text-muted-foreground">Battle type</Label>
<div className="flex gap-1.5">
{(['coding', 'qa'] as const).map((t) => (
<button
key={t}
type="button"
onClick={() => { setBattleType(t); setContestants([newContestant(), newContestant()]); }}
aria-pressed={battleType === t}
className={cn(
'flex-1 rounded-lg border py-1.5 text-xs transition-colors capitalize',
battleType === t
? 'border-primary bg-primary/10 text-primary font-medium'
: 'border-border text-muted-foreground hover:bg-muted hover:text-foreground',
)}
>
{t === 'coding' ? 'Coding' : 'Q&A'}
</button>
))}
</div>
<p className="text-xs text-muted-foreground">
{battleType === 'coding'
? 'Each contestant works in its own isolated worktree. Results include a diff.'
: 'Contestants answer the prompt as text. No code changes.'}
</p>
</div>
{/* Prompt */}
<div className="flex flex-col gap-1.5">
<div className="flex items-center justify-between">
<Label htmlFor="arena-prompt" className="text-xs text-muted-foreground">
Prompt
</Label>
<button
type="button"
onClick={() => void handleGeneratePrompt()}
disabled={generating || prompt.trim().length === 0}
className="text-xs text-primary hover:text-primary/80 disabled:opacity-40 disabled:cursor-default flex items-center gap-1"
title="Expand your description into a fuller battle prompt"
>
{generating && <Loader2 size={10} className="animate-spin" />}
Generate prompt
</button>
</div>
<textarea
id="arena-prompt"
ref={promptRef}
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
placeholder={
battleType === 'coding'
? 'Describe a coding task, or enter a short description and click Generate prompt…'
: 'Ask a question or describe a topic, or enter a short description and click Generate prompt…'
}
rows={4}
className="w-full text-sm border border-border rounded bg-background px-3 py-2 text-foreground placeholder:text-muted-foreground focus:outline-none focus:ring-1 focus:ring-ring resize-none"
/>
</div>
{/* Contestants */}
<div className="flex flex-col gap-2">
<div className="flex items-center justify-between">
<Label className="text-xs text-muted-foreground">
Contestants ({contestants.length}/6)
</Label>
<span className="text-xs text-muted-foreground">
{battleType === 'coding' ? 'Backend + Model' : 'Persona + Model'}
</span>
</div>
<div className="flex flex-col gap-1.5">
{contestants.map((c) => (
<ContestantRow
key={c.key}
contestant={c}
battleType={battleType}
snapshot={snapshot}
agents={agents}
allContestants={contestants}
onUpdate={(patch) => updateContestant(c.key, patch)}
onRemove={() => removeContestant(c.key)}
removable={contestants.length > 2}
/>
))}
</div>
{contestants.length < 6 && (
<button
type="button"
onClick={addContestant}
className="flex items-center gap-1.5 text-xs text-muted-foreground hover:text-foreground py-1"
>
<Plus size={12} /> Add contestant
</button>
)}
{hasDuplicatePair(contestants) && (
<div className="flex items-center gap-1.5 text-xs text-destructive">
<TriangleAlert size={12} />
Duplicate contestants (same identity + model) are not allowed.
</div>
)}
{showLocalWarning && (
<div className="flex items-center gap-1.5 text-xs text-amber-600 dark:text-amber-400">
<TriangleAlert size={12} />
{localLaneCount} local contestants will run serially (one GPU load at a time). This battle will take a while.
</div>
)}
</div>
</div>
<DialogFooter className="px-4 py-3 border-t shrink-0 flex items-center justify-between">
<button
type="button"
onClick={() => setOpen(false)}
className="flex items-center gap-1.5 text-xs text-muted-foreground hover:text-foreground"
>
<X size={12} /> Cancel
</button>
<Button
type="button"
size="sm"
onClick={() => void handleStart()}
disabled={!canStart}
>
{starting ? <Loader2 className="animate-spin" /> : <Swords size={14} />}
Start battle
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
);
}