Arena is a new pane kind for competitive AI evaluation. A Battle runs the same prompt against 2-6 Contestants across two concurrent lanes: local lane (llama-swap models, serial) and cloud lane (parallel). Added to all three registries: @boocode/contracts WsFrameSchema, server InferenceFrame, and web WsFrame. Backend (apps/coder): - arena-runner: battle scheduler, lane classifier, benchmark, results writer, resume, user winner override - arena-analyzer: two-stage digest→judge analysis on DEFAULT_MODEL - arena-decisions: status transitions and resume logic (unit-tested) - arena-analyzer-helpers: pure helper functions (unit-tested) - arena-model-call: model call utility for analysis - arena routes: create/get/list/stop/analyze/cross-examine/winner/diff - schema: battles, contestants, cross_examinations tables (idempotent) - remove old /api/arena* routes and tasks.arena_id column Frontend (apps/web): - ArenaLauncherDialog: battle type, prompt, contestant selection - ArenaPane: live roster, streaming output, analysis, cross-exam - DiffView: unified diff with line-by-line color for coding contests - Winner override per-row dropdown (Trophy icon) - battle_updated WS handler for live winner/analysis updates - arena pane kind in Workspace, ChatTabBar, useSidebar Cross-app: - ArenaState and ArenaContestantShape/WsFrame types (contracts) - battle_* frames in WsFrameSchema, InferenceFrame, and web WsFrame - manifest.json written per battle results folder - /Arena added to .gitignore
411 lines
16 KiB
TypeScript
411 lines
16 KiB
TypeScript
// ArenaLauncherDialog — mirrors FlowLauncherDialog.
|
||
// Opens via sessionEvents 'open_arena_launcher'.
|
||
// Flow: pick Battle Type → write/generate prompt → add 2–6 contestants → Start.
|
||
|
||
import { useCallback, useEffect, useRef, useState } from 'react';
|
||
import { Loader2, Minus, Plus, Swords, TriangleAlert, X } from 'lucide-react';
|
||
import { toast } from 'sonner';
|
||
import {
|
||
Dialog,
|
||
DialogContent,
|
||
DialogFooter,
|
||
DialogHeader,
|
||
DialogTitle,
|
||
} from '@/components/ui/dialog';
|
||
import { Button } from '@/components/ui/button';
|
||
import { Label } from '@/components/ui/label';
|
||
import { api } from '@/api/client';
|
||
import type { Agent, ProviderSnapshotEntry } from '@/api/types';
|
||
import { sessionEvents } from '@/hooks/sessionEvents';
|
||
import { useProviderSnapshot } from '@/hooks/useProviderSnapshot';
|
||
import { cn } from '@/lib/utils';
|
||
|
||
// ─── types ────────────────────────────────────────────────────────────────────
|
||
|
||
type BattleType = 'coding' | 'qa';
|
||
|
||
interface Contestant {
|
||
key: string; // local unique key for React
|
||
identity: string;
|
||
model: string;
|
||
}
|
||
|
||
// ─── helpers ─────────────────────────────────────────────────────────────────
|
||
|
||
function newContestant(): Contestant {
|
||
return { key: crypto.randomUUID(), identity: '', model: '' };
|
||
}
|
||
|
||
function isDuplicate(contestants: Contestant[], c: Contestant): boolean {
|
||
const dups = contestants.filter(
|
||
(x) => x.key !== c.key && x.identity === c.identity && x.model === c.model && x.identity !== '',
|
||
);
|
||
return dups.length > 0;
|
||
}
|
||
|
||
function hasDuplicatePair(contestants: Contestant[]): boolean {
|
||
return contestants.some((c) => isDuplicate(contestants, c));
|
||
}
|
||
|
||
function localCount(battleType: BattleType, contestants: Contestant[], snapshot: ProviderSnapshotEntry[] | null): number {
|
||
if (battleType === 'qa') return contestants.filter((c) => c.identity !== '').length;
|
||
const boocode = snapshot?.find((e) => e.name === 'boocode');
|
||
const localModelIds = new Set(boocode?.models.map((m) => m.id) ?? []);
|
||
return contestants.filter((c) => {
|
||
// Match bare IDs (boocode/native) and llama-swap/-prefixed IDs used by
|
||
// opencode and other external agents pointing at the local llama-swap server.
|
||
return localModelIds.has(c.model) || localModelIds.has(c.model.replace(/^llama-swap\//, ''));
|
||
}).length;
|
||
}
|
||
|
||
// ─── ContestantRow ────────────────────────────────────────────────────────────
|
||
|
||
function ContestantRow({
|
||
contestant,
|
||
battleType,
|
||
snapshot,
|
||
agents,
|
||
allContestants,
|
||
onUpdate,
|
||
onRemove,
|
||
removable,
|
||
}: {
|
||
contestant: Contestant;
|
||
battleType: BattleType;
|
||
snapshot: ProviderSnapshotEntry[] | null;
|
||
agents: Agent[];
|
||
allContestants: Contestant[];
|
||
onUpdate: (patch: Partial<Contestant>) => void;
|
||
onRemove: () => void;
|
||
removable: boolean;
|
||
}) {
|
||
const dup = isDuplicate(allContestants, contestant);
|
||
|
||
// Identity options for Coding: installed provider names.
|
||
// Identity options for Q&A: agents by id.
|
||
const identityOptions =
|
||
battleType === 'coding'
|
||
? (snapshot ?? [])
|
||
.filter((e) => e.installed && e.enabled)
|
||
.map((e) => ({ value: e.name, label: e.label }))
|
||
: agents.map((a) => ({ value: a.id, label: a.name }));
|
||
|
||
// Model options: for Coding use the selected provider's models; for Q&A use boocode models.
|
||
const modelOptions: { value: string; label: string }[] = (() => {
|
||
if (battleType === 'coding') {
|
||
const provider = (snapshot ?? []).find((e) => e.name === contestant.identity);
|
||
return (provider?.models ?? []).map((m) => ({ value: m.id, label: m.label }));
|
||
}
|
||
// Q&A: native backend only — use boocode models
|
||
const boocode = (snapshot ?? []).find((e) => e.name === 'boocode');
|
||
return (boocode?.models ?? []).map((m) => ({ value: m.id, label: m.label }));
|
||
})();
|
||
|
||
function handleIdentityChange(value: string) {
|
||
// Reset model when identity changes so stale model doesn't persist.
|
||
onUpdate({ identity: value, model: '' });
|
||
}
|
||
|
||
function handleModelChange(value: string) {
|
||
onUpdate({ model: value });
|
||
}
|
||
|
||
return (
|
||
<div className={cn('flex items-center gap-2', dup && 'opacity-60')}>
|
||
<select
|
||
value={contestant.identity}
|
||
onChange={(e) => handleIdentityChange(e.target.value)}
|
||
className="flex-1 min-w-0 text-xs border border-border rounded bg-background px-2 py-1.5 text-foreground focus:outline-none focus:ring-1 focus:ring-ring"
|
||
aria-label={battleType === 'coding' ? 'Backend' : 'Persona'}
|
||
>
|
||
<option value="">{battleType === 'coding' ? 'Backend…' : 'Persona…'}</option>
|
||
{identityOptions.map((o) => (
|
||
<option key={o.value} value={o.value}>{o.label}</option>
|
||
))}
|
||
</select>
|
||
<select
|
||
value={contestant.model}
|
||
onChange={(e) => handleModelChange(e.target.value)}
|
||
disabled={!contestant.identity}
|
||
className="flex-1 min-w-0 text-xs border border-border rounded bg-background px-2 py-1.5 text-foreground focus:outline-none focus:ring-1 focus:ring-ring disabled:opacity-50"
|
||
aria-label="Model"
|
||
>
|
||
<option value="">Model…</option>
|
||
{modelOptions.map((o) => (
|
||
<option key={o.value} value={o.value}>{o.label}</option>
|
||
))}
|
||
</select>
|
||
{dup && (
|
||
<span title="Duplicate contestant" className="shrink-0 text-destructive">
|
||
<TriangleAlert size={12} />
|
||
</span>
|
||
)}
|
||
{removable && (
|
||
<button
|
||
type="button"
|
||
onClick={onRemove}
|
||
className="shrink-0 inline-flex items-center justify-center p-1 rounded text-muted-foreground hover:bg-muted hover:text-foreground"
|
||
aria-label="Remove contestant"
|
||
>
|
||
<Minus size={12} />
|
||
</button>
|
||
)}
|
||
</div>
|
||
);
|
||
}
|
||
|
||
// ─── ArenaLauncherDialog ──────────────────────────────────────────────────────
|
||
|
||
export function ArenaLauncherDialog() {
|
||
const [open, setOpen] = useState(false);
|
||
const [projectId, setProjectId] = useState('');
|
||
const [placement, setPlacement] = useState<'new' | 'split'>('new');
|
||
const [battleType, setBattleType] = useState<BattleType>('coding');
|
||
const [prompt, setPrompt] = useState('');
|
||
const [contestants, setContestants] = useState<Contestant[]>(() => [
|
||
newContestant(),
|
||
newContestant(),
|
||
]);
|
||
const [generating, setGenerating] = useState(false);
|
||
const [starting, setStarting] = useState(false);
|
||
const [agents, setAgents] = useState<Agent[]>([]);
|
||
const promptRef = useRef<HTMLTextAreaElement>(null);
|
||
|
||
const snapshot = useProviderSnapshot();
|
||
|
||
useEffect(() => {
|
||
return sessionEvents.subscribe((ev) => {
|
||
if (ev.type !== 'open_arena_launcher') return;
|
||
setProjectId(ev.project_id);
|
||
setPlacement(ev.placement ?? 'new');
|
||
setBattleType('coding');
|
||
setPrompt('');
|
||
setContestants([newContestant(), newContestant()]);
|
||
setGenerating(false);
|
||
setStarting(false);
|
||
setOpen(true);
|
||
});
|
||
}, []);
|
||
|
||
// Load agents list when dialog opens (for Q&A mode).
|
||
useEffect(() => {
|
||
if (!open || !projectId) return;
|
||
api.agents.list(projectId)
|
||
.then((r) => setAgents(r.agents))
|
||
.catch(() => {});
|
||
}, [open, projectId]);
|
||
|
||
const handleGeneratePrompt = useCallback(async () => {
|
||
const description = prompt.trim();
|
||
if (!description || generating) return;
|
||
setGenerating(true);
|
||
try {
|
||
const { prompt: generated } = await api.battles.generatePrompt(description);
|
||
setPrompt(generated);
|
||
promptRef.current?.focus();
|
||
} catch (err) {
|
||
toast.error(err instanceof Error ? err.message : 'Generate failed');
|
||
} finally {
|
||
setGenerating(false);
|
||
}
|
||
}, [prompt, generating]);
|
||
|
||
function updateContestant(key: string, patch: Partial<Contestant>) {
|
||
setContestants((prev) => prev.map((c) => (c.key === key ? { ...c, ...patch } : c)));
|
||
}
|
||
|
||
function removeContestant(key: string) {
|
||
setContestants((prev) => prev.filter((c) => c.key !== key));
|
||
}
|
||
|
||
function addContestant() {
|
||
if (contestants.length >= 6) return;
|
||
setContestants((prev) => [...prev, newContestant()]);
|
||
}
|
||
|
||
const canStart =
|
||
!starting &&
|
||
prompt.trim().length > 0 &&
|
||
contestants.length >= 2 &&
|
||
contestants.every((c) => c.identity !== '' && c.model !== '') &&
|
||
!hasDuplicatePair(contestants);
|
||
|
||
const localLaneCount = localCount(battleType, contestants, snapshot);
|
||
const showLocalWarning = localLaneCount >= 3;
|
||
|
||
async function handleStart() {
|
||
if (!canStart) return;
|
||
setStarting(true);
|
||
try {
|
||
const { battle_id } = await api.battles.create({
|
||
project_id: projectId,
|
||
battle_type: battleType,
|
||
prompt: prompt.trim(),
|
||
contestants: contestants.map((c) => ({ identity: c.identity, model: c.model })),
|
||
});
|
||
sessionEvents.emit({
|
||
type: 'open_arena_pane',
|
||
state: { battle_id, battle_type: battleType, prompt: prompt.trim() },
|
||
placement,
|
||
});
|
||
setOpen(false);
|
||
} catch (err) {
|
||
toast.error(err instanceof Error ? err.message : 'Failed to start battle');
|
||
} finally {
|
||
setStarting(false);
|
||
}
|
||
}
|
||
|
||
return (
|
||
<Dialog open={open} onOpenChange={setOpen}>
|
||
<DialogContent
|
||
className="flex flex-col gap-0 p-0 max-h-[85vh] sm:max-w-lg overflow-hidden"
|
||
showCloseButton={false}
|
||
>
|
||
<DialogHeader className="gap-1.5 px-4 pt-4 pb-3 border-b shrink-0">
|
||
<div className="flex items-center gap-2">
|
||
<Swords size={14} className="text-muted-foreground shrink-0" />
|
||
<DialogTitle className="text-sm font-medium">New Arena Battle</DialogTitle>
|
||
</div>
|
||
<p className="text-xs text-muted-foreground">
|
||
Run the same prompt against multiple AI competitors and pick the best result.
|
||
</p>
|
||
</DialogHeader>
|
||
|
||
<div className="flex flex-col gap-4 overflow-y-auto overscroll-contain px-4 py-3">
|
||
{/* Battle type */}
|
||
<div className="flex flex-col gap-1.5">
|
||
<Label className="text-xs text-muted-foreground">Battle type</Label>
|
||
<div className="flex gap-1.5">
|
||
{(['coding', 'qa'] as const).map((t) => (
|
||
<button
|
||
key={t}
|
||
type="button"
|
||
onClick={() => { setBattleType(t); setContestants([newContestant(), newContestant()]); }}
|
||
aria-pressed={battleType === t}
|
||
className={cn(
|
||
'flex-1 rounded-lg border py-1.5 text-xs transition-colors capitalize',
|
||
battleType === t
|
||
? 'border-primary bg-primary/10 text-primary font-medium'
|
||
: 'border-border text-muted-foreground hover:bg-muted hover:text-foreground',
|
||
)}
|
||
>
|
||
{t === 'coding' ? 'Coding' : 'Q&A'}
|
||
</button>
|
||
))}
|
||
</div>
|
||
<p className="text-xs text-muted-foreground">
|
||
{battleType === 'coding'
|
||
? 'Each contestant works in its own isolated worktree. Results include a diff.'
|
||
: 'Contestants answer the prompt as text. No code changes.'}
|
||
</p>
|
||
</div>
|
||
|
||
{/* Prompt */}
|
||
<div className="flex flex-col gap-1.5">
|
||
<div className="flex items-center justify-between">
|
||
<Label htmlFor="arena-prompt" className="text-xs text-muted-foreground">
|
||
Prompt
|
||
</Label>
|
||
<button
|
||
type="button"
|
||
onClick={() => void handleGeneratePrompt()}
|
||
disabled={generating || prompt.trim().length === 0}
|
||
className="text-xs text-primary hover:text-primary/80 disabled:opacity-40 disabled:cursor-default flex items-center gap-1"
|
||
title="Expand your description into a fuller battle prompt"
|
||
>
|
||
{generating && <Loader2 size={10} className="animate-spin" />}
|
||
Generate prompt
|
||
</button>
|
||
</div>
|
||
<textarea
|
||
id="arena-prompt"
|
||
ref={promptRef}
|
||
value={prompt}
|
||
onChange={(e) => setPrompt(e.target.value)}
|
||
placeholder={
|
||
battleType === 'coding'
|
||
? 'Describe a coding task, or enter a short description and click Generate prompt…'
|
||
: 'Ask a question or describe a topic, or enter a short description and click Generate prompt…'
|
||
}
|
||
rows={4}
|
||
className="w-full text-sm border border-border rounded bg-background px-3 py-2 text-foreground placeholder:text-muted-foreground focus:outline-none focus:ring-1 focus:ring-ring resize-none"
|
||
/>
|
||
</div>
|
||
|
||
{/* Contestants */}
|
||
<div className="flex flex-col gap-2">
|
||
<div className="flex items-center justify-between">
|
||
<Label className="text-xs text-muted-foreground">
|
||
Contestants ({contestants.length}/6)
|
||
</Label>
|
||
<span className="text-xs text-muted-foreground">
|
||
{battleType === 'coding' ? 'Backend + Model' : 'Persona + Model'}
|
||
</span>
|
||
</div>
|
||
|
||
<div className="flex flex-col gap-1.5">
|
||
{contestants.map((c) => (
|
||
<ContestantRow
|
||
key={c.key}
|
||
contestant={c}
|
||
battleType={battleType}
|
||
snapshot={snapshot}
|
||
agents={agents}
|
||
allContestants={contestants}
|
||
onUpdate={(patch) => updateContestant(c.key, patch)}
|
||
onRemove={() => removeContestant(c.key)}
|
||
removable={contestants.length > 2}
|
||
/>
|
||
))}
|
||
</div>
|
||
|
||
{contestants.length < 6 && (
|
||
<button
|
||
type="button"
|
||
onClick={addContestant}
|
||
className="flex items-center gap-1.5 text-xs text-muted-foreground hover:text-foreground py-1"
|
||
>
|
||
<Plus size={12} /> Add contestant
|
||
</button>
|
||
)}
|
||
|
||
{hasDuplicatePair(contestants) && (
|
||
<div className="flex items-center gap-1.5 text-xs text-destructive">
|
||
<TriangleAlert size={12} />
|
||
Duplicate contestants (same identity + model) are not allowed.
|
||
</div>
|
||
)}
|
||
|
||
{showLocalWarning && (
|
||
<div className="flex items-center gap-1.5 text-xs text-amber-600 dark:text-amber-400">
|
||
<TriangleAlert size={12} />
|
||
{localLaneCount} local contestants will run serially (one GPU load at a time). This battle will take a while.
|
||
</div>
|
||
)}
|
||
</div>
|
||
</div>
|
||
|
||
<DialogFooter className="px-4 py-3 border-t shrink-0 flex items-center justify-between">
|
||
<button
|
||
type="button"
|
||
onClick={() => setOpen(false)}
|
||
className="flex items-center gap-1.5 text-xs text-muted-foreground hover:text-foreground"
|
||
>
|
||
<X size={12} /> Cancel
|
||
</button>
|
||
<Button
|
||
type="button"
|
||
size="sm"
|
||
onClick={() => void handleStart()}
|
||
disabled={!canStart}
|
||
>
|
||
{starting ? <Loader2 className="animate-spin" /> : <Swords size={14} />}
|
||
Start battle
|
||
</Button>
|
||
</DialogFooter>
|
||
</DialogContent>
|
||
</Dialog>
|
||
);
|
||
}
|