Files
boocode/apps/coder/src/routes/arena.ts
indifferentketchup f436021bf9 feat: deferred items — arena token API + UI, ToolShim docs
- Arena API: token_breakdown selected in contestant query
- ArenaPane: token category breakdown bar (s/u/a/t/r) in expanded contestant view
- apps/server/CLAUDE.md: document tool-shim and loop-detectors
2026-06-07 18:41:26 +00:00

413 lines
13 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Arena routes — HTTP surface for the Battle UI.
*
* POST /api/battles — launch a battle
* GET /api/battles?project_id= — list battles for a project
* GET /api/battles/:id — one battle + contestants + cross-exams
* POST /api/battles/:id/stop — cancel a running battle
* POST /api/battles/:id/analyze — trigger analysis (Phase 5 fills the logic)
* POST /api/battles/:id/cross-examine — start a cross-examination (Phase 5 fills the logic)
*
* Mirrors the shape of runs.ts (Orchestrator routes). Battle creation delegates to
* the battle-runner; cancellation calls cancelBattle then aborts in-flight tasks
* via the dispatcher's cancelExternalTask.
*/
import type { FastifyInstance } from 'fastify';
import { z } from 'zod';
import { readFile } from 'node:fs/promises';
import { join } from 'node:path';
import type { Sql } from '../db.js';
import type { Config } from '../config.js';
import type { BattleRunner } from '../services/arena-runner.js';
import type { ExternalCancelFn } from './tasks.js';
import { arenaModelCall } from '../services/arena-model-call.js';
// ─── Validation schemas ───────────────────────────────────────────────────────
const UuidParam = z.string().uuid();
const ContestantInput = z.object({
identity: z.string().min(1).max(200),
model: z.string().min(1).max(200),
});
const CreateBattleBody = z.object({
project_id: z.string().uuid(),
battle_type: z.enum(['coding', 'qa']),
prompt: z.string().min(1).max(64_000),
contestants: z
.array(ContestantInput)
.min(2, 'at least 2 contestants required')
.max(6, 'at most 6 contestants allowed'),
});
const ListBattlesQuery = z.object({
project_id: z.string().uuid(),
});
const CrossExamineBody = z.object({
identity: z.string().min(1).max(200),
model: z.string().min(1).max(200),
});
const SetWinnerBody = z.object({
winner_contestant_id: z.string().uuid().nullable(),
});
// ─── Route registration ───────────────────────────────────────────────────────
const GeneratePromptBody = z.object({
description: z.string().min(1).max(2_000),
});
export function registerArenaRoutes(
app: FastifyInstance,
sql: Sql,
battleRunner: BattleRunner,
cancelExternal: ExternalCancelFn,
config: Config,
): void {
// POST /api/battles/generate-prompt — draft a fuller battle prompt from a
// short description using the default BooChat model. One-shot, non-streaming.
// Must be registered BEFORE /api/battles/:id so the literal 'generate-prompt'
// path is not mistaken for a UUID param.
app.post('/api/battles/generate-prompt', async (req, reply) => {
const parsed = GeneratePromptBody.safeParse(req.body);
if (!parsed.success) {
reply.code(400);
return { error: 'invalid body', details: parsed.error.flatten() };
}
const { description } = parsed.data;
try {
const prompt = await arenaModelCall({
config,
model: config.DEFAULT_MODEL,
system: [
'You are a battle-prompt writer for an AI Arena.',
'The user gives you a short description of a coding or Q&A challenge.',
'Expand it into a clear, self-contained prompt (26 sentences) that any AI model can act on.',
'Include specific acceptance criteria where helpful.',
'Output ONLY the prompt — no preamble, no labels, no meta-commentary.',
].join(' '),
user: description,
maxTokens: 400,
temperature: 0.6,
});
return { prompt };
} catch (err) {
app.log.warn(
{ err: err instanceof Error ? err.message : String(err) },
'arena generate-prompt: model call failed',
);
reply.code(502);
return { error: 'model call failed' };
}
});
// POST /api/battles — launch a battle
app.post('/api/battles', async (req, reply) => {
const parsed = CreateBattleBody.safeParse(req.body);
if (!parsed.success) {
reply.code(400);
return { error: 'invalid body', details: parsed.error.flatten() };
}
const { project_id, battle_type, prompt, contestants } = parsed.data;
// Reject duplicate (identity, model) pairs up front — the schema UNIQUE
// constraint would catch it too, but an early 422 is friendlier.
const seen = new Set<string>();
for (const c of contestants) {
const key = `${c.identity}::${c.model}`;
if (seen.has(key)) {
reply.code(422);
return {
error: 'duplicate_contestant',
message: `duplicate contestant: identity="${c.identity}" model="${c.model}"`,
};
}
seen.add(key);
}
// Verify project exists
const [proj] = await sql<{ id: string }[]>`SELECT id FROM projects WHERE id = ${project_id}`;
if (!proj) {
reply.code(404);
return { error: 'project not found' };
}
const { battleId } = await battleRunner.startBattle({
projectId: project_id,
battleType: battle_type,
prompt,
contestants,
});
reply.code(201);
return { battle_id: battleId };
});
// GET /api/battles?project_id= — list battles, most-recent-first
app.get('/api/battles', async (req, reply) => {
const parsed = ListBattlesQuery.safeParse(req.query);
if (!parsed.success) {
reply.code(400);
return { error: 'invalid query', details: parsed.error.flatten() };
}
const battles = await sql`
SELECT id, project_id, battle_type, prompt, status,
winner_contestant_id, results_path, error,
created_at, updated_at
FROM battles
WHERE project_id = ${parsed.data.project_id}
ORDER BY created_at DESC
LIMIT 100
`;
return { battles };
});
// GET /api/battles/:id — one battle + its contestants + cross-examinations
app.get<{ Params: { id: string } }>('/api/battles/:id', async (req, reply) => {
const parsedId = UuidParam.safeParse(req.params.id);
if (!parsedId.success) {
reply.code(400);
return { error: 'invalid id' };
}
const id = parsedId.data;
const [battle] = await sql<{
id: string;
project_id: string;
battle_type: string;
prompt: string;
status: string;
winner_contestant_id: string | null;
results_path: string | null;
error: string | null;
created_at: unknown;
updated_at: unknown;
}[]>`
SELECT id, project_id, battle_type, prompt, status,
winner_contestant_id, results_path, error,
created_at, updated_at
FROM battles WHERE id = ${id}
`;
if (!battle) {
reply.code(404);
return { error: 'battle not found' };
}
const contestants = await sql`
SELECT id, battle_id, identity, model, lane, task_id, worktree_id,
status, duration_ms, tokens_per_sec, cost_tokens, token_breakdown, result_path, error,
created_at, updated_at
FROM contestants
WHERE battle_id = ${id}
ORDER BY created_at ASC
`;
const crossExaminations = await sql`
SELECT id, battle_id, identity, model, verdict, created_at
FROM cross_examinations
WHERE battle_id = ${id}
ORDER BY created_at ASC
`;
return { battle, contestants, cross_examinations: crossExaminations };
});
// POST /api/battles/:id/stop — cancel a running battle
app.post<{ Params: { id: string } }>('/api/battles/:id/stop', async (req, reply) => {
const parsedId = UuidParam.safeParse(req.params.id);
if (!parsedId.success) {
reply.code(400);
return { error: 'invalid id' };
}
const id = parsedId.data;
const [row] = await sql<{ id: string; status: string }[]>`
SELECT id, status FROM battles WHERE id = ${id}
`;
if (!row) {
reply.code(404);
return { error: 'battle not found' };
}
if (row.status !== 'running') {
reply.code(409);
return { error: `cannot stop battle in status '${row.status}'` };
}
const { cancelled, taskIds } = await battleRunner.cancelBattle(id);
if (!cancelled) {
reply.code(409);
return { error: 'battle is no longer running' };
}
// Abort any in-flight dispatcher tasks (cloud contestants running externally).
for (const taskId of taskIds) {
cancelExternal(taskId);
}
return { cancelled: true };
});
// GET /api/battles/:id/analysis — read analysis.md from the battle's results_path
app.get<{ Params: { id: string } }>('/api/battles/:id/analysis', async (req, reply) => {
const parsedId = UuidParam.safeParse(req.params.id);
if (!parsedId.success) {
reply.code(400);
return { error: 'invalid id' };
}
const id = parsedId.data;
const [row] = await sql<{ results_path: string | null }[]>`
SELECT results_path FROM battles WHERE id = ${id}
`;
if (!row) {
reply.code(404);
return { error: 'battle not found' };
}
if (!row.results_path) {
reply.code(404);
return { error: 'analysis not ready' };
}
try {
const text = await readFile(join(row.results_path, 'analysis.md'), 'utf8');
return { text };
} catch {
reply.code(404);
return { error: 'analysis not ready' };
}
});
// POST /api/battles/:id/analyze — trigger or re-trigger analysis
app.post<{ Params: { id: string } }>('/api/battles/:id/analyze', async (req, reply) => {
const parsedId = UuidParam.safeParse(req.params.id);
if (!parsedId.success) {
reply.code(400);
return { error: 'invalid id' };
}
const id = parsedId.data;
const [row] = await sql<{ id: string; status: string }[]>`
SELECT id, status FROM battles WHERE id = ${id}
`;
if (!row) {
reply.code(404);
return { error: 'battle not found' };
}
if (row.status === 'running') {
reply.code(409);
return { error: 'battle is still running — wait for all contestants to finish' };
}
const result = await battleRunner.triggerAnalysis(id);
if (!result.triggered) {
reply.code(404);
return { error: 'battle not found' };
}
reply.code(202);
return { triggered: true };
});
// PATCH /api/battles/:id/winner — manually set or clear the winner.
// Validates the contestant belongs to the battle; publishes battle_updated so
// the pane badge reflects the override immediately. Human is authoritative.
app.patch<{ Params: { id: string } }>('/api/battles/:id/winner', async (req, reply) => {
const parsedId = UuidParam.safeParse(req.params.id);
if (!parsedId.success) {
reply.code(400);
return { error: 'invalid id' };
}
const parsed = SetWinnerBody.safeParse(req.body);
if (!parsed.success) {
reply.code(400);
return { error: 'invalid body', details: parsed.error.flatten() };
}
const result = await battleRunner.setWinner(parsedId.data, parsed.data.winner_contestant_id);
if (!result.ok) {
if (result.notFound) { reply.code(404); return { error: 'battle not found' }; }
if (result.invalidContestant) { reply.code(422); return { error: 'contestant not found in this battle' }; }
reply.code(500); return { error: 'unknown error' };
}
return { ok: true };
});
// GET /api/battles/:id/contestants/:cid/diff — read the diff.patch for a coding contestant.
app.get<{ Params: { id: string; cid: string } }>('/api/battles/:id/contestants/:cid/diff', async (req, reply) => {
const parsedId = UuidParam.safeParse(req.params.id);
const parsedCid = UuidParam.safeParse(req.params.cid);
if (!parsedId.success || !parsedCid.success) {
reply.code(400);
return { error: 'invalid id' };
}
const [contestant] = await sql<{ result_path: string | null }[]>`
SELECT result_path FROM contestants
WHERE id = ${parsedCid.data} AND battle_id = ${parsedId.data}
`;
if (!contestant) {
reply.code(404);
return { error: 'contestant not found' };
}
if (!contestant.result_path) {
reply.code(404);
return { error: 'diff not available' };
}
try {
const text = await readFile(join(contestant.result_path, 'diff.patch'), 'utf8');
return { diff: text };
} catch {
reply.code(404);
return { error: 'diff not available' };
}
});
// POST /api/battles/:id/cross-examine — start a cross-examination
app.post<{ Params: { id: string } }>('/api/battles/:id/cross-examine', async (req, reply) => {
const parsedId = UuidParam.safeParse(req.params.id);
if (!parsedId.success) {
reply.code(400);
return { error: 'invalid id' };
}
const id = parsedId.data;
const parsed = CrossExamineBody.safeParse(req.body);
if (!parsed.success) {
reply.code(400);
return { error: 'invalid body', details: parsed.error.flatten() };
}
const [row] = await sql<{ id: string; status: string }[]>`
SELECT id, status FROM battles WHERE id = ${id}
`;
if (!row) {
reply.code(404);
return { error: 'battle not found' };
}
if (row.status === 'running') {
reply.code(409);
return { error: 'battle is still running — cross-examine after all contestants finish' };
}
const { crossExamId } = await battleRunner.startCrossExam(id, {
identity: parsed.data.identity,
model: parsed.data.model,
});
reply.code(202);
return { cross_exam_id: crossExamId };
});
}