- Arena API: token_breakdown selected in contestant query - ArenaPane: token category breakdown bar (s/u/a/t/r) in expanded contestant view - apps/server/CLAUDE.md: document tool-shim and loop-detectors
413 lines
13 KiB
TypeScript
413 lines
13 KiB
TypeScript
/**
|
||
* Arena routes — HTTP surface for the Battle UI.
|
||
*
|
||
* POST /api/battles — launch a battle
|
||
* GET /api/battles?project_id= — list battles for a project
|
||
* GET /api/battles/:id — one battle + contestants + cross-exams
|
||
* POST /api/battles/:id/stop — cancel a running battle
|
||
* POST /api/battles/:id/analyze — trigger analysis (Phase 5 fills the logic)
|
||
* POST /api/battles/:id/cross-examine — start a cross-examination (Phase 5 fills the logic)
|
||
*
|
||
* Mirrors the shape of runs.ts (Orchestrator routes). Battle creation delegates to
|
||
* the battle-runner; cancellation calls cancelBattle then aborts in-flight tasks
|
||
* via the dispatcher's cancelExternalTask.
|
||
*/
|
||
import type { FastifyInstance } from 'fastify';
|
||
import { z } from 'zod';
|
||
import { readFile } from 'node:fs/promises';
|
||
import { join } from 'node:path';
|
||
import type { Sql } from '../db.js';
|
||
import type { Config } from '../config.js';
|
||
import type { BattleRunner } from '../services/arena-runner.js';
|
||
import type { ExternalCancelFn } from './tasks.js';
|
||
import { arenaModelCall } from '../services/arena-model-call.js';
|
||
|
||
// ─── Validation schemas ───────────────────────────────────────────────────────
|
||
|
||
const UuidParam = z.string().uuid();
|
||
|
||
const ContestantInput = z.object({
|
||
identity: z.string().min(1).max(200),
|
||
model: z.string().min(1).max(200),
|
||
});
|
||
|
||
const CreateBattleBody = z.object({
|
||
project_id: z.string().uuid(),
|
||
battle_type: z.enum(['coding', 'qa']),
|
||
prompt: z.string().min(1).max(64_000),
|
||
contestants: z
|
||
.array(ContestantInput)
|
||
.min(2, 'at least 2 contestants required')
|
||
.max(6, 'at most 6 contestants allowed'),
|
||
});
|
||
|
||
const ListBattlesQuery = z.object({
|
||
project_id: z.string().uuid(),
|
||
});
|
||
|
||
const CrossExamineBody = z.object({
|
||
identity: z.string().min(1).max(200),
|
||
model: z.string().min(1).max(200),
|
||
});
|
||
|
||
const SetWinnerBody = z.object({
|
||
winner_contestant_id: z.string().uuid().nullable(),
|
||
});
|
||
|
||
// ─── Route registration ───────────────────────────────────────────────────────
|
||
|
||
const GeneratePromptBody = z.object({
|
||
description: z.string().min(1).max(2_000),
|
||
});
|
||
|
||
export function registerArenaRoutes(
|
||
app: FastifyInstance,
|
||
sql: Sql,
|
||
battleRunner: BattleRunner,
|
||
cancelExternal: ExternalCancelFn,
|
||
config: Config,
|
||
): void {
|
||
|
||
// POST /api/battles/generate-prompt — draft a fuller battle prompt from a
|
||
// short description using the default BooChat model. One-shot, non-streaming.
|
||
// Must be registered BEFORE /api/battles/:id so the literal 'generate-prompt'
|
||
// path is not mistaken for a UUID param.
|
||
app.post('/api/battles/generate-prompt', async (req, reply) => {
|
||
const parsed = GeneratePromptBody.safeParse(req.body);
|
||
if (!parsed.success) {
|
||
reply.code(400);
|
||
return { error: 'invalid body', details: parsed.error.flatten() };
|
||
}
|
||
|
||
const { description } = parsed.data;
|
||
|
||
try {
|
||
const prompt = await arenaModelCall({
|
||
config,
|
||
model: config.DEFAULT_MODEL,
|
||
system: [
|
||
'You are a battle-prompt writer for an AI Arena.',
|
||
'The user gives you a short description of a coding or Q&A challenge.',
|
||
'Expand it into a clear, self-contained prompt (2–6 sentences) that any AI model can act on.',
|
||
'Include specific acceptance criteria where helpful.',
|
||
'Output ONLY the prompt — no preamble, no labels, no meta-commentary.',
|
||
].join(' '),
|
||
user: description,
|
||
maxTokens: 400,
|
||
temperature: 0.6,
|
||
});
|
||
return { prompt };
|
||
} catch (err) {
|
||
app.log.warn(
|
||
{ err: err instanceof Error ? err.message : String(err) },
|
||
'arena generate-prompt: model call failed',
|
||
);
|
||
reply.code(502);
|
||
return { error: 'model call failed' };
|
||
}
|
||
});
|
||
|
||
// POST /api/battles — launch a battle
|
||
app.post('/api/battles', async (req, reply) => {
|
||
const parsed = CreateBattleBody.safeParse(req.body);
|
||
if (!parsed.success) {
|
||
reply.code(400);
|
||
return { error: 'invalid body', details: parsed.error.flatten() };
|
||
}
|
||
|
||
const { project_id, battle_type, prompt, contestants } = parsed.data;
|
||
|
||
// Reject duplicate (identity, model) pairs up front — the schema UNIQUE
|
||
// constraint would catch it too, but an early 422 is friendlier.
|
||
const seen = new Set<string>();
|
||
for (const c of contestants) {
|
||
const key = `${c.identity}::${c.model}`;
|
||
if (seen.has(key)) {
|
||
reply.code(422);
|
||
return {
|
||
error: 'duplicate_contestant',
|
||
message: `duplicate contestant: identity="${c.identity}" model="${c.model}"`,
|
||
};
|
||
}
|
||
seen.add(key);
|
||
}
|
||
|
||
// Verify project exists
|
||
const [proj] = await sql<{ id: string }[]>`SELECT id FROM projects WHERE id = ${project_id}`;
|
||
if (!proj) {
|
||
reply.code(404);
|
||
return { error: 'project not found' };
|
||
}
|
||
|
||
const { battleId } = await battleRunner.startBattle({
|
||
projectId: project_id,
|
||
battleType: battle_type,
|
||
prompt,
|
||
contestants,
|
||
});
|
||
|
||
reply.code(201);
|
||
return { battle_id: battleId };
|
||
});
|
||
|
||
// GET /api/battles?project_id= — list battles, most-recent-first
|
||
app.get('/api/battles', async (req, reply) => {
|
||
const parsed = ListBattlesQuery.safeParse(req.query);
|
||
if (!parsed.success) {
|
||
reply.code(400);
|
||
return { error: 'invalid query', details: parsed.error.flatten() };
|
||
}
|
||
|
||
const battles = await sql`
|
||
SELECT id, project_id, battle_type, prompt, status,
|
||
winner_contestant_id, results_path, error,
|
||
created_at, updated_at
|
||
FROM battles
|
||
WHERE project_id = ${parsed.data.project_id}
|
||
ORDER BY created_at DESC
|
||
LIMIT 100
|
||
`;
|
||
|
||
return { battles };
|
||
});
|
||
|
||
// GET /api/battles/:id — one battle + its contestants + cross-examinations
|
||
app.get<{ Params: { id: string } }>('/api/battles/:id', async (req, reply) => {
|
||
const parsedId = UuidParam.safeParse(req.params.id);
|
||
if (!parsedId.success) {
|
||
reply.code(400);
|
||
return { error: 'invalid id' };
|
||
}
|
||
const id = parsedId.data;
|
||
|
||
const [battle] = await sql<{
|
||
id: string;
|
||
project_id: string;
|
||
battle_type: string;
|
||
prompt: string;
|
||
status: string;
|
||
winner_contestant_id: string | null;
|
||
results_path: string | null;
|
||
error: string | null;
|
||
created_at: unknown;
|
||
updated_at: unknown;
|
||
}[]>`
|
||
SELECT id, project_id, battle_type, prompt, status,
|
||
winner_contestant_id, results_path, error,
|
||
created_at, updated_at
|
||
FROM battles WHERE id = ${id}
|
||
`;
|
||
|
||
if (!battle) {
|
||
reply.code(404);
|
||
return { error: 'battle not found' };
|
||
}
|
||
|
||
const contestants = await sql`
|
||
SELECT id, battle_id, identity, model, lane, task_id, worktree_id,
|
||
status, duration_ms, tokens_per_sec, cost_tokens, token_breakdown, result_path, error,
|
||
created_at, updated_at
|
||
FROM contestants
|
||
WHERE battle_id = ${id}
|
||
ORDER BY created_at ASC
|
||
`;
|
||
|
||
const crossExaminations = await sql`
|
||
SELECT id, battle_id, identity, model, verdict, created_at
|
||
FROM cross_examinations
|
||
WHERE battle_id = ${id}
|
||
ORDER BY created_at ASC
|
||
`;
|
||
|
||
return { battle, contestants, cross_examinations: crossExaminations };
|
||
});
|
||
|
||
// POST /api/battles/:id/stop — cancel a running battle
|
||
app.post<{ Params: { id: string } }>('/api/battles/:id/stop', async (req, reply) => {
|
||
const parsedId = UuidParam.safeParse(req.params.id);
|
||
if (!parsedId.success) {
|
||
reply.code(400);
|
||
return { error: 'invalid id' };
|
||
}
|
||
const id = parsedId.data;
|
||
|
||
const [row] = await sql<{ id: string; status: string }[]>`
|
||
SELECT id, status FROM battles WHERE id = ${id}
|
||
`;
|
||
if (!row) {
|
||
reply.code(404);
|
||
return { error: 'battle not found' };
|
||
}
|
||
if (row.status !== 'running') {
|
||
reply.code(409);
|
||
return { error: `cannot stop battle in status '${row.status}'` };
|
||
}
|
||
|
||
const { cancelled, taskIds } = await battleRunner.cancelBattle(id);
|
||
if (!cancelled) {
|
||
reply.code(409);
|
||
return { error: 'battle is no longer running' };
|
||
}
|
||
|
||
// Abort any in-flight dispatcher tasks (cloud contestants running externally).
|
||
for (const taskId of taskIds) {
|
||
cancelExternal(taskId);
|
||
}
|
||
|
||
return { cancelled: true };
|
||
});
|
||
|
||
// GET /api/battles/:id/analysis — read analysis.md from the battle's results_path
|
||
app.get<{ Params: { id: string } }>('/api/battles/:id/analysis', async (req, reply) => {
|
||
const parsedId = UuidParam.safeParse(req.params.id);
|
||
if (!parsedId.success) {
|
||
reply.code(400);
|
||
return { error: 'invalid id' };
|
||
}
|
||
const id = parsedId.data;
|
||
|
||
const [row] = await sql<{ results_path: string | null }[]>`
|
||
SELECT results_path FROM battles WHERE id = ${id}
|
||
`;
|
||
if (!row) {
|
||
reply.code(404);
|
||
return { error: 'battle not found' };
|
||
}
|
||
if (!row.results_path) {
|
||
reply.code(404);
|
||
return { error: 'analysis not ready' };
|
||
}
|
||
|
||
try {
|
||
const text = await readFile(join(row.results_path, 'analysis.md'), 'utf8');
|
||
return { text };
|
||
} catch {
|
||
reply.code(404);
|
||
return { error: 'analysis not ready' };
|
||
}
|
||
});
|
||
|
||
// POST /api/battles/:id/analyze — trigger or re-trigger analysis
|
||
app.post<{ Params: { id: string } }>('/api/battles/:id/analyze', async (req, reply) => {
|
||
const parsedId = UuidParam.safeParse(req.params.id);
|
||
if (!parsedId.success) {
|
||
reply.code(400);
|
||
return { error: 'invalid id' };
|
||
}
|
||
const id = parsedId.data;
|
||
|
||
const [row] = await sql<{ id: string; status: string }[]>`
|
||
SELECT id, status FROM battles WHERE id = ${id}
|
||
`;
|
||
if (!row) {
|
||
reply.code(404);
|
||
return { error: 'battle not found' };
|
||
}
|
||
if (row.status === 'running') {
|
||
reply.code(409);
|
||
return { error: 'battle is still running — wait for all contestants to finish' };
|
||
}
|
||
|
||
const result = await battleRunner.triggerAnalysis(id);
|
||
if (!result.triggered) {
|
||
reply.code(404);
|
||
return { error: 'battle not found' };
|
||
}
|
||
|
||
reply.code(202);
|
||
return { triggered: true };
|
||
});
|
||
|
||
// PATCH /api/battles/:id/winner — manually set or clear the winner.
|
||
// Validates the contestant belongs to the battle; publishes battle_updated so
|
||
// the pane badge reflects the override immediately. Human is authoritative.
|
||
app.patch<{ Params: { id: string } }>('/api/battles/:id/winner', async (req, reply) => {
|
||
const parsedId = UuidParam.safeParse(req.params.id);
|
||
if (!parsedId.success) {
|
||
reply.code(400);
|
||
return { error: 'invalid id' };
|
||
}
|
||
|
||
const parsed = SetWinnerBody.safeParse(req.body);
|
||
if (!parsed.success) {
|
||
reply.code(400);
|
||
return { error: 'invalid body', details: parsed.error.flatten() };
|
||
}
|
||
|
||
const result = await battleRunner.setWinner(parsedId.data, parsed.data.winner_contestant_id);
|
||
if (!result.ok) {
|
||
if (result.notFound) { reply.code(404); return { error: 'battle not found' }; }
|
||
if (result.invalidContestant) { reply.code(422); return { error: 'contestant not found in this battle' }; }
|
||
reply.code(500); return { error: 'unknown error' };
|
||
}
|
||
return { ok: true };
|
||
});
|
||
|
||
// GET /api/battles/:id/contestants/:cid/diff — read the diff.patch for a coding contestant.
|
||
app.get<{ Params: { id: string; cid: string } }>('/api/battles/:id/contestants/:cid/diff', async (req, reply) => {
|
||
const parsedId = UuidParam.safeParse(req.params.id);
|
||
const parsedCid = UuidParam.safeParse(req.params.cid);
|
||
if (!parsedId.success || !parsedCid.success) {
|
||
reply.code(400);
|
||
return { error: 'invalid id' };
|
||
}
|
||
|
||
const [contestant] = await sql<{ result_path: string | null }[]>`
|
||
SELECT result_path FROM contestants
|
||
WHERE id = ${parsedCid.data} AND battle_id = ${parsedId.data}
|
||
`;
|
||
if (!contestant) {
|
||
reply.code(404);
|
||
return { error: 'contestant not found' };
|
||
}
|
||
if (!contestant.result_path) {
|
||
reply.code(404);
|
||
return { error: 'diff not available' };
|
||
}
|
||
|
||
try {
|
||
const text = await readFile(join(contestant.result_path, 'diff.patch'), 'utf8');
|
||
return { diff: text };
|
||
} catch {
|
||
reply.code(404);
|
||
return { error: 'diff not available' };
|
||
}
|
||
});
|
||
|
||
// POST /api/battles/:id/cross-examine — start a cross-examination
|
||
app.post<{ Params: { id: string } }>('/api/battles/:id/cross-examine', async (req, reply) => {
|
||
const parsedId = UuidParam.safeParse(req.params.id);
|
||
if (!parsedId.success) {
|
||
reply.code(400);
|
||
return { error: 'invalid id' };
|
||
}
|
||
const id = parsedId.data;
|
||
|
||
const parsed = CrossExamineBody.safeParse(req.body);
|
||
if (!parsed.success) {
|
||
reply.code(400);
|
||
return { error: 'invalid body', details: parsed.error.flatten() };
|
||
}
|
||
|
||
const [row] = await sql<{ id: string; status: string }[]>`
|
||
SELECT id, status FROM battles WHERE id = ${id}
|
||
`;
|
||
if (!row) {
|
||
reply.code(404);
|
||
return { error: 'battle not found' };
|
||
}
|
||
if (row.status === 'running') {
|
||
reply.code(409);
|
||
return { error: 'battle is still running — cross-examine after all contestants finish' };
|
||
}
|
||
|
||
const { crossExamId } = await battleRunner.startCrossExam(id, {
|
||
identity: parsed.data.identity,
|
||
model: parsed.data.model,
|
||
});
|
||
|
||
reply.code(202);
|
||
return { cross_exam_id: crossExamId };
|
||
});
|
||
}
|