import { randomUUID } from 'node:crypto'; import type { FastifyBaseLogger, FastifyInstance, FastifyRequest, FastifyReply } from 'fastify'; import type { Sql } from '../db.js'; import type { FleetState } from '../services/fleet-state.js'; import type { DeltaEmitter } from '../index.js'; import { acquireHostAccess } from '../services/host-access.js'; import type { BenchSuite, BenchRunProgress } from '../services/bench-engine.js'; import { runBenchSuite } from '../services/bench-engine.js'; import { resolveProviderBaseUrl } from '../services/llama-providers.js'; import { jsonbNumberArray, jsonbObject } from '../services/jsonb.js'; /** * Register bench routes. * * POST /api/bench/suite — create a suite definition * GET /api/bench/suites — list suites * GET /api/bench/suites/:id — get suite * POST /api/bench/run — start a bench run (gated through acquireHostAccess) * GET /api/bench/runs — list runs * GET /api/bench/runs/:id — get run + samples * GET /api/bench/baselines — get baselines per (provider_id, model) */ export function registerBenchRoutes( app: FastifyInstance, sql: Sql, fleet: FleetState, emitter: DeltaEmitter, ): void { // ─── suite CRUD ────────────────────────────────────────────────────────── app.post('/api/bench/suite', async (req: FastifyRequest, reply: FastifyReply) => { const body = req.body as Record; const suiteId = body.id as string; const name = body.name as string; const providerId = body.providerId as string; const model = body.model as string; const promptTokens = body.promptTokens as number[]; const genTokens = body.genTokens as number[]; const concurrency = body.concurrency as number[]; const repetitions = (body.repetitions as number) ?? 1; const metadata = body.metadata as Record | undefined; if (!name || !providerId || !model) { return reply.status(400).send({ error: 'name, providerId, and model are required' }); } if (!promptTokens?.length || !genTokens?.length || !concurrency?.length) { return reply.status(400).send({ error: 'promptTokens, genTokens, and concurrency must each have at least one value' }); } const id = suiteId ?? randomUUID(); await sql` INSERT INTO bench_suites (id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata) VALUES (${id}, ${name}, ${providerId}, ${model}, ${sql.json(promptTokens as never)}, ${sql.json(genTokens as never)}, ${sql.json(concurrency as never)}, ${repetitions}, ${metadata ? sql.json(metadata as never) : sql`NULL::jsonb`}) ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name, provider_id = EXCLUDED.provider_id, model = EXCLUDED.model, prompt_tokens = EXCLUDED.prompt_tokens, gen_tokens = EXCLUDED.gen_tokens, concurrency = EXCLUDED.concurrency, repetitions = EXCLUDED.repetitions, metadata = EXCLUDED.metadata `; return reply.status(201).send({ id }); }); app.get('/api/bench/suites', async (_req: FastifyRequest, reply: FastifyReply) => { const suites = await sql<{ id: string; name: string; provider_id: string; model: string; prompt_tokens: string; gen_tokens: string; concurrency: string; repetitions: number; metadata: string | null; created_at: string; }[]>` SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata, created_at FROM bench_suites ORDER BY created_at DESC `; return reply.send({ suites: suites.map((s) => ({ id: s.id, name: s.name, providerId: s.provider_id, model: s.model, promptTokens: jsonbNumberArray(s.prompt_tokens), genTokens: jsonbNumberArray(s.gen_tokens), concurrency: jsonbNumberArray(s.concurrency), repetitions: s.repetitions, metadata: jsonbObject(s.metadata) ?? undefined, createdAt: s.created_at, })), }); }); app.get('/api/bench/suites/:id', async (req: FastifyRequest, reply: FastifyReply) => { const { id } = req.params as { id: string }; const rows = await sql<{ id: string; name: string; provider_id: string; model: string; prompt_tokens: string; gen_tokens: string; concurrency: string; repetitions: number; metadata: string | null; created_at: string; }[]>` SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata, created_at FROM bench_suites WHERE id = ${id} `; if (rows.length === 0) { return reply.status(404).send({ error: 'suite not found' }); } const s = rows[0]!; return reply.send({ id: s.id, name: s.name, providerId: s.provider_id, model: s.model, promptTokens: jsonbNumberArray(s.prompt_tokens), genTokens: jsonbNumberArray(s.gen_tokens), concurrency: jsonbNumberArray(s.concurrency), repetitions: s.repetitions, metadata: jsonbObject(s.metadata) ?? undefined, createdAt: s.created_at, }); }); // ─── run launcher (P3.3: safety gates + P3.4: acquireHostAccess) ───────── app.post('/api/bench/run', async (req: FastifyRequest, reply: FastifyReply) => { const body = req.body as Record; const suiteId = body.suiteId as string; const temperature = (body.temperature as number) ?? 0.7; const topP = (body.topP as number) ?? 0.9; if (!suiteId) { return reply.status(400).send({ error: 'suiteId is required' }); } // Load suite. const suiteRows = await sql<{ id: string; name: string; provider_id: string; model: string; prompt_tokens: string; gen_tokens: string; concurrency: string; repetitions: number; metadata: string | null; }[]>` SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata FROM bench_suites WHERE id = ${suiteId} `; if (suiteRows.length === 0) { return reply.status(404).send({ error: 'suite not found' }); } const s = suiteRows[0]!; const suite: BenchSuite = { id: s.id, name: s.name, providerId: s.provider_id, model: s.model, promptTokens: jsonbNumberArray(s.prompt_tokens), genTokens: jsonbNumberArray(s.gen_tokens), concurrency: jsonbNumberArray(s.concurrency), repetitions: s.repetitions, metadata: jsonbObject(s.metadata) ?? undefined, }; // P3.3: Safety check — check recent traffic on the target host. const hostState = fleet.hosts.get(suite.providerId); const recentTraffic = checkRecentTraffic(hostState); // P3.4: Gate through acquireHostAccess seam. const grant = await acquireHostAccess(suite.providerId, 'bench'); if (!grant.ok) { return reply.status(409).send({ error: 'host access denied', reason: grant.reason, }); } // Resolve base URL from registry. const baseUrl = resolveBaseUrl(suite.providerId); if (!baseUrl) { return reply.status(400).send({ error: `no base URL configured for provider ${suite.providerId}` }); } // Get seq for the host. const seq = hostState?.seq ?? 0; // Run the bench suite asynchronously (non-blocking HTTP response). void runBenchAsync( { suite, baseUrl, temperature, topP }, sql, emitter, seq, suite.providerId, ); return reply.status(202).send({ status: 'queued', suiteId: suite.id, recentTraffic, }); }); // ─── runs listing ──────────────────────────────────────────────────────── app.get('/api/bench/runs', async (req: FastifyRequest, reply: FastifyReply) => { const query = req.query as Record; const suiteId = query.suiteId; let runs: Array<{ id: string; suite_id: string; job_type: string; status: string; started_at: string | null; finished_at: string | null; total_samples: number; completed_samples: number; concurrent_foreign_requests: number; regression_flag: string | null; aggregate: string | null; error: string | null; created_at: string; }>; if (suiteId) { runs = await sql` SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at FROM bench_runs WHERE suite_id = ${suiteId} ORDER BY created_at DESC `; } else { runs = await sql` SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at FROM bench_runs ORDER BY created_at DESC LIMIT 100 `; } return reply.send({ runs: runs.map((r) => ({ id: r.id, suiteId: r.suite_id, jobType: r.job_type, status: r.status, startedAt: r.started_at, finishedAt: r.finished_at, totalSamples: r.total_samples, completedSamples: r.completed_samples, concurrentForeignRequests: r.concurrent_foreign_requests, regressionFlag: r.regression_flag, aggregate: jsonbObject(r.aggregate), error: r.error, createdAt: r.created_at, })), }); }); app.get('/api/bench/runs/:id', async (req: FastifyRequest, reply: FastifyReply) => { const { id } = req.params as { id: string }; const runRows = await sql<{ id: string; suite_id: string; job_type: string; status: string; started_at: string | null; finished_at: string | null; total_samples: number; completed_samples: number; concurrent_foreign_requests: number; regression_flag: string | null; aggregate: string | null; error: string | null; created_at: string; }[]>` SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at FROM bench_runs WHERE id = ${id} `; if (runRows.length === 0) { return reply.status(404).send({ error: 'run not found' }); } const r = runRows[0]!; const samples = await sql<{ id: number; prompt_tokens: number; gen_tokens: number; concurrency: number; repetition: number; ttft_ms: number | null; total_ms: number | null; prompt_tps: number | null; gen_tps: number | null; cache_n: number | null; error: string | null; }[]>` SELECT id, prompt_tokens, gen_tokens, concurrency, repetition, ttft_ms, total_ms, prompt_tps, gen_tps, cache_n, error FROM bench_samples WHERE run_id = ${id} ORDER BY prompt_tokens, gen_tokens, concurrency, repetition `; return reply.send({ run: { id: r.id, suiteId: r.suite_id, jobType: r.job_type, status: r.status, startedAt: r.started_at, finishedAt: r.finished_at, totalSamples: r.total_samples, completedSamples: r.completed_samples, concurrentForeignRequests: r.concurrent_foreign_requests, regressionFlag: r.regression_flag, aggregate: jsonbObject(r.aggregate), error: r.error, createdAt: r.created_at, }, samples: samples.map((s) => ({ id: s.id, promptTokens: s.prompt_tokens, genTokens: s.gen_tokens, concurrency: s.concurrency, repetition: s.repetition, ttftMs: s.ttft_ms, totalMs: s.total_ms, promptTps: s.prompt_tps, genTps: s.gen_tps, cacheN: s.cache_n, error: s.error, })), }); }); // ─── baselines ─────────────────────────────────────────────────────────── app.get('/api/bench/baselines', async (_req: FastifyRequest, reply: FastifyReply) => { const rows = await sql<{ provider_id: string; model: string; run_id: string; aggregate: string; created_at: string; }[]>` SELECT provider_id, model, run_id, aggregate, created_at FROM bench_baselines ORDER BY provider_id, model `; return reply.send({ baselines: rows.map((r) => ({ providerId: r.provider_id, model: r.model, runId: r.run_id, aggregate: jsonbObject(r.aggregate), createdAt: r.created_at, })), }); }); } /** * P3.3: Check if the target host has recent traffic (for takeover confirmation). */ function checkRecentTraffic(hostState: { models: Map } | undefined): { hasRecentTraffic: boolean; inflightCount: number } { if (!hostState) { return { hasRecentTraffic: false, inflightCount: 0 }; } let total = 0; for (const m of hostState.models.values()) { total += m.inflight; } return { hasRecentTraffic: total > 0, inflightCount: total, }; } /** * Resolve the base URL for a provider from the loaded registry. * baseUrl comes from LlamaProvider.baseUrl, never from ssh_host. */ function resolveBaseUrl(providerId: string): string | null { return resolveProviderBaseUrl(providerId); } /** * Async bench runner: fire-and-forget, records concurrent_foreign_requests. * A6: sources from activity stream during [started_at, finished_at] window, * minus the bench's own samples count. */ async function runBenchAsync( params: { suite: BenchSuite; baseUrl: string; temperature?: number; topP?: number }, sql: Sql, emitter: DeltaEmitter, seq: number, providerId: string, ): Promise { const { suite } = params; // Find the latest running run for this suite. const latestRun = await sql<{ id: string; started_at: string | null }[]>` SELECT id, started_at FROM bench_runs WHERE suite_id = ${suite.id} AND status = 'running' ORDER BY created_at DESC LIMIT 1 `; if (latestRun.length === 0) { benchLogger?.error?.({}, 'bench: no running run found'); return; } const runId = latestRun[0]!.id; const progressHandler = (_progress: BenchRunProgress) => { // Progress is published via emitter in runBenchSuite. }; try { await runBenchSuite(params, sql, emitter, seq, progressHandler); // A6: Record concurrent_foreign_requests from activity stream during run window. // Count control_requests for this provider in [started_at, finished_at], // minus the bench's own sample count. const runData = await sql<{ started_at: string | null; finished_at: string | null; completed_samples: number }[]>` SELECT started_at, finished_at, completed_samples FROM bench_runs WHERE id = ${runId} `; const rd = runData[0]!; if (rd.started_at && rd.finished_at) { const foreignCount = await sql<{ count: number }[]>` SELECT COUNT(*)::INT AS count FROM control_requests WHERE provider_id = ${providerId} AND ts >= ${rd.started_at}::timestamptz AND ts <= ${rd.finished_at}::timestamptz `; const totalForeign = (foreignCount[0]?.count ?? 0) - rd.completed_samples; await sql` UPDATE bench_runs SET concurrent_foreign_requests = ${Math.max(0, totalForeign)} WHERE id = ${runId} `; } } catch (err) { const msg = (err as Error).message ?? String(err); benchLogger?.error?.({ err: msg }, 'bench: run failed'); await sql` UPDATE bench_runs SET status = 'failed', finished_at = clock_timestamp(), error = ${msg} WHERE id = ${runId} `; emitter.publish({ type: 'control_job' as const, seq, jobType: 'bench' as const, jobId: runId, status: 'failed' as const, detail: { error: msg }, }); } } /** * Set the Fastify logger for the async bench runner. */ let benchLogger: FastifyBaseLogger | undefined; export function setBenchApp(logger: FastifyBaseLogger): void { benchLogger = logger; }