Files
boocode/apps/control/src/routes/bench.ts
indifferentketchup b18de2a331 chore: snapshot working tree - pty_exited notifications + in-flight inference WIP
feat(booterm): structured pty_exited WS notifications. Plan-validated, impl-validated, code-reviewed green (contracts build clean, contracts test 29/29, booterm + web typecheck clean).

wip: in-progress inference/provider refactor (agents.ts, provider.ts, new llama-providers.ts, removed llama-args-validator), plus arena, dispatcher, compaction, schema changes.

openspec: pty-exit-notifications complete; x-agent-flags planned (not yet implemented).
2026-06-14 12:48:47 +00:00

493 lines
16 KiB
TypeScript

import { randomUUID } from 'node:crypto';
import type { FastifyBaseLogger, FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import type { Sql } from '../db.js';
import type { FleetState } from '../services/fleet-state.js';
import type { DeltaEmitter } from '../index.js';
import { acquireHostAccess } from '../services/host-access.js';
import type { BenchSuite, BenchRunProgress } from '../services/bench-engine.js';
import { runBenchSuite } from '../services/bench-engine.js';
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
import { jsonbNumberArray, jsonbObject } from '../services/jsonb.js';
/**
* Register bench routes.
*
* POST /api/bench/suite — create a suite definition
* GET /api/bench/suites — list suites
* GET /api/bench/suites/:id — get suite
* POST /api/bench/run — start a bench run (gated through acquireHostAccess)
* GET /api/bench/runs — list runs
* GET /api/bench/runs/:id — get run + samples
* GET /api/bench/baselines — get baselines per (provider_id, model)
*/
export function registerBenchRoutes(
app: FastifyInstance,
sql: Sql,
fleet: FleetState,
emitter: DeltaEmitter,
): void {
// ─── suite CRUD ──────────────────────────────────────────────────────────
app.post('/api/bench/suite', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const suiteId = body.id as string;
const name = body.name as string;
const providerId = body.providerId as string;
const model = body.model as string;
const promptTokens = body.promptTokens as number[];
const genTokens = body.genTokens as number[];
const concurrency = body.concurrency as number[];
const repetitions = (body.repetitions as number) ?? 1;
const metadata = body.metadata as Record<string, unknown> | undefined;
if (!name || !providerId || !model) {
return reply.status(400).send({ error: 'name, providerId, and model are required' });
}
if (!promptTokens?.length || !genTokens?.length || !concurrency?.length) {
return reply.status(400).send({ error: 'promptTokens, genTokens, and concurrency must each have at least one value' });
}
const id = suiteId ?? randomUUID();
await sql`
INSERT INTO bench_suites (id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata)
VALUES (${id}, ${name}, ${providerId}, ${model}, ${sql.json(promptTokens as never)}, ${sql.json(genTokens as never)}, ${sql.json(concurrency as never)}, ${repetitions}, ${metadata ? sql.json(metadata as never) : sql`NULL::jsonb`})
ON CONFLICT (id) DO UPDATE SET
name = EXCLUDED.name,
provider_id = EXCLUDED.provider_id,
model = EXCLUDED.model,
prompt_tokens = EXCLUDED.prompt_tokens,
gen_tokens = EXCLUDED.gen_tokens,
concurrency = EXCLUDED.concurrency,
repetitions = EXCLUDED.repetitions,
metadata = EXCLUDED.metadata
`;
return reply.status(201).send({ id });
});
app.get('/api/bench/suites', async (_req: FastifyRequest, reply: FastifyReply) => {
const suites = await sql<{
id: string;
name: string;
provider_id: string;
model: string;
prompt_tokens: string;
gen_tokens: string;
concurrency: string;
repetitions: number;
metadata: string | null;
created_at: string;
}[]>`
SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata, created_at
FROM bench_suites
ORDER BY created_at DESC
`;
return reply.send({
suites: suites.map((s) => ({
id: s.id,
name: s.name,
providerId: s.provider_id,
model: s.model,
promptTokens: jsonbNumberArray(s.prompt_tokens),
genTokens: jsonbNumberArray(s.gen_tokens),
concurrency: jsonbNumberArray(s.concurrency),
repetitions: s.repetitions,
metadata: jsonbObject(s.metadata) ?? undefined,
createdAt: s.created_at,
})),
});
});
app.get('/api/bench/suites/:id', async (req: FastifyRequest, reply: FastifyReply) => {
const { id } = req.params as { id: string };
const rows = await sql<{
id: string;
name: string;
provider_id: string;
model: string;
prompt_tokens: string;
gen_tokens: string;
concurrency: string;
repetitions: number;
metadata: string | null;
created_at: string;
}[]>`
SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata, created_at
FROM bench_suites WHERE id = ${id}
`;
if (rows.length === 0) {
return reply.status(404).send({ error: 'suite not found' });
}
const s = rows[0]!;
return reply.send({
id: s.id,
name: s.name,
providerId: s.provider_id,
model: s.model,
promptTokens: jsonbNumberArray(s.prompt_tokens),
genTokens: jsonbNumberArray(s.gen_tokens),
concurrency: jsonbNumberArray(s.concurrency),
repetitions: s.repetitions,
metadata: jsonbObject(s.metadata) ?? undefined,
createdAt: s.created_at,
});
});
// ─── run launcher (P3.3: safety gates + P3.4: acquireHostAccess) ─────────
app.post('/api/bench/run', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const suiteId = body.suiteId as string;
const temperature = (body.temperature as number) ?? 0.7;
const topP = (body.topP as number) ?? 0.9;
if (!suiteId) {
return reply.status(400).send({ error: 'suiteId is required' });
}
// Load suite.
const suiteRows = await sql<{
id: string;
name: string;
provider_id: string;
model: string;
prompt_tokens: string;
gen_tokens: string;
concurrency: string;
repetitions: number;
metadata: string | null;
}[]>`
SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata
FROM bench_suites WHERE id = ${suiteId}
`;
if (suiteRows.length === 0) {
return reply.status(404).send({ error: 'suite not found' });
}
const s = suiteRows[0]!;
const suite: BenchSuite = {
id: s.id,
name: s.name,
providerId: s.provider_id,
model: s.model,
promptTokens: jsonbNumberArray(s.prompt_tokens),
genTokens: jsonbNumberArray(s.gen_tokens),
concurrency: jsonbNumberArray(s.concurrency),
repetitions: s.repetitions,
metadata: jsonbObject(s.metadata) ?? undefined,
};
// P3.3: Safety check — check recent traffic on the target host.
const hostState = fleet.hosts.get(suite.providerId);
const recentTraffic = checkRecentTraffic(hostState);
// P3.4: Gate through acquireHostAccess seam.
const grant = await acquireHostAccess(suite.providerId, 'bench');
if (!grant.ok) {
return reply.status(409).send({
error: 'host access denied',
reason: grant.reason,
});
}
// Resolve base URL from registry.
const baseUrl = resolveBaseUrl(suite.providerId);
if (!baseUrl) {
return reply.status(400).send({ error: `no base URL configured for provider ${suite.providerId}` });
}
// Get seq for the host.
const seq = hostState?.seq ?? 0;
// Run the bench suite asynchronously (non-blocking HTTP response).
void runBenchAsync(
{ suite, baseUrl, temperature, topP },
sql,
emitter,
seq,
suite.providerId,
);
return reply.status(202).send({
status: 'queued',
suiteId: suite.id,
recentTraffic,
});
});
// ─── runs listing ────────────────────────────────────────────────────────
app.get('/api/bench/runs', async (req: FastifyRequest, reply: FastifyReply) => {
const query = req.query as Record<string, string | undefined>;
const suiteId = query.suiteId;
let runs: Array<{
id: string;
suite_id: string;
job_type: string;
status: string;
started_at: string | null;
finished_at: string | null;
total_samples: number;
completed_samples: number;
concurrent_foreign_requests: number;
regression_flag: string | null;
aggregate: string | null;
error: string | null;
created_at: string;
}>;
if (suiteId) {
runs = await sql`
SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at
FROM bench_runs WHERE suite_id = ${suiteId}
ORDER BY created_at DESC
`;
} else {
runs = await sql`
SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at
FROM bench_runs
ORDER BY created_at DESC
LIMIT 100
`;
}
return reply.send({
runs: runs.map((r) => ({
id: r.id,
suiteId: r.suite_id,
jobType: r.job_type,
status: r.status,
startedAt: r.started_at,
finishedAt: r.finished_at,
totalSamples: r.total_samples,
completedSamples: r.completed_samples,
concurrentForeignRequests: r.concurrent_foreign_requests,
regressionFlag: r.regression_flag,
aggregate: jsonbObject(r.aggregate),
error: r.error,
createdAt: r.created_at,
})),
});
});
app.get('/api/bench/runs/:id', async (req: FastifyRequest, reply: FastifyReply) => {
const { id } = req.params as { id: string };
const runRows = await sql<{
id: string;
suite_id: string;
job_type: string;
status: string;
started_at: string | null;
finished_at: string | null;
total_samples: number;
completed_samples: number;
concurrent_foreign_requests: number;
regression_flag: string | null;
aggregate: string | null;
error: string | null;
created_at: string;
}[]>`
SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at
FROM bench_runs WHERE id = ${id}
`;
if (runRows.length === 0) {
return reply.status(404).send({ error: 'run not found' });
}
const r = runRows[0]!;
const samples = await sql<{
id: number;
prompt_tokens: number;
gen_tokens: number;
concurrency: number;
repetition: number;
ttft_ms: number | null;
total_ms: number | null;
prompt_tps: number | null;
gen_tps: number | null;
cache_n: number | null;
error: string | null;
}[]>`
SELECT id, prompt_tokens, gen_tokens, concurrency, repetition, ttft_ms, total_ms, prompt_tps, gen_tps, cache_n, error
FROM bench_samples WHERE run_id = ${id}
ORDER BY prompt_tokens, gen_tokens, concurrency, repetition
`;
return reply.send({
run: {
id: r.id,
suiteId: r.suite_id,
jobType: r.job_type,
status: r.status,
startedAt: r.started_at,
finishedAt: r.finished_at,
totalSamples: r.total_samples,
completedSamples: r.completed_samples,
concurrentForeignRequests: r.concurrent_foreign_requests,
regressionFlag: r.regression_flag,
aggregate: jsonbObject(r.aggregate),
error: r.error,
createdAt: r.created_at,
},
samples: samples.map((s) => ({
id: s.id,
promptTokens: s.prompt_tokens,
genTokens: s.gen_tokens,
concurrency: s.concurrency,
repetition: s.repetition,
ttftMs: s.ttft_ms,
totalMs: s.total_ms,
promptTps: s.prompt_tps,
genTps: s.gen_tps,
cacheN: s.cache_n,
error: s.error,
})),
});
});
// ─── baselines ───────────────────────────────────────────────────────────
app.get('/api/bench/baselines', async (_req: FastifyRequest, reply: FastifyReply) => {
const rows = await sql<{
provider_id: string;
model: string;
run_id: string;
aggregate: string;
created_at: string;
}[]>`
SELECT provider_id, model, run_id, aggregate, created_at
FROM bench_baselines
ORDER BY provider_id, model
`;
return reply.send({
baselines: rows.map((r) => ({
providerId: r.provider_id,
model: r.model,
runId: r.run_id,
aggregate: jsonbObject(r.aggregate),
createdAt: r.created_at,
})),
});
});
}
/**
* P3.3: Check if the target host has recent traffic (for takeover confirmation).
*/
function checkRecentTraffic(hostState: { models: Map<string, { inflight: number }> } | undefined): { hasRecentTraffic: boolean; inflightCount: number } {
if (!hostState) {
return { hasRecentTraffic: false, inflightCount: 0 };
}
let total = 0;
for (const m of hostState.models.values()) {
total += m.inflight;
}
return {
hasRecentTraffic: total > 0,
inflightCount: total,
};
}
/**
* Resolve the base URL for a provider from the loaded registry.
* baseUrl comes from LlamaProvider.baseUrl, never from ssh_host.
*/
function resolveBaseUrl(providerId: string): string | null {
return resolveProviderBaseUrl(providerId);
}
/**
* Async bench runner: fire-and-forget, records concurrent_foreign_requests.
* A6: sources from activity stream during [started_at, finished_at] window,
* minus the bench's own samples count.
*/
async function runBenchAsync(
params: { suite: BenchSuite; baseUrl: string; temperature?: number; topP?: number },
sql: Sql,
emitter: DeltaEmitter,
seq: number,
providerId: string,
): Promise<void> {
const { suite } = params;
// Find the latest running run for this suite.
const latestRun = await sql<{ id: string; started_at: string | null }[]>`
SELECT id, started_at FROM bench_runs
WHERE suite_id = ${suite.id} AND status = 'running'
ORDER BY created_at DESC LIMIT 1
`;
if (latestRun.length === 0) {
benchLogger?.error?.({}, 'bench: no running run found');
return;
}
const runId = latestRun[0]!.id;
const progressHandler = (_progress: BenchRunProgress) => {
// Progress is published via emitter in runBenchSuite.
};
try {
await runBenchSuite(params, sql, emitter, seq, progressHandler);
// A6: Record concurrent_foreign_requests from activity stream during run window.
// Count control_requests for this provider in [started_at, finished_at],
// minus the bench's own sample count.
const runData = await sql<{ started_at: string | null; finished_at: string | null; completed_samples: number }[]>`
SELECT started_at, finished_at, completed_samples FROM bench_runs WHERE id = ${runId}
`;
const rd = runData[0]!;
if (rd.started_at && rd.finished_at) {
const foreignCount = await sql<{ count: number }[]>`
SELECT COUNT(*)::INT AS count FROM control_requests
WHERE provider_id = ${providerId}
AND ts >= ${rd.started_at}::timestamptz
AND ts <= ${rd.finished_at}::timestamptz
`;
const totalForeign = (foreignCount[0]?.count ?? 0) - rd.completed_samples;
await sql`
UPDATE bench_runs SET concurrent_foreign_requests = ${Math.max(0, totalForeign)}
WHERE id = ${runId}
`;
}
} catch (err) {
const msg = (err as Error).message ?? String(err);
benchLogger?.error?.({ err: msg }, 'bench: run failed');
await sql`
UPDATE bench_runs
SET status = 'failed', finished_at = clock_timestamp(), error = ${msg}
WHERE id = ${runId}
`;
emitter.publish({
type: 'control_job' as const,
seq,
jobType: 'bench' as const,
jobId: runId,
status: 'failed' as const,
detail: { error: msg },
});
}
}
/**
* Set the Fastify logger for the async bench runner.
*/
let benchLogger: FastifyBaseLogger | undefined;
export function setBenchApp(logger: FastifyBaseLogger): void {
benchLogger = logger;
}