feat(booterm): structured pty_exited WS notifications. Plan-validated, impl-validated, code-reviewed green (contracts build clean, contracts test 29/29, booterm + web typecheck clean). wip: in-progress inference/provider refactor (agents.ts, provider.ts, new llama-providers.ts, removed llama-args-validator), plus arena, dispatcher, compaction, schema changes. openspec: pty-exit-notifications complete; x-agent-flags planned (not yet implemented).
493 lines
16 KiB
TypeScript
493 lines
16 KiB
TypeScript
import { randomUUID } from 'node:crypto';
|
|
import type { FastifyBaseLogger, FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
|
import type { Sql } from '../db.js';
|
|
import type { FleetState } from '../services/fleet-state.js';
|
|
import type { DeltaEmitter } from '../index.js';
|
|
import { acquireHostAccess } from '../services/host-access.js';
|
|
import type { BenchSuite, BenchRunProgress } from '../services/bench-engine.js';
|
|
import { runBenchSuite } from '../services/bench-engine.js';
|
|
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
|
|
import { jsonbNumberArray, jsonbObject } from '../services/jsonb.js';
|
|
|
|
/**
|
|
* Register bench routes.
|
|
*
|
|
* POST /api/bench/suite — create a suite definition
|
|
* GET /api/bench/suites — list suites
|
|
* GET /api/bench/suites/:id — get suite
|
|
* POST /api/bench/run — start a bench run (gated through acquireHostAccess)
|
|
* GET /api/bench/runs — list runs
|
|
* GET /api/bench/runs/:id — get run + samples
|
|
* GET /api/bench/baselines — get baselines per (provider_id, model)
|
|
*/
|
|
export function registerBenchRoutes(
|
|
app: FastifyInstance,
|
|
sql: Sql,
|
|
fleet: FleetState,
|
|
emitter: DeltaEmitter,
|
|
): void {
|
|
// ─── suite CRUD ──────────────────────────────────────────────────────────
|
|
|
|
app.post('/api/bench/suite', async (req: FastifyRequest, reply: FastifyReply) => {
|
|
const body = req.body as Record<string, unknown>;
|
|
const suiteId = body.id as string;
|
|
const name = body.name as string;
|
|
const providerId = body.providerId as string;
|
|
const model = body.model as string;
|
|
const promptTokens = body.promptTokens as number[];
|
|
const genTokens = body.genTokens as number[];
|
|
const concurrency = body.concurrency as number[];
|
|
const repetitions = (body.repetitions as number) ?? 1;
|
|
const metadata = body.metadata as Record<string, unknown> | undefined;
|
|
|
|
if (!name || !providerId || !model) {
|
|
return reply.status(400).send({ error: 'name, providerId, and model are required' });
|
|
}
|
|
if (!promptTokens?.length || !genTokens?.length || !concurrency?.length) {
|
|
return reply.status(400).send({ error: 'promptTokens, genTokens, and concurrency must each have at least one value' });
|
|
}
|
|
|
|
const id = suiteId ?? randomUUID();
|
|
await sql`
|
|
INSERT INTO bench_suites (id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata)
|
|
VALUES (${id}, ${name}, ${providerId}, ${model}, ${sql.json(promptTokens as never)}, ${sql.json(genTokens as never)}, ${sql.json(concurrency as never)}, ${repetitions}, ${metadata ? sql.json(metadata as never) : sql`NULL::jsonb`})
|
|
ON CONFLICT (id) DO UPDATE SET
|
|
name = EXCLUDED.name,
|
|
provider_id = EXCLUDED.provider_id,
|
|
model = EXCLUDED.model,
|
|
prompt_tokens = EXCLUDED.prompt_tokens,
|
|
gen_tokens = EXCLUDED.gen_tokens,
|
|
concurrency = EXCLUDED.concurrency,
|
|
repetitions = EXCLUDED.repetitions,
|
|
metadata = EXCLUDED.metadata
|
|
`;
|
|
|
|
return reply.status(201).send({ id });
|
|
});
|
|
|
|
app.get('/api/bench/suites', async (_req: FastifyRequest, reply: FastifyReply) => {
|
|
const suites = await sql<{
|
|
id: string;
|
|
name: string;
|
|
provider_id: string;
|
|
model: string;
|
|
prompt_tokens: string;
|
|
gen_tokens: string;
|
|
concurrency: string;
|
|
repetitions: number;
|
|
metadata: string | null;
|
|
created_at: string;
|
|
}[]>`
|
|
SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata, created_at
|
|
FROM bench_suites
|
|
ORDER BY created_at DESC
|
|
`;
|
|
|
|
return reply.send({
|
|
suites: suites.map((s) => ({
|
|
id: s.id,
|
|
name: s.name,
|
|
providerId: s.provider_id,
|
|
model: s.model,
|
|
promptTokens: jsonbNumberArray(s.prompt_tokens),
|
|
genTokens: jsonbNumberArray(s.gen_tokens),
|
|
concurrency: jsonbNumberArray(s.concurrency),
|
|
repetitions: s.repetitions,
|
|
metadata: jsonbObject(s.metadata) ?? undefined,
|
|
createdAt: s.created_at,
|
|
})),
|
|
});
|
|
});
|
|
|
|
app.get('/api/bench/suites/:id', async (req: FastifyRequest, reply: FastifyReply) => {
|
|
const { id } = req.params as { id: string };
|
|
const rows = await sql<{
|
|
id: string;
|
|
name: string;
|
|
provider_id: string;
|
|
model: string;
|
|
prompt_tokens: string;
|
|
gen_tokens: string;
|
|
concurrency: string;
|
|
repetitions: number;
|
|
metadata: string | null;
|
|
created_at: string;
|
|
}[]>`
|
|
SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata, created_at
|
|
FROM bench_suites WHERE id = ${id}
|
|
`;
|
|
|
|
if (rows.length === 0) {
|
|
return reply.status(404).send({ error: 'suite not found' });
|
|
}
|
|
|
|
const s = rows[0]!;
|
|
return reply.send({
|
|
id: s.id,
|
|
name: s.name,
|
|
providerId: s.provider_id,
|
|
model: s.model,
|
|
promptTokens: jsonbNumberArray(s.prompt_tokens),
|
|
genTokens: jsonbNumberArray(s.gen_tokens),
|
|
concurrency: jsonbNumberArray(s.concurrency),
|
|
repetitions: s.repetitions,
|
|
metadata: jsonbObject(s.metadata) ?? undefined,
|
|
createdAt: s.created_at,
|
|
});
|
|
});
|
|
|
|
// ─── run launcher (P3.3: safety gates + P3.4: acquireHostAccess) ─────────
|
|
|
|
app.post('/api/bench/run', async (req: FastifyRequest, reply: FastifyReply) => {
|
|
const body = req.body as Record<string, unknown>;
|
|
const suiteId = body.suiteId as string;
|
|
const temperature = (body.temperature as number) ?? 0.7;
|
|
const topP = (body.topP as number) ?? 0.9;
|
|
|
|
if (!suiteId) {
|
|
return reply.status(400).send({ error: 'suiteId is required' });
|
|
}
|
|
|
|
// Load suite.
|
|
const suiteRows = await sql<{
|
|
id: string;
|
|
name: string;
|
|
provider_id: string;
|
|
model: string;
|
|
prompt_tokens: string;
|
|
gen_tokens: string;
|
|
concurrency: string;
|
|
repetitions: number;
|
|
metadata: string | null;
|
|
}[]>`
|
|
SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata
|
|
FROM bench_suites WHERE id = ${suiteId}
|
|
`;
|
|
|
|
if (suiteRows.length === 0) {
|
|
return reply.status(404).send({ error: 'suite not found' });
|
|
}
|
|
|
|
const s = suiteRows[0]!;
|
|
const suite: BenchSuite = {
|
|
id: s.id,
|
|
name: s.name,
|
|
providerId: s.provider_id,
|
|
model: s.model,
|
|
promptTokens: jsonbNumberArray(s.prompt_tokens),
|
|
genTokens: jsonbNumberArray(s.gen_tokens),
|
|
concurrency: jsonbNumberArray(s.concurrency),
|
|
repetitions: s.repetitions,
|
|
metadata: jsonbObject(s.metadata) ?? undefined,
|
|
};
|
|
|
|
// P3.3: Safety check — check recent traffic on the target host.
|
|
const hostState = fleet.hosts.get(suite.providerId);
|
|
const recentTraffic = checkRecentTraffic(hostState);
|
|
|
|
// P3.4: Gate through acquireHostAccess seam.
|
|
const grant = await acquireHostAccess(suite.providerId, 'bench');
|
|
if (!grant.ok) {
|
|
return reply.status(409).send({
|
|
error: 'host access denied',
|
|
reason: grant.reason,
|
|
});
|
|
}
|
|
|
|
// Resolve base URL from registry.
|
|
const baseUrl = resolveBaseUrl(suite.providerId);
|
|
if (!baseUrl) {
|
|
return reply.status(400).send({ error: `no base URL configured for provider ${suite.providerId}` });
|
|
}
|
|
|
|
// Get seq for the host.
|
|
const seq = hostState?.seq ?? 0;
|
|
|
|
// Run the bench suite asynchronously (non-blocking HTTP response).
|
|
void runBenchAsync(
|
|
{ suite, baseUrl, temperature, topP },
|
|
sql,
|
|
emitter,
|
|
seq,
|
|
suite.providerId,
|
|
);
|
|
|
|
return reply.status(202).send({
|
|
status: 'queued',
|
|
suiteId: suite.id,
|
|
recentTraffic,
|
|
});
|
|
});
|
|
|
|
// ─── runs listing ────────────────────────────────────────────────────────
|
|
|
|
app.get('/api/bench/runs', async (req: FastifyRequest, reply: FastifyReply) => {
|
|
const query = req.query as Record<string, string | undefined>;
|
|
const suiteId = query.suiteId;
|
|
|
|
let runs: Array<{
|
|
id: string;
|
|
suite_id: string;
|
|
job_type: string;
|
|
status: string;
|
|
started_at: string | null;
|
|
finished_at: string | null;
|
|
total_samples: number;
|
|
completed_samples: number;
|
|
concurrent_foreign_requests: number;
|
|
regression_flag: string | null;
|
|
aggregate: string | null;
|
|
error: string | null;
|
|
created_at: string;
|
|
}>;
|
|
|
|
if (suiteId) {
|
|
runs = await sql`
|
|
SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at
|
|
FROM bench_runs WHERE suite_id = ${suiteId}
|
|
ORDER BY created_at DESC
|
|
`;
|
|
} else {
|
|
runs = await sql`
|
|
SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at
|
|
FROM bench_runs
|
|
ORDER BY created_at DESC
|
|
LIMIT 100
|
|
`;
|
|
}
|
|
|
|
return reply.send({
|
|
runs: runs.map((r) => ({
|
|
id: r.id,
|
|
suiteId: r.suite_id,
|
|
jobType: r.job_type,
|
|
status: r.status,
|
|
startedAt: r.started_at,
|
|
finishedAt: r.finished_at,
|
|
totalSamples: r.total_samples,
|
|
completedSamples: r.completed_samples,
|
|
concurrentForeignRequests: r.concurrent_foreign_requests,
|
|
regressionFlag: r.regression_flag,
|
|
aggregate: jsonbObject(r.aggregate),
|
|
error: r.error,
|
|
createdAt: r.created_at,
|
|
})),
|
|
});
|
|
});
|
|
|
|
app.get('/api/bench/runs/:id', async (req: FastifyRequest, reply: FastifyReply) => {
|
|
const { id } = req.params as { id: string };
|
|
|
|
const runRows = await sql<{
|
|
id: string;
|
|
suite_id: string;
|
|
job_type: string;
|
|
status: string;
|
|
started_at: string | null;
|
|
finished_at: string | null;
|
|
total_samples: number;
|
|
completed_samples: number;
|
|
concurrent_foreign_requests: number;
|
|
regression_flag: string | null;
|
|
aggregate: string | null;
|
|
error: string | null;
|
|
created_at: string;
|
|
}[]>`
|
|
SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at
|
|
FROM bench_runs WHERE id = ${id}
|
|
`;
|
|
|
|
if (runRows.length === 0) {
|
|
return reply.status(404).send({ error: 'run not found' });
|
|
}
|
|
|
|
const r = runRows[0]!;
|
|
|
|
const samples = await sql<{
|
|
id: number;
|
|
prompt_tokens: number;
|
|
gen_tokens: number;
|
|
concurrency: number;
|
|
repetition: number;
|
|
ttft_ms: number | null;
|
|
total_ms: number | null;
|
|
prompt_tps: number | null;
|
|
gen_tps: number | null;
|
|
cache_n: number | null;
|
|
error: string | null;
|
|
}[]>`
|
|
SELECT id, prompt_tokens, gen_tokens, concurrency, repetition, ttft_ms, total_ms, prompt_tps, gen_tps, cache_n, error
|
|
FROM bench_samples WHERE run_id = ${id}
|
|
ORDER BY prompt_tokens, gen_tokens, concurrency, repetition
|
|
`;
|
|
|
|
return reply.send({
|
|
run: {
|
|
id: r.id,
|
|
suiteId: r.suite_id,
|
|
jobType: r.job_type,
|
|
status: r.status,
|
|
startedAt: r.started_at,
|
|
finishedAt: r.finished_at,
|
|
totalSamples: r.total_samples,
|
|
completedSamples: r.completed_samples,
|
|
concurrentForeignRequests: r.concurrent_foreign_requests,
|
|
regressionFlag: r.regression_flag,
|
|
aggregate: jsonbObject(r.aggregate),
|
|
error: r.error,
|
|
createdAt: r.created_at,
|
|
},
|
|
samples: samples.map((s) => ({
|
|
id: s.id,
|
|
promptTokens: s.prompt_tokens,
|
|
genTokens: s.gen_tokens,
|
|
concurrency: s.concurrency,
|
|
repetition: s.repetition,
|
|
ttftMs: s.ttft_ms,
|
|
totalMs: s.total_ms,
|
|
promptTps: s.prompt_tps,
|
|
genTps: s.gen_tps,
|
|
cacheN: s.cache_n,
|
|
error: s.error,
|
|
})),
|
|
});
|
|
});
|
|
|
|
// ─── baselines ───────────────────────────────────────────────────────────
|
|
|
|
app.get('/api/bench/baselines', async (_req: FastifyRequest, reply: FastifyReply) => {
|
|
const rows = await sql<{
|
|
provider_id: string;
|
|
model: string;
|
|
run_id: string;
|
|
aggregate: string;
|
|
created_at: string;
|
|
}[]>`
|
|
SELECT provider_id, model, run_id, aggregate, created_at
|
|
FROM bench_baselines
|
|
ORDER BY provider_id, model
|
|
`;
|
|
|
|
return reply.send({
|
|
baselines: rows.map((r) => ({
|
|
providerId: r.provider_id,
|
|
model: r.model,
|
|
runId: r.run_id,
|
|
aggregate: jsonbObject(r.aggregate),
|
|
createdAt: r.created_at,
|
|
})),
|
|
});
|
|
});
|
|
}
|
|
|
|
/**
|
|
* P3.3: Check if the target host has recent traffic (for takeover confirmation).
|
|
*/
|
|
function checkRecentTraffic(hostState: { models: Map<string, { inflight: number }> } | undefined): { hasRecentTraffic: boolean; inflightCount: number } {
|
|
if (!hostState) {
|
|
return { hasRecentTraffic: false, inflightCount: 0 };
|
|
}
|
|
let total = 0;
|
|
for (const m of hostState.models.values()) {
|
|
total += m.inflight;
|
|
}
|
|
return {
|
|
hasRecentTraffic: total > 0,
|
|
inflightCount: total,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Resolve the base URL for a provider from the loaded registry.
|
|
* baseUrl comes from LlamaProvider.baseUrl, never from ssh_host.
|
|
*/
|
|
function resolveBaseUrl(providerId: string): string | null {
|
|
return resolveProviderBaseUrl(providerId);
|
|
}
|
|
|
|
/**
|
|
* Async bench runner: fire-and-forget, records concurrent_foreign_requests.
|
|
* A6: sources from activity stream during [started_at, finished_at] window,
|
|
* minus the bench's own samples count.
|
|
*/
|
|
async function runBenchAsync(
|
|
params: { suite: BenchSuite; baseUrl: string; temperature?: number; topP?: number },
|
|
sql: Sql,
|
|
emitter: DeltaEmitter,
|
|
seq: number,
|
|
providerId: string,
|
|
): Promise<void> {
|
|
const { suite } = params;
|
|
|
|
// Find the latest running run for this suite.
|
|
const latestRun = await sql<{ id: string; started_at: string | null }[]>`
|
|
SELECT id, started_at FROM bench_runs
|
|
WHERE suite_id = ${suite.id} AND status = 'running'
|
|
ORDER BY created_at DESC LIMIT 1
|
|
`;
|
|
|
|
if (latestRun.length === 0) {
|
|
benchLogger?.error?.({}, 'bench: no running run found');
|
|
return;
|
|
}
|
|
|
|
const runId = latestRun[0]!.id;
|
|
|
|
const progressHandler = (_progress: BenchRunProgress) => {
|
|
// Progress is published via emitter in runBenchSuite.
|
|
};
|
|
|
|
try {
|
|
await runBenchSuite(params, sql, emitter, seq, progressHandler);
|
|
|
|
// A6: Record concurrent_foreign_requests from activity stream during run window.
|
|
// Count control_requests for this provider in [started_at, finished_at],
|
|
// minus the bench's own sample count.
|
|
const runData = await sql<{ started_at: string | null; finished_at: string | null; completed_samples: number }[]>`
|
|
SELECT started_at, finished_at, completed_samples FROM bench_runs WHERE id = ${runId}
|
|
`;
|
|
const rd = runData[0]!;
|
|
|
|
if (rd.started_at && rd.finished_at) {
|
|
const foreignCount = await sql<{ count: number }[]>`
|
|
SELECT COUNT(*)::INT AS count FROM control_requests
|
|
WHERE provider_id = ${providerId}
|
|
AND ts >= ${rd.started_at}::timestamptz
|
|
AND ts <= ${rd.finished_at}::timestamptz
|
|
`;
|
|
const totalForeign = (foreignCount[0]?.count ?? 0) - rd.completed_samples;
|
|
await sql`
|
|
UPDATE bench_runs SET concurrent_foreign_requests = ${Math.max(0, totalForeign)}
|
|
WHERE id = ${runId}
|
|
`;
|
|
}
|
|
} catch (err) {
|
|
const msg = (err as Error).message ?? String(err);
|
|
benchLogger?.error?.({ err: msg }, 'bench: run failed');
|
|
|
|
await sql`
|
|
UPDATE bench_runs
|
|
SET status = 'failed', finished_at = clock_timestamp(), error = ${msg}
|
|
WHERE id = ${runId}
|
|
`;
|
|
|
|
emitter.publish({
|
|
type: 'control_job' as const,
|
|
seq,
|
|
jobType: 'bench' as const,
|
|
jobId: runId,
|
|
status: 'failed' as const,
|
|
detail: { error: msg },
|
|
});
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Set the Fastify logger for the async bench runner.
|
|
*/
|
|
let benchLogger: FastifyBaseLogger | undefined;
|
|
|
|
export function setBenchApp(logger: FastifyBaseLogger): void {
|
|
benchLogger = logger;
|
|
}
|