chore: snapshot working tree - pty_exited notifications + in-flight inference WIP
feat(booterm): structured pty_exited WS notifications. Plan-validated, impl-validated, code-reviewed green (contracts build clean, contracts test 29/29, booterm + web typecheck clean). wip: in-progress inference/provider refactor (agents.ts, provider.ts, new llama-providers.ts, removed llama-args-validator), plus arena, dispatcher, compaction, schema changes. openspec: pty-exit-notifications complete; x-agent-flags planned (not yet implemented).
This commit is contained in:
492
apps/control/src/routes/bench.ts
Normal file
492
apps/control/src/routes/bench.ts
Normal file
@@ -0,0 +1,492 @@
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import type { FastifyBaseLogger, FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
import type { FleetState } from '../services/fleet-state.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import { acquireHostAccess } from '../services/host-access.js';
|
||||
import type { BenchSuite, BenchRunProgress } from '../services/bench-engine.js';
|
||||
import { runBenchSuite } from '../services/bench-engine.js';
|
||||
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
|
||||
import { jsonbNumberArray, jsonbObject } from '../services/jsonb.js';
|
||||
|
||||
/**
|
||||
* Register bench routes.
|
||||
*
|
||||
* POST /api/bench/suite — create a suite definition
|
||||
* GET /api/bench/suites — list suites
|
||||
* GET /api/bench/suites/:id — get suite
|
||||
* POST /api/bench/run — start a bench run (gated through acquireHostAccess)
|
||||
* GET /api/bench/runs — list runs
|
||||
* GET /api/bench/runs/:id — get run + samples
|
||||
* GET /api/bench/baselines — get baselines per (provider_id, model)
|
||||
*/
|
||||
export function registerBenchRoutes(
|
||||
app: FastifyInstance,
|
||||
sql: Sql,
|
||||
fleet: FleetState,
|
||||
emitter: DeltaEmitter,
|
||||
): void {
|
||||
// ─── suite CRUD ──────────────────────────────────────────────────────────
|
||||
|
||||
app.post('/api/bench/suite', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const suiteId = body.id as string;
|
||||
const name = body.name as string;
|
||||
const providerId = body.providerId as string;
|
||||
const model = body.model as string;
|
||||
const promptTokens = body.promptTokens as number[];
|
||||
const genTokens = body.genTokens as number[];
|
||||
const concurrency = body.concurrency as number[];
|
||||
const repetitions = (body.repetitions as number) ?? 1;
|
||||
const metadata = body.metadata as Record<string, unknown> | undefined;
|
||||
|
||||
if (!name || !providerId || !model) {
|
||||
return reply.status(400).send({ error: 'name, providerId, and model are required' });
|
||||
}
|
||||
if (!promptTokens?.length || !genTokens?.length || !concurrency?.length) {
|
||||
return reply.status(400).send({ error: 'promptTokens, genTokens, and concurrency must each have at least one value' });
|
||||
}
|
||||
|
||||
const id = suiteId ?? randomUUID();
|
||||
await sql`
|
||||
INSERT INTO bench_suites (id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata)
|
||||
VALUES (${id}, ${name}, ${providerId}, ${model}, ${sql.json(promptTokens as never)}, ${sql.json(genTokens as never)}, ${sql.json(concurrency as never)}, ${repetitions}, ${metadata ? sql.json(metadata as never) : sql`NULL::jsonb`})
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
provider_id = EXCLUDED.provider_id,
|
||||
model = EXCLUDED.model,
|
||||
prompt_tokens = EXCLUDED.prompt_tokens,
|
||||
gen_tokens = EXCLUDED.gen_tokens,
|
||||
concurrency = EXCLUDED.concurrency,
|
||||
repetitions = EXCLUDED.repetitions,
|
||||
metadata = EXCLUDED.metadata
|
||||
`;
|
||||
|
||||
return reply.status(201).send({ id });
|
||||
});
|
||||
|
||||
app.get('/api/bench/suites', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
const suites = await sql<{
|
||||
id: string;
|
||||
name: string;
|
||||
provider_id: string;
|
||||
model: string;
|
||||
prompt_tokens: string;
|
||||
gen_tokens: string;
|
||||
concurrency: string;
|
||||
repetitions: number;
|
||||
metadata: string | null;
|
||||
created_at: string;
|
||||
}[]>`
|
||||
SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata, created_at
|
||||
FROM bench_suites
|
||||
ORDER BY created_at DESC
|
||||
`;
|
||||
|
||||
return reply.send({
|
||||
suites: suites.map((s) => ({
|
||||
id: s.id,
|
||||
name: s.name,
|
||||
providerId: s.provider_id,
|
||||
model: s.model,
|
||||
promptTokens: jsonbNumberArray(s.prompt_tokens),
|
||||
genTokens: jsonbNumberArray(s.gen_tokens),
|
||||
concurrency: jsonbNumberArray(s.concurrency),
|
||||
repetitions: s.repetitions,
|
||||
metadata: jsonbObject(s.metadata) ?? undefined,
|
||||
createdAt: s.created_at,
|
||||
})),
|
||||
});
|
||||
});
|
||||
|
||||
app.get('/api/bench/suites/:id', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { id } = req.params as { id: string };
|
||||
const rows = await sql<{
|
||||
id: string;
|
||||
name: string;
|
||||
provider_id: string;
|
||||
model: string;
|
||||
prompt_tokens: string;
|
||||
gen_tokens: string;
|
||||
concurrency: string;
|
||||
repetitions: number;
|
||||
metadata: string | null;
|
||||
created_at: string;
|
||||
}[]>`
|
||||
SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata, created_at
|
||||
FROM bench_suites WHERE id = ${id}
|
||||
`;
|
||||
|
||||
if (rows.length === 0) {
|
||||
return reply.status(404).send({ error: 'suite not found' });
|
||||
}
|
||||
|
||||
const s = rows[0]!;
|
||||
return reply.send({
|
||||
id: s.id,
|
||||
name: s.name,
|
||||
providerId: s.provider_id,
|
||||
model: s.model,
|
||||
promptTokens: jsonbNumberArray(s.prompt_tokens),
|
||||
genTokens: jsonbNumberArray(s.gen_tokens),
|
||||
concurrency: jsonbNumberArray(s.concurrency),
|
||||
repetitions: s.repetitions,
|
||||
metadata: jsonbObject(s.metadata) ?? undefined,
|
||||
createdAt: s.created_at,
|
||||
});
|
||||
});
|
||||
|
||||
// ─── run launcher (P3.3: safety gates + P3.4: acquireHostAccess) ─────────
|
||||
|
||||
app.post('/api/bench/run', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const suiteId = body.suiteId as string;
|
||||
const temperature = (body.temperature as number) ?? 0.7;
|
||||
const topP = (body.topP as number) ?? 0.9;
|
||||
|
||||
if (!suiteId) {
|
||||
return reply.status(400).send({ error: 'suiteId is required' });
|
||||
}
|
||||
|
||||
// Load suite.
|
||||
const suiteRows = await sql<{
|
||||
id: string;
|
||||
name: string;
|
||||
provider_id: string;
|
||||
model: string;
|
||||
prompt_tokens: string;
|
||||
gen_tokens: string;
|
||||
concurrency: string;
|
||||
repetitions: number;
|
||||
metadata: string | null;
|
||||
}[]>`
|
||||
SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata
|
||||
FROM bench_suites WHERE id = ${suiteId}
|
||||
`;
|
||||
|
||||
if (suiteRows.length === 0) {
|
||||
return reply.status(404).send({ error: 'suite not found' });
|
||||
}
|
||||
|
||||
const s = suiteRows[0]!;
|
||||
const suite: BenchSuite = {
|
||||
id: s.id,
|
||||
name: s.name,
|
||||
providerId: s.provider_id,
|
||||
model: s.model,
|
||||
promptTokens: jsonbNumberArray(s.prompt_tokens),
|
||||
genTokens: jsonbNumberArray(s.gen_tokens),
|
||||
concurrency: jsonbNumberArray(s.concurrency),
|
||||
repetitions: s.repetitions,
|
||||
metadata: jsonbObject(s.metadata) ?? undefined,
|
||||
};
|
||||
|
||||
// P3.3: Safety check — check recent traffic on the target host.
|
||||
const hostState = fleet.hosts.get(suite.providerId);
|
||||
const recentTraffic = checkRecentTraffic(hostState);
|
||||
|
||||
// P3.4: Gate through acquireHostAccess seam.
|
||||
const grant = await acquireHostAccess(suite.providerId, 'bench');
|
||||
if (!grant.ok) {
|
||||
return reply.status(409).send({
|
||||
error: 'host access denied',
|
||||
reason: grant.reason,
|
||||
});
|
||||
}
|
||||
|
||||
// Resolve base URL from registry.
|
||||
const baseUrl = resolveBaseUrl(suite.providerId);
|
||||
if (!baseUrl) {
|
||||
return reply.status(400).send({ error: `no base URL configured for provider ${suite.providerId}` });
|
||||
}
|
||||
|
||||
// Get seq for the host.
|
||||
const seq = hostState?.seq ?? 0;
|
||||
|
||||
// Run the bench suite asynchronously (non-blocking HTTP response).
|
||||
void runBenchAsync(
|
||||
{ suite, baseUrl, temperature, topP },
|
||||
sql,
|
||||
emitter,
|
||||
seq,
|
||||
suite.providerId,
|
||||
);
|
||||
|
||||
return reply.status(202).send({
|
||||
status: 'queued',
|
||||
suiteId: suite.id,
|
||||
recentTraffic,
|
||||
});
|
||||
});
|
||||
|
||||
// ─── runs listing ────────────────────────────────────────────────────────
|
||||
|
||||
app.get('/api/bench/runs', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const query = req.query as Record<string, string | undefined>;
|
||||
const suiteId = query.suiteId;
|
||||
|
||||
let runs: Array<{
|
||||
id: string;
|
||||
suite_id: string;
|
||||
job_type: string;
|
||||
status: string;
|
||||
started_at: string | null;
|
||||
finished_at: string | null;
|
||||
total_samples: number;
|
||||
completed_samples: number;
|
||||
concurrent_foreign_requests: number;
|
||||
regression_flag: string | null;
|
||||
aggregate: string | null;
|
||||
error: string | null;
|
||||
created_at: string;
|
||||
}>;
|
||||
|
||||
if (suiteId) {
|
||||
runs = await sql`
|
||||
SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at
|
||||
FROM bench_runs WHERE suite_id = ${suiteId}
|
||||
ORDER BY created_at DESC
|
||||
`;
|
||||
} else {
|
||||
runs = await sql`
|
||||
SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at
|
||||
FROM bench_runs
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 100
|
||||
`;
|
||||
}
|
||||
|
||||
return reply.send({
|
||||
runs: runs.map((r) => ({
|
||||
id: r.id,
|
||||
suiteId: r.suite_id,
|
||||
jobType: r.job_type,
|
||||
status: r.status,
|
||||
startedAt: r.started_at,
|
||||
finishedAt: r.finished_at,
|
||||
totalSamples: r.total_samples,
|
||||
completedSamples: r.completed_samples,
|
||||
concurrentForeignRequests: r.concurrent_foreign_requests,
|
||||
regressionFlag: r.regression_flag,
|
||||
aggregate: jsonbObject(r.aggregate),
|
||||
error: r.error,
|
||||
createdAt: r.created_at,
|
||||
})),
|
||||
});
|
||||
});
|
||||
|
||||
app.get('/api/bench/runs/:id', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { id } = req.params as { id: string };
|
||||
|
||||
const runRows = await sql<{
|
||||
id: string;
|
||||
suite_id: string;
|
||||
job_type: string;
|
||||
status: string;
|
||||
started_at: string | null;
|
||||
finished_at: string | null;
|
||||
total_samples: number;
|
||||
completed_samples: number;
|
||||
concurrent_foreign_requests: number;
|
||||
regression_flag: string | null;
|
||||
aggregate: string | null;
|
||||
error: string | null;
|
||||
created_at: string;
|
||||
}[]>`
|
||||
SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at
|
||||
FROM bench_runs WHERE id = ${id}
|
||||
`;
|
||||
|
||||
if (runRows.length === 0) {
|
||||
return reply.status(404).send({ error: 'run not found' });
|
||||
}
|
||||
|
||||
const r = runRows[0]!;
|
||||
|
||||
const samples = await sql<{
|
||||
id: number;
|
||||
prompt_tokens: number;
|
||||
gen_tokens: number;
|
||||
concurrency: number;
|
||||
repetition: number;
|
||||
ttft_ms: number | null;
|
||||
total_ms: number | null;
|
||||
prompt_tps: number | null;
|
||||
gen_tps: number | null;
|
||||
cache_n: number | null;
|
||||
error: string | null;
|
||||
}[]>`
|
||||
SELECT id, prompt_tokens, gen_tokens, concurrency, repetition, ttft_ms, total_ms, prompt_tps, gen_tps, cache_n, error
|
||||
FROM bench_samples WHERE run_id = ${id}
|
||||
ORDER BY prompt_tokens, gen_tokens, concurrency, repetition
|
||||
`;
|
||||
|
||||
return reply.send({
|
||||
run: {
|
||||
id: r.id,
|
||||
suiteId: r.suite_id,
|
||||
jobType: r.job_type,
|
||||
status: r.status,
|
||||
startedAt: r.started_at,
|
||||
finishedAt: r.finished_at,
|
||||
totalSamples: r.total_samples,
|
||||
completedSamples: r.completed_samples,
|
||||
concurrentForeignRequests: r.concurrent_foreign_requests,
|
||||
regressionFlag: r.regression_flag,
|
||||
aggregate: jsonbObject(r.aggregate),
|
||||
error: r.error,
|
||||
createdAt: r.created_at,
|
||||
},
|
||||
samples: samples.map((s) => ({
|
||||
id: s.id,
|
||||
promptTokens: s.prompt_tokens,
|
||||
genTokens: s.gen_tokens,
|
||||
concurrency: s.concurrency,
|
||||
repetition: s.repetition,
|
||||
ttftMs: s.ttft_ms,
|
||||
totalMs: s.total_ms,
|
||||
promptTps: s.prompt_tps,
|
||||
genTps: s.gen_tps,
|
||||
cacheN: s.cache_n,
|
||||
error: s.error,
|
||||
})),
|
||||
});
|
||||
});
|
||||
|
||||
// ─── baselines ───────────────────────────────────────────────────────────
|
||||
|
||||
app.get('/api/bench/baselines', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
const rows = await sql<{
|
||||
provider_id: string;
|
||||
model: string;
|
||||
run_id: string;
|
||||
aggregate: string;
|
||||
created_at: string;
|
||||
}[]>`
|
||||
SELECT provider_id, model, run_id, aggregate, created_at
|
||||
FROM bench_baselines
|
||||
ORDER BY provider_id, model
|
||||
`;
|
||||
|
||||
return reply.send({
|
||||
baselines: rows.map((r) => ({
|
||||
providerId: r.provider_id,
|
||||
model: r.model,
|
||||
runId: r.run_id,
|
||||
aggregate: jsonbObject(r.aggregate),
|
||||
createdAt: r.created_at,
|
||||
})),
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* P3.3: Check if the target host has recent traffic (for takeover confirmation).
|
||||
*/
|
||||
function checkRecentTraffic(hostState: { models: Map<string, { inflight: number }> } | undefined): { hasRecentTraffic: boolean; inflightCount: number } {
|
||||
if (!hostState) {
|
||||
return { hasRecentTraffic: false, inflightCount: 0 };
|
||||
}
|
||||
let total = 0;
|
||||
for (const m of hostState.models.values()) {
|
||||
total += m.inflight;
|
||||
}
|
||||
return {
|
||||
hasRecentTraffic: total > 0,
|
||||
inflightCount: total,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the base URL for a provider from the loaded registry.
|
||||
* baseUrl comes from LlamaProvider.baseUrl, never from ssh_host.
|
||||
*/
|
||||
function resolveBaseUrl(providerId: string): string | null {
|
||||
return resolveProviderBaseUrl(providerId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Async bench runner: fire-and-forget, records concurrent_foreign_requests.
|
||||
* A6: sources from activity stream during [started_at, finished_at] window,
|
||||
* minus the bench's own samples count.
|
||||
*/
|
||||
async function runBenchAsync(
|
||||
params: { suite: BenchSuite; baseUrl: string; temperature?: number; topP?: number },
|
||||
sql: Sql,
|
||||
emitter: DeltaEmitter,
|
||||
seq: number,
|
||||
providerId: string,
|
||||
): Promise<void> {
|
||||
const { suite } = params;
|
||||
|
||||
// Find the latest running run for this suite.
|
||||
const latestRun = await sql<{ id: string; started_at: string | null }[]>`
|
||||
SELECT id, started_at FROM bench_runs
|
||||
WHERE suite_id = ${suite.id} AND status = 'running'
|
||||
ORDER BY created_at DESC LIMIT 1
|
||||
`;
|
||||
|
||||
if (latestRun.length === 0) {
|
||||
benchLogger?.error?.({}, 'bench: no running run found');
|
||||
return;
|
||||
}
|
||||
|
||||
const runId = latestRun[0]!.id;
|
||||
|
||||
const progressHandler = (_progress: BenchRunProgress) => {
|
||||
// Progress is published via emitter in runBenchSuite.
|
||||
};
|
||||
|
||||
try {
|
||||
await runBenchSuite(params, sql, emitter, seq, progressHandler);
|
||||
|
||||
// A6: Record concurrent_foreign_requests from activity stream during run window.
|
||||
// Count control_requests for this provider in [started_at, finished_at],
|
||||
// minus the bench's own sample count.
|
||||
const runData = await sql<{ started_at: string | null; finished_at: string | null; completed_samples: number }[]>`
|
||||
SELECT started_at, finished_at, completed_samples FROM bench_runs WHERE id = ${runId}
|
||||
`;
|
||||
const rd = runData[0]!;
|
||||
|
||||
if (rd.started_at && rd.finished_at) {
|
||||
const foreignCount = await sql<{ count: number }[]>`
|
||||
SELECT COUNT(*)::INT AS count FROM control_requests
|
||||
WHERE provider_id = ${providerId}
|
||||
AND ts >= ${rd.started_at}::timestamptz
|
||||
AND ts <= ${rd.finished_at}::timestamptz
|
||||
`;
|
||||
const totalForeign = (foreignCount[0]?.count ?? 0) - rd.completed_samples;
|
||||
await sql`
|
||||
UPDATE bench_runs SET concurrent_foreign_requests = ${Math.max(0, totalForeign)}
|
||||
WHERE id = ${runId}
|
||||
`;
|
||||
}
|
||||
} catch (err) {
|
||||
const msg = (err as Error).message ?? String(err);
|
||||
benchLogger?.error?.({ err: msg }, 'bench: run failed');
|
||||
|
||||
await sql`
|
||||
UPDATE bench_runs
|
||||
SET status = 'failed', finished_at = clock_timestamp(), error = ${msg}
|
||||
WHERE id = ${runId}
|
||||
`;
|
||||
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
seq,
|
||||
jobType: 'bench' as const,
|
||||
jobId: runId,
|
||||
status: 'failed' as const,
|
||||
detail: { error: msg },
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the Fastify logger for the async bench runner.
|
||||
*/
|
||||
let benchLogger: FastifyBaseLogger | undefined;
|
||||
|
||||
export function setBenchApp(logger: FastifyBaseLogger): void {
|
||||
benchLogger = logger;
|
||||
}
|
||||
Reference in New Issue
Block a user