chore: snapshot working tree - pty_exited notifications + in-flight inference WIP

feat(booterm): structured pty_exited WS notifications. Plan-validated, impl-validated, code-reviewed green (contracts build clean, contracts test 29/29, booterm + web typecheck clean).

wip: in-progress inference/provider refactor (agents.ts, provider.ts, new llama-providers.ts, removed llama-args-validator), plus arena, dispatcher, compaction, schema changes.

openspec: pty-exit-notifications complete; x-agent-flags planned (not yet implemented).
This commit is contained in:
2026-06-14 12:48:47 +00:00
parent 0ed506f1da
commit b18de2a331
204 changed files with 25344 additions and 867 deletions

View File

@@ -0,0 +1,29 @@
import { z } from 'zod';
const schema = z.object({
NODE_ENV: z.enum(['development', 'production']).default('production'),
PORT: z.coerce.number().default(9503),
HOST: z.string().default('100.114.205.53'),
DATABASE_URL: z.string(),
LOG_LEVEL: z.enum(['fatal', 'error', 'warn', 'info', 'debug', 'trace']).default('info'),
RETENTION_RAW_HOURS: z.coerce.number().default(48),
RETENTION_ROLLUP_DAYS: z.coerce.number().default(90),
CAPTURE_SIZE_KB: z.coerce.number().default(256),
CAPTURE_BUDGET_MB: z.coerce.number().default(50),
LLAMA_PROVIDERS_PATH: z.string().optional(),
LLAMA_SWAP_URL: z.string().default('http://localhost:8080'),
// P9.1: path to the llama-swap config-schema.json (fork). Defaults to the
// copy bundled under dist/data; override to point at the live fork schema.
LLAMA_CONFIG_SCHEMA_PATH: z.string().optional(),
});
export type Config = z.infer<typeof schema>;
export function loadConfig(): Config {
const result = schema.safeParse(process.env);
if (!result.success) {
console.error('Invalid env:', result.error.message);
process.exit(1);
}
return result.data;
}

67
apps/control/src/db.ts Normal file
View File

@@ -0,0 +1,67 @@
import postgres from 'postgres';
import { readFile } from 'node:fs/promises';
import { fileURLToPath } from 'node:url';
import { dirname, resolve } from 'node:path';
import type { Config } from './config.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
export type Sql = ReturnType<typeof postgres>;
let sqlInstance: Sql | null = null;
export function getSql(config: Config): Sql {
if (sqlInstance) return sqlInstance;
sqlInstance = postgres(config.DATABASE_URL, {
max: 10,
idle_timeout: 30,
connect_timeout: 10,
onnotice: () => {},
});
return sqlInstance;
}
/**
* Poll information_schema.tables for a table name with exponential backoff.
* Throws on timeout so systemd Restart=on-failure retries.
*/
export async function waitForTable(sql: Sql, tableName: string, timeoutMs: number): Promise<void> {
const start = Date.now();
const baseDelay = 100;
const cap = 2000;
while (true) {
const rows = await sql<{ table_name: string }[]>`
SELECT table_name FROM information_schema.tables
WHERE table_schema = 'public' AND table_name = ${tableName}
`;
if (rows.length > 0) return;
if (Date.now() - start >= timeoutMs) {
throw new Error(`timeout waiting for table '${tableName}' after ${timeoutMs}ms`);
}
const delay = Math.min(cap, baseDelay * 2 ** Math.floor((Date.now() - start) / 1000));
await new Promise((r) => setTimeout(r, delay));
}
}
export async function applySchema(sql: Sql): Promise<void> {
const schemaPath = resolve(__dirname, 'schema.sql');
const ddl = await readFile(schemaPath, 'utf8');
await sql.unsafe(ddl);
}
export async function pingDb(sql: Sql): Promise<boolean> {
try {
await sql`SELECT 1`;
return true;
} catch {
return false;
}
}
export async function closeDb(): Promise<void> {
if (sqlInstance) {
await sqlInstance.end({ timeout: 5 });
sqlInstance = null;
}
}

624
apps/control/src/index.ts Normal file
View File

@@ -0,0 +1,624 @@
import Fastify from 'fastify';
import fastifyWebsocket from '@fastify/websocket';
import { loadConfig } from './config.js';
import { getSql, applySchema, pingDb, waitForTable } from './db.js';
import type { FleetState, HostState } from './services/fleet-state.js';
import { createFleetState, ensureHostState, stampLastSeen, incrementSeq } from './services/fleet-state.js';
import { registerControlWebSocket } from './routes/ws.js';
import type { LlamaSweepSSEEvent, MetricsEntry } from './services/fleet-connector.js';
import { startFleetConnector } from './services/fleet-connector.js';
import { buildRetentionConfig, runRollup, pruneRawSamples, pruneActivity, pruneModelEvents, trimCapture, parseCaptureJson } from './services/retention.js';
import { detectGap } from './services/reconcile.js';
import { jsonbObject } from './services/jsonb.js';
import { ActionQueue } from './services/action-queue.js';
import { LogRelay } from './services/log-relay.js';
import { registerActionRoutes } from './routes/actions.js';
import { registerCaptureRoutes } from './routes/captures.js';
import { registerBenchRoutes, setBenchApp } from './routes/bench.js';
import { registerPlaygroundRoutes } from './routes/playground.js';
import { registerEvalRoutes } from './routes/evals.js';
import { registerRoutingRoutes } from './routes/routing.js';
import { registerReportRoutes, startReportScheduler } from './routes/reports.js';
import { registerGatewayRoutes } from './routes/gateway.js';
import { registerPolicyRoutes } from './routes/policies.js';
import { registerSshConfigRoutes } from './routes/ssh-config.js';
import { loadLlamaProviders, getLlamaProviders, resolveProviderBaseUrl } from './services/llama-providers.js';
// ─── delta emitter (B3 fix) ─────────────────────────────────────────────────
export type DeltaCallback = (delta: unknown) => void;
export type DeltaEmitter = {
subscribe(cb: DeltaCallback): () => void;
publish(delta: unknown): void;
};
export function createDeltaEmitter(): DeltaEmitter {
const listeners = new Set<DeltaCallback>();
return {
subscribe(cb: DeltaCallback): () => void {
listeners.add(cb);
return () => { listeners.delete(cb); };
},
publish(delta: unknown): void {
for (const cb of listeners) {
try { cb(delta); } catch { /* ignore emitter errors */ }
}
},
};
}
// ─── metrics entry field-name mapper ─────────────────────────────────────────
// Real /api/metrics shape has nested tokens and different field names:
// {id, timestamp, model, req_path, resp_status_code, tokens:{...}, duration_ms, has_capture}
// Map to the column names used in control_requests.
interface MappedMetricsEntry {
id: number;
ts: string;
model: string;
req_path: string;
status_code: number;
duration_ms: number;
cache_tokens: number;
input_tokens: number;
output_tokens: number;
prompt_tps: number;
gen_tps: number;
has_capture: boolean;
/** P4: NULL for ring data — ActivityLogEntry does not carry request headers. */
source: string | null;
}
function mapMetricsEntry(entry: MetricsEntry): MappedMetricsEntry {
return {
id: entry.id,
ts: entry.timestamp,
model: entry.model,
req_path: entry.req_path,
status_code: entry.resp_status_code,
duration_ms: entry.duration_ms,
cache_tokens: entry.tokens.cache_tokens,
input_tokens: entry.tokens.input_tokens,
output_tokens: entry.tokens.output_tokens,
prompt_tps: entry.tokens.prompt_per_second,
gen_tps: entry.tokens.tokens_per_second,
has_capture: entry.has_capture,
/** P4: NULL — ActivityLogEntry does not carry request headers. */
source: null,
};
}
// ─── SSE event handlers (B5 fix: await onEvent; B2 fix: incrementSeq) ───────
export async function handleLlamaSweepEvent(
fleet: FleetState,
sql: ReturnType<typeof getSql>,
config: ReturnType<typeof loadConfig>,
providerId: string,
emitter: DeltaEmitter,
event: LlamaSweepSSEEvent,
logRelay: LogRelay | null = null,
): Promise<void> {
const state = ensureHostState(fleet, providerId);
stampLastSeen(state);
switch (event.type) {
case 'modelStatus': {
// Real payload: FULL-FLEET array of {id, state, ...} (fork apiModel).
// Derive transitions by diffing against current state; persist only changes.
state.liveness = 'connected';
const changed: Array<{ model: string; state: string }> = [];
for (const m of event.data) {
const prev = state.models.get(m.id);
if (!prev || prev.state !== m.state) {
changed.push({ model: m.id, state: m.state });
}
state.models.set(m.id, {
model: m.id,
state: m.state,
ts: new Date(),
ttlDeadline: prev?.ttlDeadline ?? null,
inflight: prev?.inflight ?? 0,
});
}
if (changed.length === 0) break;
const seq = incrementSeq(state);
for (const c of changed) {
await sql`
INSERT INTO control_model_events (provider_id, model, state, ts, detail)
VALUES (${providerId}, ${c.model}, ${c.state}, clock_timestamp(), ${sql.json({} as never)})
ON CONFLICT (provider_id, model, state, ts) DO NOTHING
`;
}
// Publish delta to WS subscribers (B3 fix).
emitter.publish({
type: 'control_fleet' as const,
seq,
hosts: [{
providerId: state.providerId,
liveness: state.liveness,
lastSeenAt: state.lastSeenAt?.toISOString() ?? null,
seq: state.seq,
models: Array.from(state.models.values()).map((m) => ({
model: m.model,
state: m.state,
ts: m.ts.toISOString(),
ttlDeadline: m.ttlDeadline?.toISOString() ?? null,
inflight: m.inflight,
})),
}],
});
break;
}
case 'logData': {
// Logs are relay-only; no persistence by default.
const source = event.data.source as 'proxy' | 'upstream' | 'model';
// Real payload field is 'data' (fork sendLogData), may contain multiple lines.
const text = event.data.data;
if (logRelay) {
logRelay.append(providerId, source, text);
}
const seq = incrementSeq(state);
emitter.publish({
type: 'control_log' as const,
seq,
providerId,
source,
line: text,
});
break;
}
case 'metrics': {
// Real payload: BARE array of ActivityLogEntry (fork sendMetrics).
const entries = event.data;
// B5 fix: await onEvent (handleReconcile is async).
const seq = incrementSeq(state);
await handleReconcile(fleet, sql, config, providerId, emitter, event.data).catch((err) => {
// A1: log the error instead of swallowing silently.
const msg = (err as Error).message ?? String(err);
console.warn({ providerId, err: msg }, 'fleet: reconcile failed');
});
// Publish activity deltas.
for (const entry of entries) {
const captureTrimmed = entry.capture ? trimCapture(entry.capture, config.CAPTURE_SIZE_KB) : null;
const captureObj = captureTrimmed ? parseCaptureJson(captureTrimmed) : null;
// Map real field names: resp_status_code -> status_code, tokens.* nested, timestamp -> ts.
const mapped = mapMetricsEntry(entry);
await sql`
INSERT INTO control_requests (provider_id, swap_entry_id, ts, model, req_path, status_code, duration_ms, cache_tokens, input_tokens, output_tokens, prompt_tps, gen_tps, has_capture, capture, source)
VALUES (${providerId}, ${mapped.id}, ${mapped.ts}, ${mapped.model}, ${mapped.req_path}, ${mapped.status_code}, ${mapped.duration_ms}, ${mapped.cache_tokens}, ${mapped.input_tokens}, ${mapped.output_tokens}, ${mapped.prompt_tps}, ${mapped.gen_tps}, ${mapped.has_capture}, ${captureObj ? sql.json(captureObj as never) : sql`NULL::jsonb`}, ${mapped.source})
ON CONFLICT (provider_id, swap_entry_id, ts) DO NOTHING
`;
emitter.publish({
type: 'control_activity' as const,
seq: state.seq,
providerId,
entry: {
id: mapped.id,
ts: mapped.ts,
model: mapped.model,
reqPath: mapped.req_path,
statusCode: mapped.status_code,
durationMs: mapped.duration_ms,
},
});
}
break;
}
case 'inflight': {
// Real payload: {total} -- host-level total (fork sendInFlight); the fork
// does not publish per-model inflight over SSE.
state.inflightTotal = event.data.total;
break;
}
}
}
// ─── reconcile handler (B7 fix: called from metrics event) ───────────────────
async function handleReconcile(
fleet: FleetState,
sql: ReturnType<typeof getSql>,
config: ReturnType<typeof loadConfig>,
providerId: string,
emitter: DeltaEmitter,
metrics: MetricsEntry[],
): Promise<boolean> {
const state = ensureHostState(fleet, providerId);
stampLastSeen(state);
state.liveness = 'connected';
// Detect gap: if oldest reconcile entry is newer than newest persisted entry
// for that provider, the ring wrapped past our tail.
const entries = metrics ?? [];
const oldestReconcileTs = entries.length > 0
? entries[entries.length - 1]!.timestamp
: null;
if (oldestReconcileTs) {
const newestPersisted = await sql<{ ts: string }[]>`
SELECT ts FROM control_requests
WHERE provider_id = ${providerId}
ORDER BY ts DESC LIMIT 1
`;
if (newestPersisted.length > 0) {
const newestRow = newestPersisted[0]!;
if (detectGap(oldestReconcileTs, newestRow.ts)) {
await sql`
INSERT INTO control_model_events (provider_id, model, state, ts, detail)
VALUES (${providerId}, '*', 'gap_suspected', clock_timestamp(), ${sql.json({
oldestReconcile: oldestReconcileTs,
newestPersisted: newestRow.ts,
} as never)})
ON CONFLICT (provider_id, model, state, ts) DO NOTHING
`;
}
}
}
// Ingest reconcile entries (dedup via UNIQUE constraint).
for (const entry of entries) {
const mapped = mapMetricsEntry(entry);
await sql`
INSERT INTO control_requests (provider_id, swap_entry_id, ts, model, req_path, status_code, duration_ms, cache_tokens, input_tokens, output_tokens, prompt_tps, gen_tps, has_capture, source)
VALUES (${providerId}, ${mapped.id}, ${mapped.ts}, ${mapped.model}, ${mapped.req_path}, ${mapped.status_code}, ${mapped.duration_ms}, ${mapped.cache_tokens}, ${mapped.input_tokens}, ${mapped.output_tokens}, ${mapped.prompt_tps}, ${mapped.gen_tps}, ${mapped.has_capture}, ${mapped.source})
ON CONFLICT (provider_id, swap_entry_id, ts) DO NOTHING
`;
}
return true;
}
// ─── perf poller (A7 fix: add timeout; A8 fix: log errors) ───────────────────
async function pollPerformance(
sql: ReturnType<typeof getSql>,
config: ReturnType<typeof loadConfig>,
providerId: string,
baseUrl: string,
fleet: FleetState,
emitter: DeltaEmitter,
): Promise<void> {
const state = ensureHostState(fleet, providerId);
// Recover watermark from MAX(ts) per provider.
const watermark = await sql<{ ts: string | null }[]>`
SELECT MAX(ts) AS ts FROM control_perf_samples WHERE provider_id = ${providerId}
`;
// porsager returns timestamptz as a Date object; interpolating it raw yields
// Date.toString() ("Thu Jun 12 2026 ...") which llama-swap rejects with 400.
const afterParam = watermark[0]?.ts
? `?after=${encodeURIComponent(new Date(watermark[0].ts).toISOString())}`
: '';
const url = `${baseUrl}/api/performance${afterParam}`;
try {
// A7 fix: add fetch timeout via AbortController.
const fetchSignal = AbortSignal.timeout(10_000);
const res = await fetch(url, { signal: fetchSignal });
if (!res.ok) return;
// Real shape: { gpu_stats: GpuStat[], sys_stats: SysStat[] }
const data = await res.json() as { gpu_stats?: unknown[]; sys_stats?: unknown[] } | null;
if (!data) return;
// Pair gpu_stats and sys_stats by timestamp.
const gpuMap = new Map<string, unknown>();
for (const g of data.gpu_stats ?? []) {
const gpu = g as { timestamp?: string };
if (gpu.timestamp) {
gpuMap.set(gpu.timestamp, g);
}
}
const sysMap = new Map<string, unknown>();
for (const s of data.sys_stats ?? []) {
const sys = s as { timestamp?: string };
if (sys.timestamp) {
sysMap.set(sys.timestamp, s);
}
}
// Collect all unique timestamps.
const allTimestamps = new Set([...gpuMap.keys(), ...sysMap.keys()]);
if (allTimestamps.size === 0) return;
stampLastSeen(state);
for (const ts of allTimestamps) {
const gpu = gpuMap.get(ts) ?? null;
const sys = sysMap.get(ts) ?? null;
await sql`
INSERT INTO control_perf_samples (provider_id, ts, gpu, sys)
VALUES (${providerId}, ${ts}, ${sql.json(gpu as never)}, ${sql.json(sys as never)})
ON CONFLICT (provider_id, ts) DO NOTHING
`;
const seq = incrementSeq(state);
emitter.publish({
type: 'control_perf' as const,
seq,
providerId,
ts,
gpu,
sys,
});
}
} catch (err) {
// A8 fix: log the error instead of swallowing silently.
const msg = (err as Error).message ?? String(err);
console.warn({ providerId, err: msg }, 'fleet: perf poll failed');
}
}
// ─── fleet-state rebuild from DB (A1/F2 fix) ─────────────────────────────────
async function rebuildFleetFromDB(fleet: FleetState, sql: ReturnType<typeof getSql>): Promise<void> {
// Query control_model_events for latest model state per provider.
// B3: ORDER BY ASC so iteration processes oldest first; Map.set() overwrites
// with the latest state for each model, so the newest event wins.
const modelEvents = await sql<{ provider_id: string; model: string; state: string; ts: string; detail: string }[]>`
SELECT provider_id, model, state, ts, detail
FROM control_model_events
WHERE ts IN (
SELECT MAX(ts) FROM control_model_events
GROUP BY provider_id, model, state
)
ORDER BY ts ASC
`;
for (const row of modelEvents) {
const state = ensureHostState(fleet, row.provider_id);
state.liveness = 'down';
stampLastSeen(state);
// row.detail is jsonb (porsager returns it parsed); jsonbObject tolerates
// both a parsed object and a JSON string.
const detail: unknown = jsonbObject(row.detail);
// B4: ttlDeadline recalculation. The live modelStatus handler (index.ts:57)
// computes ttlDeadline = new Date(Date.now() + ttl * 1000), relative to event
// arrival time. For rebuild, use the event timestamp so the deadline reflects
// when the model was actually loaded, not when we rebuild.
const ttl = (detail as { ttl?: number })?.ttl;
const eventTs = new Date(row.ts).getTime();
const ttlDeadline = ttl ? new Date(eventTs + ttl * 1000) : null;
state.models.set(row.model, {
model: row.model,
state: row.state,
ts: new Date(row.ts),
ttlDeadline,
inflight: 0,
});
}
// Query control_requests for last activity.
const lastRequests = await sql<{ provider_id: string; ts: string }[]>`
SELECT provider_id, ts FROM control_requests
WHERE ts IN (
SELECT MAX(ts) FROM control_requests GROUP BY provider_id
)
ORDER BY ts DESC
`;
for (const row of lastRequests) {
const state = ensureHostState(fleet, row.provider_id);
stampLastSeen(state);
}
// Query control_perf_samples for latest perf sample.
const lastPerf = await sql<{ provider_id: string; ts: string }[]>`
SELECT provider_id, ts FROM control_perf_samples
WHERE ts IN (
SELECT MAX(ts) FROM control_perf_samples GROUP BY provider_id
)
ORDER BY ts DESC
`;
for (const row of lastPerf) {
const state = ensureHostState(fleet, row.provider_id);
stampLastSeen(state);
}
}
// ─── main ───────────────────────────────────────────────────────────────────
async function main() {
const config = loadConfig();
const app = Fastify({ logger: { level: config.LOG_LEVEL } });
app.removeContentTypeParser(['application/json']);
app.addContentTypeParser('application/json', { parseAs: 'string' }, (_req: unknown, body: unknown, done: (err: Error | null, body: unknown) => void) => {
const str = (body as string) ?? '';
if (str.trim().length === 0) {
done(null, {});
return;
}
try {
done(null, JSON.parse(str));
} catch (err) {
done(err as Error, undefined);
}
});
const sql = getSql(config);
// Startup ordering guard: wait for server-owned tables before applying schema.
await waitForTable(sql, 'sessions', 30_000);
await applySchema(sql);
app.log.info('database schema applied');
// Register WebSocket endpoint.
const fleet = createFleetState();
const emitter = createDeltaEmitter();
// P2: Action queue + log relay
const actionQueue = new ActionQueue();
const logRelay = new LogRelay();
registerControlWebSocket(app, fleet, emitter, logRelay);
registerActionRoutes(app, actionQueue, fleet, emitter);
registerCaptureRoutes(app, sql);
setBenchApp(app.log);
registerBenchRoutes(app, sql, fleet, emitter);
registerPlaygroundRoutes(app);
registerEvalRoutes(app, sql, fleet, emitter);
registerRoutingRoutes(app, sql, fleet);
registerReportRoutes(app, sql);
registerGatewayRoutes(app, sql, fleet, emitter);
registerPolicyRoutes(app, sql);
registerSshConfigRoutes(app, sql, config, fleet, emitter);
// Health endpoint.
app.get('/api/health', async (_req: unknown, reply: import('fastify').FastifyReply) => {
const dbOk = await pingDb(sql);
const status = dbOk ? 200 : 503;
return reply.status(status).send({
ok: dbOk,
db: dbOk,
});
});
// Rebuild fleet state from DB on startup (A1/F2 fix).
await rebuildFleetFromDB(fleet, sql).catch((err) => {
app.log.warn({ err: (err as Error).message }, 'fleet: rebuild from DB failed');
});
// Load the provider registry — baseUrl comes from the registry, never from ssh_host.
const registry = loadLlamaProviders(config.LLAMA_PROVIDERS_PATH, config.LLAMA_SWAP_URL);
app.log.info({ count: registry.providers.length }, 'fleet: provider registry loaded');
// P7.2: the auto:* gateway is itself a registry entry (kind boocontrol-gateway)
// so BooChat adopts it as a provider. BooControl must NOT treat it as a fleet
// host — it has no llama-swap SSE/perf surface and its baseUrl points back at
// this service. Filter it out of every fleet operation.
const fleetProviders = registry.providers.filter((p) => p.kind !== 'boocontrol-gateway');
// JOIN registry providers with control_hosts for the enabled flag.
// Insert a control_hosts row ON CONFLICT DO NOTHING for any registry provider
// missing one, so the fleet state has a row to key off.
const enabledHosts = await sql<{ provider_id: string; enabled: boolean }[]>`
SELECT provider_id, enabled FROM control_hosts
WHERE provider_id = ANY(${fleetProviders.map((p) => p.id)}::text[])
`;
const enabledMap = new Map<string, boolean>();
for (const row of enabledHosts) {
enabledMap.set(row.provider_id, row.enabled);
}
// Seed missing control_hosts rows so the registry is the source of truth.
for (const provider of fleetProviders) {
if (!enabledMap.has(provider.id)) {
await sql`
INSERT INTO control_hosts (provider_id, enabled)
VALUES (${provider.id}, true)
ON CONFLICT (provider_id) DO NOTHING
`;
enabledMap.set(provider.id, true);
}
}
const abortControllers = new Map<string, AbortController>();
for (const provider of fleetProviders) {
const enabled = enabledMap.get(provider.id) ?? true;
if (!enabled) continue;
const baseUrl = provider.baseUrl;
// P2: Register host with action queue
actionQueue.registerHost(provider.id, {
baseUrl,
isLivenessUp: () => {
const hs = fleet.hosts.get(provider.id);
return hs?.liveness !== 'down';
},
isInflightRequests: () => {
// Host-level total from the SSE inflight event (per-model is not published).
return fleet.hosts.get(provider.id)?.inflightTotal ?? 0;
},
log: app.log,
});
const abort = startFleetConnector(provider.id, baseUrl, {
isUp: () => true,
sql,
log: app.log,
onEvent: (pid, event) => handleLlamaSweepEvent(fleet, sql, config, pid, emitter, event, logRelay),
onReconcile: (pid, metrics) => handleReconcile(fleet, sql, config, pid, emitter, metrics),
onReconnectGiveUp: async (pid) => {
const state = ensureHostState(fleet, pid);
state.liveness = 'down';
},
sleep: (ms) => new Promise((r) => setTimeout(r, ms)),
});
abortControllers.set(provider.id, abort);
}
// Perf poller: 5s interval per enabled provider — baseUrl from registry.
const pollTimer = setInterval(async () => {
for (const provider of fleetProviders) {
const enabled = enabledMap.get(provider.id) ?? true;
if (!enabled) continue;
await pollPerformance(sql, config, provider.id, provider.baseUrl, fleet, emitter);
}
}, 5_000);
// Retention job: daily timer — iterate registry providers.
const retentionConfig = buildRetentionConfig(config);
const retentionTimer = setInterval(async () => {
for (const provider of fleetProviders) {
const enabled = enabledMap.get(provider.id) ?? true;
if (!enabled) continue;
await runRollup(sql, provider.id, retentionConfig.rawHours);
// A2 fix: chunk pruneRawSamples (already chunked), also chunk pruneActivity and pruneModelEvents.
await pruneRawSamples(sql, provider.id, retentionConfig.rawHours);
await pruneActivity(sql, retentionConfig.rawHours);
await pruneModelEvents(sql, retentionConfig.rollupDays * 24);
}
}, 24 * 3600_000); // daily
// P6.2: Report digest scheduler (catch-up on boot, then hourly).
const stopReportScheduler = startReportScheduler(sql, app.log);
app.addHook('onClose', async () => {
clearInterval(pollTimer);
clearInterval(retentionTimer);
stopReportScheduler();
for (const abort of abortControllers.values()) {
abort.abort();
}
});
// Graceful shutdown.
const shutdown = async () => {
app.log.info('shutting down');
await app.close();
await sql.end({ timeout: 5 });
process.exit(0);
};
process.on('SIGTERM', shutdown);
process.on('SIGINT', shutdown);
await app.listen({ port: config.PORT, host: config.HOST });
app.log.info(`BooControl listening on ${config.HOST}:${config.PORT}`);
}
// P2 exports for tests
export { ActionQueue } from './services/action-queue.js';
export { LogRelay } from './services/log-relay.js';
// P3 exports for tests
export { runSingleBenchRequest, parseLlamaTimings, computeAggregates } from './services/bench-engine.js';
export { computeRegressionFlag } from './services/bench-engine.js';
// P5 exports for tests
export { loadEvalSuitesFromData } from './services/eval-suites.js';
export { runCodeEval } from './services/sandbox-runner.js';
if (!process.env.VITEST) {
main().catch((err) => {
console.error('fatal:', err);
process.exit(1);
});
}

View File

@@ -0,0 +1,108 @@
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import { randomUUID } from 'node:crypto';
import type { ActionQueue } from '../services/action-queue.js';
import type { FleetState } from '../services/fleet-state.js';
import type { DeltaEmitter } from '../index.js';
/**
* Register action submission routes.
*
* POST /api/action/submit — enqueue a warm or unload action
* GET /api/action/queue/:providerId — get current queue state
*/
export function registerActionRoutes(
app: FastifyInstance,
actionQueue: ActionQueue,
fleet: FleetState,
emitter: DeltaEmitter,
): void {
app.post('/api/action/submit', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const type = body.type as string;
const providerId = body.providerId as string;
const model = body.model as string | undefined;
const confirmed = body.confirmed === true;
if (!type || !['warm', 'unload'].includes(type)) {
return reply.status(400).send({ error: 'type must be warm or unload' });
}
if (!providerId) {
return reply.status(400).send({ error: 'providerId is required' });
}
// Check host liveness
const hostState = fleet.hosts.get(providerId);
if (!hostState || hostState.liveness === 'down') {
return reply.status(409).send({ error: 'host offline' });
}
const action = {
actionId: randomUUID(),
type: type as 'warm' | 'unload',
providerId,
model,
confirmed,
createdAt: new Date(),
};
const result = actionQueue.submit(action);
if (!result.ok) {
if (result.requiresConfirmation) {
return reply.status(409).send({
error: result.error,
requiresConfirmation: true,
});
}
if (result.pending) {
return reply.status(429).send({
error: result.error,
pending: result.pending,
});
}
return reply.status(409).send({ error: result.error });
}
// Publish action queued event
emitter.publish({
type: 'control_job' as const,
seq: hostState.seq,
jobType: 'action' as const,
jobId: action.actionId,
status: 'queued' as const,
detail: {
actionType: action.type,
providerId: action.providerId,
model: action.model ?? null,
},
});
return reply.status(202).send({
actionId: action.actionId,
status: 'queued',
});
});
app.get('/api/action/queue/:providerId', async (req: FastifyRequest, reply: FastifyReply) => {
const providerId = req.params as { providerId: string };
const state = actionQueue.getState(providerId.providerId);
if (!state) {
return reply.status(404).send({ error: 'host not found' });
}
return reply.send({
providerId: providerId.providerId,
depth: state.queue.length,
running: state.running,
entries: state.queue.map((e) => ({
actionId: e.action.actionId,
type: e.action.type,
model: e.action.model ?? null,
status: e.status,
error: e.error ?? null,
enqueuedAt: e.enqueuedAt.toISOString(),
})),
});
});
}

View File

@@ -0,0 +1,492 @@
import { randomUUID } from 'node:crypto';
import type { FastifyBaseLogger, FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import type { Sql } from '../db.js';
import type { FleetState } from '../services/fleet-state.js';
import type { DeltaEmitter } from '../index.js';
import { acquireHostAccess } from '../services/host-access.js';
import type { BenchSuite, BenchRunProgress } from '../services/bench-engine.js';
import { runBenchSuite } from '../services/bench-engine.js';
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
import { jsonbNumberArray, jsonbObject } from '../services/jsonb.js';
/**
* Register bench routes.
*
* POST /api/bench/suite — create a suite definition
* GET /api/bench/suites — list suites
* GET /api/bench/suites/:id — get suite
* POST /api/bench/run — start a bench run (gated through acquireHostAccess)
* GET /api/bench/runs — list runs
* GET /api/bench/runs/:id — get run + samples
* GET /api/bench/baselines — get baselines per (provider_id, model)
*/
export function registerBenchRoutes(
app: FastifyInstance,
sql: Sql,
fleet: FleetState,
emitter: DeltaEmitter,
): void {
// ─── suite CRUD ──────────────────────────────────────────────────────────
app.post('/api/bench/suite', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const suiteId = body.id as string;
const name = body.name as string;
const providerId = body.providerId as string;
const model = body.model as string;
const promptTokens = body.promptTokens as number[];
const genTokens = body.genTokens as number[];
const concurrency = body.concurrency as number[];
const repetitions = (body.repetitions as number) ?? 1;
const metadata = body.metadata as Record<string, unknown> | undefined;
if (!name || !providerId || !model) {
return reply.status(400).send({ error: 'name, providerId, and model are required' });
}
if (!promptTokens?.length || !genTokens?.length || !concurrency?.length) {
return reply.status(400).send({ error: 'promptTokens, genTokens, and concurrency must each have at least one value' });
}
const id = suiteId ?? randomUUID();
await sql`
INSERT INTO bench_suites (id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata)
VALUES (${id}, ${name}, ${providerId}, ${model}, ${sql.json(promptTokens as never)}, ${sql.json(genTokens as never)}, ${sql.json(concurrency as never)}, ${repetitions}, ${metadata ? sql.json(metadata as never) : sql`NULL::jsonb`})
ON CONFLICT (id) DO UPDATE SET
name = EXCLUDED.name,
provider_id = EXCLUDED.provider_id,
model = EXCLUDED.model,
prompt_tokens = EXCLUDED.prompt_tokens,
gen_tokens = EXCLUDED.gen_tokens,
concurrency = EXCLUDED.concurrency,
repetitions = EXCLUDED.repetitions,
metadata = EXCLUDED.metadata
`;
return reply.status(201).send({ id });
});
app.get('/api/bench/suites', async (_req: FastifyRequest, reply: FastifyReply) => {
const suites = await sql<{
id: string;
name: string;
provider_id: string;
model: string;
prompt_tokens: string;
gen_tokens: string;
concurrency: string;
repetitions: number;
metadata: string | null;
created_at: string;
}[]>`
SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata, created_at
FROM bench_suites
ORDER BY created_at DESC
`;
return reply.send({
suites: suites.map((s) => ({
id: s.id,
name: s.name,
providerId: s.provider_id,
model: s.model,
promptTokens: jsonbNumberArray(s.prompt_tokens),
genTokens: jsonbNumberArray(s.gen_tokens),
concurrency: jsonbNumberArray(s.concurrency),
repetitions: s.repetitions,
metadata: jsonbObject(s.metadata) ?? undefined,
createdAt: s.created_at,
})),
});
});
app.get('/api/bench/suites/:id', async (req: FastifyRequest, reply: FastifyReply) => {
const { id } = req.params as { id: string };
const rows = await sql<{
id: string;
name: string;
provider_id: string;
model: string;
prompt_tokens: string;
gen_tokens: string;
concurrency: string;
repetitions: number;
metadata: string | null;
created_at: string;
}[]>`
SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata, created_at
FROM bench_suites WHERE id = ${id}
`;
if (rows.length === 0) {
return reply.status(404).send({ error: 'suite not found' });
}
const s = rows[0]!;
return reply.send({
id: s.id,
name: s.name,
providerId: s.provider_id,
model: s.model,
promptTokens: jsonbNumberArray(s.prompt_tokens),
genTokens: jsonbNumberArray(s.gen_tokens),
concurrency: jsonbNumberArray(s.concurrency),
repetitions: s.repetitions,
metadata: jsonbObject(s.metadata) ?? undefined,
createdAt: s.created_at,
});
});
// ─── run launcher (P3.3: safety gates + P3.4: acquireHostAccess) ─────────
app.post('/api/bench/run', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const suiteId = body.suiteId as string;
const temperature = (body.temperature as number) ?? 0.7;
const topP = (body.topP as number) ?? 0.9;
if (!suiteId) {
return reply.status(400).send({ error: 'suiteId is required' });
}
// Load suite.
const suiteRows = await sql<{
id: string;
name: string;
provider_id: string;
model: string;
prompt_tokens: string;
gen_tokens: string;
concurrency: string;
repetitions: number;
metadata: string | null;
}[]>`
SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata
FROM bench_suites WHERE id = ${suiteId}
`;
if (suiteRows.length === 0) {
return reply.status(404).send({ error: 'suite not found' });
}
const s = suiteRows[0]!;
const suite: BenchSuite = {
id: s.id,
name: s.name,
providerId: s.provider_id,
model: s.model,
promptTokens: jsonbNumberArray(s.prompt_tokens),
genTokens: jsonbNumberArray(s.gen_tokens),
concurrency: jsonbNumberArray(s.concurrency),
repetitions: s.repetitions,
metadata: jsonbObject(s.metadata) ?? undefined,
};
// P3.3: Safety check — check recent traffic on the target host.
const hostState = fleet.hosts.get(suite.providerId);
const recentTraffic = checkRecentTraffic(hostState);
// P3.4: Gate through acquireHostAccess seam.
const grant = await acquireHostAccess(suite.providerId, 'bench');
if (!grant.ok) {
return reply.status(409).send({
error: 'host access denied',
reason: grant.reason,
});
}
// Resolve base URL from registry.
const baseUrl = resolveBaseUrl(suite.providerId);
if (!baseUrl) {
return reply.status(400).send({ error: `no base URL configured for provider ${suite.providerId}` });
}
// Get seq for the host.
const seq = hostState?.seq ?? 0;
// Run the bench suite asynchronously (non-blocking HTTP response).
void runBenchAsync(
{ suite, baseUrl, temperature, topP },
sql,
emitter,
seq,
suite.providerId,
);
return reply.status(202).send({
status: 'queued',
suiteId: suite.id,
recentTraffic,
});
});
// ─── runs listing ────────────────────────────────────────────────────────
app.get('/api/bench/runs', async (req: FastifyRequest, reply: FastifyReply) => {
const query = req.query as Record<string, string | undefined>;
const suiteId = query.suiteId;
let runs: Array<{
id: string;
suite_id: string;
job_type: string;
status: string;
started_at: string | null;
finished_at: string | null;
total_samples: number;
completed_samples: number;
concurrent_foreign_requests: number;
regression_flag: string | null;
aggregate: string | null;
error: string | null;
created_at: string;
}>;
if (suiteId) {
runs = await sql`
SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at
FROM bench_runs WHERE suite_id = ${suiteId}
ORDER BY created_at DESC
`;
} else {
runs = await sql`
SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at
FROM bench_runs
ORDER BY created_at DESC
LIMIT 100
`;
}
return reply.send({
runs: runs.map((r) => ({
id: r.id,
suiteId: r.suite_id,
jobType: r.job_type,
status: r.status,
startedAt: r.started_at,
finishedAt: r.finished_at,
totalSamples: r.total_samples,
completedSamples: r.completed_samples,
concurrentForeignRequests: r.concurrent_foreign_requests,
regressionFlag: r.regression_flag,
aggregate: jsonbObject(r.aggregate),
error: r.error,
createdAt: r.created_at,
})),
});
});
app.get('/api/bench/runs/:id', async (req: FastifyRequest, reply: FastifyReply) => {
const { id } = req.params as { id: string };
const runRows = await sql<{
id: string;
suite_id: string;
job_type: string;
status: string;
started_at: string | null;
finished_at: string | null;
total_samples: number;
completed_samples: number;
concurrent_foreign_requests: number;
regression_flag: string | null;
aggregate: string | null;
error: string | null;
created_at: string;
}[]>`
SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at
FROM bench_runs WHERE id = ${id}
`;
if (runRows.length === 0) {
return reply.status(404).send({ error: 'run not found' });
}
const r = runRows[0]!;
const samples = await sql<{
id: number;
prompt_tokens: number;
gen_tokens: number;
concurrency: number;
repetition: number;
ttft_ms: number | null;
total_ms: number | null;
prompt_tps: number | null;
gen_tps: number | null;
cache_n: number | null;
error: string | null;
}[]>`
SELECT id, prompt_tokens, gen_tokens, concurrency, repetition, ttft_ms, total_ms, prompt_tps, gen_tps, cache_n, error
FROM bench_samples WHERE run_id = ${id}
ORDER BY prompt_tokens, gen_tokens, concurrency, repetition
`;
return reply.send({
run: {
id: r.id,
suiteId: r.suite_id,
jobType: r.job_type,
status: r.status,
startedAt: r.started_at,
finishedAt: r.finished_at,
totalSamples: r.total_samples,
completedSamples: r.completed_samples,
concurrentForeignRequests: r.concurrent_foreign_requests,
regressionFlag: r.regression_flag,
aggregate: jsonbObject(r.aggregate),
error: r.error,
createdAt: r.created_at,
},
samples: samples.map((s) => ({
id: s.id,
promptTokens: s.prompt_tokens,
genTokens: s.gen_tokens,
concurrency: s.concurrency,
repetition: s.repetition,
ttftMs: s.ttft_ms,
totalMs: s.total_ms,
promptTps: s.prompt_tps,
genTps: s.gen_tps,
cacheN: s.cache_n,
error: s.error,
})),
});
});
// ─── baselines ───────────────────────────────────────────────────────────
app.get('/api/bench/baselines', async (_req: FastifyRequest, reply: FastifyReply) => {
const rows = await sql<{
provider_id: string;
model: string;
run_id: string;
aggregate: string;
created_at: string;
}[]>`
SELECT provider_id, model, run_id, aggregate, created_at
FROM bench_baselines
ORDER BY provider_id, model
`;
return reply.send({
baselines: rows.map((r) => ({
providerId: r.provider_id,
model: r.model,
runId: r.run_id,
aggregate: jsonbObject(r.aggregate),
createdAt: r.created_at,
})),
});
});
}
/**
* P3.3: Check if the target host has recent traffic (for takeover confirmation).
*/
function checkRecentTraffic(hostState: { models: Map<string, { inflight: number }> } | undefined): { hasRecentTraffic: boolean; inflightCount: number } {
if (!hostState) {
return { hasRecentTraffic: false, inflightCount: 0 };
}
let total = 0;
for (const m of hostState.models.values()) {
total += m.inflight;
}
return {
hasRecentTraffic: total > 0,
inflightCount: total,
};
}
/**
* Resolve the base URL for a provider from the loaded registry.
* baseUrl comes from LlamaProvider.baseUrl, never from ssh_host.
*/
function resolveBaseUrl(providerId: string): string | null {
return resolveProviderBaseUrl(providerId);
}
/**
* Async bench runner: fire-and-forget, records concurrent_foreign_requests.
* A6: sources from activity stream during [started_at, finished_at] window,
* minus the bench's own samples count.
*/
async function runBenchAsync(
params: { suite: BenchSuite; baseUrl: string; temperature?: number; topP?: number },
sql: Sql,
emitter: DeltaEmitter,
seq: number,
providerId: string,
): Promise<void> {
const { suite } = params;
// Find the latest running run for this suite.
const latestRun = await sql<{ id: string; started_at: string | null }[]>`
SELECT id, started_at FROM bench_runs
WHERE suite_id = ${suite.id} AND status = 'running'
ORDER BY created_at DESC LIMIT 1
`;
if (latestRun.length === 0) {
benchLogger?.error?.({}, 'bench: no running run found');
return;
}
const runId = latestRun[0]!.id;
const progressHandler = (_progress: BenchRunProgress) => {
// Progress is published via emitter in runBenchSuite.
};
try {
await runBenchSuite(params, sql, emitter, seq, progressHandler);
// A6: Record concurrent_foreign_requests from activity stream during run window.
// Count control_requests for this provider in [started_at, finished_at],
// minus the bench's own sample count.
const runData = await sql<{ started_at: string | null; finished_at: string | null; completed_samples: number }[]>`
SELECT started_at, finished_at, completed_samples FROM bench_runs WHERE id = ${runId}
`;
const rd = runData[0]!;
if (rd.started_at && rd.finished_at) {
const foreignCount = await sql<{ count: number }[]>`
SELECT COUNT(*)::INT AS count FROM control_requests
WHERE provider_id = ${providerId}
AND ts >= ${rd.started_at}::timestamptz
AND ts <= ${rd.finished_at}::timestamptz
`;
const totalForeign = (foreignCount[0]?.count ?? 0) - rd.completed_samples;
await sql`
UPDATE bench_runs SET concurrent_foreign_requests = ${Math.max(0, totalForeign)}
WHERE id = ${runId}
`;
}
} catch (err) {
const msg = (err as Error).message ?? String(err);
benchLogger?.error?.({ err: msg }, 'bench: run failed');
await sql`
UPDATE bench_runs
SET status = 'failed', finished_at = clock_timestamp(), error = ${msg}
WHERE id = ${runId}
`;
emitter.publish({
type: 'control_job' as const,
seq,
jobType: 'bench' as const,
jobId: runId,
status: 'failed' as const,
detail: { error: msg },
});
}
}
/**
* Set the Fastify logger for the async bench runner.
*/
let benchLogger: FastifyBaseLogger | undefined;
export function setBenchApp(logger: FastifyBaseLogger): void {
benchLogger = logger;
}

View File

@@ -0,0 +1,52 @@
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import type { Sql } from '../db.js';
import { fetchCapture, persistCapture } from '../services/capture-fetch.js';
/**
* Register capture inspection routes.
*
* GET /api/capture/:providerId/:swapEntryId — fetch capture from host, persist trimmed copy
*/
export function registerCaptureRoutes(
app: FastifyInstance,
sql: Sql,
): void {
app.get(
'/api/capture/:providerId/:swapEntryId',
async (req: FastifyRequest, reply: FastifyReply) => {
const params = req.params as { providerId: string; swapEntryId: string };
const swapEntryId = parseInt(params.swapEntryId, 10);
if (isNaN(swapEntryId)) {
return reply.status(400).send({ error: 'invalid swapEntryId' });
}
// Resolve host URL from control_hosts
const hosts = await sql<{ ssh_host: string }[]>`
SELECT ssh_host FROM control_hosts WHERE provider_id = ${params.providerId}
`;
if (hosts.length === 0 || !hosts[0]?.ssh_host) {
return reply.status(404).send({ error: 'host not found or no SSH host configured' });
}
const baseUrl = `http://${hosts[0].ssh_host}:8401`;
const result = await fetchCapture(baseUrl, params.providerId, swapEntryId);
if (!result.ok) {
return reply.status(404).send({ error: result.error });
}
// Persist trimmed copy
try {
await persistCapture(sql, result.capture!);
} catch (err) {
// Persistence failure is non-fatal — still return the capture
app.log.warn({ err: (err as Error).message }, 'capture: persist failed');
}
return reply.send(result.capture);
},
);
}

View File

@@ -0,0 +1,366 @@
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import type { Sql } from '../db.js';
import type { DeltaEmitter } from '../index.js';
import type { FleetState } from '../services/fleet-state.js';
import {
listEvalSuites,
getEvalSuite,
upsertEvalSuite,
listEvalRuns,
getEvalResults,
seedEvalSuites,
} from '../services/eval-suites.js';
import { jsonbArray, jsonbObject } from '../services/jsonb.js';
/**
* Register eval routes.
*
* POST /api/eval/suite — create/update an eval suite
* GET /api/eval/suites — list suites
* GET /api/eval/suites/:id — get suite
* POST /api/eval/seed — seed suites from data/ YAML
* POST /api/eval/run — start an eval run
* GET /api/eval/runs — list runs
* GET /api/eval/runs/:id — get run + results
* GET /api/eval/leaderboard — per (provider_id, model) aggregate scores
*/
export function registerEvalRoutes(
app: FastifyInstance,
sql: Sql,
fleet: FleetState,
emitter: DeltaEmitter,
): void {
// Seed suites from data/ YAML on startup (idempotent).
app.addHook('onReady', async () => {
await seedEvalSuites(sql).catch((err) => {
app.log.warn({ err: (err as Error).message }, 'eval: seed failed');
});
});
// ─── suite CRUD ──────────────────────────────────────────────────────────
app.post('/api/eval/suite', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const id = (body.id as string) ?? null;
const name = body.name as string;
const kind = body.kind as 'chat' | 'code';
const tasks = body.tasks as unknown[];
const judgeModel = (body.judgeModel as string) ?? null;
const metadata = body.metadata as Record<string, unknown> | undefined;
if (!name || !kind || !tasks?.length) {
return reply.status(400).send({ error: 'name, kind, and tasks are required' });
}
const suiteId = await upsertEvalSuite(sql, id, name, kind, tasks, judgeModel, metadata);
return reply.status(201).send({ id: suiteId });
});
app.get('/api/eval/suites', async (_req: FastifyRequest, reply: FastifyReply) => {
const suites = await listEvalSuites(sql);
return reply.send({
suites: suites.map((s) => ({
id: s.id,
name: s.name,
kind: s.kind,
version: s.version,
tasks: jsonbArray(s.tasks),
judgeModel: s.judge_model,
judgeModelVersion: s.judge_model_version,
metadata: jsonbObject(s.metadata) ?? undefined,
createdAt: s.created_at,
})),
});
});
app.get('/api/eval/suites/:id', async (req: FastifyRequest, reply: FastifyReply) => {
const { id } = req.params as { id: string };
const suite = await getEvalSuite(sql, id);
if (!suite) {
return reply.status(404).send({ error: 'suite not found' });
}
return reply.send({
id: suite.id,
name: suite.name,
kind: suite.kind,
version: suite.version,
tasks: jsonbArray(suite.tasks),
judgeModel: suite.judge_model,
judgeModelVersion: suite.judge_model_version,
metadata: jsonbObject(suite.metadata) ?? undefined,
createdAt: suite.created_at,
});
});
// ─── seed from data/ ─────────────────────────────────────────────────────
app.post('/api/eval/seed', async (_req: FastifyRequest, reply: FastifyReply) => {
await seedEvalSuites(sql);
return reply.send({ ok: true });
});
// ─── run launcher ────────────────────────────────────────────────────────
app.post('/api/eval/run', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const suiteId = body.suiteId as string;
const providerId = body.providerId as string;
const model = body.model as string;
const quant = (body.quant as string) ?? null;
if (!suiteId || !providerId || !model) {
return reply.status(400).send({ error: 'suiteId, providerId, and model are required' });
}
const suite = await getEvalSuite(sql, suiteId);
if (!suite) {
return reply.status(404).send({ error: 'suite not found' });
}
const tasks = jsonbArray(suite.tasks);
const judgeModel = suite.judge_model;
const seq = fleet.hosts.get(providerId)?.seq ?? 0;
// Start the eval run asynchronously.
void runEvalAsync(
{ suiteId, providerId, model, quant, tasks, judgeModel },
sql,
emitter,
seq,
app.log,
);
return reply.status(202).send({ status: 'queued', suiteId, providerId, model });
});
// ─── runs listing ────────────────────────────────────────────────────────
app.get('/api/eval/runs', async (req: FastifyRequest, reply: FastifyReply) => {
const query = req.query as Record<string, string | undefined>;
const runs = await listEvalRuns(sql, query.suiteId, query.providerId);
return reply.send({
runs: runs.map((r) => ({
id: r.id,
suiteId: r.suite_id,
jobType: r.job_type,
providerId: r.provider_id,
model: r.model,
quant: r.quant,
status: r.status,
judgeModel: r.judge_model,
startedAt: r.started_at,
finishedAt: r.finished_at,
totalTasks: r.total_tasks,
completedTasks: r.completed_tasks,
aggregate: jsonbObject(r.aggregate),
error: r.error,
createdAt: r.created_at,
})),
});
});
app.get('/api/eval/runs/:id', async (req: FastifyRequest, reply: FastifyReply) => {
const { id } = req.params as { id: string };
const runs = await listEvalRuns(sql);
const run = runs.find((r) => r.id === id);
if (!run) {
return reply.status(404).send({ error: 'run not found' });
}
const results = await getEvalResults(sql, id);
return reply.send({
run: {
id: run.id,
suiteId: run.suite_id,
jobType: run.job_type,
providerId: run.provider_id,
model: run.model,
quant: run.quant,
status: run.status,
judgeModel: run.judge_model,
startedAt: run.started_at,
finishedAt: run.finished_at,
totalTasks: run.total_tasks,
completedTasks: run.completed_tasks,
aggregate: jsonbObject(run.aggregate),
error: run.error,
createdAt: run.created_at,
},
results: results.map((r) => ({
id: r.id,
taskId: r.task_id,
taskIndex: r.task_index,
score: r.score,
maxScore: r.max_score,
rationale: r.rationale,
sandboxExitCode: r.sandbox_exit_code,
sandboxStderr: r.sandbox_stderr,
sandboxStdout: r.sandbox_stdout,
executionMs: r.execution_ms,
error: r.error,
})),
});
});
// ─── leaderboard ─────────────────────────────────────────────────────────
app.get('/api/eval/leaderboard', async (req: FastifyRequest, reply: FastifyReply) => {
const query = req.query as Record<string, string | undefined>;
const kind = query.kind as 'chat' | 'code' | undefined;
// Aggregate scores per (provider_id, model) from completed eval_runs.
const rows = await sql<{
provider_id: string;
model: string;
quant: string | null;
suite_kind: string;
avg_score: number;
run_count: number;
latest_run_at: string;
}[]>`
SELECT
er.provider_id,
er.model,
er.quant,
es.kind AS suite_kind,
AVG(CASE WHEN er.aggregate IS NOT NULL THEN (er.aggregate::jsonb ->> 'avgScore')::float ELSE NULL END) AS avg_score,
COUNT(DISTINCT er.id) AS run_count,
MAX(er.finished_at) AS latest_run_at
FROM eval_runs er
JOIN eval_suites es ON er.suite_id = es.id
WHERE er.status = 'completed'
${kind ? sql`AND es.kind = ${kind}` : sql`AND 1=1`}
GROUP BY er.provider_id, er.model, er.quant, es.kind
ORDER BY avg_score DESC NULLS LAST
`;
return reply.send({
leaderboard: rows.map((r) => ({
providerId: r.provider_id,
model: r.model,
quant: r.quant,
suiteKind: r.suite_kind,
avgScore: r.avg_score,
runCount: r.run_count,
latestRunAt: r.latest_run_at,
})),
});
});
}
/**
* Async eval runner: fire-and-forget.
* Delegates to judge runner (chat) or sandbox runner (code).
*/
async function runEvalAsync(
params: {
suiteId: string;
providerId: string;
model: string;
quant: string | null;
tasks: unknown[];
judgeModel: string | null;
},
sql: Sql,
emitter: DeltaEmitter,
seq: number,
logger: import('fastify').FastifyBaseLogger,
): Promise<void> {
const { suiteId, providerId, model, quant, tasks, judgeModel } = params;
const runId = `eval_${Date.now()}_${crypto.randomUUID().slice(0, 8)}`;
try {
await sql`
INSERT INTO eval_runs (id, suite_id, job_type, provider_id, model, quant, status, judge_model, started_at, total_tasks)
VALUES (${runId}, ${suiteId}, 'eval', ${providerId}, ${model}, ${quant}, 'running', ${judgeModel}, clock_timestamp(), ${tasks.length})
`;
emitter.publish({
type: 'control_job' as const,
seq,
jobType: 'eval' as const,
jobId: runId,
status: 'running' as const,
detail: { suiteId, providerId, model, totalTasks: tasks.length },
});
// Import runners dynamically to avoid circular deps.
const suiteKind = tasks[0] as Record<string, unknown>;
const isCodeSuite = !!(suiteKind && suiteKind.test_code);
let completed = 0;
let error: string | null = null;
if (isCodeSuite) {
const { runCodeEval } = await import('../services/sandbox-runner.js');
const result = await runCodeEval(
{ runId, providerId, model, tasks: tasks as Array<Record<string, unknown>>, quant },
sql,
emitter,
seq,
(progress) => {
completed = progress.completedTasks;
},
);
if (result.error) error = result.error;
} else {
const { runJudgeEval } = await import('../services/judge-runner.js');
const result = await runJudgeEval(
{ runId, providerId, model, tasks: tasks as Array<Record<string, unknown>>, judgeModel, quant },
sql,
emitter,
seq,
logger,
(progress) => {
completed = progress.completedTasks;
},
);
if (result.error) error = result.error;
}
// Compute aggregate.
const results = await sql<{ score: number | null; max_score: number | null }[]>`
SELECT score, max_score FROM eval_results WHERE run_id = ${runId}
`;
const scores = results.map((r) => r.score).filter((s): s is number => s != null);
const avgScore = scores.length ? scores.reduce((a, b) => a + b, 0) / scores.length : null;
await sql`
UPDATE eval_runs
SET status = ${error ? 'failed' : 'completed'},
finished_at = clock_timestamp(),
completed_tasks = ${completed},
aggregate = ${avgScore != null ? sql.json({ avgScore, totalTasks: tasks.length, passedTasks: scores.filter((s, i) => { const m = results[i]?.max_score; return m ? s / m >= 0.7 : s != null; }).length } as never) : sql`NULL::jsonb`},
error = ${error}
WHERE id = ${runId}
`;
emitter.publish({
type: 'control_job' as const,
seq,
jobType: 'eval' as const,
jobId: runId,
status: error ? 'failed' as const : 'completed' as const,
detail: { avgScore, error },
});
} catch (err) {
const msg = (err as Error).message ?? String(err);
logger.error({ err: msg }, 'eval: run failed');
await sql`
UPDATE eval_runs
SET status = 'failed', finished_at = clock_timestamp(), error = ${msg}
WHERE id = ${runId}
`.catch(() => {});
emitter.publish({
type: 'control_job' as const,
seq,
jobType: 'eval' as const,
jobId: runId,
status: 'failed' as const,
detail: { error: msg },
});
}
}

View File

@@ -0,0 +1,205 @@
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import type { Sql } from '../db.js';
import type { FleetState } from '../services/fleet-state.js';
import type { DeltaEmitter } from '../index.js';
import {
VIRTUAL_MODELS,
resolveCandidates,
splitComposite,
} from '../services/gateway.js';
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
/**
* P7.1: OpenAI-compatible auto:* gateway.
*
* BooChat reaches this server directly (registry baseUrl), NOT through the
* /api/control proxy, so streaming works end to end. Endpoints mirror the
* llama-swap wire surface BooChat's provider adapter expects:
*
* GET /v1/models — advertise the virtual models
* POST /v1/chat/completions — resolve a policy, dispatch with failover
* GET /upstream/:model/props — props for getModelContext (best candidate)
*
* Every dispatch forwards X-Boo-Source to the chosen target so attribution
* survives the extra hop, and is recorded in route_dispatch_log.
*/
export function registerGatewayRoutes(
app: FastifyInstance,
sql: Sql,
fleet: FleetState,
_emitter: DeltaEmitter,
): void {
// ─── model catalog ───────────────────────────────────────────────────────
app.get('/v1/models', async (_req: FastifyRequest, reply: FastifyReply) => {
return reply.send({
object: 'list',
data: VIRTUAL_MODELS.map((id) => ({
id,
object: 'model',
created: 0,
owned_by: 'boocontrol-gateway',
})),
});
});
// ─── props (for getModelContext) ─────────────────────────────────────────
// Resolve candidates and proxy the first healthy candidate's props so the
// caller can read default_generation_settings.n_ctx.
app.get('/upstream/:model/props', async (req: FastifyRequest, reply: FastifyReply) => {
const { model } = req.params as { model: string };
const { candidates } = await resolveCandidates(sql, fleet, model);
for (const compositeId of candidates) {
const split = splitComposite(compositeId);
if (!split) continue;
const baseUrl = resolveProviderBaseUrl(split.providerId);
if (!baseUrl) continue;
try {
const url = `${baseUrl.replace(/\/+$/, '')}/upstream/${encodeURIComponent(split.model)}/props`;
const res = await fetch(url, { signal: AbortSignal.timeout(5_000) });
if (!res.ok) continue;
const body = await res.json();
return reply.send(body);
} catch {
continue;
}
}
return reply.status(503).send({ error: 'no healthy candidate for virtual model', model });
});
// ─── chat completions (dispatch with failover) ───────────────────────────
app.post('/v1/chat/completions', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const requestedModel = body?.model as string | undefined;
if (!requestedModel) {
return reply.status(400).send({ error: { message: 'model is required' } });
}
const source = (req.headers['x-boo-source'] as string | undefined) ?? null;
const stream = body.stream === true;
const { virtualModel, candidates } = await resolveCandidates(sql, fleet, requestedModel);
if (candidates.length === 0) {
await logDispatch(sql, { virtualModel, chosen: null, tried: [], status: 'no_candidates', source, error: 'no healthy candidates', durationMs: 0 });
return reply.status(503).send({
error: { message: `routing gateway: no healthy candidate for ${virtualModel}`, type: 'gateway_error' },
});
}
const tried: string[] = [];
const startedAt = Date.now();
for (const compositeId of candidates) {
const split = splitComposite(compositeId);
if (!split) continue;
const baseUrl = resolveProviderBaseUrl(split.providerId);
if (!baseUrl) continue;
tried.push(compositeId);
const upstreamHeaders: Record<string, string> = { 'Content-Type': 'application/json' };
if (source) upstreamHeaders['X-Boo-Source'] = source;
const upstreamBody = JSON.stringify({ ...body, model: split.model });
try {
const res = await fetch(`${baseUrl.replace(/\/+$/, '')}/v1/chat/completions`, {
method: 'POST',
headers: upstreamHeaders,
body: upstreamBody,
signal: AbortSignal.timeout(300_000),
});
if (!res.ok) {
// HTTP error before body — eligible for failover to the next candidate.
continue;
}
// Success: dispatch chosen. Log and stream/return through.
await logDispatch(sql, {
virtualModel,
chosen: compositeId,
tried,
status: 'dispatched',
source,
error: null,
durationMs: Date.now() - startedAt,
});
if (stream) {
reply.header('Content-Type', 'text/event-stream');
reply.header('Cache-Control', 'no-cache');
reply.header('Connection', 'keep-alive');
reply.raw.writeHead(200);
const reader = res.body?.getReader();
if (!reader) {
reply.raw.end();
return;
}
const decoder = new TextDecoder();
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
reply.raw.write(decoder.decode(value, { stream: true }));
}
} finally {
reply.raw.end();
}
return;
}
// Non-streaming: pass JSON through.
const json = await res.json();
return reply.send(json);
} catch {
// Connection error — failover to the next candidate.
continue;
}
}
// All candidates exhausted.
await logDispatch(sql, {
virtualModel,
chosen: null,
tried,
status: 'failed',
source,
error: 'all candidates failed',
durationMs: Date.now() - startedAt,
});
return reply.status(502).send({
error: { message: `routing gateway: all candidates failed for ${virtualModel}`, type: 'gateway_error' },
});
});
}
async function logDispatch(
sql: Sql,
entry: {
virtualModel: string;
chosen: string | null;
tried: string[];
status: string;
source: string | null;
error: string | null;
durationMs: number;
},
): Promise<void> {
const split = entry.chosen ? splitComposite(entry.chosen) : null;
await sql`
INSERT INTO route_dispatch_log (virtual_model, chosen_provider_id, chosen_model, candidates_tried, status, source, error, duration_ms)
VALUES (
${entry.virtualModel},
${split?.providerId ?? null},
${split?.model ?? null},
${sql.json(entry.tried as never)},
${entry.status},
${entry.source},
${entry.error},
${entry.durationMs}
)
`.catch(() => { /* logging must never break dispatch */ });
}

View File

@@ -0,0 +1,235 @@
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import { getLlamaProviders, resolveProviderBaseUrl } from '../services/llama-providers.js';
/**
* Playground routes: model select, param controls, streaming chat.
*
* GET /api/playground/models — list available models from providers
* POST /api/playground/chat — streaming chat against a model
* POST /api/playground/chat-ab — side-by-side A/B compare
*/
export function registerPlaygroundRoutes(
app: FastifyInstance,
): void {
// ─── model catalog ───────────────────────────────────────────────────────
app.get('/api/playground/models', async (_req: FastifyRequest, reply: FastifyReply) => {
// Resolve provider URLs from the loaded registry.
const registry = getLlamaProviders();
const providers = registry.providers.map((p) => ({
id: p.id,
baseUrl: p.baseUrl,
}));
const results = await Promise.allSettled(
providers.map(async (p) => {
try {
const res = await fetch(`${p.baseUrl}/v1/models`, {
signal: AbortSignal.timeout(5_000),
});
if (!res.ok) return null;
const data = await res.json() as { data?: Array<{ id: string }> };
return {
providerId: p.id,
models: data?.data?.map((m) => m.id) ?? [],
};
} catch {
return null;
}
}),
);
const models: Array<{ providerId: string; models: string[] }> = [];
for (const r of results) {
if (r.status === 'fulfilled' && r.value) {
models.push(r.value);
}
}
return reply.send({ models });
});
// ─── streaming chat ──────────────────────────────────────────────────────
app.post('/api/playground/chat', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const providerId = body.providerId as string;
const model = body.model as string;
const messages = body.messages as Array<{ role: string; content: string }>;
const temperature = (body.temperature as number) ?? 0.7;
const topP = (body.topP as number) ?? 0.9;
const maxTokens = (body.maxTokens as number) ?? 1024;
if (!providerId || !model || !messages?.length) {
return reply.status(400).send({ error: 'providerId, model, and messages are required' });
}
const baseUrl = resolveProviderBaseUrl(providerId);
if (!baseUrl) {
return reply.status(400).send({ error: `unknown provider: ${providerId}` });
}
// Stream the response back to the client via SSE.
reply.header('Content-Type', 'text/event-stream');
reply.header('Cache-Control', 'no-cache');
reply.header('Connection', 'keep-alive');
reply.raw.writeHead(200);
try {
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
messages,
temperature,
top_p: topP,
max_tokens: maxTokens,
stream: true,
}),
signal: AbortSignal.timeout(120_000),
});
if (!res.ok) {
const errBody = await res.text().catch(() => '');
reply.raw.write(`data: ${JSON.stringify({ error: `Request failed: ${res.status} ${errBody.slice(0, 200)}` })}\n\n`);
reply.raw.end();
return;
}
const reader = res.body?.getReader();
if (!reader) {
reply.raw.write('data: {"error": "No response body"}\n\n');
reply.raw.end();
return;
}
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() ?? '';
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed) continue;
if (trimmed === 'data: [DONE]') {
reply.raw.write('data: [DONE]\n\n');
continue;
}
// N3: pass through the raw SSE line from upstream as-is.
// If it already has 'data: ' prefix, don't double-prefix.
const payload = trimmed.startsWith('data: ') ? trimmed : `data: ${trimmed}`;
reply.raw.write(`${payload}\n\n`);
}
}
reply.raw.write('data: [DONE]\n\n');
} catch (err) {
const msg = (err as Error).message ?? String(err);
reply.raw.write(`data: ${JSON.stringify({ error: msg })}\n\n`);
} finally {
reply.raw.end();
}
});
// ─── A/B compare ─────────────────────────────────────────────────────────
app.post('/api/playground/chat-ab', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const providerIdA = body.providerIdA as string;
const modelA = body.modelA as string;
const providerIdB = body.providerIdB as string;
const modelB = body.modelB as string;
const messages = body.messages as Array<{ role: string; content: string }>;
const temperature = (body.temperature as number) ?? 0.7;
const topP = (body.topP as number) ?? 0.9;
const maxTokens = (body.maxTokens as number) ?? 1024;
if (!providerIdA || !modelA || !providerIdB || !modelB || !messages?.length) {
return reply.status(400).send({ error: 'Both models and messages are required' });
}
const baseUrlA = resolveProviderBaseUrl(providerIdA);
const baseUrlB = resolveProviderBaseUrl(providerIdB);
if (!baseUrlA || !baseUrlB) {
return reply.status(400).send({ error: 'One or both providers unknown' });
}
// Stream both responses via SSE with lane identifiers.
reply.header('Content-Type', 'text/event-stream');
reply.header('Cache-Control', 'no-cache');
reply.header('Connection', 'keep-alive');
reply.raw.writeHead(200);
const streamModel = async (lane: 'A' | 'B', baseUrl: string, model: string) => {
try {
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
messages,
temperature,
top_p: topP,
max_tokens: maxTokens,
stream: true,
}),
signal: AbortSignal.timeout(120_000),
});
if (!res.ok) {
const errBody = await res.text().catch(() => '');
reply.raw.write(`data: ${JSON.stringify({ lane, error: `Request failed: ${res.status}` })}\n\n`);
return;
}
const reader = res.body?.getReader();
if (!reader) return;
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() ?? '';
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed) continue;
if (trimmed === 'data: [DONE]') {
reply.raw.write(`data: ${JSON.stringify({ lane, done: true })}\n\n`);
continue;
}
// N3: strip 'data: ' prefix from upstream before re-wrapping with lane info.
const payload = trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed;
reply.raw.write(`data: ${JSON.stringify({ lane, raw: payload })}\n\n`);
}
}
reply.raw.write(`data: ${JSON.stringify({ lane, done: true })}\n\n`);
} catch (err) {
const msg = (err as Error).message ?? String(err);
reply.raw.write(`data: ${JSON.stringify({ lane, error: msg })}\n\n`);
}
};
// Run both streams concurrently.
await Promise.all([
streamModel('A', baseUrlA, modelA),
streamModel('B', baseUrlB, modelB),
]);
reply.raw.end();
});
}

View File

@@ -0,0 +1,136 @@
import { randomUUID } from 'node:crypto';
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import type { Sql } from '../db.js';
import { VIRTUAL_MODELS } from '../services/gateway.js';
import { jsonbStringArray } from '../services/jsonb.js';
/**
* P7.4: Route policy CRUD + dispatch log.
*
* GET /api/policies — list policies
* POST /api/policies — create/update a policy (upsert by virtual_model)
* DELETE /api/policies/:id — delete a policy
* GET /api/policies/dispatch-log — recent gateway dispatches
* GET /api/policies/virtual-models — the available virtual model tokens
*/
export function registerPolicyRoutes(app: FastifyInstance, sql: Sql): void {
app.get('/api/policies/virtual-models', async (_req: FastifyRequest, reply: FastifyReply) => {
return reply.send({ virtualModels: VIRTUAL_MODELS });
});
app.get('/api/policies', async (_req: FastifyRequest, reply: FastifyReply) => {
const rows = await sql<{
id: string;
name: string;
virtual_model: string;
candidates: string;
fallback: string | null;
enabled: boolean;
created_at: string;
updated_at: string;
}[]>`
SELECT id, name, virtual_model, candidates, fallback, enabled, created_at, updated_at
FROM route_policies
ORDER BY virtual_model
`;
return reply.send({
policies: rows.map((r) => ({
id: r.id,
name: r.name,
virtualModel: r.virtual_model,
candidates: safeParseArray(r.candidates),
fallback: r.fallback,
enabled: r.enabled,
createdAt: r.created_at,
updatedAt: r.updated_at,
})),
});
});
app.post('/api/policies', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const id = (body.id as string) ?? randomUUID();
const name = body.name as string;
const virtualModel = body.virtualModel as string;
const candidates = body.candidates as unknown;
const fallback = (body.fallback as string) ?? null;
const enabled = body.enabled !== false;
if (!name || !virtualModel) {
return reply.status(400).send({ error: 'name and virtualModel are required' });
}
if (!(VIRTUAL_MODELS as readonly string[]).includes(virtualModel)) {
return reply.status(400).send({ error: `virtualModel must be one of ${VIRTUAL_MODELS.join(', ')}` });
}
const candidateList = Array.isArray(candidates)
? candidates.filter((c): c is string => typeof c === 'string')
: [];
// Upsert by virtual_model (UNIQUE) so there is one policy per virtual model.
await sql`
INSERT INTO route_policies (id, name, virtual_model, candidates, fallback, enabled, updated_at)
VALUES (${id}, ${name}, ${virtualModel}, ${sql.json(candidateList as never)}, ${fallback}, ${enabled}, clock_timestamp())
ON CONFLICT (virtual_model) DO UPDATE SET
name = EXCLUDED.name,
candidates = EXCLUDED.candidates,
fallback = EXCLUDED.fallback,
enabled = EXCLUDED.enabled,
updated_at = clock_timestamp()
`;
return reply.status(201).send({ id });
});
app.delete('/api/policies/:id', async (req: FastifyRequest, reply: FastifyReply) => {
const { id } = req.params as { id: string };
await sql`DELETE FROM route_policies WHERE id = ${id}`;
return reply.send({ ok: true });
});
app.get('/api/policies/dispatch-log', async (req: FastifyRequest, reply: FastifyReply) => {
const query = req.query as Record<string, string | undefined>;
const virtualModel = query.virtualModel;
const rows = virtualModel
? await sql<DispatchLogRow[]>`
SELECT id, ts, virtual_model, chosen_provider_id, chosen_model, candidates_tried, status, source, error, duration_ms
FROM route_dispatch_log WHERE virtual_model = ${virtualModel}
ORDER BY ts DESC LIMIT 200
`
: await sql<DispatchLogRow[]>`
SELECT id, ts, virtual_model, chosen_provider_id, chosen_model, candidates_tried, status, source, error, duration_ms
FROM route_dispatch_log
ORDER BY ts DESC LIMIT 200
`;
return reply.send({
dispatches: rows.map((r) => ({
id: r.id,
ts: r.ts,
virtualModel: r.virtual_model,
chosenProviderId: r.chosen_provider_id,
chosenModel: r.chosen_model,
candidatesTried: safeParseArray(r.candidates_tried),
status: r.status,
source: r.source,
error: r.error,
durationMs: r.duration_ms,
})),
});
});
}
interface DispatchLogRow {
id: number;
ts: string;
virtual_model: string;
chosen_provider_id: string | null;
chosen_model: string | null;
candidates_tried: unknown;
status: string;
source: string | null;
error: string | null;
duration_ms: number | null;
}
// jsonb columns come back parsed from porsager; jsonbStringArray tolerates both.
const safeParseArray = jsonbStringArray;

View File

@@ -0,0 +1,122 @@
import type { FastifyInstance, FastifyRequest, FastifyReply, FastifyBaseLogger } from 'fastify';
import type { Sql } from '../db.js';
import { generateReport, runReportSchedulerTick } from '../services/reports.js';
import { jsonbObject } from '../services/jsonb.js';
/**
* P6.2: Reports tab API + scheduled digest.
*
* GET /api/reports — list generated reports (newest first)
* GET /api/reports/:id — single report (markdown + stats)
* POST /api/reports/generate — manually trigger a digest now
* GET /api/reports/schedule — current schedule meta
* POST /api/reports/schedule — update schedule meta {interval, enabled}
*/
export function registerReportRoutes(app: FastifyInstance, sql: Sql): void {
app.get('/api/reports', async (_req: FastifyRequest, reply: FastifyReply) => {
const rows = await sql<{
id: string;
kind: string;
interval: string;
period_start: string;
period_end: string;
created_at: string;
}[]>`
SELECT id, kind, interval, period_start, period_end, created_at
FROM control_reports
ORDER BY created_at DESC
LIMIT 100
`;
return reply.send({
reports: rows.map((r) => ({
id: r.id,
kind: r.kind,
interval: r.interval,
periodStart: r.period_start,
periodEnd: r.period_end,
createdAt: r.created_at,
})),
});
});
app.get('/api/reports/:id', async (req: FastifyRequest, reply: FastifyReply) => {
const { id } = req.params as { id: string };
const rows = await sql<{
id: string;
kind: string;
interval: string;
period_start: string;
period_end: string;
markdown: string;
stats: unknown;
created_at: string;
}[]>`
SELECT id, kind, interval, period_start, period_end, markdown, stats, created_at
FROM control_reports WHERE id = ${id}
`;
if (rows.length === 0) {
return reply.status(404).send({ error: 'report not found' });
}
const r = rows[0]!;
return reply.send({
id: r.id,
kind: r.kind,
interval: r.interval,
periodStart: r.period_start,
periodEnd: r.period_end,
markdown: r.markdown,
stats: jsonbObject(r.stats),
createdAt: r.created_at,
});
});
app.post('/api/reports/generate', async (req: FastifyRequest, reply: FastifyReply) => {
const body = (req.body as Record<string, unknown>) ?? {};
const interval = body.interval === 'weekly' ? 'weekly' : 'daily';
const id = await generateReport(sql, interval);
return reply.status(201).send({ id });
});
app.get('/api/reports/schedule', async (_req: FastifyRequest, reply: FastifyReply) => {
const rows = await sql<{ interval: string; enabled: boolean; last_run_at: string | null }[]>`
SELECT interval, enabled, last_run_at FROM control_schedule_meta WHERE name = 'report-digest'
`;
const m = rows[0];
return reply.send({
interval: m?.interval ?? 'daily',
enabled: m?.enabled ?? true,
lastRunAt: m?.last_run_at ?? null,
});
});
app.post('/api/reports/schedule', async (req: FastifyRequest, reply: FastifyReply) => {
const body = (req.body as Record<string, unknown>) ?? {};
const interval = body.interval === 'weekly' ? 'weekly' : 'daily';
const enabled = body.enabled !== false;
await sql`
UPDATE control_schedule_meta
SET interval = ${interval}, enabled = ${enabled}
WHERE name = 'report-digest'
`;
return reply.send({ interval, enabled });
});
}
/**
* Start the in-process report scheduler: an immediate catch-up tick on boot,
* then hourly. Returns a stop function for onClose.
*/
export function startReportScheduler(sql: Sql, log: FastifyBaseLogger): () => void {
const tick = async () => {
try {
const result = await runReportSchedulerTick(sql);
if (result.ran) log.info({ reportId: result.reportId }, 'reports: digest generated');
} catch (err) {
log.warn({ err: (err as Error).message }, 'reports: scheduler tick failed');
}
};
// Catch-up on boot.
void tick();
const timer = setInterval(tick, 3600_000); // hourly
return () => clearInterval(timer);
}

View File

@@ -0,0 +1,32 @@
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import type { Sql } from '../db.js';
import type { FleetState } from '../services/fleet-state.js';
import { computeRoutingScores, BADGE_LABELS } from '../services/routing-scores.js';
/**
* P6.1: Advisory routing scores.
*
* GET /api/routing/scores — per (provider_id, model) advisory scores + badges.
* Surfaced as model-picker badges in BooChat. Advisory only; no enforcement.
*/
export function registerRoutingRoutes(
app: FastifyInstance,
sql: Sql,
fleet: FleetState,
): void {
app.get('/api/routing/scores', async (_req: FastifyRequest, reply: FastifyReply) => {
const scores = await computeRoutingScores(sql, fleet);
// Map of compositeId -> badge kinds, for cheap picker lookup.
const badges: Record<string, string[]> = {};
for (const s of scores) {
if (s.badges.length > 0) badges[s.compositeId] = s.badges;
}
return reply.send({
scores,
badges,
badgeLabels: BADGE_LABELS,
});
});
}

View File

@@ -0,0 +1,262 @@
import { readFileSync } from 'node:fs';
import { randomUUID } from 'node:crypto';
import { fileURLToPath } from 'node:url';
import { dirname, resolve } from 'node:path';
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import type { Sql } from '../db.js';
import type { Config } from '../config.js';
import type { FleetState } from '../services/fleet-state.js';
import type { DeltaEmitter } from '../index.js';
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
import {
validateLlamaConfig,
computeDiff,
readRemoteConfig,
applyRemoteConfig,
sshExec,
type SshTarget,
type SshExec,
type SshMode,
} from '../services/ssh-config.js';
import { runModelPull, validateRepoId } from '../services/model-pull.js';
/**
* P9.1: SSH config editor for llama-swap hosts.
*
* GET /api/hosts — list control_hosts with SSH config status
* PATCH /api/hosts/:id — set ssh_host/ssh_user/ssh_key_path/config_path/restart_cmd
* GET /api/hosts/:id/config — SSH read the remote config
* POST /api/hosts/:id/config/validate — validate a candidate config (no host touch)
* POST /api/hosts/:id/config/diff — diff a candidate vs the live remote config
* POST /api/hosts/:id/config/apply — validate -> backup -> write -> restart -> health-wait
* POST /api/hosts/:id/pull — pull a HuggingFace model (non-blocking job)
*
* `exec` is injectable for tests; production uses the real `sshExec` (spawn ssh).
*/
export function registerSshConfigRoutes(
app: FastifyInstance,
sql: Sql,
config: Config,
fleet: FleetState,
emitter: DeltaEmitter,
exec: SshExec = sshExec,
): void {
const schema = loadConfigSchema(config);
app.get('/api/hosts', async (_req: FastifyRequest, reply: FastifyReply) => {
const rows = await sql<HostRow[]>`
SELECT provider_id, ssh_host, ssh_user, ssh_key_path, config_path, restart_cmd, ssh_mode, os, gpu_label, enabled
FROM control_hosts ORDER BY provider_id
`;
return reply.send({
hosts: rows.map((r) => ({
providerId: r.provider_id,
sshHost: r.ssh_host,
sshUser: r.ssh_user,
sshKeyPath: r.ssh_key_path,
configPath: r.config_path,
restartCmd: r.restart_cmd,
sshMode: r.ssh_mode ?? 'shell',
os: r.os,
gpuLabel: r.gpu_label,
enabled: r.enabled,
sshConfigured: !!(r.ssh_host && r.ssh_user && r.ssh_key_path && r.config_path),
})),
});
});
app.patch('/api/hosts/:id', async (req: FastifyRequest, reply: FastifyReply) => {
const { id } = req.params as { id: string };
const body = (req.body as Record<string, unknown>) ?? {};
const sshHost = (body.sshHost as string) ?? null;
const sshUser = (body.sshUser as string) ?? null;
const sshKeyPath = (body.sshKeyPath as string) ?? null;
const configPath = (body.configPath as string) ?? null;
const restartCmd = (body.restartCmd as string) ?? null;
const sshMode: SshMode = body.sshMode === 'wrapper' ? 'wrapper' : 'shell';
const rows = await sql`
UPDATE control_hosts
SET ssh_host = ${sshHost}, ssh_user = ${sshUser}, ssh_key_path = ${sshKeyPath},
config_path = ${configPath}, restart_cmd = ${restartCmd}, ssh_mode = ${sshMode}
WHERE provider_id = ${id}
RETURNING provider_id
`;
if (rows.length === 0) {
return reply.status(404).send({ error: 'host not found' });
}
return reply.send({ ok: true });
});
app.get('/api/hosts/:id/config', async (req: FastifyRequest, reply: FastifyReply) => {
const { id } = req.params as { id: string };
const host = await loadHost(sql, id);
if (!host) return reply.status(404).send({ error: 'host not found' });
const target = sshTargetOf(host);
if (!target || !host.config_path) {
return reply.status(400).send({ error: 'host has no SSH config configured (set ssh_host/ssh_user/ssh_key_path/config_path first)' });
}
try {
const content = await readRemoteConfig(target, host.config_path, exec, hostMode(host));
return reply.send({ configPath: host.config_path, content });
} catch (err) {
return reply.status(502).send({ error: (err as Error).message });
}
});
app.post('/api/hosts/:id/config/validate', async (req: FastifyRequest, reply: FastifyReply) => {
const body = (req.body as Record<string, unknown>) ?? {};
const content = body.content as string;
if (typeof content !== 'string') {
return reply.status(400).send({ error: 'content (string) is required' });
}
if (!schema) {
return reply.status(500).send({ error: 'config schema not available on this host' });
}
const result = validateLlamaConfig(content, schema);
return reply.send({ valid: result.valid, errors: result.errors });
});
app.post('/api/hosts/:id/config/diff', async (req: FastifyRequest, reply: FastifyReply) => {
const { id } = req.params as { id: string };
const body = (req.body as Record<string, unknown>) ?? {};
const content = body.content as string;
if (typeof content !== 'string') {
return reply.status(400).send({ error: 'content (string) is required' });
}
const host = await loadHost(sql, id);
if (!host) return reply.status(404).send({ error: 'host not found' });
const target = sshTargetOf(host);
if (!target || !host.config_path) {
return reply.status(400).send({ error: 'host has no SSH config configured' });
}
try {
const current = await readRemoteConfig(target, host.config_path, exec, hostMode(host));
return reply.send({ diff: computeDiff(current, content) });
} catch (err) {
return reply.status(502).send({ error: (err as Error).message });
}
});
app.post('/api/hosts/:id/config/apply', async (req: FastifyRequest, reply: FastifyReply) => {
const { id } = req.params as { id: string };
const body = (req.body as Record<string, unknown>) ?? {};
const content = body.content as string;
const confirm = body.confirm === true;
if (typeof content !== 'string') {
return reply.status(400).send({ error: 'content (string) is required' });
}
if (!confirm) {
return reply.status(409).send({ error: 'apply requires confirmation', requiresConfirmation: true });
}
if (!schema) {
return reply.status(500).send({ error: 'config schema not available on this host' });
}
const host = await loadHost(sql, id);
if (!host) return reply.status(404).send({ error: 'host not found' });
const target = sshTargetOf(host);
const mode = hostMode(host);
// restart_cmd is only used in shell mode; in wrapper mode the wrapper's
// `restart` verb hardcodes the service, so restart_cmd is not required.
if (!target || !host.config_path || (mode === 'shell' && !host.restart_cmd)) {
return reply.status(400).send({ error: 'host needs ssh_host/ssh_user/ssh_key_path/config_path (+ restart_cmd in shell mode) set first' });
}
const baseUrl = resolveProviderBaseUrl(id);
if (!baseUrl) {
return reply.status(400).send({ error: `no base URL in registry for provider ${id}` });
}
const result = await applyRemoteConfig({
target,
configPath: host.config_path,
restartCmd: host.restart_cmd ?? '',
newConfig: content,
schema,
baseUrl,
exec,
mode,
});
const status = result.ok ? 200 : (result.step === 'validate' ? 400 : 502);
return reply.status(status).send(result);
});
// ─── model pull (non-blocking job) ─────────────────────────────────────────
app.post('/api/hosts/:id/pull', async (req: FastifyRequest, reply: FastifyReply) => {
const { id } = req.params as { id: string };
const body = (req.body as Record<string, unknown>) ?? {};
const repo = body.repo as string;
const modelsDir = (body.modelsDir as string) ?? undefined;
if (typeof repo !== 'string' || !validateRepoId(repo)) {
return reply.status(400).send({ error: 'repo must be a valid HuggingFace id (org/name)' });
}
const host = await loadHost(sql, id);
if (!host) return reply.status(404).send({ error: 'host not found' });
const target = sshTargetOf(host);
if (!target) {
return reply.status(400).send({ error: 'host has no SSH configured' });
}
const mode = hostMode(host);
if (mode === 'shell' && !modelsDir) {
return reply.status(400).send({ error: 'shell-mode host requires a modelsDir in the request body' });
}
const jobId = `pull_${Date.now()}_${randomUUID().slice(0, 8)}`;
const seq = fleet.hosts.get(id)?.seq ?? 0;
// Fire and forget; progress streams over control_job frames.
void runModelPull({ jobId, target, repo, mode, modelsDir }, exec, emitter, seq);
return reply.status(202).send({ status: 'queued', jobId, repo });
});
}
function hostMode(host: HostRow): SshMode {
return host.ssh_mode === 'wrapper' ? 'wrapper' : 'shell';
}
interface HostRow {
provider_id: string;
ssh_host: string | null;
ssh_user: string | null;
ssh_key_path: string | null;
config_path: string | null;
restart_cmd: string | null;
ssh_mode: string | null;
os: string | null;
gpu_label: string | null;
enabled: boolean;
}
async function loadHost(sql: Sql, id: string): Promise<HostRow | null> {
const rows = await sql<HostRow[]>`
SELECT provider_id, ssh_host, ssh_user, ssh_key_path, config_path, restart_cmd, ssh_mode, os, gpu_label, enabled
FROM control_hosts WHERE provider_id = ${id}
`;
return rows[0] ?? null;
}
function sshTargetOf(host: HostRow): SshTarget | null {
if (!host.ssh_host || !host.ssh_user || !host.ssh_key_path) return null;
return { host: host.ssh_host, user: host.ssh_user, keyPath: host.ssh_key_path };
}
/** Load the config schema from the configured path or the bundled copy. */
function loadConfigSchema(config: Config): object | null {
const here = dirname(fileURLToPath(import.meta.url));
// dist/routes/ssh-config.js -> dist/data/config-schema.json
const bundled = resolve(here, '../data/config-schema.json');
const path = config.LLAMA_CONFIG_SCHEMA_PATH ?? bundled;
try {
return JSON.parse(readFileSync(path, 'utf8'));
} catch {
if (path !== bundled) {
try {
return JSON.parse(readFileSync(bundled, 'utf8'));
} catch {
return null;
}
}
return null;
}
}

View File

@@ -0,0 +1,109 @@
import type { FastifyInstance } from 'fastify';
import WebSocket from 'ws';
import type { FleetState, HostState } from '../services/fleet-state.js';
import type { DeltaEmitter } from '../index.js';
import type { LogRelay } from '../services/log-relay.js';
/**
* WS endpoint: /api/ws/control
*
* On join: send snapshot carrying current fleet state + seqs.
* B6: After snapshot, replay in-memory log tail for late joiners.
* On delta: forward seq-stamped deltas to subscribers.
*
* Client rule: buffer pre-snapshot deltas, replay after snapshot applying only
* seq > snapshot_seq. On service restart, rebuild fleet state from DB before
* serving snapshots.
*/
export function registerControlWebSocket(
app: FastifyInstance,
fleet: FleetState,
emitter: DeltaEmitter,
logRelay: LogRelay | null = null,
): void {
app.get('/api/ws/control', { websocket: true }, (socket, req) => {
const fleetState = fleet;
const snapshot = buildSnapshot(fleetState);
// B4 fix: send snapshot at top level matching ControlFleetFrame Zod schema.
const maxSeq = snapshot.hosts.reduce((max, h) => Math.max(max, h.seq), 0);
socket.send(JSON.stringify({
type: 'control_fleet' as const,
seq: maxSeq,
hosts: snapshot.hosts,
}));
// B6: Replay in-memory log tail for late joiners.
if (logRelay && socket.readyState === WebSocket.OPEN) {
const tails = logRelay.getAllTails();
for (const entry of tails) {
socket.send(JSON.stringify({
type: 'control_log' as const,
seq: maxSeq, // tail lines don't carry per-host seq; use snapshot seq
providerId: entry.providerId,
source: entry.source,
line: entry.line,
}));
}
}
// B3 fix: subscribe to delta emitter so WS clients receive live updates.
const unsub = emitter.subscribe((delta: unknown) => {
if (socket.readyState === WebSocket.OPEN) {
socket.send(JSON.stringify(delta));
}
});
const heartbeat = setInterval(() => {
if (socket.readyState !== WebSocket.OPEN) {
clearInterval(heartbeat);
return;
}
socket.send(JSON.stringify({ type: 'ping' as const }));
}, 30_000);
socket.on('close', () => {
clearInterval(heartbeat);
unsub();
});
socket.on('error', () => {
clearInterval(heartbeat);
unsub();
});
});
}
/**
* Build a snapshot from the in-memory fleet state.
* On restart, this is rebuilt from DB before serving snapshots.
*/
function buildSnapshot(fleet: FleetState): { hosts: Array<{
providerId: string;
liveness: 'connected' | 'reconnecting' | 'down';
lastSeenAt: string | null;
seq: number;
models: Array<{
model: string;
state: string;
ts: string;
ttlDeadline: string | null;
inflight: number;
}>;
}> } {
const hosts = Array.from(fleet.hosts.values()).map((h) => ({
providerId: h.providerId,
liveness: h.liveness,
lastSeenAt: h.lastSeenAt?.toISOString() ?? null,
seq: h.seq,
models: Array.from(h.models.values()).map((m) => ({
model: m.model,
state: m.state,
ts: m.ts.toISOString(),
ttlDeadline: m.ttlDeadline?.toISOString() ?? null,
inflight: m.inflight,
})),
}));
return { hosts };
}

291
apps/control/src/schema.sql Normal file
View File

@@ -0,0 +1,291 @@
-- P1: BooControl schema -- read-only fleet cockpit tables.
-- Applied on startup by apps/control/src/db.ts:applySchema().
-- Lives in the same 'boochat' database as BooChat's tables.
-- Host registry: one row per enabled llama-swap instance.
CREATE TABLE IF NOT EXISTS control_hosts (
provider_id TEXT PRIMARY KEY,
ssh_host TEXT,
ssh_user TEXT,
ssh_key_path TEXT,
config_path TEXT,
restart_cmd TEXT,
os TEXT,
gpu_label TEXT,
enabled BOOLEAN NOT NULL DEFAULT true
);
-- P9 verb-mode: per-host SSH command mode. 'shell' = raw commands (default,
-- backward compatible); 'wrapper' = fixed verbs for a forced-command-locked key.
ALTER TABLE control_hosts ADD COLUMN IF NOT EXISTS ssh_mode TEXT NOT NULL DEFAULT 'shell';
-- Seed display metadata; SSH/config columns are NULL until P9.
INSERT INTO control_hosts (provider_id, os, gpu_label)
VALUES
('sam-desktop', 'Windows', 'RTX 5090 32GB'),
('embedding', 'Linux', 'P104-100 8GB')
ON CONFLICT (provider_id) DO NOTHING;
-- Request log: ingested from llama-swap /api/metrics ring.
CREATE TABLE IF NOT EXISTS control_requests (
id BIGSERIAL PRIMARY KEY,
provider_id TEXT NOT NULL,
swap_entry_id INT NOT NULL,
ts TIMESTAMPTZ NOT NULL,
model TEXT,
req_path TEXT,
status_code INT,
duration_ms INT,
cache_tokens INT,
input_tokens INT,
output_tokens INT,
prompt_tps REAL,
gen_tps REAL,
has_capture BOOLEAN NOT NULL DEFAULT false,
capture JSONB,
UNIQUE (provider_id, swap_entry_id, ts)
);
-- P4: Per-consumer attribution column. Added via idempotent ALTER so existing
-- DBs pick it up on next restart. See design §7 "Implementation notes" for the
-- llama-swap ActivityLogEntry discrepancy.
ALTER TABLE control_requests ADD COLUMN IF NOT EXISTS source TEXT;
CREATE INDEX IF NOT EXISTS idx_control_requests_provider_ts
ON control_requests (provider_id, ts DESC);
-- Raw performance samples from llama-swap /api/performance.
CREATE TABLE IF NOT EXISTS control_perf_samples (
provider_id TEXT NOT NULL,
ts TIMESTAMPTZ NOT NULL,
gpu JSONB,
sys JSONB,
UNIQUE (provider_id, ts)
);
CREATE INDEX IF NOT EXISTS idx_control_perf_samples_provider_ts
ON control_perf_samples (provider_id, ts DESC);
-- 5-minute rollup aggregates.
CREATE TABLE IF NOT EXISTS control_perf_rollup_5m (
provider_id TEXT NOT NULL,
bucket TIMESTAMPTZ NOT NULL,
gpu_agg JSONB,
sys_agg JSONB,
UNIQUE (provider_id, bucket)
);
-- Model state transitions + gap events.
CREATE TABLE IF NOT EXISTS control_model_events (
provider_id TEXT NOT NULL,
model TEXT NOT NULL,
state TEXT NOT NULL,
ts TIMESTAMPTZ NOT NULL,
detail JSONB,
UNIQUE (provider_id, model, state, ts)
);
CREATE INDEX IF NOT EXISTS idx_control_model_events_provider_ts
ON control_model_events (provider_id, ts DESC);
-- P3: Bench engine tables -- additive schema change.
-- Suite definitions: grid of prompt_tokens x gen_tokens x concurrency x repetitions.
CREATE TABLE IF NOT EXISTS bench_suites (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
provider_id TEXT NOT NULL,
model TEXT NOT NULL,
prompt_tokens INT[] NOT NULL,
gen_tokens INT[] NOT NULL,
concurrency INT[] NOT NULL,
repetitions INT NOT NULL DEFAULT 1,
metadata JSONB,
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
);
-- Individual bench runs (one per suite execution).
CREATE TABLE IF NOT EXISTS bench_runs (
id TEXT PRIMARY KEY,
suite_id TEXT NOT NULL REFERENCES bench_suites(id),
job_type TEXT NOT NULL DEFAULT 'bench',
status TEXT NOT NULL DEFAULT 'queued',
started_at TIMESTAMPTZ,
finished_at TIMESTAMPTZ,
total_samples INT NOT NULL DEFAULT 0,
completed_samples INT NOT NULL DEFAULT 0,
concurrent_foreign_requests INT NOT NULL DEFAULT 0,
temperature REAL,
top_p REAL,
aggregate JSONB,
regression_flag TEXT,
error TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
);
CREATE INDEX IF NOT EXISTS idx_bench_runs_suite_id
ON bench_runs (suite_id);
CREATE INDEX IF NOT EXISTS idx_bench_runs_status
ON bench_runs (status);
-- Raw per-request samples from a bench run.
CREATE TABLE IF NOT EXISTS bench_samples (
id BIGSERIAL PRIMARY KEY,
run_id TEXT NOT NULL REFERENCES bench_runs(id),
prompt_tokens INT NOT NULL,
gen_tokens INT NOT NULL,
concurrency INT NOT NULL,
repetition INT NOT NULL,
ttft_ms REAL,
total_ms REAL,
prompt_tps REAL,
gen_tps REAL,
cache_n INT,
error TEXT
);
CREATE INDEX IF NOT EXISTS idx_bench_samples_run_id
ON bench_samples (run_id);
-- P3: Baseline aggregates per (provider_id, model).
-- First completed run seeds the baseline; subsequent runs compare against it.
CREATE TABLE IF NOT EXISTS bench_baselines (
provider_id TEXT NOT NULL,
model TEXT NOT NULL,
aggregate JSONB NOT NULL,
run_id TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp(),
PRIMARY KEY (provider_id, model)
);
-- P5: Quality evals + sandbox tables.
-- Eval suite definitions: kind (chat|code), tasks JSONB, judge_model.
CREATE TABLE IF NOT EXISTS eval_suites (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
kind TEXT NOT NULL,
version INT NOT NULL DEFAULT 1,
tasks JSONB NOT NULL,
judge_model TEXT,
judge_model_version TEXT,
metadata JSONB,
UNIQUE (name, version),
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
);
CREATE INDEX IF NOT EXISTS idx_eval_suites_kind
ON eval_suites (kind);
-- Individual eval runs (one per suite execution against a model).
CREATE TABLE IF NOT EXISTS eval_runs (
id TEXT PRIMARY KEY,
suite_id TEXT NOT NULL REFERENCES eval_suites(id),
job_type TEXT NOT NULL DEFAULT 'eval',
provider_id TEXT NOT NULL,
model TEXT NOT NULL,
quant TEXT,
status TEXT NOT NULL DEFAULT 'queued',
judge_model TEXT,
judge_model_version TEXT,
started_at TIMESTAMPTZ,
finished_at TIMESTAMPTZ,
total_tasks INT NOT NULL DEFAULT 0,
completed_tasks INT NOT NULL DEFAULT 0,
aggregate JSONB,
error TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
);
CREATE INDEX IF NOT EXISTS idx_eval_runs_suite_id
ON eval_runs (suite_id);
CREATE INDEX IF NOT EXISTS idx_eval_runs_status
ON eval_runs (status);
CREATE INDEX IF NOT EXISTS idx_eval_runs_provider_model
ON eval_runs (provider_id, model);
-- Per-task eval results: score, judge rationale, sandbox exit info.
CREATE TABLE IF NOT EXISTS eval_results (
id BIGSERIAL PRIMARY KEY,
run_id TEXT NOT NULL REFERENCES eval_runs(id),
task_id TEXT NOT NULL,
task_index INT NOT NULL,
score REAL,
max_score REAL,
rationale TEXT,
sandbox_exit_code INT,
sandbox_stderr TEXT,
sandbox_stdout TEXT,
execution_ms INT,
error TEXT
);
CREATE INDEX IF NOT EXISTS idx_eval_results_run_id
ON eval_results (run_id);
-- P6.2: Generated fleet reports (markdown digest + JSONB stats).
CREATE TABLE IF NOT EXISTS control_reports (
id TEXT PRIMARY KEY,
kind TEXT NOT NULL DEFAULT 'digest',
interval TEXT NOT NULL DEFAULT 'daily',
period_start TIMESTAMPTZ NOT NULL,
period_end TIMESTAMPTZ NOT NULL,
markdown TEXT NOT NULL,
stats JSONB,
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
);
CREATE INDEX IF NOT EXISTS idx_control_reports_created
ON control_reports (created_at DESC);
-- P6.2: Scheduler metadata for the in-process report timer. Single row keyed by
-- schedule name; last_run_at drives catch-up-on-boot (same pattern as retention).
CREATE TABLE IF NOT EXISTS control_schedule_meta (
name TEXT PRIMARY KEY,
interval TEXT NOT NULL DEFAULT 'daily',
enabled BOOLEAN NOT NULL DEFAULT true,
last_run_at TIMESTAMPTZ
);
INSERT INTO control_schedule_meta (name, interval, enabled)
VALUES ('report-digest', 'daily', true)
ON CONFLICT (name) DO NOTHING;
-- P7.1: Routing policies for the auto:* gateway. `match` selects which virtual
-- model a policy serves (e.g. 'auto:code'); `candidates` is an ordered list of
-- composite ids ('provider/model'); `fallback` is the last-resort composite id.
CREATE TABLE IF NOT EXISTS route_policies (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
virtual_model TEXT NOT NULL,
candidates JSONB NOT NULL,
fallback TEXT,
enabled BOOLEAN NOT NULL DEFAULT true,
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp(),
UNIQUE (virtual_model)
);
-- P7.1/P7.4: Per-dispatch log for the gateway. One row per resolved completion
-- routed through a virtual model, recording the chosen target + outcome.
CREATE TABLE IF NOT EXISTS route_dispatch_log (
id BIGSERIAL PRIMARY KEY,
ts TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp(),
virtual_model TEXT NOT NULL,
chosen_provider_id TEXT,
chosen_model TEXT,
candidates_tried JSONB,
status TEXT NOT NULL,
source TEXT,
error TEXT,
duration_ms INT
);
CREATE INDEX IF NOT EXISTS idx_route_dispatch_log_ts
ON route_dispatch_log (ts DESC);
CREATE INDEX IF NOT EXISTS idx_route_dispatch_log_virtual
ON route_dispatch_log (virtual_model, ts DESC);

View File

@@ -0,0 +1,194 @@
import { describe, it, expect, beforeEach } from 'vitest';
import { ActionQueue } from '../action-queue.js';
import type { ActionQueueDeps, QueuedAction } from '../action-queue.js';
describe('ActionQueue', () => {
let queue: ActionQueue;
let deps: ActionQueueDeps;
beforeEach(() => {
queue = new ActionQueue();
deps = {
baseUrl: 'http://test-host:8401',
isLivenessUp: () => true,
isInflightRequests: () => 0,
log: {
error: () => {},
warn: () => {},
info: () => {},
debug: () => {},
trace: () => {},
fatal: () => {},
child: () => deps.log,
} as any,
};
queue.registerHost('host1', deps);
});
describe('submit', () => {
it('rejects submission when host is down', () => {
const downQueue = new ActionQueue();
const downDeps: ActionQueueDeps = {
...deps,
isLivenessUp: () => false,
};
downQueue.registerHost('down-host', downDeps);
const result = downQueue.submit({
actionId: 'a1',
type: 'warm',
providerId: 'down-host',
confirmed: false,
createdAt: new Date(),
});
expect(result.ok).toBe(false);
if (!result.ok) {
expect(result.error).toBe('host offline');
}
});
it('rejects submission when queue is full (depth 4)', () => {
// Fill the queue to capacity
for (let i = 0; i < 4; i++) {
const result = queue.submit({
actionId: `fill-${i}`,
type: 'warm',
providerId: 'host1',
model: 'model1',
confirmed: false,
createdAt: new Date(),
});
expect(result.ok).toBe(true);
}
// 5th submission should be rejected
const result = queue.submit({
actionId: 'overflow',
type: 'warm',
providerId: 'host1',
model: 'model1',
confirmed: false,
createdAt: new Date(),
});
expect(result.ok).toBe(false);
if (!result.ok) {
expect(result.error).toContain('queue full');
expect(result.pending).toHaveLength(4);
}
});
it('returns 409 with requiresConfirmation for unload during inflight', () => {
const inflightDeps: ActionQueueDeps = {
...deps,
isInflightRequests: () => 5,
};
const inflightQueue = new ActionQueue();
inflightQueue.registerHost('busy-host', inflightDeps);
const result = inflightQueue.submit({
actionId: 'unload-1',
type: 'unload',
providerId: 'busy-host',
confirmed: false,
createdAt: new Date(),
});
expect(result.ok).toBe(false);
if (!result.ok) {
expect(result.error).toBe('bench in progress');
expect(result.requiresConfirmation).toBe(true);
}
});
it('allows confirmed unload during inflight', () => {
const inflightDeps: ActionQueueDeps = {
...deps,
isInflightRequests: () => 5,
};
const inflightQueue = new ActionQueue();
inflightQueue.registerHost('busy-host', inflightDeps);
const result = inflightQueue.submit({
actionId: 'unload-confirmed',
type: 'unload',
providerId: 'busy-host',
confirmed: true,
createdAt: new Date(),
});
expect(result.ok).toBe(true);
});
it('accepts a warm action when queue has capacity', () => {
const result = queue.submit({
actionId: 'warm-1',
type: 'warm',
providerId: 'host1',
model: 'llama3',
confirmed: false,
createdAt: new Date(),
});
expect(result.ok).toBe(true);
});
});
describe('getState', () => {
it('returns null for unknown host', () => {
expect(queue.getState('unknown')).toBeNull();
});
it('returns state with entries after submission', () => {
queue.submit({
actionId: 'test-1',
type: 'warm',
providerId: 'host1',
model: 'llama3',
confirmed: false,
createdAt: new Date(),
});
const state = queue.getState('host1');
expect(state).not.toBeNull();
expect(state!.queue.length).toBe(1);
expect(state!.queue[0].action.actionId).toBe('test-1');
// Status transitions to 'running' as processNext kicks off asynchronously
expect(['pending', 'running']).toContain(state!.queue[0].status);
});
});
describe('processNext (stale action skip)', () => {
it('skips an action when host goes down during processing', async () => {
let livenessUp = true;
const dynamicDeps: ActionQueueDeps = {
...deps,
isLivenessUp: () => livenessUp,
};
const dynamicQueue = new ActionQueue();
dynamicQueue.registerHost('flaky-host', dynamicDeps);
// Submit an action
dynamicQueue.submit({
actionId: 'stale-1',
type: 'warm',
providerId: 'flaky-host',
model: 'llama3',
confirmed: false,
createdAt: new Date(),
});
// Turn host down before processing
livenessUp = false;
// The queue processor will skip the action
// We can't easily test the async processNext directly, but we can verify
// the state reflects the skip logic by checking the queue state
const state = dynamicQueue.getState('flaky-host');
expect(state).not.toBeNull();
expect(state!.queue.length).toBe(1);
// The entry is still pending; processNext would mark it skipped
});
});
});

View File

@@ -0,0 +1,300 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { parseLlamaTimings, computeAggregates, runSingleBenchRequest } from '../../index.js';
import { computeRegressionFlag } from '../bench-engine.js';
import { createFleetState, ensureHostState } from '../fleet-state.js';
import { createDeltaEmitter } from '../../index.js';
import type { Sql } from '../../db.js';
import type { Config } from '../../config.js';
import type { BenchSuite } from '../bench-engine.js';
// ─── parseLlamaTimings tests ────────────────────────────────────────────────
describe('parseLlamaTimings', () => {
it('parses timings from a standard llama.cpp chunk', () => {
const chunk = 'data: {"choices":[],"timings":{"prompt_per_second":150,"predicted_per_second":80,"cache_n":50}}';
const result = parseLlamaTimings(chunk);
expect(result).not.toBeNull();
expect(result!.promptPerSecond).toBe(150);
expect(result!.predictedPerSecond).toBe(80);
expect(result!.cacheN).toBe(50);
});
it('parses timings without data: prefix', () => {
const chunk = '{"timings":{"prompt_per_second":200,"predicted_per_second":100,"cache_n":0}}';
const result = parseLlamaTimings(chunk);
expect(result).not.toBeNull();
expect(result!.promptPerSecond).toBe(200);
});
it('returns null for [DONE] chunk', () => {
expect(parseLlamaTimings('data: [DONE]')).toBeNull();
});
it('returns null for chunk without timings', () => {
const chunk = 'data: {"choices":[{"delta":{"content":"hello"}}]}';
expect(parseLlamaTimings(chunk)).toBeNull();
});
it('returns null for malformed JSON', () => {
expect(parseLlamaTimings('data: not-json')).toBeNull();
});
});
// ─── computeAggregates tests ────────────────────────────────────────────────
describe('computeAggregates', () => {
it('returns nulls for empty samples', () => {
const result = computeAggregates([]);
expect(result.totalSamples).toBe(0);
expect(result.avgTtftMs).toBeNull();
expect(result.avgGenTps).toBeNull();
});
it('computes averages correctly', () => {
const samples = [
{ ttftMs: 100, genTps: 50, promptTps: 100, error: null } as any,
{ ttftMs: 200, genTps: 100, promptTps: 200, error: null } as any,
{ ttftMs: 300, genTps: 150, promptTps: 300, error: null } as any,
];
const result = computeAggregates(samples);
expect(result.avgTtftMs).toBe(200);
expect(result.avgGenTps).toBe(100);
expect(result.avgPromptTps).toBe(200);
expect(result.totalSamples).toBe(3);
expect(result.errorSamples).toBe(0);
});
it('computes median correctly for odd count', () => {
const samples = [
{ ttftMs: 100, genTps: 50, promptTps: 100, error: null } as any,
{ ttftMs: 200, genTps: 100, promptTps: 200, error: null } as any,
{ ttftMs: 300, genTps: 150, promptTps: 300, error: null } as any,
];
const result = computeAggregates(samples);
expect(result.medianTtftMs).toBe(200);
expect(result.medianGenTps).toBe(100);
});
it('computes median correctly for even count', () => {
const samples = [
{ ttftMs: 100, genTps: 50, promptTps: 100, error: null } as any,
{ ttftMs: 200, genTps: 100, promptTps: 200, error: null } as any,
{ ttftMs: 300, genTps: 150, promptTps: 300, error: null } as any,
{ ttftMs: 400, genTps: 200, promptTps: 400, error: null } as any,
];
const result = computeAggregates(samples);
expect(result.medianTtftMs).toBe(250);
expect(result.medianGenTps).toBe(125);
});
it('computes p95 TTFT', () => {
const samples = Array.from({ length: 20 }, (_, i) => ({
ttftMs: (i + 1) * 10,
genTps: 50,
promptTps: 100,
error: null,
})) as any[];
const result = computeAggregates(samples);
expect(result.p95TtftMs).toBeCloseTo(190, -1);
});
it('filters out null values', () => {
const samples = [
{ ttftMs: 100, genTps: 50, promptTps: 100, error: null } as any,
{ ttftMs: null, genTps: null, promptTps: null, error: 'timeout' } as any,
];
const result = computeAggregates(samples);
expect(result.avgTtftMs).toBe(100);
expect(result.errorSamples).toBe(1);
});
});
// ─── bench runner pipeline test (mock fetch + real functions) ────────────────
describe('bench runner pipeline', () => {
let mockSql: Sql;
let executedQueries: Array<{ query: string; values: unknown[] }>;
beforeEach(() => {
executedQueries = [];
mockSql = Object.assign(
(strings: TemplateStringsArray, ...values: unknown[]) => {
const query = strings.reduce((acc: string, s: string, i: number) => acc + s + (values[i] ?? ''), '');
executedQueries.push({ query, values });
return Promise.resolve([]);
},
{
json: (v: unknown) => v,
unsafe: async (q: string) => { executedQueries.push({ query: q, values: [] }); return []; },
},
) as unknown as Sql;
});
it('runSingleBenchRequest captures TTFT and timings on successful stream', async () => {
const fakeStream = createFakeStreamResponse([
'data: {"choices":[{"delta":{"content":"H"}}]}',
'data: {"choices":[{"delta":{"content":"ello"}}]}',
'data: {"choices":[],"timings":{"prompt_per_second":150,"predicted_per_second":80,"cache_n":10}}',
'data: [DONE]',
]);
vi.spyOn(global, 'fetch').mockResolvedValueOnce(fakeStream);
const sample = await runSingleBenchRequest(
'http://localhost:8401',
'test-model',
10,
20,
0,
0.7,
0.9,
);
expect(sample.error).toBeNull();
expect(sample.ttftMs).toBeGreaterThanOrEqual(0);
expect(sample.ttftMs).toBeLessThan(5000);
expect(sample.totalMs).toBeGreaterThanOrEqual(0);
expect(sample.promptTps).toBe(150);
expect(sample.genTps).toBe(80);
expect(sample.cacheN).toBe(10);
expect(sample.promptTokens).toBe(10);
expect(sample.genTokens).toBe(20);
expect(sample.repetition).toBe(0);
vi.restoreAllMocks();
});
it('runSingleBenchRequest captures error on HTTP failure', async () => {
vi.spyOn(global, 'fetch').mockResolvedValueOnce({
ok: false,
status: 500,
text: async () => 'Internal Server Error',
} as Response);
const sample = await runSingleBenchRequest(
'http://localhost:8401',
'test-model',
10,
20,
0,
);
expect(sample.error).toContain('500');
expect(sample.ttftMs).toBeNull();
vi.restoreAllMocks();
});
it('runSingleBenchRequest captures error on fetch exception', async () => {
vi.spyOn(global, 'fetch').mockRejectedValueOnce(new Error('ECONNREFUSED'));
const sample = await runSingleBenchRequest(
'http://localhost:8401',
'test-model',
10,
20,
0,
);
expect(sample.error).toContain('ECONNREFUSED');
vi.restoreAllMocks();
});
});
// ─── helper: create a fake streaming Response ────────────────────────────────
function createFakeStreamResponse(lines: string[]): Response {
const encoder = new TextEncoder();
let position = 0;
const stream = new ReadableStream({
async pull(controller) {
if (position >= lines.length) {
controller.close();
return;
}
const line = lines[position]! + '\n\n';
controller.enqueue(encoder.encode(line));
position++;
// Small delay to simulate network latency for TTFT measurement
await new Promise((r) => setTimeout(r, 5));
},
});
return new Response(stream, {
status: 200,
headers: { 'Content-Type': 'text/event-stream' },
});
}
// ─── computeRegressionFlag tests (A1) ────────────────────────────────────────
describe('computeRegressionFlag', () => {
it('returns baseline for first run (no baseline)', () => {
const current = computeAggregates([
{ ttftMs: 100, genTps: 80, promptTps: 150, error: null } as any,
]);
expect(computeRegressionFlag(current, undefined)).toBe('baseline');
});
it('returns regression when gen tok/s drops below -10%', () => {
const current = computeAggregates([
{ ttftMs: 200, genTps: 70, promptTps: 100, error: null } as any,
]);
const baseline = JSON.stringify({
avgGenTps: 100,
avgTtftMs: 100,
totalSamples: 1,
});
expect(computeRegressionFlag(current, baseline)).toBe('regression');
});
it('returns improvement when gen tok/s rises above +5%', () => {
const current = computeAggregates([
{ ttftMs: 80, genTps: 120, promptTps: 200, error: null } as any,
]);
const baseline = JSON.stringify({
avgGenTps: 100,
avgTtftMs: 100,
totalSamples: 1,
});
expect(computeRegressionFlag(current, baseline)).toBe('improvement');
});
it('returns baseline when within threshold', () => {
const current = computeAggregates([
{ ttftMs: 100, genTps: 98, promptTps: 150, error: null } as any,
]);
const baseline = JSON.stringify({
avgGenTps: 100,
avgTtftMs: 100,
totalSamples: 1,
});
expect(computeRegressionFlag(current, baseline)).toBe('baseline');
});
it('returns null for divide-by-zero (N5: baseline avgGenTps is 0)', () => {
const current = computeAggregates([
{ ttftMs: 100, genTps: 50, promptTps: 100, error: null } as any,
]);
const baseline = JSON.stringify({
avgGenTps: 0,
avgTtftMs: 100,
totalSamples: 1,
});
expect(computeRegressionFlag(current, baseline)).toBeNull();
});
it('returns null for null current avgGenTps', () => {
const current = computeAggregates([]);
expect(computeRegressionFlag(current, JSON.stringify({ avgGenTps: 100 }))).toBeNull();
});
it('returns null for malformed baseline JSON', () => {
const current = computeAggregates([
{ ttftMs: 100, genTps: 80, promptTps: 150, error: null } as any,
]);
expect(computeRegressionFlag(current, 'not-json')).toBeNull();
});
});

View File

@@ -0,0 +1,60 @@
import { describe, it, expect } from 'vitest';
import { parseCapture } from '../capture-fetch.js';
describe('parseCapture', () => {
it('trims response body when total exceeds 256KB cap', () => {
const largeBody = 'y'.repeat(300_000);
const capture = parseCapture({
request_headers: { 'Content-Type': 'application/json' },
response_headers: {},
request_body: Buffer.from('x'.repeat(100_000)).toString('base64'),
response_body: Buffer.from(largeBody).toString('base64'),
timestamp: '2024-01-01T00:00:00Z',
model: 'test-model',
duration_ms: 100,
}, 'host1', 1);
expect(capture.responseBody).toContain('[truncated: capture exceeds 256KB cap]');
const totalBytes = Buffer.byteLength(capture.requestBody + capture.responseBody);
expect(totalBytes).toBeLessThanOrEqual(256 * 1024 + 100);
});
it('does not trim when under cap', () => {
const capture = parseCapture({
request_headers: {},
response_headers: {},
request_body: Buffer.from('small request').toString('base64'),
response_body: Buffer.from('small response').toString('base64'),
timestamp: '2024-01-01T00:00:00Z',
model: 'test-model',
duration_ms: 50,
}, 'host1', 2);
expect(capture.requestBody).toBe('small request');
expect(capture.responseBody).toBe('small response');
expect(capture.responseBody).not.toContain('[truncated');
});
it('handles missing base64 bodies gracefully', () => {
const capture = parseCapture({
timestamp: '2024-01-01T00:00:00Z',
}, 'host1', 3);
expect(capture.requestBody).toBe('');
expect(capture.responseBody).toBe('');
});
it('decodes base64 (invalid base64 produces binary, not raw string)', () => {
// Buffer.from(str, 'base64') does not throw on invalid base64 —
// it decodes what it can. The catch block only triggers on actual
// Buffer.from exceptions, which are rare.
const capture = parseCapture({
request_body: Buffer.from('valid json').toString('base64'),
response_body: Buffer.from('{"result": true}').toString('base64'),
timestamp: '2024-01-01T00:00:00Z',
}, 'host1', 4);
expect(capture.requestBody).toBe('valid json');
expect(capture.responseBody).toBe('{"result": true}');
});
});

View File

@@ -0,0 +1,50 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { loadEvalSuitesFromData } from '../../index.js';
// ─── loadEvalSuitesFromData tests ───────────────────────────────────────────
describe('loadEvalSuitesFromData', () => {
it('loads suites from data/ YAML files', () => {
const suites = loadEvalSuitesFromData();
expect(suites.length).toBeGreaterThanOrEqual(4);
const ids = suites.map((s) => s.id);
expect(ids).toContain('agent-coding');
expect(ids).toContain('chat-quality');
expect(ids).toContain('long-context-retrieval');
expect(ids).toContain('utility-calls');
});
it('loads code suite with correct structure', () => {
const suites = loadEvalSuitesFromData();
const codeSuite = suites.find((s) => s.id === 'agent-coding');
expect(codeSuite).not.toBeUndefined();
expect(codeSuite!.kind).toBe('code');
expect(codeSuite!.tasks.length).toBeGreaterThan(0);
const task = codeSuite!.tasks[0] as Record<string, unknown>;
expect(task.id).toBeDefined();
expect(task.prompt).toBeDefined();
expect(task.test_code).toBeDefined();
expect(task.expected_output).toBeDefined();
expect(task.language).toBe('typescript');
});
it('loads chat suite with rubric structure', () => {
const suites = loadEvalSuitesFromData();
const chatSuite = suites.find((s) => s.id === 'chat-quality');
expect(chatSuite).not.toBeUndefined();
expect(chatSuite!.kind).toBe('chat');
const task = chatSuite!.tasks[0] as Record<string, unknown>;
expect(task.rubric).toBeDefined();
expect((task.rubric as Record<string, unknown>).max_score).toBeGreaterThan(0);
});
it('handles missing data/ directory gracefully', () => {
// The function catches errors and returns empty array.
// We can't easily test this without mocking fs, but the try-catch is there.
const suites = loadEvalSuitesFromData();
expect(Array.isArray(suites)).toBe(true);
});
});

View File

@@ -0,0 +1,82 @@
import { describe, it, expect } from 'vitest';
import { addJitter, reconnectDecision, DEFAULT_RECONNECT_POLICY } from '../fleet-connector.js';
describe('addJitter', () => {
it('returns a value >= the input delay', () => {
const jittered = addJitter(1000);
expect(jittered).toBeGreaterThanOrEqual(1000);
});
it('returns a value <= 1.5x the input delay', () => {
const jittered = addJitter(1000);
expect(jittered).toBeLessThanOrEqual(1500);
});
it('0ms delay stays 0ms', () => {
expect(addJitter(0)).toBe(0);
});
it('returns different values on repeated calls (stochastic)', () => {
const results = new Set<number>();
for (let i = 0; i < 20; i++) {
results.add(addJitter(1000));
}
expect(results.size).toBeGreaterThan(1);
});
});
describe('reconnectDecision', () => {
it('first failure returns baseMs with jitter', () => {
const decision = reconnectDecision(1);
expect(decision.action).toBe('reconnect');
expect(decision.delayMs).toBeGreaterThanOrEqual(DEFAULT_RECONNECT_POLICY.baseMs);
expect(decision.delayMs).toBeLessThanOrEqual(DEFAULT_RECONNECT_POLICY.baseMs * 1.5);
});
it('exponential growth: failure 2 returns 2x baseMs with jitter', () => {
const decision = reconnectDecision(2);
expect(decision.action).toBe('reconnect');
expect(decision.delayMs).toBeGreaterThanOrEqual(DEFAULT_RECONNECT_POLICY.baseMs * 2);
expect(decision.delayMs).toBeLessThanOrEqual(DEFAULT_RECONNECT_POLICY.baseMs * 3);
});
it('exponential growth: failure 3 returns 4x baseMs with jitter', () => {
const decision = reconnectDecision(3);
expect(decision.action).toBe('reconnect');
expect(decision.delayMs).toBeGreaterThanOrEqual(DEFAULT_RECONNECT_POLICY.baseMs * 4);
expect(decision.delayMs).toBeLessThanOrEqual(DEFAULT_RECONNECT_POLICY.baseMs * 6);
});
it('capped at maxMs with jitter', () => {
const decision = reconnectDecision(6);
expect(decision.action).toBe('reconnect');
expect(decision.delayMs).toBeGreaterThanOrEqual(DEFAULT_RECONNECT_POLICY.maxMs);
expect(decision.delayMs).toBeLessThanOrEqual(DEFAULT_RECONNECT_POLICY.maxMs * 1.5);
});
it('gives up after maxAttempts', () => {
const decision = reconnectDecision(DEFAULT_RECONNECT_POLICY.maxAttempts + 1);
expect(decision).toEqual({ action: 'give-up' });
});
it('custom policy works with jitter', () => {
const policy = { baseMs: 500, maxMs: 5000, maxAttempts: 3 };
const d1 = reconnectDecision(1, policy);
expect(d1.action).toBe('reconnect');
expect(d1.delayMs).toBeGreaterThanOrEqual(500);
expect(d1.delayMs).toBeLessThanOrEqual(750);
const d2 = reconnectDecision(2, policy);
expect(d2.action).toBe('reconnect');
expect(d2.delayMs).toBeGreaterThanOrEqual(1000);
expect(d2.delayMs).toBeLessThanOrEqual(1500);
const d3 = reconnectDecision(3, policy);
expect(d3.action).toBe('reconnect');
expect(d3.delayMs).toBeGreaterThanOrEqual(2000);
expect(d3.delayMs).toBeLessThanOrEqual(3000);
const d4 = reconnectDecision(4, policy);
expect(d4).toEqual({ action: 'give-up' });
});
});

View File

@@ -0,0 +1,42 @@
import { describe, it, expect } from 'vitest';
import { createFleetState, ensureHostState, stampLastSeen } from '../fleet-state.js';
describe('createFleetState', () => {
it('creates an empty fleet', () => {
const fleet = createFleetState();
expect(fleet.hosts.size).toBe(0);
});
});
describe('ensureHostState', () => {
it('creates a new host state if none exists', () => {
const fleet = createFleetState();
const state = ensureHostState(fleet, 'test-host');
expect(state.providerId).toBe('test-host');
expect(state.liveness).toBe('down');
expect(state.lastSeenAt).toBeNull();
expect(state.seq).toBe(0);
expect(state.models.size).toBe(0);
});
it('returns existing host state', () => {
const fleet = createFleetState();
const state1 = ensureHostState(fleet, 'test-host');
const state2 = ensureHostState(fleet, 'test-host');
expect(state1).toBe(state2);
});
it('seq is 0 on first call', () => {
const fleet = createFleetState();
const state = ensureHostState(fleet, 'test-host');
expect(state.seq).toBe(0);
});
it('stamps lastSeenAt on connection', () => {
const fleet = createFleetState();
const state = ensureHostState(fleet, 'test-host');
expect(state.lastSeenAt).toBeNull();
stampLastSeen(state);
expect(state.lastSeenAt).not.toBeNull();
});
});

View File

@@ -0,0 +1,92 @@
import { describe, it, expect } from 'vitest';
import {
isGatewayVirtualModel,
parseVirtualModel,
orderCandidates,
splitComposite,
} from '../gateway.js';
import type { ModelScore } from '../routing-scores.js';
function score(compositeId: string, partial: Partial<ModelScore> = {}): ModelScore {
return {
compositeId,
providerId: compositeId.split('/')[0]!,
model: compositeId.split('/').slice(1).join('/'),
codeScore: null,
chatScore: null,
evalScore: null,
avgGenTps: null,
avgLatencyMs: null,
sampleCount: 0,
healthy: true,
badges: [],
...partial,
};
}
describe('isGatewayVirtualModel', () => {
it('matches auto and auto:* tokens', () => {
expect(isGatewayVirtualModel('auto')).toBe(true);
expect(isGatewayVirtualModel('auto:code')).toBe(true);
expect(isGatewayVirtualModel('auto:fast')).toBe(true);
});
it('does not match ordinary models', () => {
expect(isGatewayVirtualModel('qwopus-35b')).toBe(false);
expect(isGatewayVirtualModel('autobahn')).toBe(false);
});
});
describe('parseVirtualModel', () => {
it('strips a gateway provider prefix', () => {
expect(parseVirtualModel('auto/auto:code')).toBe('auto:code');
});
it('passes a bare virtual model through', () => {
expect(parseVirtualModel('auto:fast')).toBe('auto:fast');
});
});
describe('splitComposite', () => {
it('splits provider/model', () => {
expect(splitComposite('sam-desktop/qwopus-35b')).toEqual({ providerId: 'sam-desktop', model: 'qwopus-35b' });
});
it('returns null for a bare id', () => {
expect(splitComposite('qwopus-35b')).toBeNull();
});
});
describe('orderCandidates', () => {
it('orders auto:code by code score among healthy hosts', () => {
const scores = [
score('a/m1', { codeScore: 0.6 }),
score('a/m2', { codeScore: 0.9 }),
score('a/m3', { codeScore: 0.7, healthy: false }),
];
expect(orderCandidates('auto:code', null, scores)).toEqual(['a/m2', 'a/m1']);
});
it('orders auto:fast by throughput', () => {
const scores = [
score('a/slow', { avgGenTps: 10 }),
score('a/fast', { avgGenTps: 50 }),
];
expect(orderCandidates('auto:fast', null, scores)).toEqual(['a/fast', 'a/slow']);
});
it('honors an explicit policy order and appends the fallback', () => {
const scores = [score('a/m1'), score('a/m2'), score('a/fb')];
const ordered = orderCandidates('auto:code', { candidates: ['a/m2', 'a/m1'], fallback: 'a/fb' }, scores);
expect(ordered).toEqual(['a/m2', 'a/m1', 'a/fb']);
});
it('drops policy candidates whose host is unhealthy', () => {
const scores = [score('a/m1', { healthy: false }), score('a/m2', { healthy: true })];
const ordered = orderCandidates('auto:code', { candidates: ['a/m1', 'a/m2'], fallback: null }, scores);
expect(ordered).toEqual(['a/m2']);
});
it('keeps a never-seen policy candidate (unknown health) for dispatch to try', () => {
const scores = [score('a/known', { healthy: true })];
const ordered = orderCandidates('auto:code', { candidates: ['a/never-seen', 'a/known'], fallback: null }, scores);
expect(ordered).toEqual(['a/never-seen', 'a/known']);
});
});

View File

@@ -0,0 +1,60 @@
import { describe, it, expect } from 'vitest';
import { jsonbStringArray, jsonbArray, jsonbNumberArray, jsonbObject } from '../jsonb.js';
describe('jsonbStringArray', () => {
it('passes through an already-parsed array (porsager behavior)', () => {
expect(jsonbStringArray(['a', 'b'])).toEqual(['a', 'b']);
});
it('parses a JSON string array', () => {
expect(jsonbStringArray('["a","b"]')).toEqual(['a', 'b']);
});
it('filters non-strings out of a parsed array', () => {
expect(jsonbStringArray(['a', 1, null, 'b'])).toEqual(['a', 'b']);
});
it('returns [] for null / invalid', () => {
expect(jsonbStringArray(null)).toEqual([]);
expect(jsonbStringArray('not json')).toEqual([]);
expect(jsonbStringArray({})).toEqual([]);
});
});
describe('jsonbArray', () => {
it('passes through an already-parsed array of objects (eval tasks)', () => {
expect(jsonbArray([{ id: 't1' }])).toEqual([{ id: 't1' }]);
});
it('parses a JSON string array', () => {
expect(jsonbArray('[{"id":"t1"}]')).toEqual([{ id: 't1' }]);
});
it('returns [] for null / invalid / non-array', () => {
expect(jsonbArray(null)).toEqual([]);
expect(jsonbArray('nope')).toEqual([]);
expect(jsonbArray({})).toEqual([]);
});
});
describe('jsonbNumberArray', () => {
it('passes through an already-parsed number array (bench token grids)', () => {
expect(jsonbNumberArray([128, 512])).toEqual([128, 512]);
});
it('parses a JSON string array and filters non-numbers', () => {
expect(jsonbNumberArray('[128,"x",512]')).toEqual([128, 512]);
});
it('returns [] for null / invalid', () => {
expect(jsonbNumberArray(null)).toEqual([]);
expect(jsonbNumberArray('nope')).toEqual([]);
});
});
describe('jsonbObject', () => {
it('passes through an already-parsed object', () => {
expect(jsonbObject({ a: 1 })).toEqual({ a: 1 });
});
it('parses a JSON string object', () => {
expect(jsonbObject('{"a":1}')).toEqual({ a: 1 });
});
it('returns null for arrays, null, and invalid', () => {
expect(jsonbObject([1, 2])).toBeNull();
expect(jsonbObject(null)).toBeNull();
expect(jsonbObject('nope')).toBeNull();
});
});

View File

@@ -0,0 +1,55 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
// ─── Judge runner tests (mock sql + real functions) ─────────────────────────
describe('judge runner', () => {
beforeEach(() => {
vi.restoreAllMocks();
});
it('runJudgeError', async () => {
// Test that the judge runner imports correctly and has the expected interface.
const mod = await import('../judge-runner.js');
expect(typeof mod.runJudgeEval).toBe('function');
});
it('generateResponse rejects on bad URL', async () => {
// The generateResponse function is internal, but we can test the public API.
const { runJudgeEval } = await import('../judge-runner.js');
// Mock sql operations.
const mockSql = vi.fn().mockResolvedValue([]);
mockSql.tag = vi.fn().mockReturnValue({ SQL: '' });
const mockEmitter = {
publish: vi.fn(),
};
const mockLogger = {
info: vi.fn(),
warn: vi.fn(),
error: vi.fn(),
};
const progressHandler = vi.fn();
// This will fail because resolveProviderBaseUrl returns null for unknown provider.
const result = await runJudgeEval(
{
runId: 'test_run',
providerId: 'nonexistent-provider',
model: 'test-model',
quant: null,
tasks: [],
judgeModel: null,
},
mockSql as unknown as import('../../db.js').Sql,
mockEmitter as unknown as import('../../index.js').DeltaEmitter,
0,
mockLogger as unknown as import('fastify').FastifyBaseLogger,
progressHandler,
);
expect(result.error).toContain('no base URL');
});
});

View File

@@ -0,0 +1,102 @@
import { describe, it, expect } from 'vitest';
import type { HostState } from '../fleet-state.js';
type Liveness = 'connected' | 'reconnecting' | 'down';
function transitionLiveness(current: Liveness, event: 'connect' | 'disconnect' | 'reconnect_attempt' | 'reconnect_success'): Liveness {
switch (event) {
case 'connect':
return 'connected';
case 'disconnect':
return 'down';
case 'reconnect_attempt':
return 'reconnecting';
case 'reconnect_success':
return 'connected';
}
}
describe('liveness state machine', () => {
it('starts as down', () => {
const state: HostState = {
providerId: 'test',
liveness: 'down',
lastSeenAt: null,
seq: 0,
models: new Map(),
};
expect(state.liveness).toBe('down');
});
it('connect -> connected', () => {
const state: HostState = {
providerId: 'test',
liveness: 'down',
lastSeenAt: null,
seq: 0,
models: new Map(),
};
state.liveness = transitionLiveness(state.liveness, 'connect');
expect(state.liveness).toBe('connected');
});
it('connected -> down on disconnect', () => {
const state: HostState = {
providerId: 'test',
liveness: 'connected',
lastSeenAt: new Date(),
seq: 0,
models: new Map(),
};
state.liveness = transitionLiveness(state.liveness, 'disconnect');
expect(state.liveness).toBe('down');
});
it('down -> reconnecting on reconnect attempt', () => {
const state: HostState = {
providerId: 'test',
liveness: 'down',
lastSeenAt: null,
seq: 0,
models: new Map(),
};
state.liveness = transitionLiveness(state.liveness, 'reconnect_attempt');
expect(state.liveness).toBe('reconnecting');
});
it('reconnecting -> connected on reconnect success', () => {
const state: HostState = {
providerId: 'test',
liveness: 'reconnecting',
lastSeenAt: null,
seq: 0,
models: new Map(),
};
state.liveness = transitionLiveness(state.liveness, 'reconnect_success');
expect(state.liveness).toBe('connected');
});
it('connected -> reconnecting on reconnect attempt', () => {
const state: HostState = {
providerId: 'test',
liveness: 'connected',
lastSeenAt: new Date(),
seq: 0,
models: new Map(),
};
state.liveness = transitionLiveness(state.liveness, 'reconnect_attempt');
expect(state.liveness).toBe('reconnecting');
});
it('reconnecting -> down on reconnect failure', () => {
const state: HostState = {
providerId: 'test',
liveness: 'reconnecting',
lastSeenAt: null,
seq: 0,
models: new Map(),
};
state.liveness = transitionLiveness(state.liveness, 'disconnect');
expect(state.liveness).toBe('down');
});
});

View File

@@ -0,0 +1,115 @@
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { writeFileSync, unlinkSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { loadLlamaProviders, getLlamaProviders, resolveProviderBaseUrl } from '../llama-providers.js';
function loadFixture(
providers: Array<{ id: string; label: string; baseUrl: string; kind?: string }>,
): string {
const file = {
defaultProvider: providers[0]!.id,
providers: providers.map((p) => ({ ...p, kind: p.kind ?? 'llama-swap' })),
};
const path = join(tmpdir(), `llama-providers-test-${Math.random().toString(36).slice(2)}.json`);
writeFileSync(path, JSON.stringify(file), 'utf8');
return path;
}
describe('loadLlamaProviders', () => {
afterEach(() => {
vi.restoreAllMocks();
});
it('loads a valid providers file', () => {
const path = loadFixture([
{ id: 'sam-desktop', label: 'Sam Desktop', baseUrl: 'http://100.101.41.16:8401' },
{ id: 'embedding', label: 'Embedding', baseUrl: 'http://100.90.172.55:8411' },
]);
const result = loadLlamaProviders(path, 'http://legacy.test:8080');
expect(result.providers).toHaveLength(2);
expect(result.providers[0]!.id).toBe('sam-desktop');
expect(result.providers[0]!.baseUrl).toBe('http://100.101.41.16:8401');
expect(result.providers[1]!.id).toBe('embedding');
expect(result.providers[1]!.baseUrl).toBe('http://100.90.172.55:8411');
unlinkSync(path);
});
it('falls back to legacy when file is missing', () => {
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
const result = loadLlamaProviders('/nonexistent/path.json', 'http://legacy.test:8080');
expect(result.providers).toHaveLength(1);
expect(result.providers[0]!.id).toBe('llama-swap');
expect(result.providers[0]!.baseUrl).toBe('http://legacy.test:8080');
warnSpy.mockRestore();
});
it('falls back to legacy when path is undefined', () => {
const result = loadLlamaProviders(undefined, 'http://legacy.test:8080');
expect(result.providers).toHaveLength(1);
expect(result.providers[0]!.id).toBe('llama-swap');
expect(result.providers[0]!.baseUrl).toBe('http://legacy.test:8080');
});
it('falls back to legacy when JSON is invalid', () => {
const path = join(tmpdir(), `llama-providers-bad-${Math.random().toString(36).slice(2)}.json`);
writeFileSync(path, '{not valid json', 'utf8');
const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
const result = loadLlamaProviders(path, 'http://legacy.test:8080');
expect(result.providers).toHaveLength(1);
expect(result.providers[0]!.id).toBe('llama-swap');
errorSpy.mockRestore();
unlinkSync(path);
});
});
describe('getLlamaProviders', () => {
it('returns cached result after load', () => {
loadLlamaProviders(undefined, 'http://test.example:9999');
const cached = getLlamaProviders();
expect(cached.providers[0]!.baseUrl).toBe('http://test.example:9999');
});
it('returns legacy fallback when nothing loaded', () => {
// This tests the fallback when cached is null.
// Since loadLlamaProviders always sets cached, we test the default URL.
const result = getLlamaProviders();
expect(result).toBeDefined();
expect(result.providers.length).toBeGreaterThanOrEqual(1);
});
});
describe('resolveProviderBaseUrl', () => {
it('resolves baseUrl for a known provider', () => {
loadLlamaProviders(undefined, 'http://test.example:9999');
expect(resolveProviderBaseUrl('llama-swap')).toBe('http://test.example:9999');
});
it('returns null for unknown provider', () => {
loadLlamaProviders(undefined, 'http://test.example:9999');
expect(resolveProviderBaseUrl('nonexistent')).toBeNull();
});
it('resolves correct URLs for both seeded providers', () => {
const path = loadFixture([
{ id: 'sam-desktop', label: 'Sam Desktop', baseUrl: 'http://100.101.41.16:8401' },
{ id: 'embedding', label: 'Embedding', baseUrl: 'http://100.90.172.55:8411' },
]);
loadLlamaProviders(path, 'http://legacy.test:8080');
expect(resolveProviderBaseUrl('sam-desktop')).toBe('http://100.101.41.16:8401');
expect(resolveProviderBaseUrl('embedding')).toBe('http://100.90.172.55:8411');
unlinkSync(path);
});
});

View File

@@ -0,0 +1,63 @@
import { describe, it, expect, beforeEach } from 'vitest';
import { LogRelay } from '../log-relay.js';
describe('LogRelay', () => {
let relay: LogRelay;
beforeEach(() => {
relay = new LogRelay();
});
it('appends log lines to per-host tail', () => {
relay.append('host1', 'proxy', 'connection established');
relay.append('host1', 'upstream', 'request completed');
const tail = relay.getTail('host1');
expect(tail).toHaveLength(2);
expect(tail[0].source).toBe('proxy');
expect(tail[1].source).toBe('upstream');
});
it('trims tail to MAX_LOG_LINES (2000)', () => {
for (let i = 0; i < 2500; i++) {
relay.append('host1', 'proxy', `line ${i}`);
}
const tail = relay.getTail('host1');
expect(tail.length).toBe(2000);
expect(tail[0].line).toBe('line 500');
expect(tail[tail.length - 1].line).toBe('line 2499');
});
it('returns empty array for unknown host', () => {
expect(relay.getTail('unknown')).toEqual([]);
});
it('getAllTails returns lines from all hosts', () => {
relay.append('host1', 'proxy', 'line1');
relay.append('host2', 'upstream', 'line2');
const all = relay.getAllTails();
expect(all).toHaveLength(2);
expect(all.map((l) => l.providerId)).toContain('host1');
expect(all.map((l) => l.providerId)).toContain('host2');
});
it('getSources returns unique source values', () => {
relay.append('host1', 'proxy', 'line1');
relay.append('host1', 'upstream', 'line2');
relay.append('host2', 'model', 'line3');
const sources = relay.getSources();
expect(sources).toContain('proxy');
expect(sources).toContain('upstream');
expect(sources).toContain('model');
expect(sources.length).toBe(3);
});
it('timestamps are set on each line', () => {
relay.append('host1', 'proxy', 'test');
const tail = relay.getTail('host1');
expect(tail[0].ts).toBeInstanceOf(Date);
});
});

View File

@@ -0,0 +1,83 @@
import { describe, it, expect } from 'vitest';
import { validateRepoId, buildPullCommand, runModelPull } from '../model-pull.js';
import type { SshExec, ExecResult } from '../ssh-config.js';
import type { DeltaEmitter } from '../../index.js';
describe('validateRepoId', () => {
it('accepts org/name', () => {
expect(validateRepoId('Qwen/Qwen3.5-9B')).toBe(true);
expect(validateRepoId('lmstudio-community/model.gguf-q4')).toBe(true);
});
it('rejects traversal, spaces, metacharacters, and bare names', () => {
expect(validateRepoId('../etc/passwd')).toBe(false);
expect(validateRepoId('a/b; rm -rf /')).toBe(false);
expect(validateRepoId('a b/c')).toBe(false);
expect(validateRepoId('justname')).toBe(false);
expect(validateRepoId('a/b/c')).toBe(false);
});
});
describe('buildPullCommand', () => {
it('wrapper mode emits the pull verb', () => {
expect(buildPullCommand('wrapper', 'Qwen/Q3')).toBe('pull Qwen/Q3');
});
it('shell mode emits huggingface-cli into a sanitized local dir', () => {
expect(buildPullCommand('shell', 'Qwen/Q3', '/home/u/models/')).toBe(
"huggingface-cli download Qwen/Q3 --local-dir '/home/u/models/Qwen__Q3'",
);
});
});
function emitterSpy(): { emitter: DeltaEmitter; frames: Record<string, unknown>[] } {
const frames: Record<string, unknown>[] = [];
const emitter: DeltaEmitter = {
subscribe: () => () => {},
publish: (d) => { frames.push(d as Record<string, unknown>); },
};
return { emitter, frames };
}
function execReturning(result: ExecResult): { exec: SshExec; calls: string[] } {
const calls: string[] = [];
const exec: SshExec = async (_t, command) => { calls.push(command); return result; };
return { exec, calls };
}
const target = { host: 'h', user: 'u', keyPath: '/k' };
describe('runModelPull', () => {
it('rejects an invalid repo id before issuing any command', async () => {
const { emitter, frames } = emitterSpy();
const { exec, calls } = execReturning({ code: 0, stdout: '', stderr: '' });
const r = await runModelPull({ jobId: 'j1', target, repo: '../x', mode: 'wrapper' }, exec, emitter);
expect(r.ok).toBe(false);
expect(calls).toHaveLength(0);
expect(frames[frames.length - 1]).toMatchObject({ type: 'control_job', status: 'failed' });
});
it('runs the wrapper pull verb and emits running then completed', async () => {
const { emitter, frames } = emitterSpy();
const { exec, calls } = execReturning({ code: 0, stdout: 'done', stderr: '' });
const r = await runModelPull({ jobId: 'j2', target, repo: 'Qwen/Q3', mode: 'wrapper' }, exec, emitter);
expect(r.ok).toBe(true);
expect(calls).toEqual(['pull Qwen/Q3']);
expect(frames.map((f) => f.status)).toEqual(['running', 'completed']);
expect(frames.every((f) => (f.detail as { kind?: string }).kind === 'pull')).toBe(true);
});
it('reports a non-zero exit as failed', async () => {
const { emitter, frames } = emitterSpy();
const { exec } = execReturning({ code: 1, stdout: '', stderr: 'no such repo' });
const r = await runModelPull({ jobId: 'j3', target, repo: 'Qwen/Q3', mode: 'wrapper' }, exec, emitter);
expect(r.ok).toBe(false);
expect(frames[frames.length - 1]).toMatchObject({ status: 'failed' });
});
it('shell mode without a models dir fails fast', async () => {
const { emitter } = emitterSpy();
const { exec, calls } = execReturning({ code: 0, stdout: '', stderr: '' });
const r = await runModelPull({ jobId: 'j4', target, repo: 'Qwen/Q3', mode: 'shell' }, exec, emitter);
expect(r.ok).toBe(false);
expect(calls).toHaveLength(0);
});
});

View File

@@ -0,0 +1,337 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { parseSseLine } from '../fleet-connector.js';
import type { LlamaSweepSSEEvent, MetricsEntry, ModelStatusEntry } from '../fleet-connector.js';
import { createFleetState, ensureHostState, incrementSeq } from '../fleet-state.js';
import { createDeltaEmitter, handleLlamaSweepEvent } from '../../index.js';
import type { DeltaEmitter } from '../../index.js';
import type { Sql } from '../../db.js';
import type { Config } from '../../config.js';
// ─── SSE parser tests (REAL wire shapes from apigroup.go) ────────────────────
// Real format: event:message / data:{"type":"<TYPE>","data":"<ESCAPED JSON>"}
describe('parseSseLine (real wire shapes)', () => {
it('parses double-encoded modelStatus (real full-fleet array payload)', () => {
const inner = JSON.stringify([
{ id: 'llama3', name: '', description: '', state: 'ready', unlisted: false, peerID: '' },
]);
const outer = JSON.stringify({ type: 'modelStatus', data: inner });
const result = parseSseLine(`data: ${outer}`);
expect(result).not.toBeNull();
expect(result!.type).toBe('modelStatus');
expect(result!.data).toEqual([
{ id: 'llama3', name: '', description: '', state: 'ready', unlisted: false, peerID: '' },
]);
});
it('ignores event: lines (always event:message)', () => {
expect(parseSseLine('event:message')).toBeNull();
});
it('returns null for data: with missing inner data field', () => {
expect(parseSseLine('data:{"type":"modelStatus"}')).toBeNull();
});
it('returns null for empty line', () => {
expect(parseSseLine('')).toBeNull();
expect(parseSseLine(' ')).toBeNull();
});
it('returns null for malformed JSON', () => {
expect(parseSseLine('data: not-json')).toBeNull();
});
});
// ─── Pipeline integration test (real functions) ──────────────────────────────
function apiModel(id: string, state: string): ModelStatusEntry {
return { id, name: '', description: '', state, unlisted: false, peerID: '' };
}
describe('SSE pipeline: parse -> handleLlamaSweepEvent -> emit deltas', () => {
let mockSql: Sql;
let mockConfig: Config;
let executedQueries: string[];
beforeEach(() => {
executedQueries = [];
mockSql = Object.assign(
(strings: TemplateStringsArray, ...values: unknown[]) => {
const query = strings.reduce((acc: string, s: string, i: number) => acc + s + (values[i] ?? ''), '');
executedQueries.push(query);
return Promise.resolve([]);
},
{
json: (v: unknown) => v,
unsafe: async (q: string) => { executedQueries.push(q); return []; },
},
) as unknown as Sql;
mockConfig = {
NODE_ENV: 'production',
PORT: 9503,
HOST: '127.0.0.1',
DATABASE_URL: 'postgres://test',
LOG_LEVEL: 'info',
RETENTION_RAW_HOURS: 48,
RETENTION_ROLLUP_DAYS: 90,
CAPTURE_SIZE_KB: 256,
CAPTURE_BUDGET_MB: 50,
} as unknown as Config;
});
it('processes modelStatus SSE event and emits delta with seq=1', async () => {
const fleet = createFleetState();
const emitter = createDeltaEmitter();
const deltas: unknown[] = [];
emitter.subscribe((d) => deltas.push(d));
const event: LlamaSweepSSEEvent = {
type: 'modelStatus',
data: [apiModel('llama3', 'ready')],
};
await handleLlamaSweepEvent(fleet, mockSql, mockConfig, 'host1', emitter, event);
// Assert: delta was emitted
expect(deltas).toHaveLength(1);
const delta = deltas[0] as { type: string; seq: number; hosts: Array<{ seq: number; models: Array<{ model: string; state: string }> }> };
expect(delta.type).toBe('control_fleet');
expect(delta.seq).toBe(1);
expect(delta.hosts[0].seq).toBe(1);
expect(delta.hosts[0].models[0].model).toBe('llama3');
expect(delta.hosts[0].models[0].state).toBe('ready');
// Assert: SQL INSERT was called
expect(executedQueries.length).toBe(1);
expect(executedQueries[0]).toContain('control_model_events');
expect(executedQueries[0]).toContain('llama3');
});
it('increments seq monotonically across multiple events', async () => {
const fleet = createFleetState();
const emitter = createDeltaEmitter();
const deltas: unknown[] = [];
emitter.subscribe((d) => deltas.push(d));
for (let i = 0; i < 3; i++) {
// Each snapshot adds a new model -> a transition -> a delta.
await handleLlamaSweepEvent(fleet, mockSql, mockConfig, 'host1', emitter, {
type: 'modelStatus',
data: [apiModel(`model${i}`, 'ready')],
});
}
expect(deltas).toHaveLength(3);
const seqs = deltas.map((d) => (d as { seq: number }).seq);
expect(seqs).toEqual([1, 2, 3]);
});
it('processes metrics event with multiple entries and emits activity deltas', async () => {
const fleet = createFleetState();
const emitter = createDeltaEmitter();
const deltas: unknown[] = [];
emitter.subscribe((d) => deltas.push(d));
const metricsEvent: LlamaSweepSSEEvent = {
type: 'metrics',
data: [
{
id: 1,
timestamp: '2024-01-01T00:00:00Z',
model: 'llama3',
req_path: '/v1/chat/completions',
resp_status_code: 200,
duration_ms: 1500,
tokens: {
cache_tokens: 100,
input_tokens: 50,
output_tokens: 200,
prompt_per_second: 30,
tokens_per_second: 50,
},
has_capture: false,
},
{
id: 2,
timestamp: '2024-01-01T00:01:00Z',
model: 'llama3',
req_path: '/v1/chat/completions',
resp_status_code: 200,
duration_ms: 1200,
tokens: {
cache_tokens: 0,
input_tokens: 100,
output_tokens: 300,
prompt_per_second: 25,
tokens_per_second: 45,
},
has_capture: false,
},
],
};
await handleLlamaSweepEvent(fleet, mockSql, mockConfig, 'host1', emitter, metricsEvent);
// handleReconcile is called (gap detection), then 2 activity deltas
// The reconcile SQL call + 2 INSERT calls = 3 queries
expect(executedQueries.length).toBeGreaterThanOrEqual(2);
// Activity deltas (2 entries)
const activityDeltas = deltas.filter((d) => (d as { type: string }).type === 'control_activity');
expect(activityDeltas).toHaveLength(2);
const d1 = activityDeltas[0] as { entry: { id: number } };
const d2 = activityDeltas[1] as { entry: { id: number } };
expect(d1.entry.id).toBe(1);
expect(d2.entry.id).toBe(2);
});
it('snapshot seq is max of all host seqs', () => {
const fleet = createFleetState();
const host1 = ensureHostState(fleet, 'host1');
incrementSeq(host1);
incrementSeq(host1);
const host2 = ensureHostState(fleet, 'host2');
incrementSeq(host2);
incrementSeq(host2);
incrementSeq(host2);
const hosts = Array.from(fleet.hosts.values()).map((h) => ({
providerId: h.providerId,
seq: h.seq,
}));
const snapshotMaxSeq = hosts.reduce((max: number, h: { seq: number }) => Math.max(max, h.seq), 0);
expect(snapshotMaxSeq).toBe(3);
});
});
// ─── 2-host delta merge test (B9) ────────────────────────────────────────────
// ─── P4: source column mapping ──────────────────────────────────────────────
describe('P4: source column in metrics ingest', () => {
let mockSql: Sql;
let mockConfig: Config;
let executedQueries: string[];
beforeEach(() => {
executedQueries = [];
mockSql = Object.assign(
(strings: TemplateStringsArray, ...values: unknown[]) => {
const query = strings.reduce((acc: string, s: string, i: number) => acc + s + (values[i] ?? ''), '');
executedQueries.push(query);
return Promise.resolve([]);
},
{
json: (v: unknown) => v,
unsafe: async (q: string) => { executedQueries.push(q); return []; },
},
) as unknown as Sql;
mockConfig = {
NODE_ENV: 'production',
PORT: 9503,
HOST: '127.0.0.1',
DATABASE_URL: 'postgres://test',
LOG_LEVEL: 'info',
RETENTION_RAW_HOURS: 48,
RETENTION_ROLLUP_DAYS: 90,
CAPTURE_SIZE_KB: 256,
CAPTURE_BUDGET_MB: 50,
} as unknown as Config;
});
it('maps source as NULL for ring data (ActivityLogEntry has no headers)', async () => {
const fleet = createFleetState();
const emitter = createDeltaEmitter();
const deltas: unknown[] = [];
emitter.subscribe((d) => deltas.push(d));
const metricsEvent: LlamaSweepSSEEvent = {
type: 'metrics',
data: [
{
id: 1,
timestamp: '2024-01-01T00:00:00Z',
model: 'llama3',
req_path: '/v1/chat/completions',
resp_status_code: 200,
duration_ms: 1500,
tokens: {
cache_tokens: 100,
input_tokens: 50,
output_tokens: 200,
prompt_per_second: 30,
tokens_per_second: 50,
},
has_capture: false,
},
],
};
await handleLlamaSweepEvent(fleet, mockSql, mockConfig, 'host1', emitter, metricsEvent);
// The INSERT query should include the source column
const insertQueries = executedQueries.filter((q) => q.includes('control_requests'));
expect(insertQueries.length).toBeGreaterThanOrEqual(2);
// The SSE handler INSERT (second one) includes source; reconcile INSERT (first) does not
expect(insertQueries[1]).toContain('source');
});
});
describe('2-host delta merge (B9)', () => {
it('delta for host2 does not wipe host1 from the hosts array', () => {
// Simulate the merge logic from useControlStream.tsx
const hosts = [
{ providerId: 'host1', liveness: 'connected' as const, lastSeenAt: '', seq: 5, models: [] },
{ providerId: 'host2', liveness: 'connected' as const, lastSeenAt: '', seq: 3, models: [] },
];
// Delta arrives for host2 only
const deltaHosts = [
{ providerId: 'host2', liveness: 'connected' as const, lastSeenAt: '', seq: 4, models: [] },
];
const merged = [...hosts];
for (const dh of deltaHosts) {
const idx = merged.findIndex((h) => h.providerId === dh.providerId);
if (idx >= 0) {
merged[idx] = dh;
} else {
merged.push(dh);
}
}
expect(merged).toHaveLength(2);
expect(merged.find((h) => h.providerId === 'host1')).toBeDefined();
expect(merged.find((h) => h.providerId === 'host2')!.seq).toBe(4);
expect(merged.find((h) => h.providerId === 'host1')!.seq).toBe(5);
});
it('new host is appended when not in existing array', () => {
const hosts = [
{ providerId: 'host1', liveness: 'connected' as const, lastSeenAt: '', seq: 5, models: [] },
];
const deltaHosts = [
{ providerId: 'host3', liveness: 'connected' as const, lastSeenAt: '', seq: 1, models: [] },
];
const merged = [...hosts];
for (const dh of deltaHosts) {
const idx = merged.findIndex((h) => h.providerId === dh.providerId);
if (idx >= 0) {
merged[idx] = dh;
} else {
merged.push(dh);
}
}
expect(merged).toHaveLength(2);
expect(merged.map((h) => h.providerId)).toEqual(['host1', 'host3']);
});
});

View File

@@ -0,0 +1,34 @@
import { describe, it, expect } from 'vitest';
import { detectGap } from '../reconcile.js';
describe('detectGap', () => {
it('detects gap when oldest reconcile is newer than newest persisted', () => {
expect(detectGap('2024-01-02T00:00:00Z', '2024-01-01T00:00:00Z')).toBe(true);
});
it('does not detect gap when overlap exists', () => {
expect(detectGap('2024-01-01T00:00:00Z', '2024-01-02T00:00:00Z')).toBe(false);
});
it('does not detect gap when timestamps are equal', () => {
expect(detectGap('2024-01-01T00:00:00Z', '2024-01-01T00:00:00Z')).toBe(false);
});
it('returns false when oldest reconcile is null', () => {
expect(detectGap(null, '2024-01-01T00:00:00Z')).toBe(false);
});
it('returns false when newest persisted is null', () => {
expect(detectGap('2024-01-01T00:00:00Z', null)).toBe(false);
});
it('returns false when both are null', () => {
expect(detectGap(null, null)).toBe(false);
});
it('handles timezone offsets correctly', () => {
// 2024-01-01T12:00:00Z == 2024-01-01T14:00:00+02:00
expect(detectGap('2024-01-01T12:00:00Z', '2024-01-01T14:00:00+02:00')).toBe(false);
expect(detectGap('2024-01-01T13:00:00Z', '2024-01-01T14:00:00+02:00')).toBe(true);
});
});

View File

@@ -0,0 +1,66 @@
import { describe, it, expect } from 'vitest';
import { renderReportMarkdown, isReportDue, type ReportStats } from '../reports.js';
function makeStats(partial: Partial<ReportStats> = {}): ReportStats {
return {
periodStart: '2026-06-11T00:00:00.000Z',
periodEnd: '2026-06-12T00:00:00.000Z',
interval: 'daily',
totalRequests: 100,
priorRequests: 50,
totalInputTokens: 1000,
totalOutputTokens: 2000,
bySource: [{ source: 'boochat', requests: 80, inputTokens: 800, outputTokens: 1600 }],
byProvider: [{ providerId: 'sam-desktop', requests: 100, swaps: 4 }],
leaderboard: [{ providerId: 'sam-desktop', model: 'qwopus-35b', kind: 'code', avgScore: 0.82 }],
regressions: [],
...partial,
};
}
describe('renderReportMarkdown', () => {
it('renders usage with a trend vs the prior period', () => {
const md = renderReportMarkdown(makeStats());
expect(md).toContain('# Fleet daily report');
expect(md).toContain('Requests: 100 (+100% vs prior period)');
expect(md).toContain('| boochat | 80 |');
expect(md).toContain('| sam-desktop | 100 | 4 |');
expect(md).toContain('No speed regressions flagged this period.');
});
it('renders regression anomalies when present', () => {
const md = renderReportMarkdown(makeStats({
regressions: [{ providerId: 'sam-desktop', model: 'qwopus-35b', avgGenTps: 42.5 }],
}));
expect(md).toContain('Regression: sam-desktop/qwopus-35b');
expect(md).toContain('42.5 tok/s');
});
it('handles a zero prior period without dividing by zero', () => {
const md = renderReportMarkdown(makeStats({ totalRequests: 5, priorRequests: 0 }));
expect(md).toContain('Requests: 5 (new vs prior period)');
});
});
describe('isReportDue', () => {
const now = new Date('2026-06-12T12:00:00.000Z');
it('is due when never run', () => {
expect(isReportDue(null, 'daily', now)).toBe(true);
});
it('is not due within the interval', () => {
const lastRun = new Date('2026-06-12T06:00:00.000Z'); // 6h ago
expect(isReportDue(lastRun, 'daily', now)).toBe(false);
});
it('is due once the interval has elapsed', () => {
const lastRun = new Date('2026-06-11T06:00:00.000Z'); // 30h ago
expect(isReportDue(lastRun, 'daily', now)).toBe(true);
});
it('uses a 7-day window for weekly', () => {
const lastRun = new Date('2026-06-09T12:00:00.000Z'); // 3 days ago
expect(isReportDue(lastRun, 'weekly', now)).toBe(false);
});
});

View File

@@ -0,0 +1,68 @@
import { describe, it, expect } from 'vitest';
import { trimCapture, parseCaptureJson } from '../retention.js';
describe('trimCapture', () => {
it('returns null for null input', () => {
expect(trimCapture(null, 256)).toBeNull();
});
it('returns unchanged capture when within cap', () => {
const capture = JSON.stringify({ data: 'x'.repeat(100) });
const result = trimCapture(capture, 256);
expect(result).toBe(capture);
});
it('trims capture when over cap', () => {
const capture = JSON.stringify({ data: 'x'.repeat(300_000) }); // ~600KB
const result = trimCapture(capture, 256);
expect(result).not.toBe(capture);
expect(result!.length).toBeLessThan(capture.length);
});
it('trims to roughly the cap size', () => {
const capture = JSON.stringify({ data: 'x'.repeat(1_000_000) }); // ~2MB
const result = trimCapture(capture, 256);
// trimCapture slices to sizeKB * 1024 bytes
const expectedLength = Math.floor(256 * 1024);
expect(result!.length).toBeLessThanOrEqual(expectedLength);
});
});
describe('parseCaptureJson', () => {
it('parses valid JSON string into object', () => {
const input = JSON.stringify({ requestHeaders: {}, requestBody: '{}', responseHeaders: {}, responseBody: '{}' });
const result = parseCaptureJson(input);
expect(result).toEqual({ requestHeaders: {}, requestBody: '{}', responseHeaders: {}, responseBody: '{}' });
});
it('returns null for null input', () => {
expect(parseCaptureJson(null)).toBeNull();
});
it('returns null for invalid JSON', () => {
expect(parseCaptureJson('not json')).toBeNull();
});
it('B7: trimmed capture produces a JSONB-ready object, not a string', () => {
// Simulate the pipeline: trim -> parse -> ready for sql.json()
// A capture within the cap parses cleanly to an object for sql.json()
const withinCap = JSON.stringify({ requestHeaders: {}, requestBody: '{}', responseBody: '{}' });
const parsed = parseCaptureJson(withinCap);
expect(typeof parsed).toBe('object');
expect(parsed).not.toBeNull();
// sql.json() expects an object/array; a string would double-serialize
expect(Array.isArray(parsed) || typeof parsed === 'object').toBe(true);
});
it('B7: oversized capture trims to invalid JSON -> parseCaptureJson returns null -> stored as NULL', () => {
// trimCapture slices by byte count, which produces invalid JSON for large captures.
// parseCaptureJson returns null for invalid JSON, and the insert stores NULL::jsonb.
// This is acceptable: a truncated capture is not useful anyway.
const raw = JSON.stringify({ data: 'x'.repeat(300_000) });
const trimmed = trimCapture(raw, 256);
expect(trimmed).not.toBeNull();
const parsed = parseCaptureJson(trimmed!);
// Trimmed capture is invalid JSON (sliced mid-object), so parse returns null
expect(parsed).toBeNull();
});
});

View File

@@ -0,0 +1,57 @@
import { describe, it, expect } from 'vitest';
import { assignBadges, type ModelScore } from '../routing-scores.js';
function makeScore(partial: Partial<ModelScore> & { compositeId: string }): ModelScore {
return {
providerId: partial.compositeId.split('/')[0]!,
model: partial.compositeId.split('/').slice(1).join('/'),
codeScore: null,
chatScore: null,
evalScore: null,
avgGenTps: null,
avgLatencyMs: null,
sampleCount: 0,
healthy: true,
badges: [],
...partial,
};
}
describe('assignBadges', () => {
it('awards best-code to the highest healthy code score', () => {
const scores = [
makeScore({ compositeId: 'a/m1', codeScore: 0.7 }),
makeScore({ compositeId: 'a/m2', codeScore: 0.9 }),
makeScore({ compositeId: 'a/m3', codeScore: 0.5 }),
];
assignBadges(scores);
expect(scores.find((s) => s.compositeId === 'a/m2')!.badges).toContain('best-code');
expect(scores.find((s) => s.compositeId === 'a/m1')!.badges).not.toContain('best-code');
});
it('excludes unhealthy hosts from winning any badge', () => {
const scores = [
makeScore({ compositeId: 'a/m1', codeScore: 0.95, healthy: false }),
makeScore({ compositeId: 'a/m2', codeScore: 0.6, healthy: true }),
];
assignBadges(scores);
expect(scores.find((s) => s.compositeId === 'a/m1')!.badges).toHaveLength(0);
expect(scores.find((s) => s.compositeId === 'a/m2')!.badges).toContain('best-code');
});
it('awards best-fast by throughput independently of eval scores', () => {
const scores = [
makeScore({ compositeId: 'a/slow', codeScore: 0.9, avgGenTps: 10 }),
makeScore({ compositeId: 'a/fast', codeScore: 0.4, avgGenTps: 80 }),
];
assignBadges(scores);
expect(scores.find((s) => s.compositeId === 'a/fast')!.badges).toContain('best-fast');
expect(scores.find((s) => s.compositeId === 'a/slow')!.badges).toContain('best-code');
});
it('awards nothing for a category when no model has that metric', () => {
const scores = [makeScore({ compositeId: 'a/m1', avgGenTps: 20 })];
assignBadges(scores);
expect(scores[0]!.badges).toEqual(['best-fast']);
});
});

View File

@@ -0,0 +1,130 @@
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
// ─── Sandbox lifecycle tests (mock docker spawn, test orchestration) ─────────
describe('sandbox runner lifecycle', () => {
beforeEach(() => {
vi.restoreAllMocks();
});
afterEach(() => {
vi.restoreAllMocks();
});
it('runCodeEval is importable', async () => {
const mod = await import('../sandbox-runner.js');
expect(typeof mod.runCodeEval).toBe('function');
});
it('bounded fan-out via Promise.allSettled', async () => {
// Test the bounded concurrency pattern directly.
const tasks = Array.from({ length: 10 }, (_, i) => ({ id: `task_${i}` }));
const concurrency = 4;
const executionOrder: number[] = [];
const activeCount: number[] = [];
let currentlyActive = 0;
const results = await Promise.allSettled(
tasks.slice(0, concurrency).map(async (task, idx) => {
currentlyActive++;
activeCount.push(currentlyActive);
await new Promise((r) => setTimeout(r, 10 + idx * 5));
executionOrder.push(idx);
currentlyActive--;
return { taskId: task.id, idx };
}),
);
// All should fulfill.
expect(results.filter((r) => r.status === 'fulfilled').length).toBe(concurrency);
// Max concurrent should not exceed concurrency limit.
expect(Math.max(...activeCount)).toBeLessThanOrEqual(concurrency);
});
it('per-task finally cleanup runs on error', async () => {
const cleanupCalls: string[] = [];
const tasks = [
{ id: 'task_ok' },
{ id: 'task_fail' },
{ id: 'task_ok2' },
];
const results = await Promise.allSettled(
tasks.map(async (task) => {
try {
if (task.id === 'task_fail') {
throw new Error('simulated failure');
}
return { ok: true };
} finally {
cleanupCalls.push(task.id);
}
}),
);
// All cleanup calls should run, even for the failed task.
expect(cleanupCalls).toContain('task_ok');
expect(cleanupCalls).toContain('task_fail');
expect(cleanupCalls).toContain('task_ok2');
// One rejection, two fulfillments.
expect(results.filter((r) => r.status === 'fulfilled').length).toBe(2);
expect(results.filter((r) => r.status === 'rejected').length).toBe(1);
});
it('kill-on-timeout pattern', async () => {
// Test that spawn with timeout + SIGKILL works.
const { spawn } = await import('node:child_process');
const child = spawn('sleep', ['300']);
const timeoutHandle = setTimeout(() => {
child.kill('SIGKILL');
}, 100);
await new Promise<void>((resolve) => {
child.on('close', () => {
clearTimeout(timeoutHandle);
resolve();
});
});
// SIGKILL gives signal, not exit code.
expect(child.killed).toBe(true);
});
it('allSettled isolation: one failure does not abort others', async () => {
const completed: string[] = [];
const results = await Promise.allSettled([
(async () => {
await new Promise((r) => setTimeout(r, 50));
completed.push('task1');
return 'ok1';
})(),
(async () => {
await new Promise((r) => setTimeout(r, 20));
throw new Error('fail');
})(),
(async () => {
await new Promise((r) => setTimeout(r, 50));
completed.push('task3');
return 'ok3';
})(),
]);
// Both successful tasks completed despite the failure.
expect(completed).toContain('task1');
expect(completed).toContain('task3');
expect(results[0].status).toBe('fulfilled');
expect(results[1].status).toBe('rejected');
expect(results[2].status).toBe('fulfilled');
});
it('pruneOrphanContainers handles missing docker gracefully', async () => {
// The pruneOrphanContainers function is internal but handles docker errors gracefully.
// We verify the module loads without error even if docker is not available.
const mod = await import('../sandbox-runner.js');
expect(typeof mod.runCodeEval).toBe('function');
});
});

View File

@@ -0,0 +1,106 @@
import { describe, it, expect } from 'vitest';
// Seq logic test: verify the buffer-then-filter rule.
// Client buffers pre-snapshot deltas, discards seq <= snapshot_seq per-host.
interface Delta {
type: 'control_fleet';
seq: number;
hosts: Array<{ providerId: string; seq: number }>;
}
interface Snapshot {
type: 'control_fleet';
seq: number;
hosts: Array<{ providerId: string; seq: number }>;
}
function applyDelta(delta: Delta, snapshotSeqs: Map<string, number>): boolean {
// Apply only if seq > snapshot seq for that host.
const firstHost = delta.hosts[0];
if (!firstHost) return false;
const snapshotSeq = snapshotSeqs.get(firstHost.providerId) ?? 0;
return delta.seq > snapshotSeq;
}
function applySnapshot(snapshot: Snapshot, snapshotSeqs: Map<string, number>): void {
for (const host of snapshot.hosts) {
snapshotSeqs.set(host.providerId, host.seq);
}
}
describe('seq logic: buffer-then-filter', () => {
it('applies delta when seq > snapshot seq', () => {
const snapshotSeqs = new Map([['host1', 5]]);
const delta: Delta = {
type: 'control_fleet',
seq: 10,
hosts: [{ providerId: 'host1', seq: 10 }],
};
expect(applyDelta(delta, snapshotSeqs)).toBe(true);
});
it('discards delta when seq <= snapshot seq', () => {
const snapshotSeqs = new Map([['host1', 10]]);
const delta: Delta = {
type: 'control_fleet',
seq: 5,
hosts: [{ providerId: 'host1', seq: 5 }],
};
expect(applyDelta(delta, snapshotSeqs)).toBe(false);
});
it('discards delta when seq equals snapshot seq', () => {
const snapshotSeqs = new Map([['host1', 10]]);
const delta: Delta = {
type: 'control_fleet',
seq: 10,
hosts: [{ providerId: 'host1', seq: 10 }],
};
expect(applyDelta(delta, snapshotSeqs)).toBe(false);
});
it('updates snapshot seqs on snapshot apply', () => {
const snapshotSeqs = new Map<string, number>();
const snapshot: Snapshot = {
type: 'control_fleet',
seq: 0,
hosts: [
{ providerId: 'host1', seq: 100 },
{ providerId: 'host2', seq: 50 },
],
};
applySnapshot(snapshot, snapshotSeqs);
expect(snapshotSeqs.get('host1')).toBe(100);
expect(snapshotSeqs.get('host2')).toBe(50);
});
it('handles missing snapshot seq (treats as 0)', () => {
const snapshotSeqs = new Map<string, number>();
const delta: Delta = {
type: 'control_fleet',
seq: 1,
hosts: [{ providerId: 'host1', seq: 1 }],
};
// Without a snapshot, seq 1 > 0, so delta applies.
expect(applyDelta(delta, snapshotSeqs)).toBe(true);
});
it('discards out-of-order delta after snapshot', () => {
// Simulate: snapshot arrives at seq 10, then delta at seq 5 arrives.
const snapshotSeqs = new Map<string, number>();
const snapshot: Snapshot = {
type: 'control_fleet',
seq: 0,
hosts: [{ providerId: 'host1', seq: 10 }],
};
applySnapshot(snapshot, snapshotSeqs);
const delta: Delta = {
type: 'control_fleet',
seq: 5,
hosts: [{ providerId: 'host1', seq: 5 }],
};
expect(applyDelta(delta, snapshotSeqs)).toBe(false);
});
});

View File

@@ -0,0 +1,234 @@
import { describe, it, expect } from 'vitest';
import {
validateLlamaConfig,
computeDiff,
backupFilename,
applyRemoteConfig,
healthWait,
type SshExec,
type ExecResult,
} from '../ssh-config.js';
// A minimal subset of the llama-swap config schema sufficient for these tests:
// top-level object with a required non-empty `models` object.
const SCHEMA = {
type: 'object',
required: ['models'],
properties: {
models: {
type: 'object',
minProperties: 1,
additionalProperties: {
type: 'object',
properties: { cmd: { type: 'string' } },
},
},
},
} as const;
const VALID_YAML = `models:\n m1:\n cmd: "llama-server -m m1.gguf"\n`;
describe('validateLlamaConfig', () => {
it('accepts a valid config', () => {
const r = validateLlamaConfig(VALID_YAML, SCHEMA);
expect(r.valid).toBe(true);
expect(r.errors).toEqual([]);
});
it('rejects broken YAML with a parse error', () => {
const r = validateLlamaConfig('models:\n m1:\n cmd: "x\n : :', SCHEMA);
expect(r.valid).toBe(false);
expect(r.errors[0]).toMatch(/YAML parse error/);
});
it('rejects a config missing required models', () => {
const r = validateLlamaConfig('healthCheckTimeout: 30\n', SCHEMA);
expect(r.valid).toBe(false);
expect(r.errors.join(' ')).toMatch(/models/);
});
it('rejects a non-mapping document', () => {
const r = validateLlamaConfig('- just\n- a\n- list\n', SCHEMA);
expect(r.valid).toBe(false);
});
});
describe('computeDiff', () => {
it('returns empty for identical text', () => {
expect(computeDiff('a\nb\n', 'a\nb\n')).toBe('');
});
it('marks changed lines with -/+', () => {
const d = computeDiff('a\nb\nc\n', 'a\nX\nc\n');
expect(d).toContain('- b');
expect(d).toContain('+ X');
});
});
describe('backupFilename', () => {
it('produces a timestamped path', () => {
const name = backupFilename('/etc/llama/config.yaml', new Date('2026-06-12T03:04:05.678Z'));
expect(name).toBe('/etc/llama/config.yaml.bak-20260612T030405Z');
});
});
// ─── apply pipeline failure paths ────────────────────────────────────────────
function makeExec(handlers: Record<string, ExecResult>): { exec: SshExec; calls: string[] } {
const calls: string[] = [];
const exec: SshExec = async (_t, command) => {
calls.push(command);
for (const [pattern, result] of Object.entries(handlers)) {
if (command.includes(pattern)) return result;
}
return { code: 0, stdout: '', stderr: '' };
};
return { exec, calls };
}
const target = { host: 'h', user: 'u', keyPath: '/k' };
const okFetcher = (async () => new Response('{}', { status: 200 })) as unknown as typeof fetch;
describe('applyRemoteConfig', () => {
it('aborts at validate for an invalid config and never touches the host', async () => {
const { exec, calls } = makeExec({});
const r = await applyRemoteConfig({
target, configPath: '/c.yaml', restartCmd: 'restart', newConfig: 'not: valid: yaml: here:::',
schema: SCHEMA, baseUrl: 'http://h', exec, fetcher: okFetcher,
});
expect(r.ok).toBe(false);
expect(r.step).toBe('validate');
expect(calls).toHaveLength(0);
});
it('aborts at validate when the host config is unreadable', async () => {
const { exec } = makeExec({ "cat '": { code: 1, stdout: '', stderr: 'no such file' } });
const r = await applyRemoteConfig({
target, configPath: '/c.yaml', restartCmd: 'restart', newConfig: VALID_YAML,
schema: SCHEMA, baseUrl: 'http://h', exec, fetcher: okFetcher,
});
expect(r.ok).toBe(false);
expect(r.step).toBe('validate');
expect(r.error).toMatch(/read current failed/);
});
it('backs up BEFORE write and aborts on write failure (backup retained)', async () => {
const { exec, calls } = makeExec({
"cat '": { code: 0, stdout: 'models:\n old: {}\n', stderr: '' }, // read current
'cp ': { code: 0, stdout: '', stderr: '' }, // backup
'cat >': { code: 1, stdout: '', stderr: 'disk full' }, // write fails
});
const r = await applyRemoteConfig({
target, configPath: '/c.yaml', restartCmd: 'restart', newConfig: VALID_YAML,
schema: SCHEMA, baseUrl: 'http://h', exec, fetcher: okFetcher,
now: new Date('2026-06-12T00:00:00Z'),
});
expect(r.ok).toBe(false);
expect(r.step).toBe('write');
expect(r.backupPath).toBe('/c.yaml.bak-20260612T000000Z');
// backup (cp) must precede write (cat >)
const cpIdx = calls.findIndex((c) => c.startsWith('cp '));
const writeIdx = calls.findIndex((c) => c.startsWith('cat >'));
expect(cpIdx).toBeGreaterThanOrEqual(0);
expect(writeIdx).toBeGreaterThan(cpIdx);
});
it('aborts at restart on restart failure', async () => {
const { exec } = makeExec({
"cat '": { code: 0, stdout: 'models:\n old: {}\n', stderr: '' },
'cp ': { code: 0, stdout: '', stderr: '' },
'cat >': { code: 0, stdout: '', stderr: '' },
restart: { code: 1, stdout: '', stderr: 'service not found' },
});
const r = await applyRemoteConfig({
target, configPath: '/c.yaml', restartCmd: 'restart-svc', newConfig: VALID_YAML,
schema: SCHEMA, baseUrl: 'http://h', exec, fetcher: okFetcher,
});
expect(r.ok).toBe(false);
expect(r.step).toBe('restart');
});
it('aborts at health when the service never comes back', async () => {
const { exec } = makeExec({
"cat '": { code: 0, stdout: 'models:\n old: {}\n', stderr: '' },
'cp ': { code: 0, stdout: '', stderr: '' },
'cat >': { code: 0, stdout: '', stderr: '' },
'restart-svc': { code: 0, stdout: '', stderr: '' },
});
const downFetcher = (async () => { throw new Error('refused'); }) as unknown as typeof fetch;
const r = await applyRemoteConfig({
target, configPath: '/c.yaml', restartCmd: 'restart-svc', newConfig: VALID_YAML,
schema: SCHEMA, baseUrl: 'http://h', exec, fetcher: downFetcher,
healthAttempts: 2, healthDelayMs: 1,
});
expect(r.ok).toBe(false);
expect(r.step).toBe('health');
});
it('succeeds through the full pipeline', async () => {
const { exec } = makeExec({
"cat '": { code: 0, stdout: 'models:\n old: {}\n', stderr: '' },
'cp ': { code: 0, stdout: '', stderr: '' },
'cat >': { code: 0, stdout: '', stderr: '' },
'restart-svc': { code: 0, stdout: '', stderr: '' },
});
const r = await applyRemoteConfig({
target, configPath: '/c.yaml', restartCmd: 'restart-svc', newConfig: VALID_YAML,
schema: SCHEMA, baseUrl: 'http://h', exec, fetcher: okFetcher,
healthAttempts: 1, healthDelayMs: 1,
});
expect(r.ok).toBe(true);
expect(r.step).toBe('done');
expect(r.backupPath).toBeDefined();
});
});
describe('healthWait', () => {
it('returns true on first OK', async () => {
const ok = await healthWait('http://h', okFetcher, 3, 1);
expect(ok).toBe(true);
});
it('returns false after exhausting attempts', async () => {
const downFetcher = (async () => new Response('', { status: 503 })) as unknown as typeof fetch;
const ok = await healthWait('http://h', downFetcher, 2, 1);
expect(ok).toBe(false);
});
});
// ─── wrapper mode (forced-command verbs) ─────────────────────────────────────
describe('applyRemoteConfig wrapper mode', () => {
it('sends verbs (not raw shell) and reads the backup path from the backup verb', async () => {
const { exec, calls } = makeExec({
read: { code: 0, stdout: 'models:\n old: {}\n', stderr: '' },
backup: { code: 0, stdout: '/c.yaml.bak-WRAP\n', stderr: '' },
write: { code: 0, stdout: '', stderr: '' },
restart: { code: 0, stdout: '', stderr: '' },
});
const r = await applyRemoteConfig({
target, configPath: '/c.yaml', restartCmd: 'ignored-in-wrapper', newConfig: VALID_YAML,
schema: SCHEMA, baseUrl: 'http://h', exec, fetcher: okFetcher, mode: 'wrapper',
healthAttempts: 1, healthDelayMs: 1,
});
expect(r.ok).toBe(true);
// backup path comes from the wrapper's stdout, not a client-computed name
expect(r.backupPath).toBe('/c.yaml.bak-WRAP');
// verbs only — no cat/cp/cat > shell commands
expect(calls).toEqual(['read', 'backup', 'write', 'restart']);
expect(calls.some((c) => c.includes('cat') || c.includes('cp '))).toBe(false);
});
it('aborts at write when the wrapper write verb fails (backup retained)', async () => {
const { exec } = makeExec({
read: { code: 0, stdout: 'old\n', stderr: '' },
backup: { code: 0, stdout: '/c.yaml.bak-WRAP\n', stderr: '' },
write: { code: 1, stdout: '', stderr: 'denied' },
});
const r = await applyRemoteConfig({
target, configPath: '/c.yaml', restartCmd: 'x', newConfig: VALID_YAML,
schema: SCHEMA, baseUrl: 'http://h', exec, fetcher: okFetcher, mode: 'wrapper',
});
expect(r.ok).toBe(false);
expect(r.step).toBe('write');
expect(r.backupPath).toBe('/c.yaml.bak-WRAP');
});
});

View File

@@ -0,0 +1,236 @@
/**
* Per-host FIFO action queue.
*
* All host-mutating actions (warm, unload) from BooControl serialize through
* a single FIFO queue per provider_id. Queue discipline:
*
* - Submissions rejected immediately while host liveness is 'down'
* - Queue depth capped at 4; reject-on-full includes pending queue contents
* - Each action re-checks liveness on dequeue and skips if stale
* - Unload-during-bench returns 409 {error: 'bench in progress', requiresConfirmation: true}
*
* Pattern: arena-runner.ts advanceChain promise-chain + read-fresh-state-or-skip.
*/
import type { FastifyBaseLogger } from 'fastify';
export type ActionType = 'warm' | 'unload';
export interface QueuedAction {
actionId: string;
type: ActionType;
providerId: string;
model?: string; // for warm: target model; for unload: specific model or undefined for all
confirmed: boolean; // true if client confirmed takeover
createdAt: Date;
}
export interface ActionQueueEntry {
action: QueuedAction;
status: 'pending' | 'running' | 'completed' | 'failed' | 'skipped';
error?: string;
enqueuedAt: Date;
}
export interface ActionQueueState {
queue: ActionQueueEntry[];
running: boolean;
}
export interface ActionQueueDeps {
baseUrl: string;
isLivenessUp: () => boolean;
isInflightRequests: () => number;
log: FastifyBaseLogger;
}
const MAX_QUEUE_DEPTH = 4;
export class ActionQueue {
private queues: Map<string, ActionQueueState> = new Map();
private depsMap: Map<string, ActionQueueDeps> = new Map();
registerHost(providerId: string, deps: ActionQueueDeps): void {
this.depsMap.set(providerId, deps);
if (!this.queues.has(providerId)) {
this.queues.set(providerId, { queue: [], running: false });
}
}
/**
* Submit an action to the per-host queue.
* Returns rejection reasons for: host down, queue full, bench in progress.
*/
submit(action: QueuedAction): { ok: true } | { ok: false; error: string; pending?: QueuedAction[]; requiresConfirmation?: boolean } {
const deps = this.depsMap.get(action.providerId);
if (!deps) {
return { ok: false, error: `unknown host: ${action.providerId}` };
}
// Reject if host is down
if (!deps.isLivenessUp()) {
return { ok: false, error: 'host offline' };
}
const state = this.queues.get(action.providerId);
if (!state) {
return { ok: false, error: `queue not initialized for ${action.providerId}` };
}
// Check bench in progress for unload actions
if (action.type === 'unload' && !action.confirmed) {
const inflight = deps.isInflightRequests();
if (inflight > 0) {
return {
ok: false,
error: 'bench in progress',
requiresConfirmation: true,
};
}
}
// Depth cap
if (state.queue.length >= MAX_QUEUE_DEPTH) {
const pending = state.queue.map((e) => e.action);
return {
ok: false,
error: `queue full (${state.queue.length}/${MAX_QUEUE_DEPTH})`,
pending,
};
}
const entry: ActionQueueEntry = {
action,
status: 'pending',
enqueuedAt: new Date(),
};
state.queue.push(entry);
// Kick the processor
void this.processNext(action.providerId, deps);
return { ok: true };
}
/**
* Get the current queue state for a host.
*/
getState(providerId: string): ActionQueueState | null {
return this.queues.get(providerId) ?? null;
}
/**
* Process the next action in the queue for a host.
* Uses promise-chain pattern: each action runs to completion before the next.
*/
private async processNext(providerId: string, deps: ActionQueueDeps): Promise<void> {
const state = this.queues.get(providerId);
if (!state || state.running || state.queue.length === 0) return;
state.running = true;
const entry = state.queue[0];
if (!entry) {
state.running = false;
return;
}
entry.status = 'running';
try {
// Re-check liveness on dequeue — skip stale actions
if (!deps.isLivenessUp()) {
entry.status = 'skipped';
entry.error = 'host went down during queue wait';
state.queue.shift();
state.running = false;
// Process next
void this.processNext(providerId, deps);
return;
}
// Re-check if action is still valid (stale warm after model loaded, etc.)
if (entry.action.type === 'warm' && this.isModelAlreadyLoaded(providerId, entry.action.model)) {
entry.status = 'skipped';
entry.error = 'model already loaded';
state.queue.shift();
state.running = false;
void this.processNext(providerId, deps);
return;
}
await this.executeAction(entry.action, deps);
entry.status = 'completed';
} catch (err) {
entry.status = 'failed';
entry.error = (err as Error).message ?? String(err);
deps.log.error({ actionId: entry.action.actionId, err: entry.error }, 'action: failed');
}
state.queue.shift();
state.running = false;
void this.processNext(providerId, deps);
}
private async executeAction(action: QueuedAction, deps: ActionQueueDeps): Promise<void> {
const baseUrl = deps.baseUrl;
switch (action.type) {
case 'warm': {
// 1-token POST /v1/chat/completions with bare wire ID
if (!action.model) {
throw new Error('warm action requires model');
}
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: action.model,
prompt: '.',
max_tokens: 1,
stream: false,
}),
signal: AbortSignal.timeout(60_000),
});
if (!res.ok) {
const body = await res.text().catch(() => '');
throw new Error(`warm failed: ${res.status} ${body.slice(0, 200)}`);
}
break;
}
case 'unload': {
let url: string;
if (action.model) {
url = `${baseUrl}/api/models/unload/${encodeURIComponent(action.model)}`;
} else {
url = `${baseUrl}/api/models/unload`;
}
const res = await fetch(url, {
method: 'POST',
signal: AbortSignal.timeout(30_000),
});
if (!res.ok) {
const body = await res.text().catch(() => '');
throw new Error(`unload failed: ${res.status} ${body.slice(0, 200)}`);
}
break;
}
}
}
/**
* Check if a model is already loaded on the host (stale-action guard).
* This is a placeholder — the real check reads from fleet state.
*/
private isModelAlreadyLoaded(_providerId: string, _model: string | undefined): boolean {
// Will be wired to fleet state in index.ts
return false;
}
/**
* Set the model-loaded check callback (wired from index.ts).
*/
setModelLoadedCheck(fn: (providerId: string, model: string | undefined) => boolean): void {
const original = this.isModelAlreadyLoaded.bind(this);
this.isModelAlreadyLoaded = fn;
}
}

View File

@@ -0,0 +1,517 @@
/**
* Bench engine: speed benchmark runner.
*
* Suite = grid of (prompt_tokens x gen_tokens x concurrency) x repetitions.
* TTFT measured client-side at first stream delta.
* llama.cpp timings parsed from final stream chunk.
* Bounded fan-out via Promise.allSettled at suite-declared concurrency.
* Warmup excluded from results.
*/
import type { Sql } from '../db.js';
import type { DeltaEmitter } from '../index.js';
import { jsonbObject } from './jsonb.js';
// ─── types ──────────────────────────────────────────────────────────────────
export interface BenchSuite {
id: string;
name: string;
providerId: string;
model: string;
promptTokens: number[];
genTokens: number[];
concurrency: number[];
repetitions: number;
temperature?: number;
topP?: number;
metadata?: Record<string, unknown>;
}
export interface BenchRunParams {
suite: BenchSuite;
baseUrl: string;
temperature?: number;
topP?: number;
}
export interface BenchTimings {
promptPerSecond: number;
predictedPerSecond: number;
cacheN: number;
}
export interface BenchSample {
promptTokens: number;
genTokens: number;
concurrency: number;
repetition: number;
ttftMs: number | null;
totalMs: number | null;
promptTps: number | null;
genTps: number | null;
cacheN: number | null;
error: string | null;
}
// ─── stream parser ──────────────────────────────────────────────────────────
/**
* Parse llama.cpp timings from the final chunk of a streaming response.
* llama.cpp returns timings in the last chunk's usage or as a separate field:
* { "timings": { "prompt_per_second": N, "predicted_per_second": N, "cache_n": N } }
* or in the usage object.
*/
export function parseLlamaTimings(chunk: string): BenchTimings | null {
try {
// Strip "data: " prefix if present
const jsonStr = chunk.startsWith('data: ') ? chunk.slice(6) : chunk;
if (jsonStr.trim() === '[DONE]') return null;
const parsed = JSON.parse(jsonStr) as Record<string, unknown>;
// Try the timings object first (llama.cpp standard)
const timings = parsed.timings as {
prompt_per_second?: number;
predicted_per_second?: number;
cache_n?: number;
} | undefined;
if (timings) {
return {
promptPerSecond: timings.prompt_per_second ?? 0,
predictedPerSecond: timings.predicted_per_second ?? 0,
cacheN: timings.cache_n ?? 0,
};
}
// Fallback: check usage.completion_tokens_details or completion_tokens
const usage = parsed.usage as {
prompt_tokens?: number;
completion_tokens?: number;
} | undefined;
if (usage) {
return {
promptPerSecond: 0,
predictedPerSecond: 0,
cacheN: 0,
};
}
return null;
} catch {
return null;
}
}
// ─── single request runner ──────────────────────────────────────────────────
/**
* Run a single bench request: stream completion, capture TTFT, parse timings.
* Returns a BenchSample.
*/
export async function runSingleBenchRequest(
baseUrl: string,
model: string,
promptTokens: number,
genTokens: number,
repetition: number,
temperature: number = 0.7,
topP: number = 0.9,
): Promise<BenchSample> {
const sample: BenchSample = {
promptTokens,
genTokens,
concurrency: 1, // set by the fan-out caller
repetition,
ttftMs: null,
totalMs: null,
promptTps: null,
genTps: null,
cacheN: null,
error: null,
};
// Generate a deterministic prompt of the target length.
const prompt = generatePrompt(promptTokens);
const startTime = Date.now();
let firstDeltaTime: number | null = null;
let timings: BenchTimings | null = null;
try {
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
messages: [{ role: 'user', content: prompt }],
temperature,
top_p: topP,
max_tokens: genTokens,
stream: true,
}),
signal: AbortSignal.timeout(120_000),
});
if (!res.ok) {
const errBody = await res.text().catch(() => '');
throw new Error(`bench request failed: ${res.status} ${errBody.slice(0, 200)}`);
}
const reader = res.body?.getReader();
if (!reader) {
throw new Error('no response body');
}
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() ?? '';
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed || trimmed === 'data: [DONE]') continue;
// TTFT: capture at first delta
if (firstDeltaTime === null) {
firstDeltaTime = Date.now();
}
// Parse timings from the final chunk
const t = parseLlamaTimings(trimmed);
if (t) {
timings = t;
}
}
}
sample.ttftMs = firstDeltaTime !== null ? firstDeltaTime - startTime : null;
sample.totalMs = Date.now() - startTime;
if (timings) {
sample.promptTps = timings.promptPerSecond;
sample.genTps = timings.predictedPerSecond;
sample.cacheN = timings.cacheN;
}
} catch (err) {
sample.error = (err as Error).message ?? String(err);
}
return sample;
}
/**
* Generate a deterministic prompt with approximately the target token count.
* Uses a repeating pattern that averages ~1.3 chars per token for GPT-style tokenizers.
*/
function generatePrompt(targetTokens: number): string {
// Simple pattern: repeat a sentence that tokenizes predictably.
// ~1.3 chars/token is a rough average for English text.
const charsPerToken = 4;
const targetChars = targetTokens * charsPerToken;
const base = 'The quick brown fox jumps over the lazy dog. ';
let result = '';
while (result.length < targetChars) {
result += base;
}
return result.slice(0, targetChars);
}
// ─── bench runner ───────────────────────────────────────────────────────────
export interface BenchRunProgress {
jobId: string;
totalSamples: number;
completedSamples: number;
currentPromptTokens: number;
currentGenTokens: number;
currentConcurrency: number;
currentRepetition: number;
}
/**
* Run a full bench suite: grid of all combinations.
* Bounded fan-out via Promise.allSettled at suite-declared concurrency.
* Warmup excluded from results (1 warmup request per unique grid cell, discarded).
*/
export async function runBenchSuite(
params: BenchRunParams,
sql: Sql,
emitter: DeltaEmitter,
seq: number,
onProgress: (progress: BenchRunProgress) => void,
): Promise<void> {
const { suite, baseUrl } = params;
// A4: suite-defined sampling params with fallback defaults.
const temperature = suite.temperature ?? params.temperature ?? 0.7;
const topP = suite.topP ?? params.topP ?? 0.9;
const jobId = suite.id;
// Build the full grid of combinations.
const grid: Array<{
promptTokens: number;
genTokens: number;
concurrency: number;
repetition: number;
}> = [];
for (const pt of suite.promptTokens) {
for (const gt of suite.genTokens) {
for (const conc of suite.concurrency) {
for (let rep = 0; rep < suite.repetitions; rep++) {
grid.push({ promptTokens: pt, genTokens: gt, concurrency: conc, repetition: rep });
}
}
}
}
const totalSamples = grid.length;
// Persist the run record with jobType (A2) and sampling params (A4).
const runId = `${jobId}_${Date.now()}`;
await sql`
INSERT INTO bench_runs (id, suite_id, job_type, status, started_at, total_samples, temperature, top_p)
VALUES (${runId}, ${suite.id}, 'bench', 'running', clock_timestamp(), ${totalSamples}, ${temperature}, ${topP})
`;
// Publish run started.
emitter.publish({
type: 'control_job' as const,
seq,
jobType: 'bench' as const,
jobId: runId,
status: 'running' as const,
detail: {
suiteId: suite.id,
providerId: suite.providerId,
model: suite.model,
totalSamples,
},
});
// A5: Warmup pass — 1 request per unique (promptTokens, genTokens) cell, discarded.
const uniqueCells = new Set<string>();
for (const item of grid) {
const cellKey = `${item.promptTokens}_${item.genTokens}`;
if (!uniqueCells.has(cellKey)) {
uniqueCells.add(cellKey);
}
}
const warmupPromises = Array.from(uniqueCells).map(async (cellKey) => {
const parts = cellKey.split('_').map(Number);
const pt = parts[0] ?? 0;
const gt = parts[1] ?? 0;
return runSingleBenchRequest(baseUrl, suite.model, pt, gt, 0, temperature, topP);
});
await Promise.allSettled(warmupPromises);
let completed = 0;
const samples: BenchSample[] = [];
// Group by (promptTokens, genTokens, concurrency) for fan-out; each group
// runs 'repetitions' requests concurrently.
const groups = new Map<string, typeof grid>();
for (const item of grid) {
const key = `${item.promptTokens}_${item.genTokens}_${item.concurrency}`;
if (!groups.has(key)) {
groups.set(key, []);
}
groups.get(key)!.push(item);
}
for (const [key, group] of groups) {
const concurrency = group[0]!.concurrency;
const batchSize = Math.min(concurrency, group.length);
// Process in batches of 'concurrency' size using Promise.allSettled.
for (let batchStart = 0; batchStart < group.length; batchStart += batchSize) {
const batch = group.slice(batchStart, batchStart + batchSize);
const promises = batch.map(async (item) => {
const sample = await runSingleBenchRequest(
baseUrl,
suite.model,
item.promptTokens,
item.genTokens,
item.repetition,
temperature,
topP,
);
sample.concurrency = item.concurrency;
return sample;
});
const results = await Promise.allSettled(promises);
for (const result of results) {
if (result.status === 'fulfilled') {
samples.push(result.value);
}
completed++;
// Progress callback
const current = batch[0]!;
onProgress({
jobId: runId,
totalSamples,
completedSamples: completed,
currentPromptTokens: current.promptTokens,
currentGenTokens: current.genTokens,
currentConcurrency: current.concurrency,
currentRepetition: current.repetition,
});
// Publish progress
emitter.publish({
type: 'control_job' as const,
seq,
jobType: 'bench' as const,
jobId: runId,
status: 'running' as const,
detail: {
completedSamples: completed,
totalSamples,
percent: Math.round((completed / totalSamples) * 100),
},
});
}
}
}
// Persist all samples.
for (const s of samples) {
await sql`
INSERT INTO bench_samples (run_id, prompt_tokens, gen_tokens, concurrency, repetition, ttft_ms, total_ms, prompt_tps, gen_tps, cache_n, error)
VALUES (${runId}, ${s.promptTokens}, ${s.genTokens}, ${s.concurrency}, ${s.repetition}, ${s.ttftMs ?? null}, ${s.totalMs ?? null}, ${s.promptTps ?? null}, ${s.genTps ?? null}, ${s.cacheN ?? null}, ${s.error ?? null})
`;
}
// Compute aggregates.
const validSamples = samples.filter((s) => !s.error && s.genTps != null);
const aggregate = computeAggregates(validSamples);
// A1: Baseline persistence + regression flag.
// Compare against existing baseline; first run seeds it.
const baselineRows = await sql<{ aggregate: string }[]>`
SELECT aggregate FROM bench_baselines
WHERE provider_id = ${suite.providerId} AND model = ${suite.model}
`;
const regressionFlag = computeRegressionFlag(aggregate, baselineRows[0]?.aggregate);
// Upsert baseline.
await sql`
INSERT INTO bench_baselines (provider_id, model, aggregate, run_id)
VALUES (${suite.providerId}, ${suite.model}, ${sql.json(aggregate as never)}, ${runId})
ON CONFLICT (provider_id, model) DO UPDATE SET
aggregate = EXCLUDED.aggregate,
run_id = EXCLUDED.run_id,
created_at = clock_timestamp()
`;
// Update run record with regression flag.
await sql`
UPDATE bench_runs
SET status = 'completed', finished_at = clock_timestamp(), completed_samples = ${completed},
aggregate = ${sql.json(aggregate as never)}, regression_flag = ${regressionFlag}
WHERE id = ${runId}
`;
// Publish completion.
emitter.publish({
type: 'control_job' as const,
seq,
jobType: 'bench' as const,
jobId: runId,
status: 'completed' as const,
detail: { ...aggregate, regressionFlag },
});
}
/**
* A1: Compute regression flag against baseline.
* Threshold: gen tok/s -10% = regression, +5% = improvement.
* N5: guards against divide-by-zero.
*/
export function computeRegressionFlag(
current: BenchAggregate,
// Accepts the raw bench_baselines.aggregate value: porsager returns jsonb
// already-parsed (object), while tests pass a JSON string. jsonbObject handles
// both. undefined => no baseline row yet => seed.
baselineJson: unknown,
): 'baseline' | 'regression' | 'improvement' | null {
if (!current.avgGenTps) return null;
if (!baselineJson) return 'baseline';
const baseline = jsonbObject(baselineJson) as BenchAggregate | null;
if (!baseline) return null;
if (!baseline.avgGenTps || baseline.avgGenTps === 0) return null;
const delta = (current.avgGenTps - baseline.avgGenTps) / baseline.avgGenTps;
if (delta < -0.1) return 'regression';
if (delta > 0.05) return 'improvement';
return 'baseline';
}
export interface BenchAggregate {
avgTtftMs: number | null;
medianTtftMs: number | null;
avgGenTps: number | null;
medianGenTps: number | null;
avgPromptTps: number | null;
medianPromptTps: number | null;
totalSamples: number;
errorSamples: number;
p95TtftMs: number | null;
}
export function computeAggregates(samples: BenchSample[]): BenchAggregate {
if (samples.length === 0) {
return {
avgTtftMs: null,
medianTtftMs: null,
avgGenTps: null,
medianGenTps: null,
avgPromptTps: null,
medianPromptTps: null,
totalSamples: 0,
errorSamples: 0,
p95TtftMs: null,
};
}
const ttfts = samples.map((s) => s.ttftMs).filter((v): v is number => v != null).sort((a, b) => a - b);
const genTps = samples.map((s) => s.genTps).filter((v): v is number => v != null).sort((a, b) => a - b);
const promptTps = samples.map((s) => s.promptTps).filter((v): v is number => v != null).sort((a, b) => a - b);
const avg = (arr: number[]) => arr.length ? arr.reduce((a, b) => a + b, 0) / arr.length : null;
const median = (arr: number[]) => {
if (arr.length === 0) return null;
const mid = Math.floor(arr.length / 2);
return arr.length % 2 ? arr[mid]! : (arr[mid - 1]! + arr[mid]!) / 2;
};
const p95 = (arr: number[]) => {
if (arr.length === 0) return null;
const idx = Math.ceil(arr.length * 0.95) - 1;
return arr[Math.max(0, idx)] ?? null;
};
return {
avgTtftMs: avg(ttfts),
medianTtftMs: median(ttfts),
avgGenTps: avg(genTps),
medianGenTps: median(genTps),
avgPromptTps: avg(promptTps),
medianPromptTps: median(promptTps),
totalSamples: samples.length,
errorSamples: samples.filter((s) => s.error).length,
p95TtftMs: p95(ttfts),
};
}

View File

@@ -0,0 +1,142 @@
/**
* Capture fetch: GET /api/captures/:id on llama-swap host, decode base64,
* persist trimmed copy (256KB cap app-enforced), render with shiki JSON.
*
* The 256KB cap is application-enforced in the fetch handler, not a DB constraint.
* Total budget: 50MB default, configurable via CAPTURE_BUDGET_MB env var.
*/
import type { Sql } from '../db.js';
const MAX_CAPTURE_BYTES = 256 * 1024; // 256KB
export interface CaptureData {
id: number;
providerId: string;
timestamp: string;
model: string;
requestHeaders: Record<string, string>;
requestBody: string;
responseHeaders: Record<string, string>;
responseBody: string;
durationMs: number;
sizeBytes: number;
}
export interface CaptureFetchResult {
ok: boolean;
capture?: CaptureData;
error?: string;
}
/**
* Fetch a capture from a llama-swap host by its swap_entry_id.
*/
export async function fetchCapture(
baseUrl: string,
providerId: string,
swapEntryId: number,
): Promise<CaptureFetchResult> {
try {
const res = await fetch(`${baseUrl}/api/captures/${swapEntryId}`, {
signal: AbortSignal.timeout(10_000),
});
if (!res.ok) {
if (res.status === 404) {
return { ok: false, error: 'capture not found on host' };
}
return { ok: false, error: `fetch failed: ${res.status}` };
}
const raw = await res.json() as Record<string, unknown>;
return { ok: true, capture: parseCapture(raw, providerId, swapEntryId) };
} catch (err) {
return { ok: false, error: (err as Error).message ?? String(err) };
}
}
/**
* Parse raw capture data from llama-swap into our structured format.
* Trims to 256KB cap.
*/
export function parseCapture(
raw: Record<string, unknown>,
providerId: string,
swapEntryId: number,
): CaptureData {
const requestHeaders = (raw.request_headers ?? raw.headers ?? {}) as Record<string, string>;
const responseHeaders = (raw.response_headers ?? {}) as Record<string, string>;
let requestBody = '';
let responseBody = '';
// Decode base64 bodies if present
const reqBodyRaw = raw.request_body as string | undefined;
const respBodyRaw = raw.response_body as string | undefined;
if (reqBodyRaw) {
try {
requestBody = Buffer.from(reqBodyRaw, 'base64').toString('utf8');
} catch {
requestBody = reqBodyRaw;
}
}
if (respBodyRaw) {
try {
responseBody = Buffer.from(respBodyRaw, 'base64').toString('utf8');
} catch {
responseBody = respBodyRaw;
}
}
// Enforce 256KB cap by trimming response body (largest component)
const totalSize = requestBody.length + responseBody.length;
if (totalSize > MAX_CAPTURE_BYTES) {
const remaining = MAX_CAPTURE_BYTES - requestBody.length;
responseBody = responseBody.slice(0, Math.max(0, Math.floor(remaining)));
responseBody += '\n\n[truncated: capture exceeds 256KB cap]';
}
const sizeBytes = Buffer.byteLength(requestBody + responseBody);
return {
id: swapEntryId,
providerId,
timestamp: (raw.timestamp ?? raw.ts ?? new Date().toISOString()) as string,
model: (raw.model ?? '') as string,
requestHeaders,
requestBody,
responseHeaders,
responseBody,
durationMs: (raw.duration_ms ?? 0) as number,
sizeBytes,
};
}
/**
* Persist a trimmed capture to the control_requests table.
* Uses sql.json(value as never) per convention.
*/
export async function persistCapture(
sql: Sql,
capture: CaptureData,
): Promise<void> {
// Pass the OBJECT to sql.json — wrapping a pre-stringified value stores a
// JSON string in the JSONB column (the double-serialization gotcha).
const captureObj = {
requestHeaders: capture.requestHeaders,
requestBody: capture.requestBody,
responseHeaders: capture.responseHeaders,
responseBody: capture.responseBody,
durationMs: capture.durationMs,
};
await sql`
INSERT INTO control_requests (provider_id, swap_entry_id, ts, model, capture)
VALUES (${capture.providerId}, ${capture.id}, ${capture.timestamp}, ${capture.model}, ${sql.json(captureObj as never)})
ON CONFLICT (provider_id, swap_entry_id, ts) DO UPDATE SET
capture = EXCLUDED.capture
`;
}

View File

@@ -0,0 +1,409 @@
import { randomUUID } from 'node:crypto';
import { readFileSync, readdirSync } from 'node:fs';
import { resolve, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { load as loadYaml } from 'js-yaml';
import type { Sql } from '../db.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
// ─── types ──────────────────────────────────────────────────────────────────
export interface CodeTask {
id: string;
prompt: string;
test_code: string;
expected_output: string;
language: string;
}
export interface RubricCriterion {
criterion: string;
description: string;
weight: number;
}
export interface ChatTask {
id: string;
prompt: string;
prompt_template?: string;
context_generator?: string;
rubric: {
criteria: RubricCriterion[];
max_score: number;
};
}
export interface EvalSuiteData {
id: string;
name: string;
kind: 'chat' | 'code';
version: number;
description?: string;
judge_model: string | null;
tasks: (CodeTask | ChatTask)[];
}
export interface EvalSuiteRow {
id: string;
name: string;
kind: string;
version: number;
tasks: string;
judge_model: string | null;
judge_model_version: string | null;
metadata: string | null;
created_at: string;
}
// ─── YAML loader ────────────────────────────────────────────────────────────
const DATA_DIR = resolve(dirname(__filename), '../../data');
/**
* Load all eval suite YAML files from the data/ directory.
*/
export function loadEvalSuitesFromData(): EvalSuiteData[] {
const suites: EvalSuiteData[] = [];
try {
const files = readdirSync(DATA_DIR).filter((f) => f.startsWith('suite-') && f.endsWith('.yaml'));
for (const file of files) {
const path = resolve(DATA_DIR, file);
const content = readFileSync(path, 'utf8');
const parsed = loadYaml(content) as Record<string, unknown>;
const tasks = parsed.tasks as (CodeTask | ChatTask)[] | undefined;
if (!tasks || !Array.isArray(tasks)) continue;
const chatTasks: ChatTask[] = [];
const codeTasks: CodeTask[] = [];
for (const task of tasks) {
const t = task as unknown as Record<string, unknown>;
if (t.rubric) {
const rubric = t.rubric as Record<string, unknown>;
chatTasks.push({
id: t.id as string,
prompt: t.prompt as string,
prompt_template: (t.prompt_template as string) ?? undefined,
context_generator: (t.context_generator as string) ?? undefined,
rubric: {
criteria: normalizeCriteria(rubric),
max_score: (rubric.max_score as number) ?? 7,
},
});
} else if (t.test_code) {
codeTasks.push({
id: t.id as string,
prompt: t.prompt as string,
test_code: t.test_code as string,
expected_output: t.expected_output as string,
language: t.language as string,
});
}
}
suites.push({
id: parsed.id as string,
name: parsed.name as string,
kind: parsed.kind as 'chat' | 'code',
version: (parsed.version as number) ?? 1,
description: (parsed.description as string) ?? undefined,
judge_model: (parsed.judge_model as string) ?? null,
tasks: [...codeTasks, ...chatTasks],
});
}
} catch (err) {
console.warn({ err: (err as Error).message }, 'eval: failed to load suites from data/');
}
return suites;
}
function normalizeCriteria(rubric: Record<string, unknown>): RubricCriterion[] {
const criteria = rubric.criteria as RubricCriterion[] | undefined;
if (criteria && Array.isArray(criteria)) {
return criteria.filter((c) => c.criterion && c.weight);
}
const maxScore = rubric.max_score as number | undefined;
const entries = Object.entries(rubric);
const result: RubricCriterion[] = [];
let totalWeight = 0;
for (const [key, val] of entries) {
if (key === 'max_score' || key === 'criteria') continue;
const entry = val as { criterion?: string; description?: string; weight?: number };
if (entry.weight && entry.description) {
result.push({ criterion: key, description: entry.description, weight: entry.weight });
totalWeight += entry.weight;
}
}
if (result.length === 0) {
for (const [key, val] of entries) {
if (key === 'max_score' || key === 'criteria') continue;
result.push({ criterion: key, description: String(val), weight: 1 });
}
}
if (maxScore && totalWeight > 0) {
const scale = maxScore / totalWeight;
for (const c of result) {
c.weight = Math.round(c.weight * scale * 10) / 10;
}
}
return result;
}
// ─── DB operations ──────────────────────────────────────────────────────────
/**
* Seed eval suites from data/ YAML files into the database.
* Uses INSERT ... ON CONFLICT DO NOTHING for idempotency.
*/
export async function seedEvalSuites(sql: Sql): Promise<void> {
const suites = loadEvalSuitesFromData();
for (const suite of suites) {
await sql`
INSERT INTO eval_suites (id, name, kind, version, tasks, judge_model, judge_model_version, metadata)
VALUES (
${suite.id},
${suite.name},
${suite.kind},
${suite.version},
${sql.json(suite.tasks as never)},
${suite.judge_model},
NULL,
${suite.description ? sql.json({ description: suite.description } as never) : sql`NULL::jsonb`}
)
ON CONFLICT (id) DO NOTHING
`;
}
}
/**
* List all eval suites.
*/
export async function listEvalSuites(sql: Sql): Promise<EvalSuiteRow[]> {
return await sql<EvalSuiteRow[]>`
SELECT id, name, kind, version, tasks, judge_model, judge_model_version, metadata, created_at
FROM eval_suites
ORDER BY created_at DESC
`;
}
/**
* Get a single eval suite by ID.
*/
export async function getEvalSuite(sql: Sql, id: string): Promise<EvalSuiteRow | null> {
const rows = await sql<EvalSuiteRow[]>`
SELECT id, name, kind, version, tasks, judge_model, judge_model_version, metadata, created_at
FROM eval_suites WHERE id = ${id}
`;
return rows[0] ?? null;
}
/**
* Create or update an eval suite.
*/
export async function upsertEvalSuite(
sql: Sql,
id: string | null,
name: string,
kind: 'chat' | 'code',
tasks: unknown[],
judgeModel: string | null,
metadata?: Record<string, unknown>,
): Promise<string> {
const suiteId = id ?? randomUUID();
const existing = await getEvalSuite(sql, suiteId);
const version = existing ? existing.version + 1 : 1;
await sql`
INSERT INTO eval_suites (id, name, kind, version, tasks, judge_model, judge_model_version, metadata)
VALUES (
${suiteId},
${name},
${kind},
${version},
${sql.json(tasks as never)},
${judgeModel},
NULL,
${metadata ? sql.json(metadata as never) : sql`NULL::jsonb`}
)
ON CONFLICT (id) DO UPDATE SET
name = EXCLUDED.name,
kind = EXCLUDED.kind,
version = EXCLUDED.version,
tasks = EXCLUDED.tasks,
judge_model = EXCLUDED.judge_model,
metadata = EXCLUDED.metadata
`;
return suiteId;
}
/**
* Create a new eval run record.
*/
export async function createEvalRun(
sql: Sql,
suiteId: string,
providerId: string,
model: string,
quant: string | null,
judgeModel: string | null,
judgeModelVersion: string | null,
totalTasks: number,
): Promise<string> {
const runId = `eval_${Date.now()}_${randomUUID().slice(0, 8)}`;
await sql`
INSERT INTO eval_runs (id, suite_id, job_type, provider_id, model, quant, status, judge_model, judge_model_version, started_at, total_tasks)
VALUES (
${runId}, ${suiteId}, 'eval', ${providerId}, ${model}, ${quant},
'running', ${judgeModel}, ${judgeModelVersion},
clock_timestamp(), ${totalTasks}
)
`;
return runId;
}
/**
* Record a single eval result.
*/
export async function recordEvalResult(
sql: Sql,
runId: string,
taskId: string,
taskIndex: number,
score: number | null,
maxScore: number | null,
rationale: string | null,
sandboxExitCode: number | null,
sandboxStderr: string | null,
sandboxStdout: string | null,
executionMs: number | null,
error: string | null,
): Promise<void> {
await sql`
INSERT INTO eval_results (run_id, task_id, task_index, score, max_score, rationale, sandbox_exit_code, sandbox_stderr, sandbox_stdout, execution_ms, error)
VALUES (
${runId}, ${taskId}, ${taskIndex}, ${score}, ${maxScore},
${rationale}, ${sandboxExitCode}, ${sandboxStderr}, ${sandboxStdout},
${executionMs}, ${error}
)
`;
}
/**
* Update eval run completion.
*/
export async function completeEvalRun(
sql: Sql,
runId: string,
completedTasks: number,
aggregate: Record<string, unknown> | null,
error: string | null,
): Promise<void> {
await sql`
UPDATE eval_runs
SET status = ${error ? 'failed' : 'completed'},
finished_at = clock_timestamp(),
completed_tasks = ${completedTasks},
aggregate = ${aggregate ? sql.json(aggregate as never) : sql`NULL::jsonb`},
error = ${error}
WHERE id = ${runId}
`;
}
/**
* List eval runs with optional filters.
*/
export async function listEvalRuns(
sql: Sql,
suiteId?: string,
providerId?: string,
): Promise<Array<{
id: string;
suite_id: string;
job_type: string;
provider_id: string;
model: string;
quant: string | null;
status: string;
judge_model: string | null;
started_at: string | null;
finished_at: string | null;
total_tasks: number;
completed_tasks: number;
aggregate: string | null;
error: string | null;
created_at: string;
}>> {
let query = sql<EvalSuiteRow[]>`
SELECT id, suite_id, job_type, provider_id, model, quant, status, judge_model,
started_at, finished_at, total_tasks, completed_tasks, aggregate, error, created_at
FROM eval_runs
WHERE 1=1
`;
if (suiteId) {
query = sql`${query} AND suite_id = ${suiteId}`;
}
if (providerId) {
query = sql`${query} AND provider_id = ${providerId}`;
}
query = sql`${query} ORDER BY created_at DESC LIMIT 200`;
return query as unknown as Array<{
id: string;
suite_id: string;
job_type: string;
provider_id: string;
model: string;
quant: string | null;
status: string;
judge_model: string | null;
started_at: string | null;
finished_at: string | null;
total_tasks: number;
completed_tasks: number;
aggregate: string | null;
error: string | null;
created_at: string;
}>;
}
/**
* Get eval results for a run.
*/
export async function getEvalResults(
sql: Sql,
runId: string,
): Promise<Array<{
id: number;
task_id: string;
task_index: number;
score: number | null;
max_score: number | null;
rationale: string | null;
sandbox_exit_code: number | null;
sandbox_stderr: string | null;
sandbox_stdout: string | null;
execution_ms: number | null;
error: string | null;
}>> {
return await sql<Array<{
id: number;
task_id: string;
task_index: number;
score: number | null;
max_score: number | null;
rationale: string | null;
sandbox_exit_code: number | null;
sandbox_stderr: string | null;
sandbox_stdout: string | null;
execution_ms: number | null;
error: string | null;
}>>`
SELECT id, task_id, task_index, score, max_score, rationale,
sandbox_exit_code, sandbox_stderr, sandbox_stdout, execution_ms, error
FROM eval_results WHERE run_id = ${runId}
ORDER BY task_index
`;
}

View File

@@ -0,0 +1,264 @@
/**
* Fleet connector: SSE client consuming llama-swap /api/events per enabled host.
*
* Ports the opencode-sse.ts reconnectDecision pattern (exponential backoff +
* circuit-breaker) with one critical addition: **jitter**. The source pattern
* has NO jitter, which causes thundering-herd reconnections across N hosts.
*
* Jitter: random 0-50% of computed delay. Pure function for testability.
*
* Event parsing is NEW code — llama-swap's SSE envelope (modelStatus | logData |
* metrics | inflight) differs from the opencode SDK's Event type.
*/
import type { FastifyBaseLogger } from 'fastify';
import type { Sql } from '../db.js';
// ─── jitter (pure) ──────────────────────────────────────────────────────────
/** Add random 0-50% jitter to a delay value. */
export function addJitter(delayMs: number): number {
const jitter = delayMs * Math.random() * 0.5;
return delayMs + jitter;
}
// ─── reconnect backoff ──────────────────────────────────────────────────────
export interface ReconnectPolicy {
baseMs: number;
maxMs: number;
maxAttempts: number;
}
export const DEFAULT_RECONNECT_POLICY: ReconnectPolicy = {
baseMs: 1_000,
maxMs: 30_000,
maxAttempts: 6,
};
export type ReconnectDecision =
| { action: 'reconnect'; delayMs: number }
| { action: 'give-up' };
export function reconnectDecision(
failures: number,
policy: ReconnectPolicy = DEFAULT_RECONNECT_POLICY,
): ReconnectDecision {
if (failures > policy.maxAttempts) return { action: 'give-up' };
const exp = policy.baseMs * 2 ** (failures - 1);
const capped = Math.min(policy.maxMs, exp);
return { action: 'reconnect', delayMs: addJitter(capped) };
}
// ─── llama-swap SSE envelope types ──────────────────────────────────────────
// Real wire shape (apigroup.go):
// event:message
// data:{"type":"modelStatus|logData|metrics|inflight","data":"<ESCAPED JSON STRING>"}
// The SSE event name is ALWAYS 'message'. The discriminator is the outer JSON's
// .type field. The payload is DOUBLE-ENCODED: JSON.parse(data) gives {type, data:string},
// then JSON.parse(that.data) gives the actual payload.
// Per-type payload shapes, verified against the fork source
// (/opt/forks/llama-swap/internal/server/apigroup.go sendModels/sendLogData/
// sendMetrics/sendInFlight, apiModel struct at :20):
// modelStatus -> []apiModel (FULL-FLEET snapshot array, not a single transition)
// logData -> {source, data} (field is 'data', not 'line')
// metrics -> []ActivityLogEntry (BARE array, tokens nested)
// inflight -> {total} (host-level total, NOT per-model)
export type LlamaSweepSSEEvent =
| { type: 'modelStatus'; data: ModelStatusEntry[] }
| { type: 'logData'; data: LogData }
| { type: 'metrics'; data: MetricsEntry[] }
| { type: 'inflight'; data: InflightData };
/** One entry of the modelStatus full-fleet array (fork apiModel struct). */
export interface ModelStatusEntry {
id: string;
name: string;
description: string;
state: string;
unlisted: boolean;
peerID: string;
aliases?: string[];
}
export interface LogData {
source: string;
data: string;
}
// Real /api/metrics shape: bare JSON array of entries with NESTED tokens.
// {id, timestamp, model, req_path, resp_status_code, tokens:{...}, duration_ms, has_capture}
// NOTE: ActivityLogEntry does NOT carry request headers or source field.
// Headers exist only in ReqRespCapture (fetched on-demand via /api/captures/:id).
// See design §7 "Implementation notes" for the discrepancy.
export interface MetricsEntry {
id: number;
timestamp: string;
model: string;
req_path: string;
resp_status_code: number;
tokens: {
cache_tokens: number;
input_tokens: number;
output_tokens: number;
prompt_per_second: number;
tokens_per_second: number;
};
duration_ms: number;
has_capture: boolean;
capture?: string;
}
export interface InflightData {
total: number;
}
// ─── the loop ───────────────────────────────────────────────────────────────
export interface FleetConnectorDeps {
isUp: () => boolean;
sql: Sql;
log: FastifyBaseLogger;
onEvent: (providerId: string, event: LlamaSweepSSEEvent) => void | Promise<void>;
onReconcile: (providerId: string, metrics: MetricsEntry[]) => Promise<boolean>;
onReconnectGiveUp: (providerId: string) => Promise<void>;
sleep?: (ms: number) => Promise<void>;
policy?: ReconnectPolicy;
}
function defaultSleep(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
}
/**
* Parse llama-swap SSE lines.
*
* Real wire shape (apigroup.go):
* event:message
* data:{"type":"modelStatus","data":"<ESCAPED JSON STRING>"}
*
* The SSE event name is always 'message'. The discriminator is the outer JSON's
* .type field. The payload is DOUBLE-ENCODED: JSON.parse(data) gives {type, data:string},
* then JSON.parse(that.data) gives the actual payload.
*
* Returns the fully-decoded event, or null for non-data lines.
*/
export function parseSseLine(line: string): LlamaSweepSSEEvent | null {
const trimmed = line.trim();
if (!trimmed) return null;
// The SSE event name is always 'event:message' -- we ignore it.
if (trimmed.startsWith('event:')) {
return null;
}
// "data: <json>" -- the only line that carries payload.
if (trimmed.startsWith('data:')) {
const dataStr = trimmed.slice(5).trimStart();
if (!dataStr) return null;
// First JSON parse: { type: "modelStatus", data: "<escaped json>" }
let outer: { type: string; data: string };
try {
outer = JSON.parse(dataStr) as { type: string; data: string };
} catch {
return null;
}
if (!outer.type || typeof outer.data !== 'string' || !outer.data) {
return null;
}
// Second JSON parse: the actual payload (double-encoded string).
let inner: unknown;
try {
inner = JSON.parse(outer.data);
} catch {
return null;
}
return { type: outer.type, data: inner } as LlamaSweepSSEEvent;
}
return null;
}
export function startFleetConnector(providerId: string, baseUrl: string, deps: FleetConnectorDeps): AbortController {
const abort = new AbortController();
void runFleetConnector(providerId, baseUrl, abort, deps).finally(() => {
if (abort.signal.aborted) {
// connection dropped — cleanup handled by caller
}
});
return abort;
}
export async function runFleetConnector(
providerId: string,
baseUrl: string,
abort: AbortController,
deps: FleetConnectorDeps,
): Promise<void> {
const signal = abort.signal;
const sleep = deps.sleep ?? defaultSleep;
const policy = deps.policy ?? DEFAULT_RECONNECT_POLICY;
let failures = 0;
while (deps.isUp() && !signal.aborted) {
const url = `${baseUrl}/api/events`;
try {
const res = await fetch(url, { signal });
if (!res.ok) {
throw new Error(`SSE connect failed: ${res.status} ${res.statusText}`);
}
const reader = res.body?.getReader();
if (!reader) throw new Error('no response body');
const decoder = new TextDecoder();
let buffer = '';
while (!signal.aborted) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() ?? '';
for (const line of lines) {
if (signal.aborted) break;
const event = parseSseLine(line);
if (!event) continue;
try {
await Promise.resolve(deps.onEvent(providerId, event));
} catch (err) {
deps.log.error({ providerId, err: (err as Error).message }, 'fleet: onEvent failed');
}
}
}
// Clean stream end — healthy reconnect at base delay (pre-hardening).
failures = 0;
if (deps.isUp() && !signal.aborted) {
await sleep(policy.baseMs);
}
} catch (err) {
if (!deps.isUp() || signal.aborted) break;
failures += 1;
const decision = reconnectDecision(failures, policy);
deps.log.warn(
{ providerId, failures, action: decision.action, err: (err as Error).message },
'fleet: SSE error; reconnecting',
);
if (decision.action === 'give-up') {
deps.log.warn({ providerId, failures }, 'fleet: SSE reconnect gave up (circuit breaker)');
await deps.onReconnectGiveUp(providerId);
break;
}
await sleep(decision.delayMs);
}
}
}

View File

@@ -0,0 +1,89 @@
export interface HostConfig {
providerId: string;
baseUrl: string;
enabled: boolean;
}
export interface FleetState {
hosts: Map<string, HostState>;
}
export interface HostState {
providerId: string;
liveness: 'connected' | 'reconnecting' | 'down';
lastSeenAt: Date | null;
seq: number;
/** Host-level inflight total (the fork's SSE publishes only a total, not per-model). */
inflightTotal: number;
models: Map<string, ModelState>;
}
export interface ModelState {
model: string;
state: string;
ts: Date;
ttlDeadline: Date | null;
inflight: number;
}
export interface SnapshotData {
hosts: Array<{
providerId: string;
liveness: 'connected' | 'reconnecting' | 'down';
lastSeenAt: string | null;
seq: number;
models: Array<{
model: string;
state: string;
ts: string;
ttlDeadline: string | null;
inflight: number;
}>;
}>;
requests?: Array<{
id: number;
providerId: string;
ts: string;
model: string | null;
reqPath: string | null;
statusCode: number | null;
durationMs: number | null;
}>;
perfSamples?: Array<{
providerId: string;
ts: string;
gpu: unknown;
sys: unknown;
}>;
}
// ─── helpers for tests ──────────────────────────────────────────────────────
export function createFleetState(): FleetState {
return { hosts: new Map() };
}
export function ensureHostState(fleet: FleetState, providerId: string): HostState {
let state = fleet.hosts.get(providerId);
if (!state) {
state = {
providerId,
liveness: 'down',
lastSeenAt: null,
seq: 0,
inflightTotal: 0,
models: new Map(),
};
fleet.hosts.set(providerId, state);
}
return state;
}
export function stampLastSeen(state: HostState): void {
state.lastSeenAt = new Date();
}
export function incrementSeq(state: HostState): number {
state.seq += 1;
return state.seq;
}

View File

@@ -0,0 +1,140 @@
/**
* P7.1: auto:* gateway candidate resolution.
*
* The gateway exposes OpenAI-compatible virtual models. A completion against
* `auto:code` (etc.) is resolved to an ordered list of concrete candidate
* composite ids ('provider/model'), then dispatched with failover.
*
* Ordering source:
* - An explicit route_policy for the virtual model (admin-curated candidates).
* - Otherwise, advisory routing scores ranked by the category metric.
*
* Health filtering (only connected hosts are eligible) is applied last so a
* curated policy never dispatches to a down host.
*
* Pure helpers (orderCandidates, parseVirtualModel) are unit-tested; the DB
* read lives in resolveCandidates().
*/
import type { Sql } from '../db.js';
import type { FleetState } from './fleet-state.js';
import { computeRoutingScores, type ModelScore } from './routing-scores.js';
import { jsonbStringArray } from './jsonb.js';
export const VIRTUAL_MODELS = ['auto', 'auto:code', 'auto:fast', 'auto:cheap'] as const;
export type VirtualModel = (typeof VIRTUAL_MODELS)[number];
export function isGatewayVirtualModel(id: string): boolean {
return id === 'auto' || id.startsWith('auto:');
}
/**
* Strip a composite/provider prefix the picker may prepend. The gateway
* registry provider id is 'auto', so BooChat may send 'auto/auto:code'.
* Normalize to the bare virtual model token.
*/
export function parseVirtualModel(modelId: string): string {
// Composite form: '<gatewayProviderId>/<virtual>' — take the part after '/'.
const slash = modelId.indexOf('/');
const tail = slash >= 0 ? modelId.slice(slash + 1) : modelId;
return tail;
}
export interface RoutePolicyRow {
virtual_model: string;
candidates: unknown; // jsonb: porsager returns a parsed array (see jsonb.ts)
fallback: string | null;
enabled: boolean;
}
/**
* Order concrete candidates for a virtual model. Pure.
*
* When an explicit policy is provided, its candidate list defines the order
* (with the fallback appended last). Otherwise candidates are derived from
* advisory scores ranked by the virtual model's category metric.
*
* The returned list is health-filtered: only composite ids whose host is
* connected survive (a curated candidate on a down host is skipped, not
* dispatched to).
*/
export function orderCandidates(
virtualModel: string,
policy: { candidates: string[]; fallback: string | null } | null,
scores: ModelScore[],
): string[] {
const healthy = new Set(scores.filter((s) => s.healthy).map((s) => s.compositeId));
if (policy) {
const ordered = [...policy.candidates];
if (policy.fallback && !ordered.includes(policy.fallback)) ordered.push(policy.fallback);
// Keep curated order; drop unhealthy. If a candidate isn't in the scores
// set at all (never seen), keep it — health is unknown, let dispatch try.
return ordered.filter((id) => !scores.some((s) => s.compositeId === id) || healthy.has(id));
}
// Derive from advisory scores by category metric.
const metric = (s: ModelScore): number | null => {
switch (virtualModel) {
case 'auto:code':
return s.codeScore;
case 'auto:fast':
case 'auto:cheap':
return s.avgGenTps;
case 'auto':
default:
// Overall: prefer eval score, then throughput.
return s.evalScore ?? (s.avgGenTps != null ? s.avgGenTps / 1000 : null);
}
};
return scores
.filter((s) => s.healthy && metric(s) != null)
.sort((a, b) => (metric(b) ?? -Infinity) - (metric(a) ?? -Infinity))
.map((s) => s.compositeId);
}
export interface ResolvedCandidates {
virtualModel: string;
candidates: string[];
policyName: string | null;
}
/**
* Resolve the ordered candidate list for a virtual model against the live
* fleet + policies + advisory scores.
*/
export async function resolveCandidates(
sql: Sql,
fleet: FleetState,
modelId: string,
): Promise<ResolvedCandidates> {
const virtualModel = parseVirtualModel(modelId);
const policyRows = await sql<(RoutePolicyRow & { name: string })[]>`
SELECT name, virtual_model, candidates, fallback, enabled
FROM route_policies
WHERE virtual_model = ${virtualModel} AND enabled = true
LIMIT 1
`;
const scores = await computeRoutingScores(sql, fleet);
let policy: { candidates: string[]; fallback: string | null } | null = null;
let policyName: string | null = null;
if (policyRows.length > 0) {
const row = policyRows[0]!;
policy = { candidates: jsonbStringArray(row.candidates as unknown), fallback: row.fallback };
policyName = row.name;
}
const candidates = orderCandidates(virtualModel, policy, scores);
return { virtualModel, candidates, policyName };
}
/** Split a composite id 'provider/model' into parts. */
export function splitComposite(compositeId: string): { providerId: string; model: string } | null {
const slash = compositeId.indexOf('/');
if (slash <= 0) return null;
return { providerId: compositeId.slice(0, slash), model: compositeId.slice(slash + 1) };
}

View File

@@ -0,0 +1,19 @@
/**
* Host-access seam: acquire exclusive access to a host for a purpose.
*
* V1 body: no-op returning {ok: true}. This is the P8 seam — P8 swaps the
* body for a DB lease without touching the bench engine.
*/
export interface HostGrant {
ok: boolean;
reason?: string;
}
export async function acquireHostAccess(
providerId: string,
purpose: string,
): Promise<HostGrant> {
// V1: no-op — always grant access.
return { ok: true };
}

View File

@@ -0,0 +1,41 @@
/**
* JSONB read helpers.
*
* porsager/postgres returns `jsonb` columns already parsed into JS values (an
* object/array), NOT a JSON string. Calling JSON.parse on that throws
* ("[object Object] is not valid JSON"). These helpers accept either shape so a
* read works whether the driver parsed the column or handed back a string.
*/
/** Coerce a JSONB column value to a string array. */
export function jsonbStringArray(value: unknown): string[] {
let v = value;
if (typeof v === 'string') {
try { v = JSON.parse(v); } catch { return []; }
}
return Array.isArray(v) ? v.filter((x): x is string => typeof x === 'string') : [];
}
/** Coerce a JSONB column value to an array (elements untyped). */
export function jsonbArray(value: unknown): unknown[] {
let v = value;
if (typeof v === 'string') {
try { v = JSON.parse(v); } catch { return []; }
}
return Array.isArray(v) ? v : [];
}
/** Coerce a JSONB column value to a number array. */
export function jsonbNumberArray(value: unknown): number[] {
return jsonbArray(value).filter((x): x is number => typeof x === 'number');
}
/** Coerce a JSONB column value to a plain object, or null. */
export function jsonbObject(value: unknown): Record<string, unknown> | null {
let v = value;
if (v == null) return null;
if (typeof v === 'string') {
try { v = JSON.parse(v); } catch { return null; }
}
return v && typeof v === 'object' && !Array.isArray(v) ? (v as Record<string, unknown>) : null;
}

View File

@@ -0,0 +1,288 @@
import type { Sql } from '../db.js';
import type { DeltaEmitter } from '../index.js';
import { recordEvalResult, completeEvalRun } from './eval-suites.js';
import { resolveProviderBaseUrl } from './llama-providers.js';
// ─── types ──────────────────────────────────────────────────────────────────
export interface JudgeEvalParams {
runId: string;
providerId: string;
model: string;
quant: string | null;
tasks: Array<Record<string, unknown>>;
judgeModel: string | null;
}
export interface JudgeProgress {
completedTasks: number;
}
export interface JudgeResult {
error: string | null;
}
// ─── judge runner ───────────────────────────────────────────────────────────
/**
* Run a judge-based eval (chat quality, rubric scoring).
*
* Judge requests go through llama-swap with:
* - temperature 0
* - judge model + version pinned per run
* - X-Boo-Source: control-eval
* - BARE wire model id
*
* Rubric scoring: each criterion gets a score, weighted average produces the task score.
* Rationale is captured per criterion.
*/
export async function runJudgeEval(
params: JudgeEvalParams,
sql: Sql,
emitter: DeltaEmitter,
seq: number,
logger: import('fastify').FastifyBaseLogger,
onProgress: (progress: JudgeProgress) => void,
): Promise<JudgeResult> {
const { runId, providerId, model, tasks, judgeModel, quant } = params;
// Resolve the target model's base URL.
const baseUrl = resolveProviderBaseUrl(providerId);
if (!baseUrl) {
const err = `no base URL for provider ${providerId}`;
await completeEvalRun(sql, runId, 0, null, err).catch(() => {});
return { error: err };
}
// Determine judge model: suite default -> strongest local model.
const judgeModelId = judgeModel ?? resolveDefaultJudgeModel();
const judgeModelVersion = `${judgeModelId}@${Date.now()}`;
logger.info(
{ runId, judgeModel: judgeModelId, targetModel: model, taskCount: tasks.length },
'eval: judge run started',
);
let completedTasks = 0;
let error: string | null = null;
for (let i = 0; i < tasks.length; i++) {
const task = tasks[i];
if (!task) continue;
const taskId = (task.id as string) ?? `task_${i}`;
const prompt = (task.prompt as string) ?? '';
const rubric = (task.rubric as { criteria: Array<{ criterion: string; description: string; weight: number }>; max_score: number }) ?? null;
const startTime = Date.now();
try {
// Generate the response from the target model.
const response = await generateResponse(baseUrl, model, prompt);
// Score the response.
let score: number | null = null;
let maxScore: number | null = null;
let rationale: string | null = null;
if (rubric) {
const scoring = await scoreWithRubric(
baseUrl,
judgeModelId,
prompt,
response,
rubric,
);
score = scoring.score;
maxScore = scoring.maxScore;
rationale = scoring.rationale;
} else {
// Simple pass/fail for tasks without rubric.
score = response.trim().length > 0 ? 1 : 0;
maxScore = 1;
rationale = response.trim().length > 0 ? 'Response generated' : 'Empty response';
}
const executionMs = Date.now() - startTime;
await recordEvalResult(
sql,
runId,
taskId,
i,
score,
maxScore,
rationale,
null,
null,
null,
executionMs,
null,
);
completedTasks++;
onProgress({ completedTasks });
emitter.publish({
type: 'control_job' as const,
seq,
jobType: 'eval' as const,
jobId: runId,
status: 'running' as const,
detail: {
completedTasks,
totalTasks: tasks.length,
taskId,
score,
},
});
} catch (err) {
const msg = (err as Error).message ?? String(err);
logger.warn({ taskId, err: msg }, 'eval: judge task failed');
await recordEvalResult(
sql,
runId,
taskId,
i,
null,
null,
null,
null,
null,
null,
Date.now() - startTime,
msg,
).catch(() => {});
completedTasks++;
onProgress({ completedTasks });
}
}
return { error };
}
/**
* Generate a response from the target model through llama-swap.
*/
async function generateResponse(
baseUrl: string,
model: string,
prompt: string,
): Promise<string> {
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-Boo-Source': 'control-eval',
},
body: JSON.stringify({
model,
messages: [{ role: 'user', content: prompt }],
// Design S8: temperature 0 everywhere in the eval pipeline -- response
// generation must be as reproducible as the judging (audit B1).
temperature: 0,
max_tokens: 2048,
}),
signal: AbortSignal.timeout(120_000),
});
if (!res.ok) {
const body = await res.text().catch(() => '');
throw new Error(`model response failed: ${res.status} ${body.slice(0, 200)}`);
}
const data = await res.json() as { choices?: Array<{ message?: { content?: string } }> };
return data.choices?.[0]?.message?.content ?? '';
}
/**
* Score a response using a rubric via LLM-as-judge.
*/
async function scoreWithRubric(
baseUrl: string,
judgeModelId: string,
prompt: string,
response: string,
rubric: { criteria: Array<{ criterion: string; description: string; weight: number }>; max_score: number },
): Promise<{ score: number; maxScore: number; rationale: string }> {
const criteriaText = rubric.criteria
.map((c, i) => `${i + 1}. **${c.criterion}** (weight: ${c.weight}): ${c.description}`)
.join('\n');
const judgePrompt = `You are an evaluation judge. Score the following response against the given prompt using the rubric criteria.
**Prompt:**
${prompt}
**Response:**
${response}
**Rubric Criteria (score each 0-3, then compute weighted average):**
${criteriaText}
**Max Score:** ${rubric.max_score}
Return your evaluation in JSON format:
{
"criterion_scores": {
"criterion_name": { "score": 0-3, "rationale": "explanation" }
},
"weighted_score": <number>,
"overall_rationale": "<summary>"
}`;
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-Boo-Source': 'control-eval',
},
body: JSON.stringify({
model: judgeModelId,
messages: [{ role: 'user', content: judgePrompt }],
temperature: 0,
max_tokens: 1024,
response_format: { type: 'json_object' },
}),
signal: AbortSignal.timeout(120_000),
});
if (!res.ok) {
const body = await res.text().catch(() => '');
throw new Error(`judge failed: ${res.status} ${body.slice(0, 200)}`);
}
const data = await res.json() as { choices?: Array<{ message?: { content?: string } }> };
const content = data.choices?.[0]?.message?.content ?? '{}';
let parsed: { weighted_score?: number; overall_rationale?: string };
try {
parsed = JSON.parse(content);
} catch {
// Fallback: try to extract JSON from markdown code blocks.
const match = content.match(/```(?:json)?\s*([\s\S]*?)```/);
if (match && match[1]) {
parsed = JSON.parse(match[1]);
} else {
parsed = {};
}
}
const score = parsed.weighted_score ?? 0;
const rationale = parsed.overall_rationale ?? 'No rationale provided';
return {
score: Math.min(score, rubric.max_score),
maxScore: rubric.max_score,
rationale,
};
}
/**
* Resolve the default judge model.
* Strongest local model by default -- configurable via config.
*/
function resolveDefaultJudgeModel(): string {
return process.env.EVAL_JUDGE_MODEL ?? 'qwen2.5-72b-instruct';
}

View File

@@ -0,0 +1,101 @@
/**
* Local provider registry loader (control-side).
*
* Reads the shared llama-providers config file at startup and caches the
* parsed result. When the file is absent or invalid, synthesizes a single
* legacy provider from LLAMA_SWAP_URL so the service starts with only
* legacy env vars (D-1).
*
* Schema and pure helpers live in @boocode/contracts/llama-providers.
* File I/O stays app-local per D-1.
*/
import { readFileSync } from 'node:fs';
import {
LlamaProvidersFileSchema,
type LlamaProvidersFile,
type LlamaProvider,
} from '@boocode/contracts/llama-providers';
export type { LlamaProvidersFile, LlamaProvider };
/** Synthesize a single legacy provider from env vars. */
function buildLegacyProvider(llamaSwapUrl: string): LlamaProvidersFile {
return {
defaultProvider: 'llama-swap',
providers: [
{
id: 'llama-swap',
label: 'llama-swap',
baseUrl: llamaSwapUrl,
kind: 'llama-swap',
},
],
};
}
let cached: LlamaProvidersFile | null = null;
/**
* Load (or re-load) the local provider config. Never throws on bad input --
* falls back to the legacy single-provider shape.
*/
export function loadLlamaProviders(
providersPath: string | undefined,
llamaSwapUrl: string,
): LlamaProvidersFile {
if (!providersPath) {
cached = buildLegacyProvider(llamaSwapUrl);
return cached;
}
let raw: string;
try {
raw = readFileSync(providersPath, 'utf8');
} catch {
console.warn(
`llama-providers: file not found at ${providersPath} -- falling back to legacy single-provider`,
);
cached = buildLegacyProvider(llamaSwapUrl);
return cached;
}
let json: unknown;
try {
json = JSON.parse(raw);
} catch (err) {
console.error(
`llama-providers: invalid JSON in ${providersPath} -- falling back to legacy single-provider`,
err,
);
cached = buildLegacyProvider(llamaSwapUrl);
return cached;
}
const parsed = LlamaProvidersFileSchema.safeParse(json);
if (!parsed.success) {
console.error(
`llama-providers: schema validation failed for ${providersPath} -- falling back to legacy single-provider`,
parsed.error.flatten(),
);
cached = buildLegacyProvider(llamaSwapUrl);
return cached;
}
cached = parsed.data;
return cached;
}
/** The cached provider config. Returns legacy fallback if nothing loaded yet. */
export function getLlamaProviders(): LlamaProvidersFile {
return cached ?? buildLegacyProvider('http://localhost:8080');
}
/**
* Resolve a provider's baseUrl by id from the cached registry.
* Returns null if the provider is not found.
*/
export function resolveProviderBaseUrl(providerId: string): string | null {
const file = getLlamaProviders();
const provider = file.providers.find((p) => p.id === providerId);
return provider?.baseUrl ?? null;
}

View File

@@ -0,0 +1,67 @@
/**
* Log relay: in-memory tail buffer per host for logData SSE events.
*
* - 2k-line tail per host for late joiners
* - Relays /api/events logData into control_log frames
* - Source filter: proxy | upstream | model
*/
const MAX_LOG_LINES = 2000;
export interface LogLine {
providerId: string;
source: 'proxy' | 'upstream' | 'model';
line: string;
ts: Date;
}
export class LogRelay {
private tails: Map<string, LogLine[]> = new Map();
/**
* Append a log line to the per-host tail buffer.
*/
append(providerId: string, source: 'proxy' | 'upstream' | 'model', line: string): void {
let tail = this.tails.get(providerId);
if (!tail) {
tail = [];
this.tails.set(providerId, tail);
}
tail.push({ providerId, source, line, ts: new Date() });
// Trim to max lines
while (tail.length > MAX_LOG_LINES) {
tail.shift();
}
}
/**
* Get the tail buffer for a host (for late joiners).
*/
getTail(providerId: string): LogLine[] {
return this.tails.get(providerId) ?? [];
}
/**
* Get all tails (for snapshot-on-join).
*/
getAllTails(): LogLine[] {
const all: LogLine[] = [];
for (const tail of this.tails.values()) {
all.push(...tail);
}
return all;
}
/**
* Get unique source values across all logs.
*/
getSources(): string[] {
const sources = new Set<string>();
for (const tail of this.tails.values()) {
for (const entry of tail) {
sources.add(entry.source);
}
}
return Array.from(sources);
}
}

View File

@@ -0,0 +1,105 @@
/**
* P9 model pull: download a HuggingFace repo onto a host into its models dir.
*
* Non-blocking job (fire-and-forget like bench/eval), progress over the existing
* control_job frame (jobType 'action', detail.kind = 'pull'). The repo id is
* validated server-side as defense in depth on top of the wrapper's own check,
* then passed as a single token (never interpolated into a shell string in
* wrapper mode; in shell mode it is the only argument and is regex-clean).
*/
import type { DeltaEmitter } from '../index.js';
import type { SshExec, SshTarget, SshMode } from './ssh-config.js';
/**
* HF repo id: org/name. Each segment MUST start with an alphanumeric (HF's own
* rule), which also rejects `..`/`.` traversal segments that a plain `[._-]+`
* class would let through (e.g. `../x`). Exactly one slash; no spaces/metachars.
*/
export const REPO_ID_RE = /^[A-Za-z0-9][A-Za-z0-9._-]*\/[A-Za-z0-9][A-Za-z0-9._-]*$/;
export function validateRepoId(repo: string): boolean {
return REPO_ID_RE.test(repo);
}
/**
* Build the pull command for a host. Pure helper for testing.
* - wrapper mode: the `pull <repo>` verb (wrapper hardcodes the models dir).
* - shell mode: a direct `huggingface-cli download` into <modelsDir>/<repo__>.
*/
export function buildPullCommand(mode: SshMode, repo: string, modelsDir?: string): string {
if (mode === 'wrapper') return `pull ${repo}`;
const dir = (modelsDir ?? '').replace(/\/+$/, '');
const local = `${dir}/${repo.replace(/\//g, '__')}`;
return `huggingface-cli download ${repo} --local-dir '${local}'`;
}
export interface PullParams {
jobId: string;
target: SshTarget;
repo: string;
mode: SshMode;
modelsDir?: string; // required for shell mode
}
export interface PullResult {
ok: boolean;
error?: string;
}
/**
* Run a model pull as a control_job. Resolves when the pull finishes; callers
* invoke it fire-and-forget so the HTTP response can return 202 immediately.
*/
export async function runModelPull(
params: PullParams,
exec: SshExec,
emitter: DeltaEmitter,
seq: number = 0,
): Promise<PullResult> {
const { jobId, target, repo, mode, modelsDir } = params;
if (!validateRepoId(repo)) {
emitter.publish({
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
status: 'failed' as const, detail: { kind: 'pull', repo, error: 'invalid repo id' },
});
return { ok: false, error: 'invalid repo id' };
}
if (mode === 'shell' && !modelsDir) {
emitter.publish({
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
status: 'failed' as const, detail: { kind: 'pull', repo, error: 'shell mode requires a models directory' },
});
return { ok: false, error: 'shell mode requires a models directory' };
}
emitter.publish({
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
status: 'running' as const, detail: { kind: 'pull', repo },
});
try {
const res = await exec(target, buildPullCommand(mode, repo, modelsDir));
if (res.code !== 0) {
const error = `pull failed (exit ${res.code}): ${res.stderr.slice(0, 500)}`;
emitter.publish({
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
status: 'failed' as const, detail: { kind: 'pull', repo, error },
});
return { ok: false, error };
}
emitter.publish({
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
status: 'completed' as const, detail: { kind: 'pull', repo, output: res.stdout.slice(-500) },
});
return { ok: true };
} catch (err) {
const error = (err as Error).message ?? String(err);
emitter.publish({
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
status: 'failed' as const, detail: { kind: 'pull', repo, error },
});
return { ok: false, error };
}
}

View File

@@ -0,0 +1,12 @@
/**
* Reconcile gap detection: if the oldest entry in a reconcile fetch is newer
* than the newest already-persisted entry for that provider, the ring wrapped
* past our tail and we have a gap.
*/
export function detectGap(
oldestReconcileTs: string | null,
newestPersistedTs: string | null,
): boolean {
if (!oldestReconcileTs || !newestPersistedTs) return false;
return new Date(oldestReconcileTs) > new Date(newestPersistedTs);
}

View File

@@ -0,0 +1,299 @@
/**
* P6.2: Scheduled fleet digest reports.
*
* Same in-process timer pattern as the retention job (design §3/§6): an hourly
* tick reads control_schedule_meta.last_run_at and runs the digest when due,
* so a boot after a missed window catches up immediately. No cron dependency,
* no new scheduler abstraction.
*
* The report gathers usage, trends vs the prior period, swap counts, the eval
* leaderboard, and bench regression anomalies, renders a markdown digest, and
* persists both the markdown and the structured stats to control_reports.
*/
import type { Sql } from '../db.js';
export type ReportInterval = 'daily' | 'weekly';
export interface ReportStats {
periodStart: string;
periodEnd: string;
interval: ReportInterval;
totalRequests: number;
priorRequests: number;
totalInputTokens: number;
totalOutputTokens: number;
bySource: Array<{ source: string; requests: number; inputTokens: number; outputTokens: number }>;
byProvider: Array<{ providerId: string; requests: number; swaps: number }>;
leaderboard: Array<{ providerId: string; model: string; kind: string; avgScore: number | null }>;
regressions: Array<{ providerId: string; model: string; avgGenTps: number | null }>;
}
function intervalHours(interval: ReportInterval): number {
return interval === 'weekly' ? 24 * 7 : 24;
}
/**
* Gather the structured stats for a report window. Pure read; no writes.
*/
export async function gatherReportStats(
sql: Sql,
interval: ReportInterval,
now: Date,
): Promise<ReportStats> {
const hours = intervalHours(interval);
const periodEnd = now;
const periodStart = new Date(now.getTime() - hours * 3600_000);
const priorStart = new Date(periodStart.getTime() - hours * 3600_000);
const startIso = periodStart.toISOString();
const endIso = periodEnd.toISOString();
const priorIso = priorStart.toISOString();
const totals = await sql<{ requests: number; in_tokens: number; out_tokens: number }[]>`
SELECT COUNT(*)::int AS requests,
COALESCE(SUM(input_tokens), 0)::int AS in_tokens,
COALESCE(SUM(output_tokens), 0)::int AS out_tokens
FROM control_requests
WHERE ts >= ${startIso} AND ts < ${endIso}
`;
const prior = await sql<{ requests: number }[]>`
SELECT COUNT(*)::int AS requests
FROM control_requests
WHERE ts >= ${priorIso} AND ts < ${startIso}
`;
const bySource = await sql<{ source: string | null; requests: number; in_tokens: number; out_tokens: number }[]>`
SELECT source,
COUNT(*)::int AS requests,
COALESCE(SUM(input_tokens), 0)::int AS in_tokens,
COALESCE(SUM(output_tokens), 0)::int AS out_tokens
FROM control_requests
WHERE ts >= ${startIso} AND ts < ${endIso}
GROUP BY source
ORDER BY requests DESC
`;
const byProviderReqs = await sql<{ provider_id: string; requests: number }[]>`
SELECT provider_id, COUNT(*)::int AS requests
FROM control_requests
WHERE ts >= ${startIso} AND ts < ${endIso}
GROUP BY provider_id
`;
// Swap counts: a model entering 'ready' / 'starting' marks a load/swap.
const swaps = await sql<{ provider_id: string; swaps: number }[]>`
SELECT provider_id, COUNT(*)::int AS swaps
FROM control_model_events
WHERE ts >= ${startIso} AND ts < ${endIso}
AND state IN ('ready', 'starting')
GROUP BY provider_id
`;
const swapMap = new Map<string, number>();
for (const r of swaps) swapMap.set(r.provider_id, r.swaps);
const providerIds = new Set<string>([
...byProviderReqs.map((r) => r.provider_id),
...swaps.map((r) => r.provider_id),
]);
const reqMap = new Map<string, number>();
for (const r of byProviderReqs) reqMap.set(r.provider_id, r.requests);
const byProvider = Array.from(providerIds)
.sort()
.map((providerId) => ({
providerId,
requests: reqMap.get(providerId) ?? 0,
swaps: swapMap.get(providerId) ?? 0,
}));
// Leaderboard: latest completed eval avgScore per (provider, model, kind).
const leaderboard = await sql<{ provider_id: string; model: string; kind: string; avg_score: number | null }[]>`
SELECT er.provider_id, er.model, es.kind,
(er.aggregate::jsonb ->> 'avgScore')::float AS avg_score
FROM eval_runs er
JOIN eval_suites es ON er.suite_id = es.id
WHERE er.status = 'completed' AND er.aggregate IS NOT NULL
AND er.finished_at = (
SELECT MAX(er2.finished_at) FROM eval_runs er2
JOIN eval_suites es2 ON er2.suite_id = es2.id
WHERE er2.provider_id = er.provider_id AND er2.model = er.model
AND es2.kind = es.kind AND er2.status = 'completed'
)
ORDER BY avg_score DESC NULLS LAST
LIMIT 20
`;
// Regression anomalies: bench runs flagged 'regression' in the window.
const regressions = await sql<{ provider_id: string; model: string; avg_gen_tps: number | null }[]>`
SELECT bs.provider_id, bs.model,
(br.aggregate::jsonb ->> 'avgGenTps')::float AS avg_gen_tps
FROM bench_runs br
JOIN bench_suites bs ON br.suite_id = bs.id
WHERE br.regression_flag = 'regression'
AND br.finished_at >= ${startIso} AND br.finished_at < ${endIso}
ORDER BY br.finished_at DESC
`;
return {
periodStart: startIso,
periodEnd: endIso,
interval,
totalRequests: totals[0]?.requests ?? 0,
priorRequests: prior[0]?.requests ?? 0,
totalInputTokens: totals[0]?.in_tokens ?? 0,
totalOutputTokens: totals[0]?.out_tokens ?? 0,
bySource: bySource.map((r) => ({
source: r.source ?? '(unattributed)',
requests: r.requests,
inputTokens: r.in_tokens,
outputTokens: r.out_tokens,
})),
byProvider,
leaderboard: leaderboard.map((r) => ({
providerId: r.provider_id,
model: r.model,
kind: r.kind,
avgScore: r.avg_score,
})),
regressions: regressions.map((r) => ({
providerId: r.provider_id,
model: r.model,
avgGenTps: r.avg_gen_tps,
})),
};
}
/**
* Render a markdown digest from gathered stats. Pure — unit-testable.
*/
export function renderReportMarkdown(stats: ReportStats): string {
const lines: string[] = [];
const pct = (cur: number, prev: number): string => {
if (prev === 0) return cur === 0 ? '0%' : 'new';
const d = ((cur - prev) / prev) * 100;
return `${d >= 0 ? '+' : ''}${d.toFixed(0)}%`;
};
lines.push(`# Fleet ${stats.interval} report`);
lines.push('');
lines.push(`Period: ${stats.periodStart} to ${stats.periodEnd}`);
lines.push('');
lines.push('## Usage');
lines.push('');
lines.push(`- Requests: ${stats.totalRequests} (${pct(stats.totalRequests, stats.priorRequests)} vs prior period)`);
lines.push(`- Input tokens: ${stats.totalInputTokens}`);
lines.push(`- Output tokens: ${stats.totalOutputTokens}`);
lines.push('');
if (stats.bySource.length > 0) {
lines.push('## By source');
lines.push('');
lines.push('| Source | Requests | Input tok | Output tok |');
lines.push('| --- | ---: | ---: | ---: |');
for (const s of stats.bySource) {
lines.push(`| ${s.source} | ${s.requests} | ${s.inputTokens} | ${s.outputTokens} |`);
}
lines.push('');
}
if (stats.byProvider.length > 0) {
lines.push('## By host');
lines.push('');
lines.push('| Host | Requests | Swaps |');
lines.push('| --- | ---: | ---: |');
for (const p of stats.byProvider) {
lines.push(`| ${p.providerId} | ${p.requests} | ${p.swaps} |`);
}
lines.push('');
}
if (stats.leaderboard.length > 0) {
lines.push('## Leaderboard');
lines.push('');
lines.push('| Model | Kind | Score |');
lines.push('| --- | --- | ---: |');
for (const l of stats.leaderboard) {
lines.push(`| ${l.providerId}/${l.model} | ${l.kind} | ${l.avgScore != null ? l.avgScore.toFixed(3) : 'n/a'} |`);
}
lines.push('');
}
lines.push('## Anomalies');
lines.push('');
if (stats.regressions.length === 0) {
lines.push('No speed regressions flagged this period.');
} else {
for (const r of stats.regressions) {
lines.push(`- Regression: ${r.providerId}/${r.model} (avg gen ${r.avgGenTps != null ? r.avgGenTps.toFixed(1) : 'n/a'} tok/s)`);
}
}
lines.push('');
return lines.join('\n');
}
/**
* Generate a report for the given interval and persist it. Returns the new id.
*/
export async function generateReport(
sql: Sql,
interval: ReportInterval,
now: Date = new Date(),
): Promise<string> {
const stats = await gatherReportStats(sql, interval, now);
const markdown = renderReportMarkdown(stats);
const id = `report_${now.getTime()}_${interval}`;
await sql`
INSERT INTO control_reports (id, kind, interval, period_start, period_end, markdown, stats)
VALUES (${id}, 'digest', ${interval}, ${stats.periodStart}, ${stats.periodEnd}, ${markdown}, ${sql.json(stats as never)})
ON CONFLICT (id) DO NOTHING
`;
return id;
}
/**
* Decide whether a scheduled report is due. Pure helper for testing.
*/
export function isReportDue(
lastRunAt: Date | null,
interval: ReportInterval,
now: Date,
): boolean {
if (!lastRunAt) return true;
const elapsed = now.getTime() - lastRunAt.getTime();
return elapsed >= intervalHours(interval) * 3600_000;
}
/**
* Run one scheduler tick: check control_schedule_meta and generate the digest
* if due. Catch-up-on-boot is achieved by calling this once at startup, then
* hourly.
*/
export async function runReportSchedulerTick(
sql: Sql,
now: Date = new Date(),
): Promise<{ ran: boolean; reportId?: string }> {
const rows = await sql<{ interval: string; enabled: boolean; last_run_at: string | null }[]>`
SELECT interval, enabled, last_run_at
FROM control_schedule_meta WHERE name = 'report-digest'
`;
const meta = rows[0];
if (!meta || !meta.enabled) return { ran: false };
const interval = (meta.interval === 'weekly' ? 'weekly' : 'daily') as ReportInterval;
const lastRunAt = meta.last_run_at ? new Date(meta.last_run_at) : null;
if (!isReportDue(lastRunAt, interval, now)) return { ran: false };
const reportId = await generateReport(sql, interval, now);
await sql`
UPDATE control_schedule_meta SET last_run_at = ${now.toISOString()}
WHERE name = 'report-digest'
`;
return { ran: true, reportId };
}

View File

@@ -0,0 +1,159 @@
/**
* Retention job: daily in-process timer that rolls up raw perf samples and
* prunes old data.
*
* Crash-safe by construction:
* 1. Rollup is an idempotent upsert (INSERT ... ON CONFLICT DO UPDATE).
* 2. Delete raw only AFTER covering buckets are committed.
* 3. Chunked transactions: one per provider per 1-hour window.
*/
import type { Sql } from '../db.js';
import type { Config } from '../config.js';
export interface RetentionConfig {
rawHours: number;
rollupDays: number;
captureSizeKB: number;
captureBudgetMB: number;
}
export function buildRetentionConfig(cfg: Config): RetentionConfig {
return {
rawHours: cfg.RETENTION_RAW_HOURS,
rollupDays: cfg.RETENTION_ROLLUP_DAYS,
captureSizeKB: cfg.CAPTURE_SIZE_KB,
captureBudgetMB: cfg.CAPTURE_BUDGET_MB,
};
}
/**
* Roll up raw perf samples into 5-minute buckets.
* Idempotent: re-running the same window produces identical rollups.
*/
export async function runRollup(sql: Sql, providerId: string, hours: number): Promise<void> {
const cutoff = new Date(Date.now() - hours * 3600_000);
const buckets = await sql<{ bucket: Date }[]>`
SELECT date_trunc('5 minutes', ts) AS bucket
FROM control_perf_samples
WHERE provider_id = ${providerId}
AND ts >= ${cutoff.toISOString()}
GROUP BY bucket
ORDER BY bucket
`;
for (const { bucket } of buckets) {
const bucketStart = new Date(bucket);
const bucketEnd = new Date(bucket.getTime() + 5 * 60_000);
// Idempotent upsert: re-run recomputes the same buckets, never double-counts.
await sql`
INSERT INTO control_perf_rollup_5m (provider_id, bucket, gpu_agg, sys_agg)
SELECT
${providerId},
${bucketStart.toISOString()},
jsonb_agg(DISTINCT jsonb_build_object('ts', ts, 'gpu', gpu)) AS gpu_agg,
jsonb_agg(DISTINCT jsonb_build_object('ts', ts, 'sys', sys)) AS sys_agg
FROM control_perf_samples
WHERE provider_id = ${providerId}
AND ts >= ${bucketStart.toISOString()}
AND ts < ${bucketEnd.toISOString()}
GROUP BY provider_id
ON CONFLICT (provider_id, bucket) DO UPDATE SET
gpu_agg = EXCLUDED.gpu_agg,
sys_agg = EXCLUDED.sys_agg
`;
}
}
/**
* Prune raw perf samples older than the retention window.
* Chunked: one transaction per provider per 1-hour window.
*/
export async function pruneRawSamples(sql: Sql, providerId: string, hours: number): Promise<void> {
const cutoff = new Date(Date.now() - hours * 3600_000);
const chunkSize = 1000;
while (true) {
const toDelete = await sql<{ ts: Date }[]>`
SELECT ts FROM control_perf_samples
WHERE provider_id = ${providerId}
AND ts < ${cutoff.toISOString()}
ORDER BY ts DESC
LIMIT ${chunkSize}
`;
if (toDelete.length === 0) break;
const timestamps = toDelete.map((r) => r.ts);
await sql`DELETE FROM control_perf_samples WHERE provider_id = ${providerId} AND ts = ANY(${timestamps})`;
}
}
/**
* Prune activity (control_requests) older than the retention window.
* Chunked: one transaction per batch to avoid long lock hold times.
*/
export async function pruneActivity(sql: Sql, hours: number): Promise<void> {
const cutoff = new Date(Date.now() - hours * 3600_000);
const chunkSize = 1000;
while (true) {
const toDelete = await sql<{ ts: Date }[]>`
SELECT ts FROM control_requests
WHERE ts < ${cutoff.toISOString()}
ORDER BY ts DESC
LIMIT ${chunkSize}
`;
if (toDelete.length === 0) break;
const timestamps = toDelete.map((r) => r.ts);
await sql`DELETE FROM control_requests WHERE ts = ANY(${timestamps})`;
}
}
/**
* Prune model events older than the retention window.
* Chunked: one transaction per batch to avoid long lock hold times.
*/
export async function pruneModelEvents(sql: Sql, hours: number): Promise<void> {
const cutoff = new Date(Date.now() - hours * 3600_000);
const chunkSize = 1000;
while (true) {
const toDelete = await sql<{ ts: Date }[]>`
SELECT ts FROM control_model_events
WHERE ts < ${cutoff.toISOString()}
ORDER BY ts DESC
LIMIT ${chunkSize}
`;
if (toDelete.length === 0) break;
const timestamps = toDelete.map((r) => r.ts);
await sql`DELETE FROM control_model_events WHERE ts = ANY(${timestamps})`;
}
}
/**
* Trim capture JSONB per-row to the configured size cap.
* Returns the trimmed JSON string, or null.
*/
export function trimCapture(captureJson: string | null, sizeKB: number): string | null {
if (!captureJson) return null;
const sizeBytes = Buffer.byteLength(captureJson, 'utf8');
if (sizeBytes <= sizeKB * 1024) return captureJson;
// Trim the capture to fit within the cap.
return captureJson.slice(0, Math.floor(sizeKB * 1024));
}
/**
* Parse a capture JSON string into an object for sql.json().
* Returns null if the input is null or invalid JSON.
*/
export function parseCaptureJson(captureJson: string | null): Record<string, unknown> | null {
if (!captureJson) return null;
try {
return JSON.parse(captureJson) as Record<string, unknown>;
} catch {
return null;
}
}

View File

@@ -0,0 +1,194 @@
/**
* P6.1: Advisory routing scores.
*
* Combines three signals per (provider_id, model) into an advisory score and
* a set of category badges surfaced in the BooChat model picker:
* - eval results (eval_runs.aggregate.avgScore, split by suite kind)
* - live latency (control_requests gen_tps + duration over a recent window)
* - host health (fleet liveness — an unhealthy host can win no badge)
*
* Advisory only: this never enforces routing. It powers display badges
* ("best code model right now") and the P7 gateway candidate ordering.
*
* The pure scoring/badge helpers are extracted for unit testing per the
* turn-guard.ts pattern; the DB read lives in computeRoutingScores().
*/
import type { Sql } from '../db.js';
import type { FleetState } from './fleet-state.js';
/** Recent-activity window for live latency signals. */
const LIVE_WINDOW_HOURS = 24;
export interface ModelScore {
/** Composite picker id: `${providerId}/${model}` (matches /api/models). */
compositeId: string;
providerId: string;
model: string;
/** Avg score (0..1) from completed code-suite eval runs, or null. */
codeScore: number | null;
/** Avg score (0..1) from completed chat-suite eval runs, or null. */
chatScore: number | null;
/** Best eval score across kinds, or null when never evaluated. */
evalScore: number | null;
/** Avg gen tok/s over the live window, or null when no recent traffic. */
avgGenTps: number | null;
/** Avg request duration (ms) over the live window, or null. */
avgLatencyMs: number | null;
/** Recent request count in the live window. */
sampleCount: number;
/** Whether the owning host is currently connected. */
healthy: boolean;
/** Category badges this model currently wins. */
badges: BadgeKind[];
}
export type BadgeKind = 'best-code' | 'best-chat' | 'best-fast';
export const BADGE_LABELS: Record<BadgeKind, string> = {
'best-code': 'Best code model now',
'best-chat': 'Best chat model now',
'best-fast': 'Fastest model now',
};
interface EvalRow {
provider_id: string;
model: string;
suite_kind: string;
avg_score: number | null;
}
interface LatencyRow {
provider_id: string;
model: string;
avg_gen_tps: number | null;
avg_duration_ms: number | null;
sample_count: number;
}
/**
* Pure badge assignment: given the per-model signals, award one winner per
* category. Only healthy hosts are eligible; ties broken by first-seen order
* (callers sort deterministically before passing in).
*/
export function assignBadges(scores: ModelScore[]): void {
const eligible = scores.filter((s) => s.healthy);
const award = (
pick: (s: ModelScore) => number | null,
badge: BadgeKind,
): void => {
let best: ModelScore | null = null;
let bestVal = -Infinity;
for (const s of eligible) {
const v = pick(s);
if (v == null) continue;
if (v > bestVal) {
bestVal = v;
best = s;
}
}
if (best && bestVal > -Infinity) {
best.badges.push(badge);
}
};
award((s) => s.codeScore, 'best-code');
award((s) => s.chatScore, 'best-chat');
award((s) => s.avgGenTps, 'best-fast');
}
/**
* Compute advisory routing scores across all (provider_id, model) pairs that
* have either eval history or recent live traffic.
*/
export async function computeRoutingScores(
sql: Sql,
fleet: FleetState,
): Promise<ModelScore[]> {
// 1. Eval scores — latest completed run per (provider, model, kind).
// Take the most recent finished run's aggregate avgScore per kind so a
// fresh run supersedes stale numbers.
const evalRows = await sql<EvalRow[]>`
SELECT er.provider_id,
er.model,
es.kind AS suite_kind,
(er.aggregate::jsonb ->> 'avgScore')::float AS avg_score
FROM eval_runs er
JOIN eval_suites es ON er.suite_id = es.id
WHERE er.status = 'completed'
AND er.aggregate IS NOT NULL
AND er.finished_at = (
SELECT MAX(er2.finished_at)
FROM eval_runs er2
JOIN eval_suites es2 ON er2.suite_id = es2.id
WHERE er2.provider_id = er.provider_id
AND er2.model = er.model
AND es2.kind = es.kind
AND er2.status = 'completed'
)
`;
// 2. Live latency/throughput — recent control_requests per (provider, model).
const cutoff = new Date(Date.now() - LIVE_WINDOW_HOURS * 3600_000).toISOString();
const latencyRows = await sql<LatencyRow[]>`
SELECT provider_id,
model,
AVG(gen_tps) FILTER (WHERE gen_tps > 0) AS avg_gen_tps,
AVG(duration_ms) FILTER (WHERE duration_ms > 0) AS avg_duration_ms,
COUNT(*)::int AS sample_count
FROM control_requests
WHERE ts >= ${cutoff}
AND model IS NOT NULL
GROUP BY provider_id, model
`;
// 3. Merge signals keyed by compositeId.
const byKey = new Map<string, ModelScore>();
const keyOf = (providerId: string, model: string) => `${providerId}/${model}`;
const ensure = (providerId: string, model: string): ModelScore => {
const compositeId = keyOf(providerId, model);
let s = byKey.get(compositeId);
if (!s) {
s = {
compositeId,
providerId,
model,
codeScore: null,
chatScore: null,
evalScore: null,
avgGenTps: null,
avgLatencyMs: null,
sampleCount: 0,
healthy: fleet.hosts.get(providerId)?.liveness === 'connected',
badges: [],
};
byKey.set(compositeId, s);
}
return s;
};
for (const row of evalRows) {
const s = ensure(row.provider_id, row.model);
if (row.suite_kind === 'code') s.codeScore = row.avg_score;
else if (row.suite_kind === 'chat') s.chatScore = row.avg_score;
const best = Math.max(s.codeScore ?? -Infinity, s.chatScore ?? -Infinity);
s.evalScore = best > -Infinity ? best : null;
}
for (const row of latencyRows) {
const s = ensure(row.provider_id, row.model);
s.avgGenTps = row.avg_gen_tps;
s.avgLatencyMs = row.avg_duration_ms;
s.sampleCount = row.sample_count;
}
// Deterministic order before badge assignment so ties are stable.
const scores = Array.from(byKey.values()).sort((a, b) =>
a.compositeId < b.compositeId ? -1 : a.compositeId > b.compositeId ? 1 : 0,
);
assignBadges(scores);
return scores;
}

View File

@@ -0,0 +1,410 @@
import { spawn, type ChildProcess } from 'node:child_process';
import { randomUUID } from 'node:crypto';
import type { Sql } from '../db.js';
import type { DeltaEmitter } from '../index.js';
import { recordEvalResult } from './eval-suites.js';
// ─── types ──────────────────────────────────────────────────────────────────
export interface SandboxEvalParams {
runId: string;
providerId: string;
model: string;
quant: string | null;
tasks: Array<Record<string, unknown>>;
}
export interface SandboxProgress {
completedTasks: number;
}
export interface SandboxResult {
error: string | null;
}
export interface SandboxContainer {
id: string;
process: ChildProcess;
timeoutHandle: NodeJS.Timeout | null;
}
// ─── hardening constants (LAW, not suggestions) ─────────────────────────────
const SANDBOX_IMAGE = process.env.SANDBOX_IMAGE ?? 'node:20-bookworm-slim';
const SANDBOX_MEMORY = process.env.SANDBOX_MEMORY ?? '512m';
const SANDBOX_CPU = process.env.SANDBOX_CPU ?? '0.5';
const SANDBOX_PIDS = process.env.SANDBOX_PIDS ?? '100';
const SANDBOX_TIMEOUT_MS = Number(process.env.SANDBOX_TIMEOUT_MS ?? '30000');
const SANDBOX_CONCURRENCY = Number(process.env.SANDBOX_CONCURRENCY ?? '4');
const SANDBOX_LABEL = 'boocontrol-eval';
// ─── sandbox runner ─────────────────────────────────────────────────────────
/**
* Run a code sandbox eval: each task generates code via LLM, executes in
* an ephemeral Docker container with hardening flags, and scores pass@1.
*
* HARDENING FLAGS (LAW):
* - --network none: NO network access
* - --user 1000:1000: non-root user
* - --memory, --cpus, --pids-limit: resource caps
* - --tmpfs /workspace:tmpfs workdir
* - --rm: auto-remove on exit
* - --label boocontrol-eval: orphan findability
* - --security-opt=no-new-privileges: no privilege escalation
* - --cap-drop=ALL: drop all capabilities
*
* NO volume mounts from the repo.
* NO docker socket inside containers.
*
* Bounded concurrency via Promise.allSettled.
* Per-task finally cleanup.
* Kill-on-timeout.
*/
export async function runCodeEval(
params: SandboxEvalParams,
sql: Sql,
emitter: DeltaEmitter,
seq: number,
onProgress: (progress: SandboxProgress) => void,
): Promise<SandboxResult> {
const { runId, tasks } = params;
// Orphan prune at engine start.
await pruneOrphanContainers();
let completedTasks = 0;
let error: string | null = null;
// Bounded concurrency: process tasks in batches.
const batchSizes: number[] = [];
for (let i = 0; i < tasks.length; i += SANDBOX_CONCURRENCY) {
const batch = tasks.slice(i, i + SANDBOX_CONCURRENCY);
batchSizes.push(batch.length);
// Promise.allSettled: a single task failure never abandons in-flight containers.
const results = await Promise.allSettled(
batch.map(async (task, batchIdx) => {
const globalIdx = i + batchIdx;
const taskId = (task.id as string) ?? `task_${globalIdx}`;
const prompt = (task.prompt as string) ?? '';
const testCode = (task.test_code as string) ?? '';
const expectedOutput = (task.expected_output as string) ?? '';
const language = (task.language as string) ?? 'typescript';
const startTime = Date.now();
let container: SandboxContainer | null = null;
try {
// Generate code from LLM.
const generatedCode = await generateCode(params.providerId, params.model, prompt, language);
// Execute in sandbox.
const execResult = await executeInSandbox(generatedCode, testCode, language);
const executionMs = Date.now() - startTime;
// pass@1 scoring: output matches expected.
const passed = normalizeOutput(execResult.stdout) === normalizeOutput(expectedOutput);
const score = passed ? 1 : 0;
await recordEvalResult(
sql,
runId,
taskId,
globalIdx,
score,
1,
passed ? 'Output matches expected' : `Expected: ${expectedOutput}, Got: ${execResult.stdout}`,
execResult.exitCode,
execResult.stderr,
execResult.stdout,
executionMs,
null,
);
emitter.publish({
type: 'control_job' as const,
seq,
jobType: 'eval' as const,
jobId: runId,
status: 'running' as const,
detail: {
taskId,
taskIndex: globalIdx,
passed,
score,
},
});
return { taskId, passed, score };
} catch (err) {
const msg = (err as Error).message ?? String(err);
const executionMs = Date.now() - startTime;
await recordEvalResult(
sql,
runId,
taskId,
globalIdx,
null,
1,
null,
null,
msg,
null,
executionMs,
msg,
).catch(() => {});
return { taskId, passed: false, score: 0, error: msg };
} finally {
// Per-task finally cleanup: kill container + remove.
if (container) {
await cleanupContainer(container);
}
completedTasks++;
onProgress({ completedTasks });
}
}),
);
// Log batch results.
for (const result of results) {
if (result.status === 'rejected') {
console.error('sandbox: batch task rejected:', result.reason);
}
}
}
return { error };
}
/**
* Generate code from the target model.
*/
async function generateCode(
providerId: string,
model: string,
prompt: string,
language: string,
): Promise<string> {
const baseUrl = resolveProviderBaseUrlInternal(providerId);
if (!baseUrl) {
throw new Error(`no base URL for provider ${providerId}`);
}
const systemPrompt = `You are a code generator. Write ${language} code that solves the given task.
Output ONLY the code, no explanations, no markdown fences. The code will be executed directly.`;
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-Boo-Source': 'control-eval',
},
body: JSON.stringify({
model,
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: prompt },
],
temperature: 0,
max_tokens: 2048,
}),
signal: AbortSignal.timeout(120_000),
});
if (!res.ok) {
const body = await res.text().catch(() => '');
throw new Error(`code generation failed: ${res.status} ${body.slice(0, 200)}`);
}
const data = await res.json() as { choices?: Array<{ message?: { content?: string } }> };
let code = data.choices?.[0]?.message?.content ?? '';
// Strip markdown code fences if present.
const fenceMatch = code.match(/```[\w]*\n([\s\S]*?)```/);
if (fenceMatch && fenceMatch[1]) {
code = fenceMatch[1];
}
return code.trim();
}
/**
* Execute code in a hardened Docker container.
*/
async function executeInSandbox(
generatedCode: string,
testCode: string,
language: string,
): Promise<{ stdout: string; stderr: string; exitCode: number | null }> {
return new Promise((resolve, reject) => {
const containerId = `eval_${randomUUID().slice(0, 12)}`;
// Build the combined script: generated code + test code.
const script = buildExecutionScript(generatedCode, testCode, language);
// SECURITY: Hardened Docker run command.
// --network none: NO network access.
// --user 1000:1000: non-root user.
// --memory, --cpus, --pids-limit: resource caps.
// --tmpfs /workspace: tmpfs workdir, no persistent storage.
// --rm: auto-remove on exit.
// --label boocontrol-eval: orphan findability.
// --security-opt=no-new-privileges: no privilege escalation.
// --cap-drop=ALL: drop all capabilities.
const dockerArgs = [
'run',
'--network', 'none',
'--user', '1000:1000',
'--memory', SANDBOX_MEMORY,
'--cpus', String(SANDBOX_CPU),
'--pids-limit', String(SANDBOX_PIDS),
'--tmpfs', '/workspace:rw,noexec,size=64m',
'--rm',
'--label', SANDBOX_LABEL,
'--security-opt', 'no-new-privileges',
'--cap-drop', 'ALL',
'--name', containerId,
'-e', 'NODE_ENV=production',
SANDBOX_IMAGE,
'sh', '-c', script,
];
const dockerProcess = spawn('docker', dockerArgs, {
timeout: SANDBOX_TIMEOUT_MS,
env: { ...process.env },
});
let stdout = '';
let stderr = '';
dockerProcess.stdout.on('data', (chunk: Buffer) => {
stdout += chunk.toString();
});
dockerProcess.stderr.on('data', (chunk: Buffer) => {
stderr += chunk.toString();
});
dockerProcess.on('close', (code) => {
resolve({
stdout: stdout.trim(),
stderr: stderr.trim(),
exitCode: code,
});
});
dockerProcess.on('error', (err) => {
reject(new Error(`docker spawn failed: ${err.message}`));
});
// Kill-on-timeout: if the process exceeds SANDBOX_TIMEOUT_MS, kill it.
const timeoutHandle = setTimeout(() => {
dockerProcess.kill('SIGKILL');
reject(new Error(`sandbox execution timeout (${SANDBOX_TIMEOUT_MS}ms)`));
}, SANDBOX_TIMEOUT_MS);
// Clear timeout on close.
dockerProcess.on('close', () => {
clearTimeout(timeoutHandle);
});
});
}
/**
* Build the execution script for the sandbox.
*/
function buildExecutionScript(
generatedCode: string,
testCode: string,
language: string,
): string {
if (language === 'typescript' || language === 'javascript') {
return [
'cd /workspace',
`echo '${escapeShell(generatedCode)}' > output.js`,
`echo '${escapeShell(testCode)}' > test.js`,
'npx --yes tsx test.js 2>&1',
].join(' && ');
}
// Fallback: generic shell execution.
return [
'cd /workspace',
`echo '${escapeShell(generatedCode)}' > output.sh`,
`echo '${escapeShell(testCode)}' > test.sh`,
'chmod +x output.sh test.sh',
'bash test.sh 2>&1',
].join(' && ');
}
/**
* Escape a string for safe shell embedding.
*/
function escapeShell(str: string): string {
return str.replace(/'/g, "'\\''");
}
/**
* Normalize output for comparison (trim, collapse whitespace).
*/
function normalizeOutput(output: string): string {
return output.trim().replace(/\s+/g, ' ');
}
/**
* Prune orphan containers from crashed runs.
*/
async function pruneOrphanContainers(): Promise<void> {
return new Promise((resolve) => {
const pruneCmd = spawn('docker', ['ps', '-q', '--filter', `label=${SANDBOX_LABEL}`]);
let output = '';
pruneCmd.stdout.on('data', (chunk: Buffer) => { output += chunk.toString(); });
pruneCmd.on('close', async () => {
const containerIds = output.trim().split('\n').filter(Boolean);
if (containerIds.length > 0) {
console.log({ count: containerIds.length }, 'sandbox: pruning orphan containers');
const kill = spawn('docker', ['kill', ...containerIds]);
await new Promise((r) => {
kill.on('close', r);
kill.on('error', r);
});
}
resolve();
});
pruneCmd.on('error', () => resolve());
});
}
/**
* Cleanup a sandbox container.
*/
async function cleanupContainer(container: SandboxContainer): Promise<void> {
if (container.timeoutHandle) {
clearTimeout(container.timeoutHandle);
}
if (container.process.exitCode === null) {
container.process.kill('SIGKILL');
}
// Container is --rm, so it auto-removes. But force-remove as safety net.
await new Promise<void>((resolve) => {
const rm = spawn('docker', ['rm', '-f', container.id]);
rm.on('close', resolve);
rm.on('error', resolve);
}).catch(() => {});
}
/**
* Resolve provider base URL (internal, mirrors llama-providers).
*/
function resolveProviderBaseUrlInternal(providerId: string): string | null {
try {
const { resolveProviderBaseUrl } = require('./llama-providers.js');
return resolveProviderBaseUrl(providerId);
} catch {
return null;
}
}

View File

@@ -0,0 +1,361 @@
/**
* P9.1: SSH config editor for llama-swap hosts.
*
* Pipeline (design §5, stackctl flow with the tests stackctl never had):
* SFTP/SSH read -> schema-validated edit (config-schema.json from the fork)
* -> diff preview -> timestamped backup -> write -> restart -> health-wait.
*
* SSH I/O is shelled out via `ssh` (matching the booterm precedent — no ssh2
* dependency, key from `secrets/`), injected as `SshExec` so every failure path
* is unit-testable without a live host. The pure helpers (validate, diff,
* backup filename) carry the logic and are tested directly.
*/
import { spawn } from 'node:child_process';
import { createRequire } from 'node:module';
import { load as loadYaml } from 'js-yaml';
import type { ValidateFunction } from 'ajv';
// ajv + ajv-formats are CJS. Under NodeNext ESM the default-import interop binds
// the namespace, not the constructable class, so load them via createRequire to
// get the real module.exports (class / plugin fn) at both type and runtime.
const require = createRequire(import.meta.url);
const Ajv = require('ajv') as typeof import('ajv').default;
const addFormats = require('ajv-formats') as typeof import('ajv-formats').default;
// ─── host SSH target ─────────────────────────────────────────────────────────
export interface SshTarget {
host: string;
user: string;
keyPath: string;
}
export interface ExecResult {
code: number;
stdout: string;
stderr: string;
}
/** Injectable SSH executor. `stdin`, when present, is piped to the remote command. */
export type SshExec = (target: SshTarget, command: string, stdin?: string) => Promise<ExecResult>;
// ─── pure: schema validation ─────────────────────────────────────────────────
export interface ValidationResult {
valid: boolean;
errors: string[];
/** Parsed config object when YAML is syntactically valid. */
parsed?: unknown;
}
let cachedValidator: ValidateFunction | null = null;
let cachedSchemaRef: object | null = null;
function getValidator(schema: object): ValidateFunction {
if (cachedValidator && cachedSchemaRef === schema) return cachedValidator;
const ajv = new Ajv({ allErrors: true, strict: false });
addFormats(ajv);
const validate = ajv.compile(schema);
cachedValidator = validate;
cachedSchemaRef = schema;
return validate;
}
/**
* Validate a llama-swap config YAML string against the fork's
* config-schema.json. Catches YAML syntax errors first, then schema errors.
* Pure — no I/O; the schema object is passed in.
*/
export function validateLlamaConfig(yamlText: string, schema: object): ValidationResult {
let parsed: unknown;
try {
parsed = loadYaml(yamlText);
} catch (err) {
return { valid: false, errors: [`YAML parse error: ${(err as Error).message}`] };
}
if (parsed === null || typeof parsed !== 'object') {
return { valid: false, errors: ['config must be a YAML mapping'], parsed };
}
const validate = getValidator(schema);
const ok = validate(parsed);
if (ok) return { valid: true, errors: [], parsed };
const errors = (validate.errors ?? []).map((e) => {
const path = e.instancePath || '(root)';
return `${path} ${e.message ?? 'invalid'}`;
});
return { valid: false, errors: errors.length ? errors : ['schema validation failed'], parsed };
}
// ─── pure: unified-ish diff ──────────────────────────────────────────────────
/**
* Produce a compact line diff between two texts. Trims a common prefix/suffix
* and marks the changed middle with -/+ lines. Sufficient for a preview; not a
* minimal-edit Myers diff.
*/
export function computeDiff(oldText: string, newText: string): string {
const oldLines = oldText.split('\n');
const newLines = newText.split('\n');
let start = 0;
while (start < oldLines.length && start < newLines.length && oldLines[start] === newLines[start]) {
start++;
}
let endOld = oldLines.length - 1;
let endNew = newLines.length - 1;
while (endOld >= start && endNew >= start && oldLines[endOld] === newLines[endNew]) {
endOld--;
endNew--;
}
if (endOld < start && endNew < start) return ''; // identical
const out: string[] = [];
out.push(`@@ lines ${start + 1}..${endOld + 1} -> ${start + 1}..${endNew + 1} @@`);
for (let i = start; i <= endOld; i++) out.push(`- ${oldLines[i]}`);
for (let i = start; i <= endNew; i++) out.push(`+ ${newLines[i]}`);
return out.join('\n');
}
// ─── pure: backup filename ───────────────────────────────────────────────────
/** Timestamped backup path: `<configPath>.bak-YYYYMMDDTHHMMSSZ`. */
export function backupFilename(configPath: string, now: Date): string {
const stamp = now.toISOString().replace(/[-:]/g, '').replace(/\.\d+Z$/, 'Z');
return `${configPath}.bak-${stamp}`;
}
// ─── RemoteOps seam (shell vs wrapper) ───────────────────────────────────────
//
// 'shell' mode issues raw shell commands (P9.1 behavior). 'wrapper' mode issues
// fixed verbs so the key can be bound to an authorized_keys forced command that
// hardcodes the paths. Both drive the same apply pipeline.
export type SshMode = 'shell' | 'wrapper';
export interface RemoteOps {
read(): Promise<string>;
backup(now: Date): Promise<string>; // returns the backup path
write(content: string): Promise<void>;
restart(restartCmd: string): Promise<void>;
}
function fail(label: string, res: ExecResult): never {
throw new Error(`${label} failed (exit ${res.code}): ${res.stderr.slice(0, 300)}`);
}
/** Raw-command ops (no wrapper on the host). */
export function shellOps(target: SshTarget, configPath: string, exec: SshExec): RemoteOps {
return {
async read() {
const r = await exec(target, `cat ${shellQuote(configPath)}`);
if (r.code !== 0) fail('read', r);
return r.stdout;
},
async backup(now) {
const backupPath = backupFilename(configPath, now);
const r = await exec(target, `cp ${shellQuote(configPath)} ${shellQuote(backupPath)}`);
if (r.code !== 0) fail('backup', r);
return backupPath;
},
async write(content) {
const r = await exec(target, `cat > ${shellQuote(configPath)}`, content);
if (r.code !== 0) fail('write', r);
},
async restart(restartCmd) {
const r = await exec(target, restartCmd);
if (r.code !== 0) fail('restart', r);
},
};
}
/** Verb ops for a forced-command-locked key. The wrapper hardcodes the paths;
* the backup verb stamps and returns the backup path on stdout. */
export function wrapperOps(target: SshTarget, exec: SshExec): RemoteOps {
return {
async read() {
const r = await exec(target, 'read');
if (r.code !== 0) fail('read', r);
return r.stdout;
},
async backup() {
const r = await exec(target, 'backup');
if (r.code !== 0) fail('backup', r);
return r.stdout.trim();
},
async write(content) {
const r = await exec(target, 'write', content);
if (r.code !== 0) fail('write', r);
},
async restart() {
const r = await exec(target, 'restart');
if (r.code !== 0) fail('restart', r);
},
};
}
export function makeRemoteOps(mode: SshMode, target: SshTarget, configPath: string, exec: SshExec): RemoteOps {
return mode === 'wrapper' ? wrapperOps(target, exec) : shellOps(target, configPath, exec);
}
// ─── orchestration (injectable exec) ─────────────────────────────────────────
/** Read the remote config file (mode-aware; defaults to shell for compat). */
export async function readRemoteConfig(
target: SshTarget,
configPath: string,
exec: SshExec,
mode: SshMode = 'shell',
): Promise<string> {
return makeRemoteOps(mode, target, configPath, exec).read();
}
export interface ApplyResult {
ok: boolean;
step: 'validate' | 'backup' | 'write' | 'restart' | 'health' | 'done';
backupPath?: string;
diff?: string;
error?: string;
}
export interface ApplyOptions {
target: SshTarget;
configPath: string;
restartCmd: string;
newConfig: string;
schema: object;
baseUrl: string;
exec: SshExec;
/** 'shell' (default) or 'wrapper'. */
mode?: SshMode;
fetcher?: typeof fetch;
now?: Date;
healthAttempts?: number;
healthDelayMs?: number;
}
/**
* The full apply pipeline. Aborts at the first failing step and reports which
* one. Backup ALWAYS precedes write, so a failed write leaves the timestamped
* backup intact for manual recovery. Mode selects the wire commands (raw shell
* vs forced-command verbs); the pipeline is identical.
*/
export async function applyRemoteConfig(opts: ApplyOptions): Promise<ApplyResult> {
const {
target, configPath, restartCmd, newConfig, schema, baseUrl, exec,
mode = 'shell', fetcher = fetch, now = new Date(),
healthAttempts = 10, healthDelayMs = 2000,
} = opts;
const ops = makeRemoteOps(mode, target, configPath, exec);
// 1. Validate before touching the host.
const validation = validateLlamaConfig(newConfig, schema);
if (!validation.valid) {
return { ok: false, step: 'validate', error: validation.errors.join('; ') };
}
// Read current for diff + so an unreadable host fails before any write.
let current = '';
try {
current = await ops.read();
} catch (err) {
return { ok: false, step: 'validate', error: `read current failed: ${(err as Error).message}` };
}
const diff = computeDiff(current, newConfig);
// 2. Timestamped backup BEFORE write.
let backupPath: string;
try {
backupPath = await ops.backup(now);
} catch (err) {
return { ok: false, step: 'backup', diff, error: (err as Error).message };
}
// 3. Write new config.
try {
await ops.write(newConfig);
} catch (err) {
return { ok: false, step: 'write', backupPath, diff, error: (err as Error).message };
}
// 4. Restart the service.
try {
await ops.restart(restartCmd);
} catch (err) {
return { ok: false, step: 'restart', backupPath, diff, error: (err as Error).message };
}
// 5. Health-wait: poll the provider until it serves /v1/models.
const healthy = await healthWait(baseUrl, fetcher, healthAttempts, healthDelayMs);
if (!healthy) {
return { ok: false, step: 'health', backupPath, diff, error: 'health check did not pass after restart; backup retained' };
}
return { ok: true, step: 'done', backupPath, diff };
}
/** Poll the provider's /v1/models until it responds OK or attempts run out. */
export async function healthWait(
baseUrl: string,
fetcher: typeof fetch,
attempts: number,
delayMs: number,
): Promise<boolean> {
for (let i = 0; i < attempts; i++) {
try {
const res = await fetcher(`${baseUrl.replace(/\/+$/, '')}/v1/models`, {
signal: AbortSignal.timeout(5_000),
});
if (res.ok) return true;
} catch {
// not up yet
}
if (i < attempts - 1) await sleep(delayMs);
}
return false;
}
function sleep(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
}
// Minimal POSIX single-quote shell escape for the remote command string.
function shellQuote(s: string): string {
return `'${s.replace(/'/g, `'\\''`)}'`;
}
// ─── real SSH executor (spawn) ───────────────────────────────────────────────
/**
* Default SSH executor. Uses the system `ssh` with an explicit identity file and
* IdentitiesOnly so the agent's default key is never offered (the boocode Gitea
* lesson). BatchMode avoids interactive prompts hanging the service.
*/
export const sshExec: SshExec = (target, command, stdin) => {
return new Promise<ExecResult>((resolve) => {
const args = [
'-i', target.keyPath,
'-o', 'IdentitiesOnly=yes',
'-o', 'BatchMode=yes',
'-o', 'StrictHostKeyChecking=accept-new',
'-o', 'ConnectTimeout=10',
`${target.user}@${target.host}`,
command,
];
const child = spawn('ssh', args, { stdio: ['pipe', 'pipe', 'pipe'] });
let stdout = '';
let stderr = '';
child.stdout.on('data', (d) => { stdout += d.toString(); });
child.stderr.on('data', (d) => { stderr += d.toString(); });
child.on('error', (err) => resolve({ code: 127, stdout, stderr: `${stderr}${(err as Error).message}` }));
child.on('close', (code) => resolve({ code: code ?? 1, stdout, stderr }));
if (stdin !== undefined) {
child.stdin.write(stdin);
}
child.stdin.end();
});
};