chore: snapshot working tree - pty_exited notifications + in-flight inference WIP
feat(booterm): structured pty_exited WS notifications. Plan-validated, impl-validated, code-reviewed green (contracts build clean, contracts test 29/29, booterm + web typecheck clean). wip: in-progress inference/provider refactor (agents.ts, provider.ts, new llama-providers.ts, removed llama-args-validator), plus arena, dispatcher, compaction, schema changes. openspec: pty-exit-notifications complete; x-agent-flags planned (not yet implemented).
This commit is contained in:
29
apps/control/src/config.ts
Normal file
29
apps/control/src/config.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
import { z } from 'zod';
|
||||
|
||||
const schema = z.object({
|
||||
NODE_ENV: z.enum(['development', 'production']).default('production'),
|
||||
PORT: z.coerce.number().default(9503),
|
||||
HOST: z.string().default('100.114.205.53'),
|
||||
DATABASE_URL: z.string(),
|
||||
LOG_LEVEL: z.enum(['fatal', 'error', 'warn', 'info', 'debug', 'trace']).default('info'),
|
||||
RETENTION_RAW_HOURS: z.coerce.number().default(48),
|
||||
RETENTION_ROLLUP_DAYS: z.coerce.number().default(90),
|
||||
CAPTURE_SIZE_KB: z.coerce.number().default(256),
|
||||
CAPTURE_BUDGET_MB: z.coerce.number().default(50),
|
||||
LLAMA_PROVIDERS_PATH: z.string().optional(),
|
||||
LLAMA_SWAP_URL: z.string().default('http://localhost:8080'),
|
||||
// P9.1: path to the llama-swap config-schema.json (fork). Defaults to the
|
||||
// copy bundled under dist/data; override to point at the live fork schema.
|
||||
LLAMA_CONFIG_SCHEMA_PATH: z.string().optional(),
|
||||
});
|
||||
|
||||
export type Config = z.infer<typeof schema>;
|
||||
|
||||
export function loadConfig(): Config {
|
||||
const result = schema.safeParse(process.env);
|
||||
if (!result.success) {
|
||||
console.error('Invalid env:', result.error.message);
|
||||
process.exit(1);
|
||||
}
|
||||
return result.data;
|
||||
}
|
||||
67
apps/control/src/db.ts
Normal file
67
apps/control/src/db.ts
Normal file
@@ -0,0 +1,67 @@
|
||||
import postgres from 'postgres';
|
||||
import { readFile } from 'node:fs/promises';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { dirname, resolve } from 'node:path';
|
||||
import type { Config } from './config.js';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
export type Sql = ReturnType<typeof postgres>;
|
||||
|
||||
let sqlInstance: Sql | null = null;
|
||||
|
||||
export function getSql(config: Config): Sql {
|
||||
if (sqlInstance) return sqlInstance;
|
||||
sqlInstance = postgres(config.DATABASE_URL, {
|
||||
max: 10,
|
||||
idle_timeout: 30,
|
||||
connect_timeout: 10,
|
||||
onnotice: () => {},
|
||||
});
|
||||
return sqlInstance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll information_schema.tables for a table name with exponential backoff.
|
||||
* Throws on timeout so systemd Restart=on-failure retries.
|
||||
*/
|
||||
export async function waitForTable(sql: Sql, tableName: string, timeoutMs: number): Promise<void> {
|
||||
const start = Date.now();
|
||||
const baseDelay = 100;
|
||||
const cap = 2000;
|
||||
while (true) {
|
||||
const rows = await sql<{ table_name: string }[]>`
|
||||
SELECT table_name FROM information_schema.tables
|
||||
WHERE table_schema = 'public' AND table_name = ${tableName}
|
||||
`;
|
||||
if (rows.length > 0) return;
|
||||
if (Date.now() - start >= timeoutMs) {
|
||||
throw new Error(`timeout waiting for table '${tableName}' after ${timeoutMs}ms`);
|
||||
}
|
||||
const delay = Math.min(cap, baseDelay * 2 ** Math.floor((Date.now() - start) / 1000));
|
||||
await new Promise((r) => setTimeout(r, delay));
|
||||
}
|
||||
}
|
||||
|
||||
export async function applySchema(sql: Sql): Promise<void> {
|
||||
const schemaPath = resolve(__dirname, 'schema.sql');
|
||||
const ddl = await readFile(schemaPath, 'utf8');
|
||||
await sql.unsafe(ddl);
|
||||
}
|
||||
|
||||
export async function pingDb(sql: Sql): Promise<boolean> {
|
||||
try {
|
||||
await sql`SELECT 1`;
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export async function closeDb(): Promise<void> {
|
||||
if (sqlInstance) {
|
||||
await sqlInstance.end({ timeout: 5 });
|
||||
sqlInstance = null;
|
||||
}
|
||||
}
|
||||
624
apps/control/src/index.ts
Normal file
624
apps/control/src/index.ts
Normal file
@@ -0,0 +1,624 @@
|
||||
import Fastify from 'fastify';
|
||||
import fastifyWebsocket from '@fastify/websocket';
|
||||
import { loadConfig } from './config.js';
|
||||
import { getSql, applySchema, pingDb, waitForTable } from './db.js';
|
||||
import type { FleetState, HostState } from './services/fleet-state.js';
|
||||
import { createFleetState, ensureHostState, stampLastSeen, incrementSeq } from './services/fleet-state.js';
|
||||
import { registerControlWebSocket } from './routes/ws.js';
|
||||
import type { LlamaSweepSSEEvent, MetricsEntry } from './services/fleet-connector.js';
|
||||
import { startFleetConnector } from './services/fleet-connector.js';
|
||||
import { buildRetentionConfig, runRollup, pruneRawSamples, pruneActivity, pruneModelEvents, trimCapture, parseCaptureJson } from './services/retention.js';
|
||||
import { detectGap } from './services/reconcile.js';
|
||||
import { jsonbObject } from './services/jsonb.js';
|
||||
import { ActionQueue } from './services/action-queue.js';
|
||||
import { LogRelay } from './services/log-relay.js';
|
||||
import { registerActionRoutes } from './routes/actions.js';
|
||||
import { registerCaptureRoutes } from './routes/captures.js';
|
||||
import { registerBenchRoutes, setBenchApp } from './routes/bench.js';
|
||||
import { registerPlaygroundRoutes } from './routes/playground.js';
|
||||
import { registerEvalRoutes } from './routes/evals.js';
|
||||
import { registerRoutingRoutes } from './routes/routing.js';
|
||||
import { registerReportRoutes, startReportScheduler } from './routes/reports.js';
|
||||
import { registerGatewayRoutes } from './routes/gateway.js';
|
||||
import { registerPolicyRoutes } from './routes/policies.js';
|
||||
import { registerSshConfigRoutes } from './routes/ssh-config.js';
|
||||
import { loadLlamaProviders, getLlamaProviders, resolveProviderBaseUrl } from './services/llama-providers.js';
|
||||
|
||||
// ─── delta emitter (B3 fix) ─────────────────────────────────────────────────
|
||||
|
||||
export type DeltaCallback = (delta: unknown) => void;
|
||||
export type DeltaEmitter = {
|
||||
subscribe(cb: DeltaCallback): () => void;
|
||||
publish(delta: unknown): void;
|
||||
};
|
||||
|
||||
export function createDeltaEmitter(): DeltaEmitter {
|
||||
const listeners = new Set<DeltaCallback>();
|
||||
return {
|
||||
subscribe(cb: DeltaCallback): () => void {
|
||||
listeners.add(cb);
|
||||
return () => { listeners.delete(cb); };
|
||||
},
|
||||
publish(delta: unknown): void {
|
||||
for (const cb of listeners) {
|
||||
try { cb(delta); } catch { /* ignore emitter errors */ }
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ─── metrics entry field-name mapper ─────────────────────────────────────────
|
||||
// Real /api/metrics shape has nested tokens and different field names:
|
||||
// {id, timestamp, model, req_path, resp_status_code, tokens:{...}, duration_ms, has_capture}
|
||||
// Map to the column names used in control_requests.
|
||||
|
||||
interface MappedMetricsEntry {
|
||||
id: number;
|
||||
ts: string;
|
||||
model: string;
|
||||
req_path: string;
|
||||
status_code: number;
|
||||
duration_ms: number;
|
||||
cache_tokens: number;
|
||||
input_tokens: number;
|
||||
output_tokens: number;
|
||||
prompt_tps: number;
|
||||
gen_tps: number;
|
||||
has_capture: boolean;
|
||||
/** P4: NULL for ring data — ActivityLogEntry does not carry request headers. */
|
||||
source: string | null;
|
||||
}
|
||||
|
||||
function mapMetricsEntry(entry: MetricsEntry): MappedMetricsEntry {
|
||||
return {
|
||||
id: entry.id,
|
||||
ts: entry.timestamp,
|
||||
model: entry.model,
|
||||
req_path: entry.req_path,
|
||||
status_code: entry.resp_status_code,
|
||||
duration_ms: entry.duration_ms,
|
||||
cache_tokens: entry.tokens.cache_tokens,
|
||||
input_tokens: entry.tokens.input_tokens,
|
||||
output_tokens: entry.tokens.output_tokens,
|
||||
prompt_tps: entry.tokens.prompt_per_second,
|
||||
gen_tps: entry.tokens.tokens_per_second,
|
||||
has_capture: entry.has_capture,
|
||||
/** P4: NULL — ActivityLogEntry does not carry request headers. */
|
||||
source: null,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── SSE event handlers (B5 fix: await onEvent; B2 fix: incrementSeq) ───────
|
||||
|
||||
export async function handleLlamaSweepEvent(
|
||||
fleet: FleetState,
|
||||
sql: ReturnType<typeof getSql>,
|
||||
config: ReturnType<typeof loadConfig>,
|
||||
providerId: string,
|
||||
emitter: DeltaEmitter,
|
||||
event: LlamaSweepSSEEvent,
|
||||
logRelay: LogRelay | null = null,
|
||||
): Promise<void> {
|
||||
const state = ensureHostState(fleet, providerId);
|
||||
stampLastSeen(state);
|
||||
|
||||
switch (event.type) {
|
||||
case 'modelStatus': {
|
||||
// Real payload: FULL-FLEET array of {id, state, ...} (fork apiModel).
|
||||
// Derive transitions by diffing against current state; persist only changes.
|
||||
state.liveness = 'connected';
|
||||
const changed: Array<{ model: string; state: string }> = [];
|
||||
for (const m of event.data) {
|
||||
const prev = state.models.get(m.id);
|
||||
if (!prev || prev.state !== m.state) {
|
||||
changed.push({ model: m.id, state: m.state });
|
||||
}
|
||||
state.models.set(m.id, {
|
||||
model: m.id,
|
||||
state: m.state,
|
||||
ts: new Date(),
|
||||
ttlDeadline: prev?.ttlDeadline ?? null,
|
||||
inflight: prev?.inflight ?? 0,
|
||||
});
|
||||
}
|
||||
if (changed.length === 0) break;
|
||||
const seq = incrementSeq(state);
|
||||
for (const c of changed) {
|
||||
await sql`
|
||||
INSERT INTO control_model_events (provider_id, model, state, ts, detail)
|
||||
VALUES (${providerId}, ${c.model}, ${c.state}, clock_timestamp(), ${sql.json({} as never)})
|
||||
ON CONFLICT (provider_id, model, state, ts) DO NOTHING
|
||||
`;
|
||||
}
|
||||
// Publish delta to WS subscribers (B3 fix).
|
||||
emitter.publish({
|
||||
type: 'control_fleet' as const,
|
||||
seq,
|
||||
hosts: [{
|
||||
providerId: state.providerId,
|
||||
liveness: state.liveness,
|
||||
lastSeenAt: state.lastSeenAt?.toISOString() ?? null,
|
||||
seq: state.seq,
|
||||
models: Array.from(state.models.values()).map((m) => ({
|
||||
model: m.model,
|
||||
state: m.state,
|
||||
ts: m.ts.toISOString(),
|
||||
ttlDeadline: m.ttlDeadline?.toISOString() ?? null,
|
||||
inflight: m.inflight,
|
||||
})),
|
||||
}],
|
||||
});
|
||||
break;
|
||||
}
|
||||
case 'logData': {
|
||||
// Logs are relay-only; no persistence by default.
|
||||
const source = event.data.source as 'proxy' | 'upstream' | 'model';
|
||||
// Real payload field is 'data' (fork sendLogData), may contain multiple lines.
|
||||
const text = event.data.data;
|
||||
if (logRelay) {
|
||||
logRelay.append(providerId, source, text);
|
||||
}
|
||||
const seq = incrementSeq(state);
|
||||
emitter.publish({
|
||||
type: 'control_log' as const,
|
||||
seq,
|
||||
providerId,
|
||||
source,
|
||||
line: text,
|
||||
});
|
||||
break;
|
||||
}
|
||||
case 'metrics': {
|
||||
// Real payload: BARE array of ActivityLogEntry (fork sendMetrics).
|
||||
const entries = event.data;
|
||||
// B5 fix: await onEvent (handleReconcile is async).
|
||||
const seq = incrementSeq(state);
|
||||
await handleReconcile(fleet, sql, config, providerId, emitter, event.data).catch((err) => {
|
||||
// A1: log the error instead of swallowing silently.
|
||||
const msg = (err as Error).message ?? String(err);
|
||||
console.warn({ providerId, err: msg }, 'fleet: reconcile failed');
|
||||
});
|
||||
// Publish activity deltas.
|
||||
for (const entry of entries) {
|
||||
const captureTrimmed = entry.capture ? trimCapture(entry.capture, config.CAPTURE_SIZE_KB) : null;
|
||||
const captureObj = captureTrimmed ? parseCaptureJson(captureTrimmed) : null;
|
||||
// Map real field names: resp_status_code -> status_code, tokens.* nested, timestamp -> ts.
|
||||
const mapped = mapMetricsEntry(entry);
|
||||
await sql`
|
||||
INSERT INTO control_requests (provider_id, swap_entry_id, ts, model, req_path, status_code, duration_ms, cache_tokens, input_tokens, output_tokens, prompt_tps, gen_tps, has_capture, capture, source)
|
||||
VALUES (${providerId}, ${mapped.id}, ${mapped.ts}, ${mapped.model}, ${mapped.req_path}, ${mapped.status_code}, ${mapped.duration_ms}, ${mapped.cache_tokens}, ${mapped.input_tokens}, ${mapped.output_tokens}, ${mapped.prompt_tps}, ${mapped.gen_tps}, ${mapped.has_capture}, ${captureObj ? sql.json(captureObj as never) : sql`NULL::jsonb`}, ${mapped.source})
|
||||
ON CONFLICT (provider_id, swap_entry_id, ts) DO NOTHING
|
||||
`;
|
||||
emitter.publish({
|
||||
type: 'control_activity' as const,
|
||||
seq: state.seq,
|
||||
providerId,
|
||||
entry: {
|
||||
id: mapped.id,
|
||||
ts: mapped.ts,
|
||||
model: mapped.model,
|
||||
reqPath: mapped.req_path,
|
||||
statusCode: mapped.status_code,
|
||||
durationMs: mapped.duration_ms,
|
||||
},
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'inflight': {
|
||||
// Real payload: {total} -- host-level total (fork sendInFlight); the fork
|
||||
// does not publish per-model inflight over SSE.
|
||||
state.inflightTotal = event.data.total;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─── reconcile handler (B7 fix: called from metrics event) ───────────────────
|
||||
|
||||
async function handleReconcile(
|
||||
fleet: FleetState,
|
||||
sql: ReturnType<typeof getSql>,
|
||||
config: ReturnType<typeof loadConfig>,
|
||||
providerId: string,
|
||||
emitter: DeltaEmitter,
|
||||
metrics: MetricsEntry[],
|
||||
): Promise<boolean> {
|
||||
const state = ensureHostState(fleet, providerId);
|
||||
stampLastSeen(state);
|
||||
state.liveness = 'connected';
|
||||
|
||||
// Detect gap: if oldest reconcile entry is newer than newest persisted entry
|
||||
// for that provider, the ring wrapped past our tail.
|
||||
const entries = metrics ?? [];
|
||||
const oldestReconcileTs = entries.length > 0
|
||||
? entries[entries.length - 1]!.timestamp
|
||||
: null;
|
||||
|
||||
if (oldestReconcileTs) {
|
||||
const newestPersisted = await sql<{ ts: string }[]>`
|
||||
SELECT ts FROM control_requests
|
||||
WHERE provider_id = ${providerId}
|
||||
ORDER BY ts DESC LIMIT 1
|
||||
`;
|
||||
|
||||
if (newestPersisted.length > 0) {
|
||||
const newestRow = newestPersisted[0]!;
|
||||
if (detectGap(oldestReconcileTs, newestRow.ts)) {
|
||||
await sql`
|
||||
INSERT INTO control_model_events (provider_id, model, state, ts, detail)
|
||||
VALUES (${providerId}, '*', 'gap_suspected', clock_timestamp(), ${sql.json({
|
||||
oldestReconcile: oldestReconcileTs,
|
||||
newestPersisted: newestRow.ts,
|
||||
} as never)})
|
||||
ON CONFLICT (provider_id, model, state, ts) DO NOTHING
|
||||
`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ingest reconcile entries (dedup via UNIQUE constraint).
|
||||
for (const entry of entries) {
|
||||
const mapped = mapMetricsEntry(entry);
|
||||
await sql`
|
||||
INSERT INTO control_requests (provider_id, swap_entry_id, ts, model, req_path, status_code, duration_ms, cache_tokens, input_tokens, output_tokens, prompt_tps, gen_tps, has_capture, source)
|
||||
VALUES (${providerId}, ${mapped.id}, ${mapped.ts}, ${mapped.model}, ${mapped.req_path}, ${mapped.status_code}, ${mapped.duration_ms}, ${mapped.cache_tokens}, ${mapped.input_tokens}, ${mapped.output_tokens}, ${mapped.prompt_tps}, ${mapped.gen_tps}, ${mapped.has_capture}, ${mapped.source})
|
||||
ON CONFLICT (provider_id, swap_entry_id, ts) DO NOTHING
|
||||
`;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ─── perf poller (A7 fix: add timeout; A8 fix: log errors) ───────────────────
|
||||
|
||||
async function pollPerformance(
|
||||
sql: ReturnType<typeof getSql>,
|
||||
config: ReturnType<typeof loadConfig>,
|
||||
providerId: string,
|
||||
baseUrl: string,
|
||||
fleet: FleetState,
|
||||
emitter: DeltaEmitter,
|
||||
): Promise<void> {
|
||||
const state = ensureHostState(fleet, providerId);
|
||||
|
||||
// Recover watermark from MAX(ts) per provider.
|
||||
const watermark = await sql<{ ts: string | null }[]>`
|
||||
SELECT MAX(ts) AS ts FROM control_perf_samples WHERE provider_id = ${providerId}
|
||||
`;
|
||||
|
||||
// porsager returns timestamptz as a Date object; interpolating it raw yields
|
||||
// Date.toString() ("Thu Jun 12 2026 ...") which llama-swap rejects with 400.
|
||||
const afterParam = watermark[0]?.ts
|
||||
? `?after=${encodeURIComponent(new Date(watermark[0].ts).toISOString())}`
|
||||
: '';
|
||||
const url = `${baseUrl}/api/performance${afterParam}`;
|
||||
|
||||
try {
|
||||
// A7 fix: add fetch timeout via AbortController.
|
||||
const fetchSignal = AbortSignal.timeout(10_000);
|
||||
const res = await fetch(url, { signal: fetchSignal });
|
||||
if (!res.ok) return;
|
||||
|
||||
// Real shape: { gpu_stats: GpuStat[], sys_stats: SysStat[] }
|
||||
const data = await res.json() as { gpu_stats?: unknown[]; sys_stats?: unknown[] } | null;
|
||||
if (!data) return;
|
||||
|
||||
// Pair gpu_stats and sys_stats by timestamp.
|
||||
const gpuMap = new Map<string, unknown>();
|
||||
for (const g of data.gpu_stats ?? []) {
|
||||
const gpu = g as { timestamp?: string };
|
||||
if (gpu.timestamp) {
|
||||
gpuMap.set(gpu.timestamp, g);
|
||||
}
|
||||
}
|
||||
|
||||
const sysMap = new Map<string, unknown>();
|
||||
for (const s of data.sys_stats ?? []) {
|
||||
const sys = s as { timestamp?: string };
|
||||
if (sys.timestamp) {
|
||||
sysMap.set(sys.timestamp, s);
|
||||
}
|
||||
}
|
||||
|
||||
// Collect all unique timestamps.
|
||||
const allTimestamps = new Set([...gpuMap.keys(), ...sysMap.keys()]);
|
||||
if (allTimestamps.size === 0) return;
|
||||
|
||||
stampLastSeen(state);
|
||||
|
||||
for (const ts of allTimestamps) {
|
||||
const gpu = gpuMap.get(ts) ?? null;
|
||||
const sys = sysMap.get(ts) ?? null;
|
||||
|
||||
await sql`
|
||||
INSERT INTO control_perf_samples (provider_id, ts, gpu, sys)
|
||||
VALUES (${providerId}, ${ts}, ${sql.json(gpu as never)}, ${sql.json(sys as never)})
|
||||
ON CONFLICT (provider_id, ts) DO NOTHING
|
||||
`;
|
||||
|
||||
const seq = incrementSeq(state);
|
||||
emitter.publish({
|
||||
type: 'control_perf' as const,
|
||||
seq,
|
||||
providerId,
|
||||
ts,
|
||||
gpu,
|
||||
sys,
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
// A8 fix: log the error instead of swallowing silently.
|
||||
const msg = (err as Error).message ?? String(err);
|
||||
console.warn({ providerId, err: msg }, 'fleet: perf poll failed');
|
||||
}
|
||||
}
|
||||
|
||||
// ─── fleet-state rebuild from DB (A1/F2 fix) ─────────────────────────────────
|
||||
|
||||
async function rebuildFleetFromDB(fleet: FleetState, sql: ReturnType<typeof getSql>): Promise<void> {
|
||||
// Query control_model_events for latest model state per provider.
|
||||
// B3: ORDER BY ASC so iteration processes oldest first; Map.set() overwrites
|
||||
// with the latest state for each model, so the newest event wins.
|
||||
const modelEvents = await sql<{ provider_id: string; model: string; state: string; ts: string; detail: string }[]>`
|
||||
SELECT provider_id, model, state, ts, detail
|
||||
FROM control_model_events
|
||||
WHERE ts IN (
|
||||
SELECT MAX(ts) FROM control_model_events
|
||||
GROUP BY provider_id, model, state
|
||||
)
|
||||
ORDER BY ts ASC
|
||||
`;
|
||||
|
||||
for (const row of modelEvents) {
|
||||
const state = ensureHostState(fleet, row.provider_id);
|
||||
state.liveness = 'down';
|
||||
stampLastSeen(state);
|
||||
// row.detail is jsonb (porsager returns it parsed); jsonbObject tolerates
|
||||
// both a parsed object and a JSON string.
|
||||
const detail: unknown = jsonbObject(row.detail);
|
||||
// B4: ttlDeadline recalculation. The live modelStatus handler (index.ts:57)
|
||||
// computes ttlDeadline = new Date(Date.now() + ttl * 1000), relative to event
|
||||
// arrival time. For rebuild, use the event timestamp so the deadline reflects
|
||||
// when the model was actually loaded, not when we rebuild.
|
||||
const ttl = (detail as { ttl?: number })?.ttl;
|
||||
const eventTs = new Date(row.ts).getTime();
|
||||
const ttlDeadline = ttl ? new Date(eventTs + ttl * 1000) : null;
|
||||
state.models.set(row.model, {
|
||||
model: row.model,
|
||||
state: row.state,
|
||||
ts: new Date(row.ts),
|
||||
ttlDeadline,
|
||||
inflight: 0,
|
||||
});
|
||||
}
|
||||
|
||||
// Query control_requests for last activity.
|
||||
const lastRequests = await sql<{ provider_id: string; ts: string }[]>`
|
||||
SELECT provider_id, ts FROM control_requests
|
||||
WHERE ts IN (
|
||||
SELECT MAX(ts) FROM control_requests GROUP BY provider_id
|
||||
)
|
||||
ORDER BY ts DESC
|
||||
`;
|
||||
|
||||
for (const row of lastRequests) {
|
||||
const state = ensureHostState(fleet, row.provider_id);
|
||||
stampLastSeen(state);
|
||||
}
|
||||
|
||||
// Query control_perf_samples for latest perf sample.
|
||||
const lastPerf = await sql<{ provider_id: string; ts: string }[]>`
|
||||
SELECT provider_id, ts FROM control_perf_samples
|
||||
WHERE ts IN (
|
||||
SELECT MAX(ts) FROM control_perf_samples GROUP BY provider_id
|
||||
)
|
||||
ORDER BY ts DESC
|
||||
`;
|
||||
|
||||
for (const row of lastPerf) {
|
||||
const state = ensureHostState(fleet, row.provider_id);
|
||||
stampLastSeen(state);
|
||||
}
|
||||
}
|
||||
|
||||
// ─── main ───────────────────────────────────────────────────────────────────
|
||||
|
||||
async function main() {
|
||||
const config = loadConfig();
|
||||
const app = Fastify({ logger: { level: config.LOG_LEVEL } });
|
||||
|
||||
app.removeContentTypeParser(['application/json']);
|
||||
app.addContentTypeParser('application/json', { parseAs: 'string' }, (_req: unknown, body: unknown, done: (err: Error | null, body: unknown) => void) => {
|
||||
const str = (body as string) ?? '';
|
||||
if (str.trim().length === 0) {
|
||||
done(null, {});
|
||||
return;
|
||||
}
|
||||
try {
|
||||
done(null, JSON.parse(str));
|
||||
} catch (err) {
|
||||
done(err as Error, undefined);
|
||||
}
|
||||
});
|
||||
|
||||
const sql = getSql(config);
|
||||
|
||||
// Startup ordering guard: wait for server-owned tables before applying schema.
|
||||
await waitForTable(sql, 'sessions', 30_000);
|
||||
await applySchema(sql);
|
||||
app.log.info('database schema applied');
|
||||
|
||||
// Register WebSocket endpoint.
|
||||
const fleet = createFleetState();
|
||||
const emitter = createDeltaEmitter();
|
||||
|
||||
// P2: Action queue + log relay
|
||||
const actionQueue = new ActionQueue();
|
||||
const logRelay = new LogRelay();
|
||||
registerControlWebSocket(app, fleet, emitter, logRelay);
|
||||
registerActionRoutes(app, actionQueue, fleet, emitter);
|
||||
registerCaptureRoutes(app, sql);
|
||||
setBenchApp(app.log);
|
||||
registerBenchRoutes(app, sql, fleet, emitter);
|
||||
registerPlaygroundRoutes(app);
|
||||
registerEvalRoutes(app, sql, fleet, emitter);
|
||||
registerRoutingRoutes(app, sql, fleet);
|
||||
registerReportRoutes(app, sql);
|
||||
registerGatewayRoutes(app, sql, fleet, emitter);
|
||||
registerPolicyRoutes(app, sql);
|
||||
registerSshConfigRoutes(app, sql, config, fleet, emitter);
|
||||
|
||||
// Health endpoint.
|
||||
app.get('/api/health', async (_req: unknown, reply: import('fastify').FastifyReply) => {
|
||||
const dbOk = await pingDb(sql);
|
||||
const status = dbOk ? 200 : 503;
|
||||
return reply.status(status).send({
|
||||
ok: dbOk,
|
||||
db: dbOk,
|
||||
});
|
||||
});
|
||||
|
||||
// Rebuild fleet state from DB on startup (A1/F2 fix).
|
||||
await rebuildFleetFromDB(fleet, sql).catch((err) => {
|
||||
app.log.warn({ err: (err as Error).message }, 'fleet: rebuild from DB failed');
|
||||
});
|
||||
|
||||
// Load the provider registry — baseUrl comes from the registry, never from ssh_host.
|
||||
const registry = loadLlamaProviders(config.LLAMA_PROVIDERS_PATH, config.LLAMA_SWAP_URL);
|
||||
app.log.info({ count: registry.providers.length }, 'fleet: provider registry loaded');
|
||||
|
||||
// P7.2: the auto:* gateway is itself a registry entry (kind boocontrol-gateway)
|
||||
// so BooChat adopts it as a provider. BooControl must NOT treat it as a fleet
|
||||
// host — it has no llama-swap SSE/perf surface and its baseUrl points back at
|
||||
// this service. Filter it out of every fleet operation.
|
||||
const fleetProviders = registry.providers.filter((p) => p.kind !== 'boocontrol-gateway');
|
||||
|
||||
// JOIN registry providers with control_hosts for the enabled flag.
|
||||
// Insert a control_hosts row ON CONFLICT DO NOTHING for any registry provider
|
||||
// missing one, so the fleet state has a row to key off.
|
||||
const enabledHosts = await sql<{ provider_id: string; enabled: boolean }[]>`
|
||||
SELECT provider_id, enabled FROM control_hosts
|
||||
WHERE provider_id = ANY(${fleetProviders.map((p) => p.id)}::text[])
|
||||
`;
|
||||
const enabledMap = new Map<string, boolean>();
|
||||
for (const row of enabledHosts) {
|
||||
enabledMap.set(row.provider_id, row.enabled);
|
||||
}
|
||||
|
||||
// Seed missing control_hosts rows so the registry is the source of truth.
|
||||
for (const provider of fleetProviders) {
|
||||
if (!enabledMap.has(provider.id)) {
|
||||
await sql`
|
||||
INSERT INTO control_hosts (provider_id, enabled)
|
||||
VALUES (${provider.id}, true)
|
||||
ON CONFLICT (provider_id) DO NOTHING
|
||||
`;
|
||||
enabledMap.set(provider.id, true);
|
||||
}
|
||||
}
|
||||
|
||||
const abortControllers = new Map<string, AbortController>();
|
||||
|
||||
for (const provider of fleetProviders) {
|
||||
const enabled = enabledMap.get(provider.id) ?? true;
|
||||
if (!enabled) continue;
|
||||
|
||||
const baseUrl = provider.baseUrl;
|
||||
|
||||
// P2: Register host with action queue
|
||||
actionQueue.registerHost(provider.id, {
|
||||
baseUrl,
|
||||
isLivenessUp: () => {
|
||||
const hs = fleet.hosts.get(provider.id);
|
||||
return hs?.liveness !== 'down';
|
||||
},
|
||||
isInflightRequests: () => {
|
||||
// Host-level total from the SSE inflight event (per-model is not published).
|
||||
return fleet.hosts.get(provider.id)?.inflightTotal ?? 0;
|
||||
},
|
||||
log: app.log,
|
||||
});
|
||||
|
||||
const abort = startFleetConnector(provider.id, baseUrl, {
|
||||
isUp: () => true,
|
||||
sql,
|
||||
log: app.log,
|
||||
onEvent: (pid, event) => handleLlamaSweepEvent(fleet, sql, config, pid, emitter, event, logRelay),
|
||||
onReconcile: (pid, metrics) => handleReconcile(fleet, sql, config, pid, emitter, metrics),
|
||||
onReconnectGiveUp: async (pid) => {
|
||||
const state = ensureHostState(fleet, pid);
|
||||
state.liveness = 'down';
|
||||
},
|
||||
sleep: (ms) => new Promise((r) => setTimeout(r, ms)),
|
||||
});
|
||||
abortControllers.set(provider.id, abort);
|
||||
}
|
||||
|
||||
// Perf poller: 5s interval per enabled provider — baseUrl from registry.
|
||||
const pollTimer = setInterval(async () => {
|
||||
for (const provider of fleetProviders) {
|
||||
const enabled = enabledMap.get(provider.id) ?? true;
|
||||
if (!enabled) continue;
|
||||
await pollPerformance(sql, config, provider.id, provider.baseUrl, fleet, emitter);
|
||||
}
|
||||
}, 5_000);
|
||||
|
||||
// Retention job: daily timer — iterate registry providers.
|
||||
const retentionConfig = buildRetentionConfig(config);
|
||||
const retentionTimer = setInterval(async () => {
|
||||
for (const provider of fleetProviders) {
|
||||
const enabled = enabledMap.get(provider.id) ?? true;
|
||||
if (!enabled) continue;
|
||||
await runRollup(sql, provider.id, retentionConfig.rawHours);
|
||||
// A2 fix: chunk pruneRawSamples (already chunked), also chunk pruneActivity and pruneModelEvents.
|
||||
await pruneRawSamples(sql, provider.id, retentionConfig.rawHours);
|
||||
await pruneActivity(sql, retentionConfig.rawHours);
|
||||
await pruneModelEvents(sql, retentionConfig.rollupDays * 24);
|
||||
}
|
||||
}, 24 * 3600_000); // daily
|
||||
|
||||
// P6.2: Report digest scheduler (catch-up on boot, then hourly).
|
||||
const stopReportScheduler = startReportScheduler(sql, app.log);
|
||||
|
||||
app.addHook('onClose', async () => {
|
||||
clearInterval(pollTimer);
|
||||
clearInterval(retentionTimer);
|
||||
stopReportScheduler();
|
||||
for (const abort of abortControllers.values()) {
|
||||
abort.abort();
|
||||
}
|
||||
});
|
||||
|
||||
// Graceful shutdown.
|
||||
const shutdown = async () => {
|
||||
app.log.info('shutting down');
|
||||
await app.close();
|
||||
await sql.end({ timeout: 5 });
|
||||
process.exit(0);
|
||||
};
|
||||
process.on('SIGTERM', shutdown);
|
||||
process.on('SIGINT', shutdown);
|
||||
|
||||
await app.listen({ port: config.PORT, host: config.HOST });
|
||||
app.log.info(`BooControl listening on ${config.HOST}:${config.PORT}`);
|
||||
}
|
||||
|
||||
// P2 exports for tests
|
||||
export { ActionQueue } from './services/action-queue.js';
|
||||
export { LogRelay } from './services/log-relay.js';
|
||||
|
||||
// P3 exports for tests
|
||||
export { runSingleBenchRequest, parseLlamaTimings, computeAggregates } from './services/bench-engine.js';
|
||||
export { computeRegressionFlag } from './services/bench-engine.js';
|
||||
|
||||
// P5 exports for tests
|
||||
export { loadEvalSuitesFromData } from './services/eval-suites.js';
|
||||
export { runCodeEval } from './services/sandbox-runner.js';
|
||||
|
||||
if (!process.env.VITEST) {
|
||||
main().catch((err) => {
|
||||
console.error('fatal:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
108
apps/control/src/routes/actions.ts
Normal file
108
apps/control/src/routes/actions.ts
Normal file
@@ -0,0 +1,108 @@
|
||||
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import type { ActionQueue } from '../services/action-queue.js';
|
||||
import type { FleetState } from '../services/fleet-state.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
|
||||
/**
|
||||
* Register action submission routes.
|
||||
*
|
||||
* POST /api/action/submit — enqueue a warm or unload action
|
||||
* GET /api/action/queue/:providerId — get current queue state
|
||||
*/
|
||||
export function registerActionRoutes(
|
||||
app: FastifyInstance,
|
||||
actionQueue: ActionQueue,
|
||||
fleet: FleetState,
|
||||
emitter: DeltaEmitter,
|
||||
): void {
|
||||
app.post('/api/action/submit', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const type = body.type as string;
|
||||
const providerId = body.providerId as string;
|
||||
const model = body.model as string | undefined;
|
||||
const confirmed = body.confirmed === true;
|
||||
|
||||
if (!type || !['warm', 'unload'].includes(type)) {
|
||||
return reply.status(400).send({ error: 'type must be warm or unload' });
|
||||
}
|
||||
if (!providerId) {
|
||||
return reply.status(400).send({ error: 'providerId is required' });
|
||||
}
|
||||
|
||||
// Check host liveness
|
||||
const hostState = fleet.hosts.get(providerId);
|
||||
if (!hostState || hostState.liveness === 'down') {
|
||||
return reply.status(409).send({ error: 'host offline' });
|
||||
}
|
||||
|
||||
const action = {
|
||||
actionId: randomUUID(),
|
||||
type: type as 'warm' | 'unload',
|
||||
providerId,
|
||||
model,
|
||||
confirmed,
|
||||
createdAt: new Date(),
|
||||
};
|
||||
|
||||
const result = actionQueue.submit(action);
|
||||
|
||||
if (!result.ok) {
|
||||
if (result.requiresConfirmation) {
|
||||
return reply.status(409).send({
|
||||
error: result.error,
|
||||
requiresConfirmation: true,
|
||||
});
|
||||
}
|
||||
if (result.pending) {
|
||||
return reply.status(429).send({
|
||||
error: result.error,
|
||||
pending: result.pending,
|
||||
});
|
||||
}
|
||||
return reply.status(409).send({ error: result.error });
|
||||
}
|
||||
|
||||
// Publish action queued event
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
seq: hostState.seq,
|
||||
jobType: 'action' as const,
|
||||
jobId: action.actionId,
|
||||
status: 'queued' as const,
|
||||
detail: {
|
||||
actionType: action.type,
|
||||
providerId: action.providerId,
|
||||
model: action.model ?? null,
|
||||
},
|
||||
});
|
||||
|
||||
return reply.status(202).send({
|
||||
actionId: action.actionId,
|
||||
status: 'queued',
|
||||
});
|
||||
});
|
||||
|
||||
app.get('/api/action/queue/:providerId', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const providerId = req.params as { providerId: string };
|
||||
const state = actionQueue.getState(providerId.providerId);
|
||||
|
||||
if (!state) {
|
||||
return reply.status(404).send({ error: 'host not found' });
|
||||
}
|
||||
|
||||
return reply.send({
|
||||
providerId: providerId.providerId,
|
||||
depth: state.queue.length,
|
||||
running: state.running,
|
||||
entries: state.queue.map((e) => ({
|
||||
actionId: e.action.actionId,
|
||||
type: e.action.type,
|
||||
model: e.action.model ?? null,
|
||||
status: e.status,
|
||||
error: e.error ?? null,
|
||||
enqueuedAt: e.enqueuedAt.toISOString(),
|
||||
})),
|
||||
});
|
||||
});
|
||||
}
|
||||
492
apps/control/src/routes/bench.ts
Normal file
492
apps/control/src/routes/bench.ts
Normal file
@@ -0,0 +1,492 @@
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import type { FastifyBaseLogger, FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
import type { FleetState } from '../services/fleet-state.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import { acquireHostAccess } from '../services/host-access.js';
|
||||
import type { BenchSuite, BenchRunProgress } from '../services/bench-engine.js';
|
||||
import { runBenchSuite } from '../services/bench-engine.js';
|
||||
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
|
||||
import { jsonbNumberArray, jsonbObject } from '../services/jsonb.js';
|
||||
|
||||
/**
|
||||
* Register bench routes.
|
||||
*
|
||||
* POST /api/bench/suite — create a suite definition
|
||||
* GET /api/bench/suites — list suites
|
||||
* GET /api/bench/suites/:id — get suite
|
||||
* POST /api/bench/run — start a bench run (gated through acquireHostAccess)
|
||||
* GET /api/bench/runs — list runs
|
||||
* GET /api/bench/runs/:id — get run + samples
|
||||
* GET /api/bench/baselines — get baselines per (provider_id, model)
|
||||
*/
|
||||
export function registerBenchRoutes(
|
||||
app: FastifyInstance,
|
||||
sql: Sql,
|
||||
fleet: FleetState,
|
||||
emitter: DeltaEmitter,
|
||||
): void {
|
||||
// ─── suite CRUD ──────────────────────────────────────────────────────────
|
||||
|
||||
app.post('/api/bench/suite', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const suiteId = body.id as string;
|
||||
const name = body.name as string;
|
||||
const providerId = body.providerId as string;
|
||||
const model = body.model as string;
|
||||
const promptTokens = body.promptTokens as number[];
|
||||
const genTokens = body.genTokens as number[];
|
||||
const concurrency = body.concurrency as number[];
|
||||
const repetitions = (body.repetitions as number) ?? 1;
|
||||
const metadata = body.metadata as Record<string, unknown> | undefined;
|
||||
|
||||
if (!name || !providerId || !model) {
|
||||
return reply.status(400).send({ error: 'name, providerId, and model are required' });
|
||||
}
|
||||
if (!promptTokens?.length || !genTokens?.length || !concurrency?.length) {
|
||||
return reply.status(400).send({ error: 'promptTokens, genTokens, and concurrency must each have at least one value' });
|
||||
}
|
||||
|
||||
const id = suiteId ?? randomUUID();
|
||||
await sql`
|
||||
INSERT INTO bench_suites (id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata)
|
||||
VALUES (${id}, ${name}, ${providerId}, ${model}, ${sql.json(promptTokens as never)}, ${sql.json(genTokens as never)}, ${sql.json(concurrency as never)}, ${repetitions}, ${metadata ? sql.json(metadata as never) : sql`NULL::jsonb`})
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
provider_id = EXCLUDED.provider_id,
|
||||
model = EXCLUDED.model,
|
||||
prompt_tokens = EXCLUDED.prompt_tokens,
|
||||
gen_tokens = EXCLUDED.gen_tokens,
|
||||
concurrency = EXCLUDED.concurrency,
|
||||
repetitions = EXCLUDED.repetitions,
|
||||
metadata = EXCLUDED.metadata
|
||||
`;
|
||||
|
||||
return reply.status(201).send({ id });
|
||||
});
|
||||
|
||||
app.get('/api/bench/suites', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
const suites = await sql<{
|
||||
id: string;
|
||||
name: string;
|
||||
provider_id: string;
|
||||
model: string;
|
||||
prompt_tokens: string;
|
||||
gen_tokens: string;
|
||||
concurrency: string;
|
||||
repetitions: number;
|
||||
metadata: string | null;
|
||||
created_at: string;
|
||||
}[]>`
|
||||
SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata, created_at
|
||||
FROM bench_suites
|
||||
ORDER BY created_at DESC
|
||||
`;
|
||||
|
||||
return reply.send({
|
||||
suites: suites.map((s) => ({
|
||||
id: s.id,
|
||||
name: s.name,
|
||||
providerId: s.provider_id,
|
||||
model: s.model,
|
||||
promptTokens: jsonbNumberArray(s.prompt_tokens),
|
||||
genTokens: jsonbNumberArray(s.gen_tokens),
|
||||
concurrency: jsonbNumberArray(s.concurrency),
|
||||
repetitions: s.repetitions,
|
||||
metadata: jsonbObject(s.metadata) ?? undefined,
|
||||
createdAt: s.created_at,
|
||||
})),
|
||||
});
|
||||
});
|
||||
|
||||
app.get('/api/bench/suites/:id', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { id } = req.params as { id: string };
|
||||
const rows = await sql<{
|
||||
id: string;
|
||||
name: string;
|
||||
provider_id: string;
|
||||
model: string;
|
||||
prompt_tokens: string;
|
||||
gen_tokens: string;
|
||||
concurrency: string;
|
||||
repetitions: number;
|
||||
metadata: string | null;
|
||||
created_at: string;
|
||||
}[]>`
|
||||
SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata, created_at
|
||||
FROM bench_suites WHERE id = ${id}
|
||||
`;
|
||||
|
||||
if (rows.length === 0) {
|
||||
return reply.status(404).send({ error: 'suite not found' });
|
||||
}
|
||||
|
||||
const s = rows[0]!;
|
||||
return reply.send({
|
||||
id: s.id,
|
||||
name: s.name,
|
||||
providerId: s.provider_id,
|
||||
model: s.model,
|
||||
promptTokens: jsonbNumberArray(s.prompt_tokens),
|
||||
genTokens: jsonbNumberArray(s.gen_tokens),
|
||||
concurrency: jsonbNumberArray(s.concurrency),
|
||||
repetitions: s.repetitions,
|
||||
metadata: jsonbObject(s.metadata) ?? undefined,
|
||||
createdAt: s.created_at,
|
||||
});
|
||||
});
|
||||
|
||||
// ─── run launcher (P3.3: safety gates + P3.4: acquireHostAccess) ─────────
|
||||
|
||||
app.post('/api/bench/run', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const suiteId = body.suiteId as string;
|
||||
const temperature = (body.temperature as number) ?? 0.7;
|
||||
const topP = (body.topP as number) ?? 0.9;
|
||||
|
||||
if (!suiteId) {
|
||||
return reply.status(400).send({ error: 'suiteId is required' });
|
||||
}
|
||||
|
||||
// Load suite.
|
||||
const suiteRows = await sql<{
|
||||
id: string;
|
||||
name: string;
|
||||
provider_id: string;
|
||||
model: string;
|
||||
prompt_tokens: string;
|
||||
gen_tokens: string;
|
||||
concurrency: string;
|
||||
repetitions: number;
|
||||
metadata: string | null;
|
||||
}[]>`
|
||||
SELECT id, name, provider_id, model, prompt_tokens, gen_tokens, concurrency, repetitions, metadata
|
||||
FROM bench_suites WHERE id = ${suiteId}
|
||||
`;
|
||||
|
||||
if (suiteRows.length === 0) {
|
||||
return reply.status(404).send({ error: 'suite not found' });
|
||||
}
|
||||
|
||||
const s = suiteRows[0]!;
|
||||
const suite: BenchSuite = {
|
||||
id: s.id,
|
||||
name: s.name,
|
||||
providerId: s.provider_id,
|
||||
model: s.model,
|
||||
promptTokens: jsonbNumberArray(s.prompt_tokens),
|
||||
genTokens: jsonbNumberArray(s.gen_tokens),
|
||||
concurrency: jsonbNumberArray(s.concurrency),
|
||||
repetitions: s.repetitions,
|
||||
metadata: jsonbObject(s.metadata) ?? undefined,
|
||||
};
|
||||
|
||||
// P3.3: Safety check — check recent traffic on the target host.
|
||||
const hostState = fleet.hosts.get(suite.providerId);
|
||||
const recentTraffic = checkRecentTraffic(hostState);
|
||||
|
||||
// P3.4: Gate through acquireHostAccess seam.
|
||||
const grant = await acquireHostAccess(suite.providerId, 'bench');
|
||||
if (!grant.ok) {
|
||||
return reply.status(409).send({
|
||||
error: 'host access denied',
|
||||
reason: grant.reason,
|
||||
});
|
||||
}
|
||||
|
||||
// Resolve base URL from registry.
|
||||
const baseUrl = resolveBaseUrl(suite.providerId);
|
||||
if (!baseUrl) {
|
||||
return reply.status(400).send({ error: `no base URL configured for provider ${suite.providerId}` });
|
||||
}
|
||||
|
||||
// Get seq for the host.
|
||||
const seq = hostState?.seq ?? 0;
|
||||
|
||||
// Run the bench suite asynchronously (non-blocking HTTP response).
|
||||
void runBenchAsync(
|
||||
{ suite, baseUrl, temperature, topP },
|
||||
sql,
|
||||
emitter,
|
||||
seq,
|
||||
suite.providerId,
|
||||
);
|
||||
|
||||
return reply.status(202).send({
|
||||
status: 'queued',
|
||||
suiteId: suite.id,
|
||||
recentTraffic,
|
||||
});
|
||||
});
|
||||
|
||||
// ─── runs listing ────────────────────────────────────────────────────────
|
||||
|
||||
app.get('/api/bench/runs', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const query = req.query as Record<string, string | undefined>;
|
||||
const suiteId = query.suiteId;
|
||||
|
||||
let runs: Array<{
|
||||
id: string;
|
||||
suite_id: string;
|
||||
job_type: string;
|
||||
status: string;
|
||||
started_at: string | null;
|
||||
finished_at: string | null;
|
||||
total_samples: number;
|
||||
completed_samples: number;
|
||||
concurrent_foreign_requests: number;
|
||||
regression_flag: string | null;
|
||||
aggregate: string | null;
|
||||
error: string | null;
|
||||
created_at: string;
|
||||
}>;
|
||||
|
||||
if (suiteId) {
|
||||
runs = await sql`
|
||||
SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at
|
||||
FROM bench_runs WHERE suite_id = ${suiteId}
|
||||
ORDER BY created_at DESC
|
||||
`;
|
||||
} else {
|
||||
runs = await sql`
|
||||
SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at
|
||||
FROM bench_runs
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 100
|
||||
`;
|
||||
}
|
||||
|
||||
return reply.send({
|
||||
runs: runs.map((r) => ({
|
||||
id: r.id,
|
||||
suiteId: r.suite_id,
|
||||
jobType: r.job_type,
|
||||
status: r.status,
|
||||
startedAt: r.started_at,
|
||||
finishedAt: r.finished_at,
|
||||
totalSamples: r.total_samples,
|
||||
completedSamples: r.completed_samples,
|
||||
concurrentForeignRequests: r.concurrent_foreign_requests,
|
||||
regressionFlag: r.regression_flag,
|
||||
aggregate: jsonbObject(r.aggregate),
|
||||
error: r.error,
|
||||
createdAt: r.created_at,
|
||||
})),
|
||||
});
|
||||
});
|
||||
|
||||
app.get('/api/bench/runs/:id', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { id } = req.params as { id: string };
|
||||
|
||||
const runRows = await sql<{
|
||||
id: string;
|
||||
suite_id: string;
|
||||
job_type: string;
|
||||
status: string;
|
||||
started_at: string | null;
|
||||
finished_at: string | null;
|
||||
total_samples: number;
|
||||
completed_samples: number;
|
||||
concurrent_foreign_requests: number;
|
||||
regression_flag: string | null;
|
||||
aggregate: string | null;
|
||||
error: string | null;
|
||||
created_at: string;
|
||||
}[]>`
|
||||
SELECT id, suite_id, job_type, status, started_at, finished_at, total_samples, completed_samples, concurrent_foreign_requests, regression_flag, aggregate, error, created_at
|
||||
FROM bench_runs WHERE id = ${id}
|
||||
`;
|
||||
|
||||
if (runRows.length === 0) {
|
||||
return reply.status(404).send({ error: 'run not found' });
|
||||
}
|
||||
|
||||
const r = runRows[0]!;
|
||||
|
||||
const samples = await sql<{
|
||||
id: number;
|
||||
prompt_tokens: number;
|
||||
gen_tokens: number;
|
||||
concurrency: number;
|
||||
repetition: number;
|
||||
ttft_ms: number | null;
|
||||
total_ms: number | null;
|
||||
prompt_tps: number | null;
|
||||
gen_tps: number | null;
|
||||
cache_n: number | null;
|
||||
error: string | null;
|
||||
}[]>`
|
||||
SELECT id, prompt_tokens, gen_tokens, concurrency, repetition, ttft_ms, total_ms, prompt_tps, gen_tps, cache_n, error
|
||||
FROM bench_samples WHERE run_id = ${id}
|
||||
ORDER BY prompt_tokens, gen_tokens, concurrency, repetition
|
||||
`;
|
||||
|
||||
return reply.send({
|
||||
run: {
|
||||
id: r.id,
|
||||
suiteId: r.suite_id,
|
||||
jobType: r.job_type,
|
||||
status: r.status,
|
||||
startedAt: r.started_at,
|
||||
finishedAt: r.finished_at,
|
||||
totalSamples: r.total_samples,
|
||||
completedSamples: r.completed_samples,
|
||||
concurrentForeignRequests: r.concurrent_foreign_requests,
|
||||
regressionFlag: r.regression_flag,
|
||||
aggregate: jsonbObject(r.aggregate),
|
||||
error: r.error,
|
||||
createdAt: r.created_at,
|
||||
},
|
||||
samples: samples.map((s) => ({
|
||||
id: s.id,
|
||||
promptTokens: s.prompt_tokens,
|
||||
genTokens: s.gen_tokens,
|
||||
concurrency: s.concurrency,
|
||||
repetition: s.repetition,
|
||||
ttftMs: s.ttft_ms,
|
||||
totalMs: s.total_ms,
|
||||
promptTps: s.prompt_tps,
|
||||
genTps: s.gen_tps,
|
||||
cacheN: s.cache_n,
|
||||
error: s.error,
|
||||
})),
|
||||
});
|
||||
});
|
||||
|
||||
// ─── baselines ───────────────────────────────────────────────────────────
|
||||
|
||||
app.get('/api/bench/baselines', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
const rows = await sql<{
|
||||
provider_id: string;
|
||||
model: string;
|
||||
run_id: string;
|
||||
aggregate: string;
|
||||
created_at: string;
|
||||
}[]>`
|
||||
SELECT provider_id, model, run_id, aggregate, created_at
|
||||
FROM bench_baselines
|
||||
ORDER BY provider_id, model
|
||||
`;
|
||||
|
||||
return reply.send({
|
||||
baselines: rows.map((r) => ({
|
||||
providerId: r.provider_id,
|
||||
model: r.model,
|
||||
runId: r.run_id,
|
||||
aggregate: jsonbObject(r.aggregate),
|
||||
createdAt: r.created_at,
|
||||
})),
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* P3.3: Check if the target host has recent traffic (for takeover confirmation).
|
||||
*/
|
||||
function checkRecentTraffic(hostState: { models: Map<string, { inflight: number }> } | undefined): { hasRecentTraffic: boolean; inflightCount: number } {
|
||||
if (!hostState) {
|
||||
return { hasRecentTraffic: false, inflightCount: 0 };
|
||||
}
|
||||
let total = 0;
|
||||
for (const m of hostState.models.values()) {
|
||||
total += m.inflight;
|
||||
}
|
||||
return {
|
||||
hasRecentTraffic: total > 0,
|
||||
inflightCount: total,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the base URL for a provider from the loaded registry.
|
||||
* baseUrl comes from LlamaProvider.baseUrl, never from ssh_host.
|
||||
*/
|
||||
function resolveBaseUrl(providerId: string): string | null {
|
||||
return resolveProviderBaseUrl(providerId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Async bench runner: fire-and-forget, records concurrent_foreign_requests.
|
||||
* A6: sources from activity stream during [started_at, finished_at] window,
|
||||
* minus the bench's own samples count.
|
||||
*/
|
||||
async function runBenchAsync(
|
||||
params: { suite: BenchSuite; baseUrl: string; temperature?: number; topP?: number },
|
||||
sql: Sql,
|
||||
emitter: DeltaEmitter,
|
||||
seq: number,
|
||||
providerId: string,
|
||||
): Promise<void> {
|
||||
const { suite } = params;
|
||||
|
||||
// Find the latest running run for this suite.
|
||||
const latestRun = await sql<{ id: string; started_at: string | null }[]>`
|
||||
SELECT id, started_at FROM bench_runs
|
||||
WHERE suite_id = ${suite.id} AND status = 'running'
|
||||
ORDER BY created_at DESC LIMIT 1
|
||||
`;
|
||||
|
||||
if (latestRun.length === 0) {
|
||||
benchLogger?.error?.({}, 'bench: no running run found');
|
||||
return;
|
||||
}
|
||||
|
||||
const runId = latestRun[0]!.id;
|
||||
|
||||
const progressHandler = (_progress: BenchRunProgress) => {
|
||||
// Progress is published via emitter in runBenchSuite.
|
||||
};
|
||||
|
||||
try {
|
||||
await runBenchSuite(params, sql, emitter, seq, progressHandler);
|
||||
|
||||
// A6: Record concurrent_foreign_requests from activity stream during run window.
|
||||
// Count control_requests for this provider in [started_at, finished_at],
|
||||
// minus the bench's own sample count.
|
||||
const runData = await sql<{ started_at: string | null; finished_at: string | null; completed_samples: number }[]>`
|
||||
SELECT started_at, finished_at, completed_samples FROM bench_runs WHERE id = ${runId}
|
||||
`;
|
||||
const rd = runData[0]!;
|
||||
|
||||
if (rd.started_at && rd.finished_at) {
|
||||
const foreignCount = await sql<{ count: number }[]>`
|
||||
SELECT COUNT(*)::INT AS count FROM control_requests
|
||||
WHERE provider_id = ${providerId}
|
||||
AND ts >= ${rd.started_at}::timestamptz
|
||||
AND ts <= ${rd.finished_at}::timestamptz
|
||||
`;
|
||||
const totalForeign = (foreignCount[0]?.count ?? 0) - rd.completed_samples;
|
||||
await sql`
|
||||
UPDATE bench_runs SET concurrent_foreign_requests = ${Math.max(0, totalForeign)}
|
||||
WHERE id = ${runId}
|
||||
`;
|
||||
}
|
||||
} catch (err) {
|
||||
const msg = (err as Error).message ?? String(err);
|
||||
benchLogger?.error?.({ err: msg }, 'bench: run failed');
|
||||
|
||||
await sql`
|
||||
UPDATE bench_runs
|
||||
SET status = 'failed', finished_at = clock_timestamp(), error = ${msg}
|
||||
WHERE id = ${runId}
|
||||
`;
|
||||
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
seq,
|
||||
jobType: 'bench' as const,
|
||||
jobId: runId,
|
||||
status: 'failed' as const,
|
||||
detail: { error: msg },
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the Fastify logger for the async bench runner.
|
||||
*/
|
||||
let benchLogger: FastifyBaseLogger | undefined;
|
||||
|
||||
export function setBenchApp(logger: FastifyBaseLogger): void {
|
||||
benchLogger = logger;
|
||||
}
|
||||
52
apps/control/src/routes/captures.ts
Normal file
52
apps/control/src/routes/captures.ts
Normal file
@@ -0,0 +1,52 @@
|
||||
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
import { fetchCapture, persistCapture } from '../services/capture-fetch.js';
|
||||
|
||||
/**
|
||||
* Register capture inspection routes.
|
||||
*
|
||||
* GET /api/capture/:providerId/:swapEntryId — fetch capture from host, persist trimmed copy
|
||||
*/
|
||||
export function registerCaptureRoutes(
|
||||
app: FastifyInstance,
|
||||
sql: Sql,
|
||||
): void {
|
||||
app.get(
|
||||
'/api/capture/:providerId/:swapEntryId',
|
||||
async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const params = req.params as { providerId: string; swapEntryId: string };
|
||||
const swapEntryId = parseInt(params.swapEntryId, 10);
|
||||
|
||||
if (isNaN(swapEntryId)) {
|
||||
return reply.status(400).send({ error: 'invalid swapEntryId' });
|
||||
}
|
||||
|
||||
// Resolve host URL from control_hosts
|
||||
const hosts = await sql<{ ssh_host: string }[]>`
|
||||
SELECT ssh_host FROM control_hosts WHERE provider_id = ${params.providerId}
|
||||
`;
|
||||
|
||||
if (hosts.length === 0 || !hosts[0]?.ssh_host) {
|
||||
return reply.status(404).send({ error: 'host not found or no SSH host configured' });
|
||||
}
|
||||
|
||||
const baseUrl = `http://${hosts[0].ssh_host}:8401`;
|
||||
|
||||
const result = await fetchCapture(baseUrl, params.providerId, swapEntryId);
|
||||
|
||||
if (!result.ok) {
|
||||
return reply.status(404).send({ error: result.error });
|
||||
}
|
||||
|
||||
// Persist trimmed copy
|
||||
try {
|
||||
await persistCapture(sql, result.capture!);
|
||||
} catch (err) {
|
||||
// Persistence failure is non-fatal — still return the capture
|
||||
app.log.warn({ err: (err as Error).message }, 'capture: persist failed');
|
||||
}
|
||||
|
||||
return reply.send(result.capture);
|
||||
},
|
||||
);
|
||||
}
|
||||
366
apps/control/src/routes/evals.ts
Normal file
366
apps/control/src/routes/evals.ts
Normal file
@@ -0,0 +1,366 @@
|
||||
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import type { FleetState } from '../services/fleet-state.js';
|
||||
import {
|
||||
listEvalSuites,
|
||||
getEvalSuite,
|
||||
upsertEvalSuite,
|
||||
listEvalRuns,
|
||||
getEvalResults,
|
||||
seedEvalSuites,
|
||||
} from '../services/eval-suites.js';
|
||||
import { jsonbArray, jsonbObject } from '../services/jsonb.js';
|
||||
|
||||
/**
|
||||
* Register eval routes.
|
||||
*
|
||||
* POST /api/eval/suite — create/update an eval suite
|
||||
* GET /api/eval/suites — list suites
|
||||
* GET /api/eval/suites/:id — get suite
|
||||
* POST /api/eval/seed — seed suites from data/ YAML
|
||||
* POST /api/eval/run — start an eval run
|
||||
* GET /api/eval/runs — list runs
|
||||
* GET /api/eval/runs/:id — get run + results
|
||||
* GET /api/eval/leaderboard — per (provider_id, model) aggregate scores
|
||||
*/
|
||||
export function registerEvalRoutes(
|
||||
app: FastifyInstance,
|
||||
sql: Sql,
|
||||
fleet: FleetState,
|
||||
emitter: DeltaEmitter,
|
||||
): void {
|
||||
// Seed suites from data/ YAML on startup (idempotent).
|
||||
app.addHook('onReady', async () => {
|
||||
await seedEvalSuites(sql).catch((err) => {
|
||||
app.log.warn({ err: (err as Error).message }, 'eval: seed failed');
|
||||
});
|
||||
});
|
||||
|
||||
// ─── suite CRUD ──────────────────────────────────────────────────────────
|
||||
|
||||
app.post('/api/eval/suite', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const id = (body.id as string) ?? null;
|
||||
const name = body.name as string;
|
||||
const kind = body.kind as 'chat' | 'code';
|
||||
const tasks = body.tasks as unknown[];
|
||||
const judgeModel = (body.judgeModel as string) ?? null;
|
||||
const metadata = body.metadata as Record<string, unknown> | undefined;
|
||||
|
||||
if (!name || !kind || !tasks?.length) {
|
||||
return reply.status(400).send({ error: 'name, kind, and tasks are required' });
|
||||
}
|
||||
|
||||
const suiteId = await upsertEvalSuite(sql, id, name, kind, tasks, judgeModel, metadata);
|
||||
return reply.status(201).send({ id: suiteId });
|
||||
});
|
||||
|
||||
app.get('/api/eval/suites', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
const suites = await listEvalSuites(sql);
|
||||
return reply.send({
|
||||
suites: suites.map((s) => ({
|
||||
id: s.id,
|
||||
name: s.name,
|
||||
kind: s.kind,
|
||||
version: s.version,
|
||||
tasks: jsonbArray(s.tasks),
|
||||
judgeModel: s.judge_model,
|
||||
judgeModelVersion: s.judge_model_version,
|
||||
metadata: jsonbObject(s.metadata) ?? undefined,
|
||||
createdAt: s.created_at,
|
||||
})),
|
||||
});
|
||||
});
|
||||
|
||||
app.get('/api/eval/suites/:id', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { id } = req.params as { id: string };
|
||||
const suite = await getEvalSuite(sql, id);
|
||||
if (!suite) {
|
||||
return reply.status(404).send({ error: 'suite not found' });
|
||||
}
|
||||
return reply.send({
|
||||
id: suite.id,
|
||||
name: suite.name,
|
||||
kind: suite.kind,
|
||||
version: suite.version,
|
||||
tasks: jsonbArray(suite.tasks),
|
||||
judgeModel: suite.judge_model,
|
||||
judgeModelVersion: suite.judge_model_version,
|
||||
metadata: jsonbObject(suite.metadata) ?? undefined,
|
||||
createdAt: suite.created_at,
|
||||
});
|
||||
});
|
||||
|
||||
// ─── seed from data/ ─────────────────────────────────────────────────────
|
||||
|
||||
app.post('/api/eval/seed', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
await seedEvalSuites(sql);
|
||||
return reply.send({ ok: true });
|
||||
});
|
||||
|
||||
// ─── run launcher ────────────────────────────────────────────────────────
|
||||
|
||||
app.post('/api/eval/run', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const suiteId = body.suiteId as string;
|
||||
const providerId = body.providerId as string;
|
||||
const model = body.model as string;
|
||||
const quant = (body.quant as string) ?? null;
|
||||
|
||||
if (!suiteId || !providerId || !model) {
|
||||
return reply.status(400).send({ error: 'suiteId, providerId, and model are required' });
|
||||
}
|
||||
|
||||
const suite = await getEvalSuite(sql, suiteId);
|
||||
if (!suite) {
|
||||
return reply.status(404).send({ error: 'suite not found' });
|
||||
}
|
||||
|
||||
const tasks = jsonbArray(suite.tasks);
|
||||
const judgeModel = suite.judge_model;
|
||||
const seq = fleet.hosts.get(providerId)?.seq ?? 0;
|
||||
|
||||
// Start the eval run asynchronously.
|
||||
void runEvalAsync(
|
||||
{ suiteId, providerId, model, quant, tasks, judgeModel },
|
||||
sql,
|
||||
emitter,
|
||||
seq,
|
||||
app.log,
|
||||
);
|
||||
|
||||
return reply.status(202).send({ status: 'queued', suiteId, providerId, model });
|
||||
});
|
||||
|
||||
// ─── runs listing ────────────────────────────────────────────────────────
|
||||
|
||||
app.get('/api/eval/runs', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const query = req.query as Record<string, string | undefined>;
|
||||
const runs = await listEvalRuns(sql, query.suiteId, query.providerId);
|
||||
return reply.send({
|
||||
runs: runs.map((r) => ({
|
||||
id: r.id,
|
||||
suiteId: r.suite_id,
|
||||
jobType: r.job_type,
|
||||
providerId: r.provider_id,
|
||||
model: r.model,
|
||||
quant: r.quant,
|
||||
status: r.status,
|
||||
judgeModel: r.judge_model,
|
||||
startedAt: r.started_at,
|
||||
finishedAt: r.finished_at,
|
||||
totalTasks: r.total_tasks,
|
||||
completedTasks: r.completed_tasks,
|
||||
aggregate: jsonbObject(r.aggregate),
|
||||
error: r.error,
|
||||
createdAt: r.created_at,
|
||||
})),
|
||||
});
|
||||
});
|
||||
|
||||
app.get('/api/eval/runs/:id', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { id } = req.params as { id: string };
|
||||
const runs = await listEvalRuns(sql);
|
||||
const run = runs.find((r) => r.id === id);
|
||||
if (!run) {
|
||||
return reply.status(404).send({ error: 'run not found' });
|
||||
}
|
||||
|
||||
const results = await getEvalResults(sql, id);
|
||||
|
||||
return reply.send({
|
||||
run: {
|
||||
id: run.id,
|
||||
suiteId: run.suite_id,
|
||||
jobType: run.job_type,
|
||||
providerId: run.provider_id,
|
||||
model: run.model,
|
||||
quant: run.quant,
|
||||
status: run.status,
|
||||
judgeModel: run.judge_model,
|
||||
startedAt: run.started_at,
|
||||
finishedAt: run.finished_at,
|
||||
totalTasks: run.total_tasks,
|
||||
completedTasks: run.completed_tasks,
|
||||
aggregate: jsonbObject(run.aggregate),
|
||||
error: run.error,
|
||||
createdAt: run.created_at,
|
||||
},
|
||||
results: results.map((r) => ({
|
||||
id: r.id,
|
||||
taskId: r.task_id,
|
||||
taskIndex: r.task_index,
|
||||
score: r.score,
|
||||
maxScore: r.max_score,
|
||||
rationale: r.rationale,
|
||||
sandboxExitCode: r.sandbox_exit_code,
|
||||
sandboxStderr: r.sandbox_stderr,
|
||||
sandboxStdout: r.sandbox_stdout,
|
||||
executionMs: r.execution_ms,
|
||||
error: r.error,
|
||||
})),
|
||||
});
|
||||
});
|
||||
|
||||
// ─── leaderboard ─────────────────────────────────────────────────────────
|
||||
|
||||
app.get('/api/eval/leaderboard', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const query = req.query as Record<string, string | undefined>;
|
||||
const kind = query.kind as 'chat' | 'code' | undefined;
|
||||
|
||||
// Aggregate scores per (provider_id, model) from completed eval_runs.
|
||||
const rows = await sql<{
|
||||
provider_id: string;
|
||||
model: string;
|
||||
quant: string | null;
|
||||
suite_kind: string;
|
||||
avg_score: number;
|
||||
run_count: number;
|
||||
latest_run_at: string;
|
||||
}[]>`
|
||||
SELECT
|
||||
er.provider_id,
|
||||
er.model,
|
||||
er.quant,
|
||||
es.kind AS suite_kind,
|
||||
AVG(CASE WHEN er.aggregate IS NOT NULL THEN (er.aggregate::jsonb ->> 'avgScore')::float ELSE NULL END) AS avg_score,
|
||||
COUNT(DISTINCT er.id) AS run_count,
|
||||
MAX(er.finished_at) AS latest_run_at
|
||||
FROM eval_runs er
|
||||
JOIN eval_suites es ON er.suite_id = es.id
|
||||
WHERE er.status = 'completed'
|
||||
${kind ? sql`AND es.kind = ${kind}` : sql`AND 1=1`}
|
||||
GROUP BY er.provider_id, er.model, er.quant, es.kind
|
||||
ORDER BY avg_score DESC NULLS LAST
|
||||
`;
|
||||
|
||||
return reply.send({
|
||||
leaderboard: rows.map((r) => ({
|
||||
providerId: r.provider_id,
|
||||
model: r.model,
|
||||
quant: r.quant,
|
||||
suiteKind: r.suite_kind,
|
||||
avgScore: r.avg_score,
|
||||
runCount: r.run_count,
|
||||
latestRunAt: r.latest_run_at,
|
||||
})),
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Async eval runner: fire-and-forget.
|
||||
* Delegates to judge runner (chat) or sandbox runner (code).
|
||||
*/
|
||||
async function runEvalAsync(
|
||||
params: {
|
||||
suiteId: string;
|
||||
providerId: string;
|
||||
model: string;
|
||||
quant: string | null;
|
||||
tasks: unknown[];
|
||||
judgeModel: string | null;
|
||||
},
|
||||
sql: Sql,
|
||||
emitter: DeltaEmitter,
|
||||
seq: number,
|
||||
logger: import('fastify').FastifyBaseLogger,
|
||||
): Promise<void> {
|
||||
const { suiteId, providerId, model, quant, tasks, judgeModel } = params;
|
||||
const runId = `eval_${Date.now()}_${crypto.randomUUID().slice(0, 8)}`;
|
||||
|
||||
try {
|
||||
await sql`
|
||||
INSERT INTO eval_runs (id, suite_id, job_type, provider_id, model, quant, status, judge_model, started_at, total_tasks)
|
||||
VALUES (${runId}, ${suiteId}, 'eval', ${providerId}, ${model}, ${quant}, 'running', ${judgeModel}, clock_timestamp(), ${tasks.length})
|
||||
`;
|
||||
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
seq,
|
||||
jobType: 'eval' as const,
|
||||
jobId: runId,
|
||||
status: 'running' as const,
|
||||
detail: { suiteId, providerId, model, totalTasks: tasks.length },
|
||||
});
|
||||
|
||||
// Import runners dynamically to avoid circular deps.
|
||||
const suiteKind = tasks[0] as Record<string, unknown>;
|
||||
const isCodeSuite = !!(suiteKind && suiteKind.test_code);
|
||||
|
||||
let completed = 0;
|
||||
let error: string | null = null;
|
||||
|
||||
if (isCodeSuite) {
|
||||
const { runCodeEval } = await import('../services/sandbox-runner.js');
|
||||
const result = await runCodeEval(
|
||||
{ runId, providerId, model, tasks: tasks as Array<Record<string, unknown>>, quant },
|
||||
sql,
|
||||
emitter,
|
||||
seq,
|
||||
(progress) => {
|
||||
completed = progress.completedTasks;
|
||||
},
|
||||
);
|
||||
if (result.error) error = result.error;
|
||||
} else {
|
||||
const { runJudgeEval } = await import('../services/judge-runner.js');
|
||||
const result = await runJudgeEval(
|
||||
{ runId, providerId, model, tasks: tasks as Array<Record<string, unknown>>, judgeModel, quant },
|
||||
sql,
|
||||
emitter,
|
||||
seq,
|
||||
logger,
|
||||
(progress) => {
|
||||
completed = progress.completedTasks;
|
||||
},
|
||||
);
|
||||
if (result.error) error = result.error;
|
||||
}
|
||||
|
||||
// Compute aggregate.
|
||||
const results = await sql<{ score: number | null; max_score: number | null }[]>`
|
||||
SELECT score, max_score FROM eval_results WHERE run_id = ${runId}
|
||||
`;
|
||||
const scores = results.map((r) => r.score).filter((s): s is number => s != null);
|
||||
const avgScore = scores.length ? scores.reduce((a, b) => a + b, 0) / scores.length : null;
|
||||
|
||||
await sql`
|
||||
UPDATE eval_runs
|
||||
SET status = ${error ? 'failed' : 'completed'},
|
||||
finished_at = clock_timestamp(),
|
||||
completed_tasks = ${completed},
|
||||
aggregate = ${avgScore != null ? sql.json({ avgScore, totalTasks: tasks.length, passedTasks: scores.filter((s, i) => { const m = results[i]?.max_score; return m ? s / m >= 0.7 : s != null; }).length } as never) : sql`NULL::jsonb`},
|
||||
error = ${error}
|
||||
WHERE id = ${runId}
|
||||
`;
|
||||
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
seq,
|
||||
jobType: 'eval' as const,
|
||||
jobId: runId,
|
||||
status: error ? 'failed' as const : 'completed' as const,
|
||||
detail: { avgScore, error },
|
||||
});
|
||||
} catch (err) {
|
||||
const msg = (err as Error).message ?? String(err);
|
||||
logger.error({ err: msg }, 'eval: run failed');
|
||||
|
||||
await sql`
|
||||
UPDATE eval_runs
|
||||
SET status = 'failed', finished_at = clock_timestamp(), error = ${msg}
|
||||
WHERE id = ${runId}
|
||||
`.catch(() => {});
|
||||
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
seq,
|
||||
jobType: 'eval' as const,
|
||||
jobId: runId,
|
||||
status: 'failed' as const,
|
||||
detail: { error: msg },
|
||||
});
|
||||
}
|
||||
}
|
||||
205
apps/control/src/routes/gateway.ts
Normal file
205
apps/control/src/routes/gateway.ts
Normal file
@@ -0,0 +1,205 @@
|
||||
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
import type { FleetState } from '../services/fleet-state.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import {
|
||||
VIRTUAL_MODELS,
|
||||
resolveCandidates,
|
||||
splitComposite,
|
||||
} from '../services/gateway.js';
|
||||
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
|
||||
|
||||
/**
|
||||
* P7.1: OpenAI-compatible auto:* gateway.
|
||||
*
|
||||
* BooChat reaches this server directly (registry baseUrl), NOT through the
|
||||
* /api/control proxy, so streaming works end to end. Endpoints mirror the
|
||||
* llama-swap wire surface BooChat's provider adapter expects:
|
||||
*
|
||||
* GET /v1/models — advertise the virtual models
|
||||
* POST /v1/chat/completions — resolve a policy, dispatch with failover
|
||||
* GET /upstream/:model/props — props for getModelContext (best candidate)
|
||||
*
|
||||
* Every dispatch forwards X-Boo-Source to the chosen target so attribution
|
||||
* survives the extra hop, and is recorded in route_dispatch_log.
|
||||
*/
|
||||
export function registerGatewayRoutes(
|
||||
app: FastifyInstance,
|
||||
sql: Sql,
|
||||
fleet: FleetState,
|
||||
_emitter: DeltaEmitter,
|
||||
): void {
|
||||
// ─── model catalog ───────────────────────────────────────────────────────
|
||||
|
||||
app.get('/v1/models', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
return reply.send({
|
||||
object: 'list',
|
||||
data: VIRTUAL_MODELS.map((id) => ({
|
||||
id,
|
||||
object: 'model',
|
||||
created: 0,
|
||||
owned_by: 'boocontrol-gateway',
|
||||
})),
|
||||
});
|
||||
});
|
||||
|
||||
// ─── props (for getModelContext) ─────────────────────────────────────────
|
||||
// Resolve candidates and proxy the first healthy candidate's props so the
|
||||
// caller can read default_generation_settings.n_ctx.
|
||||
|
||||
app.get('/upstream/:model/props', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { model } = req.params as { model: string };
|
||||
const { candidates } = await resolveCandidates(sql, fleet, model);
|
||||
|
||||
for (const compositeId of candidates) {
|
||||
const split = splitComposite(compositeId);
|
||||
if (!split) continue;
|
||||
const baseUrl = resolveProviderBaseUrl(split.providerId);
|
||||
if (!baseUrl) continue;
|
||||
try {
|
||||
const url = `${baseUrl.replace(/\/+$/, '')}/upstream/${encodeURIComponent(split.model)}/props`;
|
||||
const res = await fetch(url, { signal: AbortSignal.timeout(5_000) });
|
||||
if (!res.ok) continue;
|
||||
const body = await res.json();
|
||||
return reply.send(body);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return reply.status(503).send({ error: 'no healthy candidate for virtual model', model });
|
||||
});
|
||||
|
||||
// ─── chat completions (dispatch with failover) ───────────────────────────
|
||||
|
||||
app.post('/v1/chat/completions', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const requestedModel = body?.model as string | undefined;
|
||||
if (!requestedModel) {
|
||||
return reply.status(400).send({ error: { message: 'model is required' } });
|
||||
}
|
||||
|
||||
const source = (req.headers['x-boo-source'] as string | undefined) ?? null;
|
||||
const stream = body.stream === true;
|
||||
const { virtualModel, candidates } = await resolveCandidates(sql, fleet, requestedModel);
|
||||
|
||||
if (candidates.length === 0) {
|
||||
await logDispatch(sql, { virtualModel, chosen: null, tried: [], status: 'no_candidates', source, error: 'no healthy candidates', durationMs: 0 });
|
||||
return reply.status(503).send({
|
||||
error: { message: `routing gateway: no healthy candidate for ${virtualModel}`, type: 'gateway_error' },
|
||||
});
|
||||
}
|
||||
|
||||
const tried: string[] = [];
|
||||
const startedAt = Date.now();
|
||||
|
||||
for (const compositeId of candidates) {
|
||||
const split = splitComposite(compositeId);
|
||||
if (!split) continue;
|
||||
const baseUrl = resolveProviderBaseUrl(split.providerId);
|
||||
if (!baseUrl) continue;
|
||||
tried.push(compositeId);
|
||||
|
||||
const upstreamHeaders: Record<string, string> = { 'Content-Type': 'application/json' };
|
||||
if (source) upstreamHeaders['X-Boo-Source'] = source;
|
||||
|
||||
const upstreamBody = JSON.stringify({ ...body, model: split.model });
|
||||
|
||||
try {
|
||||
const res = await fetch(`${baseUrl.replace(/\/+$/, '')}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: upstreamHeaders,
|
||||
body: upstreamBody,
|
||||
signal: AbortSignal.timeout(300_000),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
// HTTP error before body — eligible for failover to the next candidate.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Success: dispatch chosen. Log and stream/return through.
|
||||
await logDispatch(sql, {
|
||||
virtualModel,
|
||||
chosen: compositeId,
|
||||
tried,
|
||||
status: 'dispatched',
|
||||
source,
|
||||
error: null,
|
||||
durationMs: Date.now() - startedAt,
|
||||
});
|
||||
|
||||
if (stream) {
|
||||
reply.header('Content-Type', 'text/event-stream');
|
||||
reply.header('Cache-Control', 'no-cache');
|
||||
reply.header('Connection', 'keep-alive');
|
||||
reply.raw.writeHead(200);
|
||||
const reader = res.body?.getReader();
|
||||
if (!reader) {
|
||||
reply.raw.end();
|
||||
return;
|
||||
}
|
||||
const decoder = new TextDecoder();
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
reply.raw.write(decoder.decode(value, { stream: true }));
|
||||
}
|
||||
} finally {
|
||||
reply.raw.end();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Non-streaming: pass JSON through.
|
||||
const json = await res.json();
|
||||
return reply.send(json);
|
||||
} catch {
|
||||
// Connection error — failover to the next candidate.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// All candidates exhausted.
|
||||
await logDispatch(sql, {
|
||||
virtualModel,
|
||||
chosen: null,
|
||||
tried,
|
||||
status: 'failed',
|
||||
source,
|
||||
error: 'all candidates failed',
|
||||
durationMs: Date.now() - startedAt,
|
||||
});
|
||||
return reply.status(502).send({
|
||||
error: { message: `routing gateway: all candidates failed for ${virtualModel}`, type: 'gateway_error' },
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function logDispatch(
|
||||
sql: Sql,
|
||||
entry: {
|
||||
virtualModel: string;
|
||||
chosen: string | null;
|
||||
tried: string[];
|
||||
status: string;
|
||||
source: string | null;
|
||||
error: string | null;
|
||||
durationMs: number;
|
||||
},
|
||||
): Promise<void> {
|
||||
const split = entry.chosen ? splitComposite(entry.chosen) : null;
|
||||
await sql`
|
||||
INSERT INTO route_dispatch_log (virtual_model, chosen_provider_id, chosen_model, candidates_tried, status, source, error, duration_ms)
|
||||
VALUES (
|
||||
${entry.virtualModel},
|
||||
${split?.providerId ?? null},
|
||||
${split?.model ?? null},
|
||||
${sql.json(entry.tried as never)},
|
||||
${entry.status},
|
||||
${entry.source},
|
||||
${entry.error},
|
||||
${entry.durationMs}
|
||||
)
|
||||
`.catch(() => { /* logging must never break dispatch */ });
|
||||
}
|
||||
235
apps/control/src/routes/playground.ts
Normal file
235
apps/control/src/routes/playground.ts
Normal file
@@ -0,0 +1,235 @@
|
||||
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import { getLlamaProviders, resolveProviderBaseUrl } from '../services/llama-providers.js';
|
||||
|
||||
/**
|
||||
* Playground routes: model select, param controls, streaming chat.
|
||||
*
|
||||
* GET /api/playground/models — list available models from providers
|
||||
* POST /api/playground/chat — streaming chat against a model
|
||||
* POST /api/playground/chat-ab — side-by-side A/B compare
|
||||
*/
|
||||
export function registerPlaygroundRoutes(
|
||||
app: FastifyInstance,
|
||||
): void {
|
||||
// ─── model catalog ───────────────────────────────────────────────────────
|
||||
|
||||
app.get('/api/playground/models', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
// Resolve provider URLs from the loaded registry.
|
||||
const registry = getLlamaProviders();
|
||||
const providers = registry.providers.map((p) => ({
|
||||
id: p.id,
|
||||
baseUrl: p.baseUrl,
|
||||
}));
|
||||
|
||||
const results = await Promise.allSettled(
|
||||
providers.map(async (p) => {
|
||||
try {
|
||||
const res = await fetch(`${p.baseUrl}/v1/models`, {
|
||||
signal: AbortSignal.timeout(5_000),
|
||||
});
|
||||
if (!res.ok) return null;
|
||||
const data = await res.json() as { data?: Array<{ id: string }> };
|
||||
return {
|
||||
providerId: p.id,
|
||||
models: data?.data?.map((m) => m.id) ?? [],
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
const models: Array<{ providerId: string; models: string[] }> = [];
|
||||
for (const r of results) {
|
||||
if (r.status === 'fulfilled' && r.value) {
|
||||
models.push(r.value);
|
||||
}
|
||||
}
|
||||
|
||||
return reply.send({ models });
|
||||
});
|
||||
|
||||
// ─── streaming chat ──────────────────────────────────────────────────────
|
||||
|
||||
app.post('/api/playground/chat', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const providerId = body.providerId as string;
|
||||
const model = body.model as string;
|
||||
const messages = body.messages as Array<{ role: string; content: string }>;
|
||||
const temperature = (body.temperature as number) ?? 0.7;
|
||||
const topP = (body.topP as number) ?? 0.9;
|
||||
const maxTokens = (body.maxTokens as number) ?? 1024;
|
||||
|
||||
if (!providerId || !model || !messages?.length) {
|
||||
return reply.status(400).send({ error: 'providerId, model, and messages are required' });
|
||||
}
|
||||
|
||||
const baseUrl = resolveProviderBaseUrl(providerId);
|
||||
if (!baseUrl) {
|
||||
return reply.status(400).send({ error: `unknown provider: ${providerId}` });
|
||||
}
|
||||
|
||||
// Stream the response back to the client via SSE.
|
||||
reply.header('Content-Type', 'text/event-stream');
|
||||
reply.header('Cache-Control', 'no-cache');
|
||||
reply.header('Connection', 'keep-alive');
|
||||
reply.raw.writeHead(200);
|
||||
|
||||
try {
|
||||
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages,
|
||||
temperature,
|
||||
top_p: topP,
|
||||
max_tokens: maxTokens,
|
||||
stream: true,
|
||||
}),
|
||||
signal: AbortSignal.timeout(120_000),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const errBody = await res.text().catch(() => '');
|
||||
reply.raw.write(`data: ${JSON.stringify({ error: `Request failed: ${res.status} ${errBody.slice(0, 200)}` })}\n\n`);
|
||||
reply.raw.end();
|
||||
return;
|
||||
}
|
||||
|
||||
const reader = res.body?.getReader();
|
||||
if (!reader) {
|
||||
reply.raw.write('data: {"error": "No response body"}\n\n');
|
||||
reply.raw.end();
|
||||
return;
|
||||
}
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() ?? '';
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
if (trimmed === 'data: [DONE]') {
|
||||
reply.raw.write('data: [DONE]\n\n');
|
||||
continue;
|
||||
}
|
||||
// N3: pass through the raw SSE line from upstream as-is.
|
||||
// If it already has 'data: ' prefix, don't double-prefix.
|
||||
const payload = trimmed.startsWith('data: ') ? trimmed : `data: ${trimmed}`;
|
||||
reply.raw.write(`${payload}\n\n`);
|
||||
}
|
||||
}
|
||||
|
||||
reply.raw.write('data: [DONE]\n\n');
|
||||
} catch (err) {
|
||||
const msg = (err as Error).message ?? String(err);
|
||||
reply.raw.write(`data: ${JSON.stringify({ error: msg })}\n\n`);
|
||||
} finally {
|
||||
reply.raw.end();
|
||||
}
|
||||
});
|
||||
|
||||
// ─── A/B compare ─────────────────────────────────────────────────────────
|
||||
|
||||
app.post('/api/playground/chat-ab', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const providerIdA = body.providerIdA as string;
|
||||
const modelA = body.modelA as string;
|
||||
const providerIdB = body.providerIdB as string;
|
||||
const modelB = body.modelB as string;
|
||||
const messages = body.messages as Array<{ role: string; content: string }>;
|
||||
const temperature = (body.temperature as number) ?? 0.7;
|
||||
const topP = (body.topP as number) ?? 0.9;
|
||||
const maxTokens = (body.maxTokens as number) ?? 1024;
|
||||
|
||||
if (!providerIdA || !modelA || !providerIdB || !modelB || !messages?.length) {
|
||||
return reply.status(400).send({ error: 'Both models and messages are required' });
|
||||
}
|
||||
|
||||
const baseUrlA = resolveProviderBaseUrl(providerIdA);
|
||||
const baseUrlB = resolveProviderBaseUrl(providerIdB);
|
||||
|
||||
if (!baseUrlA || !baseUrlB) {
|
||||
return reply.status(400).send({ error: 'One or both providers unknown' });
|
||||
}
|
||||
|
||||
// Stream both responses via SSE with lane identifiers.
|
||||
reply.header('Content-Type', 'text/event-stream');
|
||||
reply.header('Cache-Control', 'no-cache');
|
||||
reply.header('Connection', 'keep-alive');
|
||||
reply.raw.writeHead(200);
|
||||
|
||||
const streamModel = async (lane: 'A' | 'B', baseUrl: string, model: string) => {
|
||||
try {
|
||||
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages,
|
||||
temperature,
|
||||
top_p: topP,
|
||||
max_tokens: maxTokens,
|
||||
stream: true,
|
||||
}),
|
||||
signal: AbortSignal.timeout(120_000),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const errBody = await res.text().catch(() => '');
|
||||
reply.raw.write(`data: ${JSON.stringify({ lane, error: `Request failed: ${res.status}` })}\n\n`);
|
||||
return;
|
||||
}
|
||||
|
||||
const reader = res.body?.getReader();
|
||||
if (!reader) return;
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() ?? '';
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
if (trimmed === 'data: [DONE]') {
|
||||
reply.raw.write(`data: ${JSON.stringify({ lane, done: true })}\n\n`);
|
||||
continue;
|
||||
}
|
||||
// N3: strip 'data: ' prefix from upstream before re-wrapping with lane info.
|
||||
const payload = trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed;
|
||||
reply.raw.write(`data: ${JSON.stringify({ lane, raw: payload })}\n\n`);
|
||||
}
|
||||
}
|
||||
|
||||
reply.raw.write(`data: ${JSON.stringify({ lane, done: true })}\n\n`);
|
||||
} catch (err) {
|
||||
const msg = (err as Error).message ?? String(err);
|
||||
reply.raw.write(`data: ${JSON.stringify({ lane, error: msg })}\n\n`);
|
||||
}
|
||||
};
|
||||
|
||||
// Run both streams concurrently.
|
||||
await Promise.all([
|
||||
streamModel('A', baseUrlA, modelA),
|
||||
streamModel('B', baseUrlB, modelB),
|
||||
]);
|
||||
|
||||
reply.raw.end();
|
||||
});
|
||||
}
|
||||
136
apps/control/src/routes/policies.ts
Normal file
136
apps/control/src/routes/policies.ts
Normal file
@@ -0,0 +1,136 @@
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
import { VIRTUAL_MODELS } from '../services/gateway.js';
|
||||
import { jsonbStringArray } from '../services/jsonb.js';
|
||||
|
||||
/**
|
||||
* P7.4: Route policy CRUD + dispatch log.
|
||||
*
|
||||
* GET /api/policies — list policies
|
||||
* POST /api/policies — create/update a policy (upsert by virtual_model)
|
||||
* DELETE /api/policies/:id — delete a policy
|
||||
* GET /api/policies/dispatch-log — recent gateway dispatches
|
||||
* GET /api/policies/virtual-models — the available virtual model tokens
|
||||
*/
|
||||
export function registerPolicyRoutes(app: FastifyInstance, sql: Sql): void {
|
||||
app.get('/api/policies/virtual-models', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
return reply.send({ virtualModels: VIRTUAL_MODELS });
|
||||
});
|
||||
|
||||
app.get('/api/policies', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
const rows = await sql<{
|
||||
id: string;
|
||||
name: string;
|
||||
virtual_model: string;
|
||||
candidates: string;
|
||||
fallback: string | null;
|
||||
enabled: boolean;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
}[]>`
|
||||
SELECT id, name, virtual_model, candidates, fallback, enabled, created_at, updated_at
|
||||
FROM route_policies
|
||||
ORDER BY virtual_model
|
||||
`;
|
||||
return reply.send({
|
||||
policies: rows.map((r) => ({
|
||||
id: r.id,
|
||||
name: r.name,
|
||||
virtualModel: r.virtual_model,
|
||||
candidates: safeParseArray(r.candidates),
|
||||
fallback: r.fallback,
|
||||
enabled: r.enabled,
|
||||
createdAt: r.created_at,
|
||||
updatedAt: r.updated_at,
|
||||
})),
|
||||
});
|
||||
});
|
||||
|
||||
app.post('/api/policies', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const id = (body.id as string) ?? randomUUID();
|
||||
const name = body.name as string;
|
||||
const virtualModel = body.virtualModel as string;
|
||||
const candidates = body.candidates as unknown;
|
||||
const fallback = (body.fallback as string) ?? null;
|
||||
const enabled = body.enabled !== false;
|
||||
|
||||
if (!name || !virtualModel) {
|
||||
return reply.status(400).send({ error: 'name and virtualModel are required' });
|
||||
}
|
||||
if (!(VIRTUAL_MODELS as readonly string[]).includes(virtualModel)) {
|
||||
return reply.status(400).send({ error: `virtualModel must be one of ${VIRTUAL_MODELS.join(', ')}` });
|
||||
}
|
||||
const candidateList = Array.isArray(candidates)
|
||||
? candidates.filter((c): c is string => typeof c === 'string')
|
||||
: [];
|
||||
|
||||
// Upsert by virtual_model (UNIQUE) so there is one policy per virtual model.
|
||||
await sql`
|
||||
INSERT INTO route_policies (id, name, virtual_model, candidates, fallback, enabled, updated_at)
|
||||
VALUES (${id}, ${name}, ${virtualModel}, ${sql.json(candidateList as never)}, ${fallback}, ${enabled}, clock_timestamp())
|
||||
ON CONFLICT (virtual_model) DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
candidates = EXCLUDED.candidates,
|
||||
fallback = EXCLUDED.fallback,
|
||||
enabled = EXCLUDED.enabled,
|
||||
updated_at = clock_timestamp()
|
||||
`;
|
||||
return reply.status(201).send({ id });
|
||||
});
|
||||
|
||||
app.delete('/api/policies/:id', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { id } = req.params as { id: string };
|
||||
await sql`DELETE FROM route_policies WHERE id = ${id}`;
|
||||
return reply.send({ ok: true });
|
||||
});
|
||||
|
||||
app.get('/api/policies/dispatch-log', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const query = req.query as Record<string, string | undefined>;
|
||||
const virtualModel = query.virtualModel;
|
||||
|
||||
const rows = virtualModel
|
||||
? await sql<DispatchLogRow[]>`
|
||||
SELECT id, ts, virtual_model, chosen_provider_id, chosen_model, candidates_tried, status, source, error, duration_ms
|
||||
FROM route_dispatch_log WHERE virtual_model = ${virtualModel}
|
||||
ORDER BY ts DESC LIMIT 200
|
||||
`
|
||||
: await sql<DispatchLogRow[]>`
|
||||
SELECT id, ts, virtual_model, chosen_provider_id, chosen_model, candidates_tried, status, source, error, duration_ms
|
||||
FROM route_dispatch_log
|
||||
ORDER BY ts DESC LIMIT 200
|
||||
`;
|
||||
|
||||
return reply.send({
|
||||
dispatches: rows.map((r) => ({
|
||||
id: r.id,
|
||||
ts: r.ts,
|
||||
virtualModel: r.virtual_model,
|
||||
chosenProviderId: r.chosen_provider_id,
|
||||
chosenModel: r.chosen_model,
|
||||
candidatesTried: safeParseArray(r.candidates_tried),
|
||||
status: r.status,
|
||||
source: r.source,
|
||||
error: r.error,
|
||||
durationMs: r.duration_ms,
|
||||
})),
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
interface DispatchLogRow {
|
||||
id: number;
|
||||
ts: string;
|
||||
virtual_model: string;
|
||||
chosen_provider_id: string | null;
|
||||
chosen_model: string | null;
|
||||
candidates_tried: unknown;
|
||||
status: string;
|
||||
source: string | null;
|
||||
error: string | null;
|
||||
duration_ms: number | null;
|
||||
}
|
||||
|
||||
// jsonb columns come back parsed from porsager; jsonbStringArray tolerates both.
|
||||
const safeParseArray = jsonbStringArray;
|
||||
122
apps/control/src/routes/reports.ts
Normal file
122
apps/control/src/routes/reports.ts
Normal file
@@ -0,0 +1,122 @@
|
||||
import type { FastifyInstance, FastifyRequest, FastifyReply, FastifyBaseLogger } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
import { generateReport, runReportSchedulerTick } from '../services/reports.js';
|
||||
import { jsonbObject } from '../services/jsonb.js';
|
||||
|
||||
/**
|
||||
* P6.2: Reports tab API + scheduled digest.
|
||||
*
|
||||
* GET /api/reports — list generated reports (newest first)
|
||||
* GET /api/reports/:id — single report (markdown + stats)
|
||||
* POST /api/reports/generate — manually trigger a digest now
|
||||
* GET /api/reports/schedule — current schedule meta
|
||||
* POST /api/reports/schedule — update schedule meta {interval, enabled}
|
||||
*/
|
||||
export function registerReportRoutes(app: FastifyInstance, sql: Sql): void {
|
||||
app.get('/api/reports', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
const rows = await sql<{
|
||||
id: string;
|
||||
kind: string;
|
||||
interval: string;
|
||||
period_start: string;
|
||||
period_end: string;
|
||||
created_at: string;
|
||||
}[]>`
|
||||
SELECT id, kind, interval, period_start, period_end, created_at
|
||||
FROM control_reports
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 100
|
||||
`;
|
||||
return reply.send({
|
||||
reports: rows.map((r) => ({
|
||||
id: r.id,
|
||||
kind: r.kind,
|
||||
interval: r.interval,
|
||||
periodStart: r.period_start,
|
||||
periodEnd: r.period_end,
|
||||
createdAt: r.created_at,
|
||||
})),
|
||||
});
|
||||
});
|
||||
|
||||
app.get('/api/reports/:id', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { id } = req.params as { id: string };
|
||||
const rows = await sql<{
|
||||
id: string;
|
||||
kind: string;
|
||||
interval: string;
|
||||
period_start: string;
|
||||
period_end: string;
|
||||
markdown: string;
|
||||
stats: unknown;
|
||||
created_at: string;
|
||||
}[]>`
|
||||
SELECT id, kind, interval, period_start, period_end, markdown, stats, created_at
|
||||
FROM control_reports WHERE id = ${id}
|
||||
`;
|
||||
if (rows.length === 0) {
|
||||
return reply.status(404).send({ error: 'report not found' });
|
||||
}
|
||||
const r = rows[0]!;
|
||||
return reply.send({
|
||||
id: r.id,
|
||||
kind: r.kind,
|
||||
interval: r.interval,
|
||||
periodStart: r.period_start,
|
||||
periodEnd: r.period_end,
|
||||
markdown: r.markdown,
|
||||
stats: jsonbObject(r.stats),
|
||||
createdAt: r.created_at,
|
||||
});
|
||||
});
|
||||
|
||||
app.post('/api/reports/generate', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = (req.body as Record<string, unknown>) ?? {};
|
||||
const interval = body.interval === 'weekly' ? 'weekly' : 'daily';
|
||||
const id = await generateReport(sql, interval);
|
||||
return reply.status(201).send({ id });
|
||||
});
|
||||
|
||||
app.get('/api/reports/schedule', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
const rows = await sql<{ interval: string; enabled: boolean; last_run_at: string | null }[]>`
|
||||
SELECT interval, enabled, last_run_at FROM control_schedule_meta WHERE name = 'report-digest'
|
||||
`;
|
||||
const m = rows[0];
|
||||
return reply.send({
|
||||
interval: m?.interval ?? 'daily',
|
||||
enabled: m?.enabled ?? true,
|
||||
lastRunAt: m?.last_run_at ?? null,
|
||||
});
|
||||
});
|
||||
|
||||
app.post('/api/reports/schedule', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = (req.body as Record<string, unknown>) ?? {};
|
||||
const interval = body.interval === 'weekly' ? 'weekly' : 'daily';
|
||||
const enabled = body.enabled !== false;
|
||||
await sql`
|
||||
UPDATE control_schedule_meta
|
||||
SET interval = ${interval}, enabled = ${enabled}
|
||||
WHERE name = 'report-digest'
|
||||
`;
|
||||
return reply.send({ interval, enabled });
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the in-process report scheduler: an immediate catch-up tick on boot,
|
||||
* then hourly. Returns a stop function for onClose.
|
||||
*/
|
||||
export function startReportScheduler(sql: Sql, log: FastifyBaseLogger): () => void {
|
||||
const tick = async () => {
|
||||
try {
|
||||
const result = await runReportSchedulerTick(sql);
|
||||
if (result.ran) log.info({ reportId: result.reportId }, 'reports: digest generated');
|
||||
} catch (err) {
|
||||
log.warn({ err: (err as Error).message }, 'reports: scheduler tick failed');
|
||||
}
|
||||
};
|
||||
// Catch-up on boot.
|
||||
void tick();
|
||||
const timer = setInterval(tick, 3600_000); // hourly
|
||||
return () => clearInterval(timer);
|
||||
}
|
||||
32
apps/control/src/routes/routing.ts
Normal file
32
apps/control/src/routes/routing.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
import type { FleetState } from '../services/fleet-state.js';
|
||||
import { computeRoutingScores, BADGE_LABELS } from '../services/routing-scores.js';
|
||||
|
||||
/**
|
||||
* P6.1: Advisory routing scores.
|
||||
*
|
||||
* GET /api/routing/scores — per (provider_id, model) advisory scores + badges.
|
||||
* Surfaced as model-picker badges in BooChat. Advisory only; no enforcement.
|
||||
*/
|
||||
export function registerRoutingRoutes(
|
||||
app: FastifyInstance,
|
||||
sql: Sql,
|
||||
fleet: FleetState,
|
||||
): void {
|
||||
app.get('/api/routing/scores', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
const scores = await computeRoutingScores(sql, fleet);
|
||||
|
||||
// Map of compositeId -> badge kinds, for cheap picker lookup.
|
||||
const badges: Record<string, string[]> = {};
|
||||
for (const s of scores) {
|
||||
if (s.badges.length > 0) badges[s.compositeId] = s.badges;
|
||||
}
|
||||
|
||||
return reply.send({
|
||||
scores,
|
||||
badges,
|
||||
badgeLabels: BADGE_LABELS,
|
||||
});
|
||||
});
|
||||
}
|
||||
262
apps/control/src/routes/ssh-config.ts
Normal file
262
apps/control/src/routes/ssh-config.ts
Normal file
@@ -0,0 +1,262 @@
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { dirname, resolve } from 'node:path';
|
||||
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
import type { Config } from '../config.js';
|
||||
import type { FleetState } from '../services/fleet-state.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
|
||||
import {
|
||||
validateLlamaConfig,
|
||||
computeDiff,
|
||||
readRemoteConfig,
|
||||
applyRemoteConfig,
|
||||
sshExec,
|
||||
type SshTarget,
|
||||
type SshExec,
|
||||
type SshMode,
|
||||
} from '../services/ssh-config.js';
|
||||
import { runModelPull, validateRepoId } from '../services/model-pull.js';
|
||||
|
||||
/**
|
||||
* P9.1: SSH config editor for llama-swap hosts.
|
||||
*
|
||||
* GET /api/hosts — list control_hosts with SSH config status
|
||||
* PATCH /api/hosts/:id — set ssh_host/ssh_user/ssh_key_path/config_path/restart_cmd
|
||||
* GET /api/hosts/:id/config — SSH read the remote config
|
||||
* POST /api/hosts/:id/config/validate — validate a candidate config (no host touch)
|
||||
* POST /api/hosts/:id/config/diff — diff a candidate vs the live remote config
|
||||
* POST /api/hosts/:id/config/apply — validate -> backup -> write -> restart -> health-wait
|
||||
* POST /api/hosts/:id/pull — pull a HuggingFace model (non-blocking job)
|
||||
*
|
||||
* `exec` is injectable for tests; production uses the real `sshExec` (spawn ssh).
|
||||
*/
|
||||
export function registerSshConfigRoutes(
|
||||
app: FastifyInstance,
|
||||
sql: Sql,
|
||||
config: Config,
|
||||
fleet: FleetState,
|
||||
emitter: DeltaEmitter,
|
||||
exec: SshExec = sshExec,
|
||||
): void {
|
||||
const schema = loadConfigSchema(config);
|
||||
|
||||
app.get('/api/hosts', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
const rows = await sql<HostRow[]>`
|
||||
SELECT provider_id, ssh_host, ssh_user, ssh_key_path, config_path, restart_cmd, ssh_mode, os, gpu_label, enabled
|
||||
FROM control_hosts ORDER BY provider_id
|
||||
`;
|
||||
return reply.send({
|
||||
hosts: rows.map((r) => ({
|
||||
providerId: r.provider_id,
|
||||
sshHost: r.ssh_host,
|
||||
sshUser: r.ssh_user,
|
||||
sshKeyPath: r.ssh_key_path,
|
||||
configPath: r.config_path,
|
||||
restartCmd: r.restart_cmd,
|
||||
sshMode: r.ssh_mode ?? 'shell',
|
||||
os: r.os,
|
||||
gpuLabel: r.gpu_label,
|
||||
enabled: r.enabled,
|
||||
sshConfigured: !!(r.ssh_host && r.ssh_user && r.ssh_key_path && r.config_path),
|
||||
})),
|
||||
});
|
||||
});
|
||||
|
||||
app.patch('/api/hosts/:id', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { id } = req.params as { id: string };
|
||||
const body = (req.body as Record<string, unknown>) ?? {};
|
||||
const sshHost = (body.sshHost as string) ?? null;
|
||||
const sshUser = (body.sshUser as string) ?? null;
|
||||
const sshKeyPath = (body.sshKeyPath as string) ?? null;
|
||||
const configPath = (body.configPath as string) ?? null;
|
||||
const restartCmd = (body.restartCmd as string) ?? null;
|
||||
const sshMode: SshMode = body.sshMode === 'wrapper' ? 'wrapper' : 'shell';
|
||||
|
||||
const rows = await sql`
|
||||
UPDATE control_hosts
|
||||
SET ssh_host = ${sshHost}, ssh_user = ${sshUser}, ssh_key_path = ${sshKeyPath},
|
||||
config_path = ${configPath}, restart_cmd = ${restartCmd}, ssh_mode = ${sshMode}
|
||||
WHERE provider_id = ${id}
|
||||
RETURNING provider_id
|
||||
`;
|
||||
if (rows.length === 0) {
|
||||
return reply.status(404).send({ error: 'host not found' });
|
||||
}
|
||||
return reply.send({ ok: true });
|
||||
});
|
||||
|
||||
app.get('/api/hosts/:id/config', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { id } = req.params as { id: string };
|
||||
const host = await loadHost(sql, id);
|
||||
if (!host) return reply.status(404).send({ error: 'host not found' });
|
||||
const target = sshTargetOf(host);
|
||||
if (!target || !host.config_path) {
|
||||
return reply.status(400).send({ error: 'host has no SSH config configured (set ssh_host/ssh_user/ssh_key_path/config_path first)' });
|
||||
}
|
||||
try {
|
||||
const content = await readRemoteConfig(target, host.config_path, exec, hostMode(host));
|
||||
return reply.send({ configPath: host.config_path, content });
|
||||
} catch (err) {
|
||||
return reply.status(502).send({ error: (err as Error).message });
|
||||
}
|
||||
});
|
||||
|
||||
app.post('/api/hosts/:id/config/validate', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = (req.body as Record<string, unknown>) ?? {};
|
||||
const content = body.content as string;
|
||||
if (typeof content !== 'string') {
|
||||
return reply.status(400).send({ error: 'content (string) is required' });
|
||||
}
|
||||
if (!schema) {
|
||||
return reply.status(500).send({ error: 'config schema not available on this host' });
|
||||
}
|
||||
const result = validateLlamaConfig(content, schema);
|
||||
return reply.send({ valid: result.valid, errors: result.errors });
|
||||
});
|
||||
|
||||
app.post('/api/hosts/:id/config/diff', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { id } = req.params as { id: string };
|
||||
const body = (req.body as Record<string, unknown>) ?? {};
|
||||
const content = body.content as string;
|
||||
if (typeof content !== 'string') {
|
||||
return reply.status(400).send({ error: 'content (string) is required' });
|
||||
}
|
||||
const host = await loadHost(sql, id);
|
||||
if (!host) return reply.status(404).send({ error: 'host not found' });
|
||||
const target = sshTargetOf(host);
|
||||
if (!target || !host.config_path) {
|
||||
return reply.status(400).send({ error: 'host has no SSH config configured' });
|
||||
}
|
||||
try {
|
||||
const current = await readRemoteConfig(target, host.config_path, exec, hostMode(host));
|
||||
return reply.send({ diff: computeDiff(current, content) });
|
||||
} catch (err) {
|
||||
return reply.status(502).send({ error: (err as Error).message });
|
||||
}
|
||||
});
|
||||
|
||||
app.post('/api/hosts/:id/config/apply', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { id } = req.params as { id: string };
|
||||
const body = (req.body as Record<string, unknown>) ?? {};
|
||||
const content = body.content as string;
|
||||
const confirm = body.confirm === true;
|
||||
if (typeof content !== 'string') {
|
||||
return reply.status(400).send({ error: 'content (string) is required' });
|
||||
}
|
||||
if (!confirm) {
|
||||
return reply.status(409).send({ error: 'apply requires confirmation', requiresConfirmation: true });
|
||||
}
|
||||
if (!schema) {
|
||||
return reply.status(500).send({ error: 'config schema not available on this host' });
|
||||
}
|
||||
const host = await loadHost(sql, id);
|
||||
if (!host) return reply.status(404).send({ error: 'host not found' });
|
||||
const target = sshTargetOf(host);
|
||||
const mode = hostMode(host);
|
||||
// restart_cmd is only used in shell mode; in wrapper mode the wrapper's
|
||||
// `restart` verb hardcodes the service, so restart_cmd is not required.
|
||||
if (!target || !host.config_path || (mode === 'shell' && !host.restart_cmd)) {
|
||||
return reply.status(400).send({ error: 'host needs ssh_host/ssh_user/ssh_key_path/config_path (+ restart_cmd in shell mode) set first' });
|
||||
}
|
||||
const baseUrl = resolveProviderBaseUrl(id);
|
||||
if (!baseUrl) {
|
||||
return reply.status(400).send({ error: `no base URL in registry for provider ${id}` });
|
||||
}
|
||||
|
||||
const result = await applyRemoteConfig({
|
||||
target,
|
||||
configPath: host.config_path,
|
||||
restartCmd: host.restart_cmd ?? '',
|
||||
newConfig: content,
|
||||
schema,
|
||||
baseUrl,
|
||||
exec,
|
||||
mode,
|
||||
});
|
||||
|
||||
const status = result.ok ? 200 : (result.step === 'validate' ? 400 : 502);
|
||||
return reply.status(status).send(result);
|
||||
});
|
||||
|
||||
// ─── model pull (non-blocking job) ─────────────────────────────────────────
|
||||
app.post('/api/hosts/:id/pull', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { id } = req.params as { id: string };
|
||||
const body = (req.body as Record<string, unknown>) ?? {};
|
||||
const repo = body.repo as string;
|
||||
const modelsDir = (body.modelsDir as string) ?? undefined;
|
||||
|
||||
if (typeof repo !== 'string' || !validateRepoId(repo)) {
|
||||
return reply.status(400).send({ error: 'repo must be a valid HuggingFace id (org/name)' });
|
||||
}
|
||||
const host = await loadHost(sql, id);
|
||||
if (!host) return reply.status(404).send({ error: 'host not found' });
|
||||
const target = sshTargetOf(host);
|
||||
if (!target) {
|
||||
return reply.status(400).send({ error: 'host has no SSH configured' });
|
||||
}
|
||||
const mode = hostMode(host);
|
||||
if (mode === 'shell' && !modelsDir) {
|
||||
return reply.status(400).send({ error: 'shell-mode host requires a modelsDir in the request body' });
|
||||
}
|
||||
|
||||
const jobId = `pull_${Date.now()}_${randomUUID().slice(0, 8)}`;
|
||||
const seq = fleet.hosts.get(id)?.seq ?? 0;
|
||||
// Fire and forget; progress streams over control_job frames.
|
||||
void runModelPull({ jobId, target, repo, mode, modelsDir }, exec, emitter, seq);
|
||||
|
||||
return reply.status(202).send({ status: 'queued', jobId, repo });
|
||||
});
|
||||
}
|
||||
|
||||
function hostMode(host: HostRow): SshMode {
|
||||
return host.ssh_mode === 'wrapper' ? 'wrapper' : 'shell';
|
||||
}
|
||||
|
||||
interface HostRow {
|
||||
provider_id: string;
|
||||
ssh_host: string | null;
|
||||
ssh_user: string | null;
|
||||
ssh_key_path: string | null;
|
||||
config_path: string | null;
|
||||
restart_cmd: string | null;
|
||||
ssh_mode: string | null;
|
||||
os: string | null;
|
||||
gpu_label: string | null;
|
||||
enabled: boolean;
|
||||
}
|
||||
|
||||
async function loadHost(sql: Sql, id: string): Promise<HostRow | null> {
|
||||
const rows = await sql<HostRow[]>`
|
||||
SELECT provider_id, ssh_host, ssh_user, ssh_key_path, config_path, restart_cmd, ssh_mode, os, gpu_label, enabled
|
||||
FROM control_hosts WHERE provider_id = ${id}
|
||||
`;
|
||||
return rows[0] ?? null;
|
||||
}
|
||||
|
||||
function sshTargetOf(host: HostRow): SshTarget | null {
|
||||
if (!host.ssh_host || !host.ssh_user || !host.ssh_key_path) return null;
|
||||
return { host: host.ssh_host, user: host.ssh_user, keyPath: host.ssh_key_path };
|
||||
}
|
||||
|
||||
/** Load the config schema from the configured path or the bundled copy. */
|
||||
function loadConfigSchema(config: Config): object | null {
|
||||
const here = dirname(fileURLToPath(import.meta.url));
|
||||
// dist/routes/ssh-config.js -> dist/data/config-schema.json
|
||||
const bundled = resolve(here, '../data/config-schema.json');
|
||||
const path = config.LLAMA_CONFIG_SCHEMA_PATH ?? bundled;
|
||||
try {
|
||||
return JSON.parse(readFileSync(path, 'utf8'));
|
||||
} catch {
|
||||
if (path !== bundled) {
|
||||
try {
|
||||
return JSON.parse(readFileSync(bundled, 'utf8'));
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
109
apps/control/src/routes/ws.ts
Normal file
109
apps/control/src/routes/ws.ts
Normal file
@@ -0,0 +1,109 @@
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import WebSocket from 'ws';
|
||||
import type { FleetState, HostState } from '../services/fleet-state.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import type { LogRelay } from '../services/log-relay.js';
|
||||
|
||||
/**
|
||||
* WS endpoint: /api/ws/control
|
||||
*
|
||||
* On join: send snapshot carrying current fleet state + seqs.
|
||||
* B6: After snapshot, replay in-memory log tail for late joiners.
|
||||
* On delta: forward seq-stamped deltas to subscribers.
|
||||
*
|
||||
* Client rule: buffer pre-snapshot deltas, replay after snapshot applying only
|
||||
* seq > snapshot_seq. On service restart, rebuild fleet state from DB before
|
||||
* serving snapshots.
|
||||
*/
|
||||
export function registerControlWebSocket(
|
||||
app: FastifyInstance,
|
||||
fleet: FleetState,
|
||||
emitter: DeltaEmitter,
|
||||
logRelay: LogRelay | null = null,
|
||||
): void {
|
||||
app.get('/api/ws/control', { websocket: true }, (socket, req) => {
|
||||
const fleetState = fleet;
|
||||
const snapshot = buildSnapshot(fleetState);
|
||||
|
||||
// B4 fix: send snapshot at top level matching ControlFleetFrame Zod schema.
|
||||
const maxSeq = snapshot.hosts.reduce((max, h) => Math.max(max, h.seq), 0);
|
||||
socket.send(JSON.stringify({
|
||||
type: 'control_fleet' as const,
|
||||
seq: maxSeq,
|
||||
hosts: snapshot.hosts,
|
||||
}));
|
||||
|
||||
// B6: Replay in-memory log tail for late joiners.
|
||||
if (logRelay && socket.readyState === WebSocket.OPEN) {
|
||||
const tails = logRelay.getAllTails();
|
||||
for (const entry of tails) {
|
||||
socket.send(JSON.stringify({
|
||||
type: 'control_log' as const,
|
||||
seq: maxSeq, // tail lines don't carry per-host seq; use snapshot seq
|
||||
providerId: entry.providerId,
|
||||
source: entry.source,
|
||||
line: entry.line,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
// B3 fix: subscribe to delta emitter so WS clients receive live updates.
|
||||
const unsub = emitter.subscribe((delta: unknown) => {
|
||||
if (socket.readyState === WebSocket.OPEN) {
|
||||
socket.send(JSON.stringify(delta));
|
||||
}
|
||||
});
|
||||
|
||||
const heartbeat = setInterval(() => {
|
||||
if (socket.readyState !== WebSocket.OPEN) {
|
||||
clearInterval(heartbeat);
|
||||
return;
|
||||
}
|
||||
socket.send(JSON.stringify({ type: 'ping' as const }));
|
||||
}, 30_000);
|
||||
|
||||
socket.on('close', () => {
|
||||
clearInterval(heartbeat);
|
||||
unsub();
|
||||
});
|
||||
|
||||
socket.on('error', () => {
|
||||
clearInterval(heartbeat);
|
||||
unsub();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a snapshot from the in-memory fleet state.
|
||||
* On restart, this is rebuilt from DB before serving snapshots.
|
||||
*/
|
||||
function buildSnapshot(fleet: FleetState): { hosts: Array<{
|
||||
providerId: string;
|
||||
liveness: 'connected' | 'reconnecting' | 'down';
|
||||
lastSeenAt: string | null;
|
||||
seq: number;
|
||||
models: Array<{
|
||||
model: string;
|
||||
state: string;
|
||||
ts: string;
|
||||
ttlDeadline: string | null;
|
||||
inflight: number;
|
||||
}>;
|
||||
}> } {
|
||||
const hosts = Array.from(fleet.hosts.values()).map((h) => ({
|
||||
providerId: h.providerId,
|
||||
liveness: h.liveness,
|
||||
lastSeenAt: h.lastSeenAt?.toISOString() ?? null,
|
||||
seq: h.seq,
|
||||
models: Array.from(h.models.values()).map((m) => ({
|
||||
model: m.model,
|
||||
state: m.state,
|
||||
ts: m.ts.toISOString(),
|
||||
ttlDeadline: m.ttlDeadline?.toISOString() ?? null,
|
||||
inflight: m.inflight,
|
||||
})),
|
||||
}));
|
||||
|
||||
return { hosts };
|
||||
}
|
||||
291
apps/control/src/schema.sql
Normal file
291
apps/control/src/schema.sql
Normal file
@@ -0,0 +1,291 @@
|
||||
-- P1: BooControl schema -- read-only fleet cockpit tables.
|
||||
-- Applied on startup by apps/control/src/db.ts:applySchema().
|
||||
-- Lives in the same 'boochat' database as BooChat's tables.
|
||||
|
||||
-- Host registry: one row per enabled llama-swap instance.
|
||||
CREATE TABLE IF NOT EXISTS control_hosts (
|
||||
provider_id TEXT PRIMARY KEY,
|
||||
ssh_host TEXT,
|
||||
ssh_user TEXT,
|
||||
ssh_key_path TEXT,
|
||||
config_path TEXT,
|
||||
restart_cmd TEXT,
|
||||
os TEXT,
|
||||
gpu_label TEXT,
|
||||
enabled BOOLEAN NOT NULL DEFAULT true
|
||||
);
|
||||
|
||||
-- P9 verb-mode: per-host SSH command mode. 'shell' = raw commands (default,
|
||||
-- backward compatible); 'wrapper' = fixed verbs for a forced-command-locked key.
|
||||
ALTER TABLE control_hosts ADD COLUMN IF NOT EXISTS ssh_mode TEXT NOT NULL DEFAULT 'shell';
|
||||
|
||||
-- Seed display metadata; SSH/config columns are NULL until P9.
|
||||
INSERT INTO control_hosts (provider_id, os, gpu_label)
|
||||
VALUES
|
||||
('sam-desktop', 'Windows', 'RTX 5090 32GB'),
|
||||
('embedding', 'Linux', 'P104-100 8GB')
|
||||
ON CONFLICT (provider_id) DO NOTHING;
|
||||
|
||||
-- Request log: ingested from llama-swap /api/metrics ring.
|
||||
CREATE TABLE IF NOT EXISTS control_requests (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
provider_id TEXT NOT NULL,
|
||||
swap_entry_id INT NOT NULL,
|
||||
ts TIMESTAMPTZ NOT NULL,
|
||||
model TEXT,
|
||||
req_path TEXT,
|
||||
status_code INT,
|
||||
duration_ms INT,
|
||||
cache_tokens INT,
|
||||
input_tokens INT,
|
||||
output_tokens INT,
|
||||
prompt_tps REAL,
|
||||
gen_tps REAL,
|
||||
has_capture BOOLEAN NOT NULL DEFAULT false,
|
||||
capture JSONB,
|
||||
UNIQUE (provider_id, swap_entry_id, ts)
|
||||
);
|
||||
|
||||
-- P4: Per-consumer attribution column. Added via idempotent ALTER so existing
|
||||
-- DBs pick it up on next restart. See design §7 "Implementation notes" for the
|
||||
-- llama-swap ActivityLogEntry discrepancy.
|
||||
ALTER TABLE control_requests ADD COLUMN IF NOT EXISTS source TEXT;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_control_requests_provider_ts
|
||||
ON control_requests (provider_id, ts DESC);
|
||||
|
||||
-- Raw performance samples from llama-swap /api/performance.
|
||||
CREATE TABLE IF NOT EXISTS control_perf_samples (
|
||||
provider_id TEXT NOT NULL,
|
||||
ts TIMESTAMPTZ NOT NULL,
|
||||
gpu JSONB,
|
||||
sys JSONB,
|
||||
UNIQUE (provider_id, ts)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_control_perf_samples_provider_ts
|
||||
ON control_perf_samples (provider_id, ts DESC);
|
||||
|
||||
-- 5-minute rollup aggregates.
|
||||
CREATE TABLE IF NOT EXISTS control_perf_rollup_5m (
|
||||
provider_id TEXT NOT NULL,
|
||||
bucket TIMESTAMPTZ NOT NULL,
|
||||
gpu_agg JSONB,
|
||||
sys_agg JSONB,
|
||||
UNIQUE (provider_id, bucket)
|
||||
);
|
||||
|
||||
-- Model state transitions + gap events.
|
||||
CREATE TABLE IF NOT EXISTS control_model_events (
|
||||
provider_id TEXT NOT NULL,
|
||||
model TEXT NOT NULL,
|
||||
state TEXT NOT NULL,
|
||||
ts TIMESTAMPTZ NOT NULL,
|
||||
detail JSONB,
|
||||
UNIQUE (provider_id, model, state, ts)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_control_model_events_provider_ts
|
||||
ON control_model_events (provider_id, ts DESC);
|
||||
|
||||
-- P3: Bench engine tables -- additive schema change.
|
||||
|
||||
-- Suite definitions: grid of prompt_tokens x gen_tokens x concurrency x repetitions.
|
||||
CREATE TABLE IF NOT EXISTS bench_suites (
|
||||
id TEXT PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
provider_id TEXT NOT NULL,
|
||||
model TEXT NOT NULL,
|
||||
prompt_tokens INT[] NOT NULL,
|
||||
gen_tokens INT[] NOT NULL,
|
||||
concurrency INT[] NOT NULL,
|
||||
repetitions INT NOT NULL DEFAULT 1,
|
||||
metadata JSONB,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
|
||||
);
|
||||
|
||||
-- Individual bench runs (one per suite execution).
|
||||
CREATE TABLE IF NOT EXISTS bench_runs (
|
||||
id TEXT PRIMARY KEY,
|
||||
suite_id TEXT NOT NULL REFERENCES bench_suites(id),
|
||||
job_type TEXT NOT NULL DEFAULT 'bench',
|
||||
status TEXT NOT NULL DEFAULT 'queued',
|
||||
started_at TIMESTAMPTZ,
|
||||
finished_at TIMESTAMPTZ,
|
||||
total_samples INT NOT NULL DEFAULT 0,
|
||||
completed_samples INT NOT NULL DEFAULT 0,
|
||||
concurrent_foreign_requests INT NOT NULL DEFAULT 0,
|
||||
temperature REAL,
|
||||
top_p REAL,
|
||||
aggregate JSONB,
|
||||
regression_flag TEXT,
|
||||
error TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_bench_runs_suite_id
|
||||
ON bench_runs (suite_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_bench_runs_status
|
||||
ON bench_runs (status);
|
||||
|
||||
-- Raw per-request samples from a bench run.
|
||||
CREATE TABLE IF NOT EXISTS bench_samples (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
run_id TEXT NOT NULL REFERENCES bench_runs(id),
|
||||
prompt_tokens INT NOT NULL,
|
||||
gen_tokens INT NOT NULL,
|
||||
concurrency INT NOT NULL,
|
||||
repetition INT NOT NULL,
|
||||
ttft_ms REAL,
|
||||
total_ms REAL,
|
||||
prompt_tps REAL,
|
||||
gen_tps REAL,
|
||||
cache_n INT,
|
||||
error TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_bench_samples_run_id
|
||||
ON bench_samples (run_id);
|
||||
|
||||
-- P3: Baseline aggregates per (provider_id, model).
|
||||
-- First completed run seeds the baseline; subsequent runs compare against it.
|
||||
CREATE TABLE IF NOT EXISTS bench_baselines (
|
||||
provider_id TEXT NOT NULL,
|
||||
model TEXT NOT NULL,
|
||||
aggregate JSONB NOT NULL,
|
||||
run_id TEXT NOT NULL,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp(),
|
||||
PRIMARY KEY (provider_id, model)
|
||||
);
|
||||
|
||||
-- P5: Quality evals + sandbox tables.
|
||||
|
||||
-- Eval suite definitions: kind (chat|code), tasks JSONB, judge_model.
|
||||
CREATE TABLE IF NOT EXISTS eval_suites (
|
||||
id TEXT PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
kind TEXT NOT NULL,
|
||||
version INT NOT NULL DEFAULT 1,
|
||||
tasks JSONB NOT NULL,
|
||||
judge_model TEXT,
|
||||
judge_model_version TEXT,
|
||||
metadata JSONB,
|
||||
UNIQUE (name, version),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_eval_suites_kind
|
||||
ON eval_suites (kind);
|
||||
|
||||
-- Individual eval runs (one per suite execution against a model).
|
||||
CREATE TABLE IF NOT EXISTS eval_runs (
|
||||
id TEXT PRIMARY KEY,
|
||||
suite_id TEXT NOT NULL REFERENCES eval_suites(id),
|
||||
job_type TEXT NOT NULL DEFAULT 'eval',
|
||||
provider_id TEXT NOT NULL,
|
||||
model TEXT NOT NULL,
|
||||
quant TEXT,
|
||||
status TEXT NOT NULL DEFAULT 'queued',
|
||||
judge_model TEXT,
|
||||
judge_model_version TEXT,
|
||||
started_at TIMESTAMPTZ,
|
||||
finished_at TIMESTAMPTZ,
|
||||
total_tasks INT NOT NULL DEFAULT 0,
|
||||
completed_tasks INT NOT NULL DEFAULT 0,
|
||||
aggregate JSONB,
|
||||
error TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_eval_runs_suite_id
|
||||
ON eval_runs (suite_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_eval_runs_status
|
||||
ON eval_runs (status);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_eval_runs_provider_model
|
||||
ON eval_runs (provider_id, model);
|
||||
|
||||
-- Per-task eval results: score, judge rationale, sandbox exit info.
|
||||
CREATE TABLE IF NOT EXISTS eval_results (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
run_id TEXT NOT NULL REFERENCES eval_runs(id),
|
||||
task_id TEXT NOT NULL,
|
||||
task_index INT NOT NULL,
|
||||
score REAL,
|
||||
max_score REAL,
|
||||
rationale TEXT,
|
||||
sandbox_exit_code INT,
|
||||
sandbox_stderr TEXT,
|
||||
sandbox_stdout TEXT,
|
||||
execution_ms INT,
|
||||
error TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_eval_results_run_id
|
||||
ON eval_results (run_id);
|
||||
|
||||
-- P6.2: Generated fleet reports (markdown digest + JSONB stats).
|
||||
CREATE TABLE IF NOT EXISTS control_reports (
|
||||
id TEXT PRIMARY KEY,
|
||||
kind TEXT NOT NULL DEFAULT 'digest',
|
||||
interval TEXT NOT NULL DEFAULT 'daily',
|
||||
period_start TIMESTAMPTZ NOT NULL,
|
||||
period_end TIMESTAMPTZ NOT NULL,
|
||||
markdown TEXT NOT NULL,
|
||||
stats JSONB,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_control_reports_created
|
||||
ON control_reports (created_at DESC);
|
||||
|
||||
-- P6.2: Scheduler metadata for the in-process report timer. Single row keyed by
|
||||
-- schedule name; last_run_at drives catch-up-on-boot (same pattern as retention).
|
||||
CREATE TABLE IF NOT EXISTS control_schedule_meta (
|
||||
name TEXT PRIMARY KEY,
|
||||
interval TEXT NOT NULL DEFAULT 'daily',
|
||||
enabled BOOLEAN NOT NULL DEFAULT true,
|
||||
last_run_at TIMESTAMPTZ
|
||||
);
|
||||
|
||||
INSERT INTO control_schedule_meta (name, interval, enabled)
|
||||
VALUES ('report-digest', 'daily', true)
|
||||
ON CONFLICT (name) DO NOTHING;
|
||||
|
||||
-- P7.1: Routing policies for the auto:* gateway. `match` selects which virtual
|
||||
-- model a policy serves (e.g. 'auto:code'); `candidates` is an ordered list of
|
||||
-- composite ids ('provider/model'); `fallback` is the last-resort composite id.
|
||||
CREATE TABLE IF NOT EXISTS route_policies (
|
||||
id TEXT PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
virtual_model TEXT NOT NULL,
|
||||
candidates JSONB NOT NULL,
|
||||
fallback TEXT,
|
||||
enabled BOOLEAN NOT NULL DEFAULT true,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp(),
|
||||
UNIQUE (virtual_model)
|
||||
);
|
||||
|
||||
-- P7.1/P7.4: Per-dispatch log for the gateway. One row per resolved completion
|
||||
-- routed through a virtual model, recording the chosen target + outcome.
|
||||
CREATE TABLE IF NOT EXISTS route_dispatch_log (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
ts TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp(),
|
||||
virtual_model TEXT NOT NULL,
|
||||
chosen_provider_id TEXT,
|
||||
chosen_model TEXT,
|
||||
candidates_tried JSONB,
|
||||
status TEXT NOT NULL,
|
||||
source TEXT,
|
||||
error TEXT,
|
||||
duration_ms INT
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_route_dispatch_log_ts
|
||||
ON route_dispatch_log (ts DESC);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_route_dispatch_log_virtual
|
||||
ON route_dispatch_log (virtual_model, ts DESC);
|
||||
194
apps/control/src/services/__tests__/action-queue.test.ts
Normal file
194
apps/control/src/services/__tests__/action-queue.test.ts
Normal file
@@ -0,0 +1,194 @@
|
||||
import { describe, it, expect, beforeEach } from 'vitest';
|
||||
import { ActionQueue } from '../action-queue.js';
|
||||
import type { ActionQueueDeps, QueuedAction } from '../action-queue.js';
|
||||
|
||||
describe('ActionQueue', () => {
|
||||
let queue: ActionQueue;
|
||||
let deps: ActionQueueDeps;
|
||||
|
||||
beforeEach(() => {
|
||||
queue = new ActionQueue();
|
||||
deps = {
|
||||
baseUrl: 'http://test-host:8401',
|
||||
isLivenessUp: () => true,
|
||||
isInflightRequests: () => 0,
|
||||
log: {
|
||||
error: () => {},
|
||||
warn: () => {},
|
||||
info: () => {},
|
||||
debug: () => {},
|
||||
trace: () => {},
|
||||
fatal: () => {},
|
||||
child: () => deps.log,
|
||||
} as any,
|
||||
};
|
||||
queue.registerHost('host1', deps);
|
||||
});
|
||||
|
||||
describe('submit', () => {
|
||||
it('rejects submission when host is down', () => {
|
||||
const downQueue = new ActionQueue();
|
||||
const downDeps: ActionQueueDeps = {
|
||||
...deps,
|
||||
isLivenessUp: () => false,
|
||||
};
|
||||
downQueue.registerHost('down-host', downDeps);
|
||||
|
||||
const result = downQueue.submit({
|
||||
actionId: 'a1',
|
||||
type: 'warm',
|
||||
providerId: 'down-host',
|
||||
confirmed: false,
|
||||
createdAt: new Date(),
|
||||
});
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
if (!result.ok) {
|
||||
expect(result.error).toBe('host offline');
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects submission when queue is full (depth 4)', () => {
|
||||
// Fill the queue to capacity
|
||||
for (let i = 0; i < 4; i++) {
|
||||
const result = queue.submit({
|
||||
actionId: `fill-${i}`,
|
||||
type: 'warm',
|
||||
providerId: 'host1',
|
||||
model: 'model1',
|
||||
confirmed: false,
|
||||
createdAt: new Date(),
|
||||
});
|
||||
expect(result.ok).toBe(true);
|
||||
}
|
||||
|
||||
// 5th submission should be rejected
|
||||
const result = queue.submit({
|
||||
actionId: 'overflow',
|
||||
type: 'warm',
|
||||
providerId: 'host1',
|
||||
model: 'model1',
|
||||
confirmed: false,
|
||||
createdAt: new Date(),
|
||||
});
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
if (!result.ok) {
|
||||
expect(result.error).toContain('queue full');
|
||||
expect(result.pending).toHaveLength(4);
|
||||
}
|
||||
});
|
||||
|
||||
it('returns 409 with requiresConfirmation for unload during inflight', () => {
|
||||
const inflightDeps: ActionQueueDeps = {
|
||||
...deps,
|
||||
isInflightRequests: () => 5,
|
||||
};
|
||||
const inflightQueue = new ActionQueue();
|
||||
inflightQueue.registerHost('busy-host', inflightDeps);
|
||||
|
||||
const result = inflightQueue.submit({
|
||||
actionId: 'unload-1',
|
||||
type: 'unload',
|
||||
providerId: 'busy-host',
|
||||
confirmed: false,
|
||||
createdAt: new Date(),
|
||||
});
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
if (!result.ok) {
|
||||
expect(result.error).toBe('bench in progress');
|
||||
expect(result.requiresConfirmation).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
it('allows confirmed unload during inflight', () => {
|
||||
const inflightDeps: ActionQueueDeps = {
|
||||
...deps,
|
||||
isInflightRequests: () => 5,
|
||||
};
|
||||
const inflightQueue = new ActionQueue();
|
||||
inflightQueue.registerHost('busy-host', inflightDeps);
|
||||
|
||||
const result = inflightQueue.submit({
|
||||
actionId: 'unload-confirmed',
|
||||
type: 'unload',
|
||||
providerId: 'busy-host',
|
||||
confirmed: true,
|
||||
createdAt: new Date(),
|
||||
});
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
});
|
||||
|
||||
it('accepts a warm action when queue has capacity', () => {
|
||||
const result = queue.submit({
|
||||
actionId: 'warm-1',
|
||||
type: 'warm',
|
||||
providerId: 'host1',
|
||||
model: 'llama3',
|
||||
confirmed: false,
|
||||
createdAt: new Date(),
|
||||
});
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getState', () => {
|
||||
it('returns null for unknown host', () => {
|
||||
expect(queue.getState('unknown')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns state with entries after submission', () => {
|
||||
queue.submit({
|
||||
actionId: 'test-1',
|
||||
type: 'warm',
|
||||
providerId: 'host1',
|
||||
model: 'llama3',
|
||||
confirmed: false,
|
||||
createdAt: new Date(),
|
||||
});
|
||||
|
||||
const state = queue.getState('host1');
|
||||
expect(state).not.toBeNull();
|
||||
expect(state!.queue.length).toBe(1);
|
||||
expect(state!.queue[0].action.actionId).toBe('test-1');
|
||||
// Status transitions to 'running' as processNext kicks off asynchronously
|
||||
expect(['pending', 'running']).toContain(state!.queue[0].status);
|
||||
});
|
||||
});
|
||||
|
||||
describe('processNext (stale action skip)', () => {
|
||||
it('skips an action when host goes down during processing', async () => {
|
||||
let livenessUp = true;
|
||||
const dynamicDeps: ActionQueueDeps = {
|
||||
...deps,
|
||||
isLivenessUp: () => livenessUp,
|
||||
};
|
||||
const dynamicQueue = new ActionQueue();
|
||||
dynamicQueue.registerHost('flaky-host', dynamicDeps);
|
||||
|
||||
// Submit an action
|
||||
dynamicQueue.submit({
|
||||
actionId: 'stale-1',
|
||||
type: 'warm',
|
||||
providerId: 'flaky-host',
|
||||
model: 'llama3',
|
||||
confirmed: false,
|
||||
createdAt: new Date(),
|
||||
});
|
||||
|
||||
// Turn host down before processing
|
||||
livenessUp = false;
|
||||
|
||||
// The queue processor will skip the action
|
||||
// We can't easily test the async processNext directly, but we can verify
|
||||
// the state reflects the skip logic by checking the queue state
|
||||
const state = dynamicQueue.getState('flaky-host');
|
||||
expect(state).not.toBeNull();
|
||||
expect(state!.queue.length).toBe(1);
|
||||
// The entry is still pending; processNext would mark it skipped
|
||||
});
|
||||
});
|
||||
});
|
||||
300
apps/control/src/services/__tests__/bench-engine.test.ts
Normal file
300
apps/control/src/services/__tests__/bench-engine.test.ts
Normal file
@@ -0,0 +1,300 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { parseLlamaTimings, computeAggregates, runSingleBenchRequest } from '../../index.js';
|
||||
import { computeRegressionFlag } from '../bench-engine.js';
|
||||
import { createFleetState, ensureHostState } from '../fleet-state.js';
|
||||
import { createDeltaEmitter } from '../../index.js';
|
||||
import type { Sql } from '../../db.js';
|
||||
import type { Config } from '../../config.js';
|
||||
import type { BenchSuite } from '../bench-engine.js';
|
||||
|
||||
// ─── parseLlamaTimings tests ────────────────────────────────────────────────
|
||||
|
||||
describe('parseLlamaTimings', () => {
|
||||
it('parses timings from a standard llama.cpp chunk', () => {
|
||||
const chunk = 'data: {"choices":[],"timings":{"prompt_per_second":150,"predicted_per_second":80,"cache_n":50}}';
|
||||
const result = parseLlamaTimings(chunk);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.promptPerSecond).toBe(150);
|
||||
expect(result!.predictedPerSecond).toBe(80);
|
||||
expect(result!.cacheN).toBe(50);
|
||||
});
|
||||
|
||||
it('parses timings without data: prefix', () => {
|
||||
const chunk = '{"timings":{"prompt_per_second":200,"predicted_per_second":100,"cache_n":0}}';
|
||||
const result = parseLlamaTimings(chunk);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.promptPerSecond).toBe(200);
|
||||
});
|
||||
|
||||
it('returns null for [DONE] chunk', () => {
|
||||
expect(parseLlamaTimings('data: [DONE]')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for chunk without timings', () => {
|
||||
const chunk = 'data: {"choices":[{"delta":{"content":"hello"}}]}';
|
||||
expect(parseLlamaTimings(chunk)).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for malformed JSON', () => {
|
||||
expect(parseLlamaTimings('data: not-json')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ─── computeAggregates tests ────────────────────────────────────────────────
|
||||
|
||||
describe('computeAggregates', () => {
|
||||
it('returns nulls for empty samples', () => {
|
||||
const result = computeAggregates([]);
|
||||
expect(result.totalSamples).toBe(0);
|
||||
expect(result.avgTtftMs).toBeNull();
|
||||
expect(result.avgGenTps).toBeNull();
|
||||
});
|
||||
|
||||
it('computes averages correctly', () => {
|
||||
const samples = [
|
||||
{ ttftMs: 100, genTps: 50, promptTps: 100, error: null } as any,
|
||||
{ ttftMs: 200, genTps: 100, promptTps: 200, error: null } as any,
|
||||
{ ttftMs: 300, genTps: 150, promptTps: 300, error: null } as any,
|
||||
];
|
||||
const result = computeAggregates(samples);
|
||||
expect(result.avgTtftMs).toBe(200);
|
||||
expect(result.avgGenTps).toBe(100);
|
||||
expect(result.avgPromptTps).toBe(200);
|
||||
expect(result.totalSamples).toBe(3);
|
||||
expect(result.errorSamples).toBe(0);
|
||||
});
|
||||
|
||||
it('computes median correctly for odd count', () => {
|
||||
const samples = [
|
||||
{ ttftMs: 100, genTps: 50, promptTps: 100, error: null } as any,
|
||||
{ ttftMs: 200, genTps: 100, promptTps: 200, error: null } as any,
|
||||
{ ttftMs: 300, genTps: 150, promptTps: 300, error: null } as any,
|
||||
];
|
||||
const result = computeAggregates(samples);
|
||||
expect(result.medianTtftMs).toBe(200);
|
||||
expect(result.medianGenTps).toBe(100);
|
||||
});
|
||||
|
||||
it('computes median correctly for even count', () => {
|
||||
const samples = [
|
||||
{ ttftMs: 100, genTps: 50, promptTps: 100, error: null } as any,
|
||||
{ ttftMs: 200, genTps: 100, promptTps: 200, error: null } as any,
|
||||
{ ttftMs: 300, genTps: 150, promptTps: 300, error: null } as any,
|
||||
{ ttftMs: 400, genTps: 200, promptTps: 400, error: null } as any,
|
||||
];
|
||||
const result = computeAggregates(samples);
|
||||
expect(result.medianTtftMs).toBe(250);
|
||||
expect(result.medianGenTps).toBe(125);
|
||||
});
|
||||
|
||||
it('computes p95 TTFT', () => {
|
||||
const samples = Array.from({ length: 20 }, (_, i) => ({
|
||||
ttftMs: (i + 1) * 10,
|
||||
genTps: 50,
|
||||
promptTps: 100,
|
||||
error: null,
|
||||
})) as any[];
|
||||
const result = computeAggregates(samples);
|
||||
expect(result.p95TtftMs).toBeCloseTo(190, -1);
|
||||
});
|
||||
|
||||
it('filters out null values', () => {
|
||||
const samples = [
|
||||
{ ttftMs: 100, genTps: 50, promptTps: 100, error: null } as any,
|
||||
{ ttftMs: null, genTps: null, promptTps: null, error: 'timeout' } as any,
|
||||
];
|
||||
const result = computeAggregates(samples);
|
||||
expect(result.avgTtftMs).toBe(100);
|
||||
expect(result.errorSamples).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── bench runner pipeline test (mock fetch + real functions) ────────────────
|
||||
|
||||
describe('bench runner pipeline', () => {
|
||||
let mockSql: Sql;
|
||||
let executedQueries: Array<{ query: string; values: unknown[] }>;
|
||||
|
||||
beforeEach(() => {
|
||||
executedQueries = [];
|
||||
mockSql = Object.assign(
|
||||
(strings: TemplateStringsArray, ...values: unknown[]) => {
|
||||
const query = strings.reduce((acc: string, s: string, i: number) => acc + s + (values[i] ?? ''), '');
|
||||
executedQueries.push({ query, values });
|
||||
return Promise.resolve([]);
|
||||
},
|
||||
{
|
||||
json: (v: unknown) => v,
|
||||
unsafe: async (q: string) => { executedQueries.push({ query: q, values: [] }); return []; },
|
||||
},
|
||||
) as unknown as Sql;
|
||||
});
|
||||
|
||||
it('runSingleBenchRequest captures TTFT and timings on successful stream', async () => {
|
||||
const fakeStream = createFakeStreamResponse([
|
||||
'data: {"choices":[{"delta":{"content":"H"}}]}',
|
||||
'data: {"choices":[{"delta":{"content":"ello"}}]}',
|
||||
'data: {"choices":[],"timings":{"prompt_per_second":150,"predicted_per_second":80,"cache_n":10}}',
|
||||
'data: [DONE]',
|
||||
]);
|
||||
|
||||
vi.spyOn(global, 'fetch').mockResolvedValueOnce(fakeStream);
|
||||
|
||||
const sample = await runSingleBenchRequest(
|
||||
'http://localhost:8401',
|
||||
'test-model',
|
||||
10,
|
||||
20,
|
||||
0,
|
||||
0.7,
|
||||
0.9,
|
||||
);
|
||||
|
||||
expect(sample.error).toBeNull();
|
||||
expect(sample.ttftMs).toBeGreaterThanOrEqual(0);
|
||||
expect(sample.ttftMs).toBeLessThan(5000);
|
||||
expect(sample.totalMs).toBeGreaterThanOrEqual(0);
|
||||
expect(sample.promptTps).toBe(150);
|
||||
expect(sample.genTps).toBe(80);
|
||||
expect(sample.cacheN).toBe(10);
|
||||
expect(sample.promptTokens).toBe(10);
|
||||
expect(sample.genTokens).toBe(20);
|
||||
expect(sample.repetition).toBe(0);
|
||||
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('runSingleBenchRequest captures error on HTTP failure', async () => {
|
||||
vi.spyOn(global, 'fetch').mockResolvedValueOnce({
|
||||
ok: false,
|
||||
status: 500,
|
||||
text: async () => 'Internal Server Error',
|
||||
} as Response);
|
||||
|
||||
const sample = await runSingleBenchRequest(
|
||||
'http://localhost:8401',
|
||||
'test-model',
|
||||
10,
|
||||
20,
|
||||
0,
|
||||
);
|
||||
|
||||
expect(sample.error).toContain('500');
|
||||
expect(sample.ttftMs).toBeNull();
|
||||
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('runSingleBenchRequest captures error on fetch exception', async () => {
|
||||
vi.spyOn(global, 'fetch').mockRejectedValueOnce(new Error('ECONNREFUSED'));
|
||||
|
||||
const sample = await runSingleBenchRequest(
|
||||
'http://localhost:8401',
|
||||
'test-model',
|
||||
10,
|
||||
20,
|
||||
0,
|
||||
);
|
||||
|
||||
expect(sample.error).toContain('ECONNREFUSED');
|
||||
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
});
|
||||
|
||||
// ─── helper: create a fake streaming Response ────────────────────────────────
|
||||
|
||||
function createFakeStreamResponse(lines: string[]): Response {
|
||||
const encoder = new TextEncoder();
|
||||
let position = 0;
|
||||
|
||||
const stream = new ReadableStream({
|
||||
async pull(controller) {
|
||||
if (position >= lines.length) {
|
||||
controller.close();
|
||||
return;
|
||||
}
|
||||
const line = lines[position]! + '\n\n';
|
||||
controller.enqueue(encoder.encode(line));
|
||||
position++;
|
||||
// Small delay to simulate network latency for TTFT measurement
|
||||
await new Promise((r) => setTimeout(r, 5));
|
||||
},
|
||||
});
|
||||
|
||||
return new Response(stream, {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'text/event-stream' },
|
||||
});
|
||||
}
|
||||
|
||||
// ─── computeRegressionFlag tests (A1) ────────────────────────────────────────
|
||||
|
||||
describe('computeRegressionFlag', () => {
|
||||
it('returns baseline for first run (no baseline)', () => {
|
||||
const current = computeAggregates([
|
||||
{ ttftMs: 100, genTps: 80, promptTps: 150, error: null } as any,
|
||||
]);
|
||||
expect(computeRegressionFlag(current, undefined)).toBe('baseline');
|
||||
});
|
||||
|
||||
it('returns regression when gen tok/s drops below -10%', () => {
|
||||
const current = computeAggregates([
|
||||
{ ttftMs: 200, genTps: 70, promptTps: 100, error: null } as any,
|
||||
]);
|
||||
const baseline = JSON.stringify({
|
||||
avgGenTps: 100,
|
||||
avgTtftMs: 100,
|
||||
totalSamples: 1,
|
||||
});
|
||||
expect(computeRegressionFlag(current, baseline)).toBe('regression');
|
||||
});
|
||||
|
||||
it('returns improvement when gen tok/s rises above +5%', () => {
|
||||
const current = computeAggregates([
|
||||
{ ttftMs: 80, genTps: 120, promptTps: 200, error: null } as any,
|
||||
]);
|
||||
const baseline = JSON.stringify({
|
||||
avgGenTps: 100,
|
||||
avgTtftMs: 100,
|
||||
totalSamples: 1,
|
||||
});
|
||||
expect(computeRegressionFlag(current, baseline)).toBe('improvement');
|
||||
});
|
||||
|
||||
it('returns baseline when within threshold', () => {
|
||||
const current = computeAggregates([
|
||||
{ ttftMs: 100, genTps: 98, promptTps: 150, error: null } as any,
|
||||
]);
|
||||
const baseline = JSON.stringify({
|
||||
avgGenTps: 100,
|
||||
avgTtftMs: 100,
|
||||
totalSamples: 1,
|
||||
});
|
||||
expect(computeRegressionFlag(current, baseline)).toBe('baseline');
|
||||
});
|
||||
|
||||
it('returns null for divide-by-zero (N5: baseline avgGenTps is 0)', () => {
|
||||
const current = computeAggregates([
|
||||
{ ttftMs: 100, genTps: 50, promptTps: 100, error: null } as any,
|
||||
]);
|
||||
const baseline = JSON.stringify({
|
||||
avgGenTps: 0,
|
||||
avgTtftMs: 100,
|
||||
totalSamples: 1,
|
||||
});
|
||||
expect(computeRegressionFlag(current, baseline)).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for null current avgGenTps', () => {
|
||||
const current = computeAggregates([]);
|
||||
expect(computeRegressionFlag(current, JSON.stringify({ avgGenTps: 100 }))).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for malformed baseline JSON', () => {
|
||||
const current = computeAggregates([
|
||||
{ ttftMs: 100, genTps: 80, promptTps: 150, error: null } as any,
|
||||
]);
|
||||
expect(computeRegressionFlag(current, 'not-json')).toBeNull();
|
||||
});
|
||||
});
|
||||
60
apps/control/src/services/__tests__/capture-fetch.test.ts
Normal file
60
apps/control/src/services/__tests__/capture-fetch.test.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { parseCapture } from '../capture-fetch.js';
|
||||
|
||||
describe('parseCapture', () => {
|
||||
it('trims response body when total exceeds 256KB cap', () => {
|
||||
const largeBody = 'y'.repeat(300_000);
|
||||
const capture = parseCapture({
|
||||
request_headers: { 'Content-Type': 'application/json' },
|
||||
response_headers: {},
|
||||
request_body: Buffer.from('x'.repeat(100_000)).toString('base64'),
|
||||
response_body: Buffer.from(largeBody).toString('base64'),
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
model: 'test-model',
|
||||
duration_ms: 100,
|
||||
}, 'host1', 1);
|
||||
|
||||
expect(capture.responseBody).toContain('[truncated: capture exceeds 256KB cap]');
|
||||
const totalBytes = Buffer.byteLength(capture.requestBody + capture.responseBody);
|
||||
expect(totalBytes).toBeLessThanOrEqual(256 * 1024 + 100);
|
||||
});
|
||||
|
||||
it('does not trim when under cap', () => {
|
||||
const capture = parseCapture({
|
||||
request_headers: {},
|
||||
response_headers: {},
|
||||
request_body: Buffer.from('small request').toString('base64'),
|
||||
response_body: Buffer.from('small response').toString('base64'),
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
model: 'test-model',
|
||||
duration_ms: 50,
|
||||
}, 'host1', 2);
|
||||
|
||||
expect(capture.requestBody).toBe('small request');
|
||||
expect(capture.responseBody).toBe('small response');
|
||||
expect(capture.responseBody).not.toContain('[truncated');
|
||||
});
|
||||
|
||||
it('handles missing base64 bodies gracefully', () => {
|
||||
const capture = parseCapture({
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
}, 'host1', 3);
|
||||
|
||||
expect(capture.requestBody).toBe('');
|
||||
expect(capture.responseBody).toBe('');
|
||||
});
|
||||
|
||||
it('decodes base64 (invalid base64 produces binary, not raw string)', () => {
|
||||
// Buffer.from(str, 'base64') does not throw on invalid base64 —
|
||||
// it decodes what it can. The catch block only triggers on actual
|
||||
// Buffer.from exceptions, which are rare.
|
||||
const capture = parseCapture({
|
||||
request_body: Buffer.from('valid json').toString('base64'),
|
||||
response_body: Buffer.from('{"result": true}').toString('base64'),
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
}, 'host1', 4);
|
||||
|
||||
expect(capture.requestBody).toBe('valid json');
|
||||
expect(capture.responseBody).toBe('{"result": true}');
|
||||
});
|
||||
});
|
||||
50
apps/control/src/services/__tests__/eval-suites.test.ts
Normal file
50
apps/control/src/services/__tests__/eval-suites.test.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { loadEvalSuitesFromData } from '../../index.js';
|
||||
|
||||
// ─── loadEvalSuitesFromData tests ───────────────────────────────────────────
|
||||
|
||||
describe('loadEvalSuitesFromData', () => {
|
||||
it('loads suites from data/ YAML files', () => {
|
||||
const suites = loadEvalSuitesFromData();
|
||||
expect(suites.length).toBeGreaterThanOrEqual(4);
|
||||
|
||||
const ids = suites.map((s) => s.id);
|
||||
expect(ids).toContain('agent-coding');
|
||||
expect(ids).toContain('chat-quality');
|
||||
expect(ids).toContain('long-context-retrieval');
|
||||
expect(ids).toContain('utility-calls');
|
||||
});
|
||||
|
||||
it('loads code suite with correct structure', () => {
|
||||
const suites = loadEvalSuitesFromData();
|
||||
const codeSuite = suites.find((s) => s.id === 'agent-coding');
|
||||
expect(codeSuite).not.toBeUndefined();
|
||||
expect(codeSuite!.kind).toBe('code');
|
||||
expect(codeSuite!.tasks.length).toBeGreaterThan(0);
|
||||
|
||||
const task = codeSuite!.tasks[0] as Record<string, unknown>;
|
||||
expect(task.id).toBeDefined();
|
||||
expect(task.prompt).toBeDefined();
|
||||
expect(task.test_code).toBeDefined();
|
||||
expect(task.expected_output).toBeDefined();
|
||||
expect(task.language).toBe('typescript');
|
||||
});
|
||||
|
||||
it('loads chat suite with rubric structure', () => {
|
||||
const suites = loadEvalSuitesFromData();
|
||||
const chatSuite = suites.find((s) => s.id === 'chat-quality');
|
||||
expect(chatSuite).not.toBeUndefined();
|
||||
expect(chatSuite!.kind).toBe('chat');
|
||||
|
||||
const task = chatSuite!.tasks[0] as Record<string, unknown>;
|
||||
expect(task.rubric).toBeDefined();
|
||||
expect((task.rubric as Record<string, unknown>).max_score).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('handles missing data/ directory gracefully', () => {
|
||||
// The function catches errors and returns empty array.
|
||||
// We can't easily test this without mocking fs, but the try-catch is there.
|
||||
const suites = loadEvalSuitesFromData();
|
||||
expect(Array.isArray(suites)).toBe(true);
|
||||
});
|
||||
});
|
||||
82
apps/control/src/services/__tests__/fleet-connector.test.ts
Normal file
82
apps/control/src/services/__tests__/fleet-connector.test.ts
Normal file
@@ -0,0 +1,82 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { addJitter, reconnectDecision, DEFAULT_RECONNECT_POLICY } from '../fleet-connector.js';
|
||||
|
||||
describe('addJitter', () => {
|
||||
it('returns a value >= the input delay', () => {
|
||||
const jittered = addJitter(1000);
|
||||
expect(jittered).toBeGreaterThanOrEqual(1000);
|
||||
});
|
||||
|
||||
it('returns a value <= 1.5x the input delay', () => {
|
||||
const jittered = addJitter(1000);
|
||||
expect(jittered).toBeLessThanOrEqual(1500);
|
||||
});
|
||||
|
||||
it('0ms delay stays 0ms', () => {
|
||||
expect(addJitter(0)).toBe(0);
|
||||
});
|
||||
|
||||
it('returns different values on repeated calls (stochastic)', () => {
|
||||
const results = new Set<number>();
|
||||
for (let i = 0; i < 20; i++) {
|
||||
results.add(addJitter(1000));
|
||||
}
|
||||
expect(results.size).toBeGreaterThan(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('reconnectDecision', () => {
|
||||
it('first failure returns baseMs with jitter', () => {
|
||||
const decision = reconnectDecision(1);
|
||||
expect(decision.action).toBe('reconnect');
|
||||
expect(decision.delayMs).toBeGreaterThanOrEqual(DEFAULT_RECONNECT_POLICY.baseMs);
|
||||
expect(decision.delayMs).toBeLessThanOrEqual(DEFAULT_RECONNECT_POLICY.baseMs * 1.5);
|
||||
});
|
||||
|
||||
it('exponential growth: failure 2 returns 2x baseMs with jitter', () => {
|
||||
const decision = reconnectDecision(2);
|
||||
expect(decision.action).toBe('reconnect');
|
||||
expect(decision.delayMs).toBeGreaterThanOrEqual(DEFAULT_RECONNECT_POLICY.baseMs * 2);
|
||||
expect(decision.delayMs).toBeLessThanOrEqual(DEFAULT_RECONNECT_POLICY.baseMs * 3);
|
||||
});
|
||||
|
||||
it('exponential growth: failure 3 returns 4x baseMs with jitter', () => {
|
||||
const decision = reconnectDecision(3);
|
||||
expect(decision.action).toBe('reconnect');
|
||||
expect(decision.delayMs).toBeGreaterThanOrEqual(DEFAULT_RECONNECT_POLICY.baseMs * 4);
|
||||
expect(decision.delayMs).toBeLessThanOrEqual(DEFAULT_RECONNECT_POLICY.baseMs * 6);
|
||||
});
|
||||
|
||||
it('capped at maxMs with jitter', () => {
|
||||
const decision = reconnectDecision(6);
|
||||
expect(decision.action).toBe('reconnect');
|
||||
expect(decision.delayMs).toBeGreaterThanOrEqual(DEFAULT_RECONNECT_POLICY.maxMs);
|
||||
expect(decision.delayMs).toBeLessThanOrEqual(DEFAULT_RECONNECT_POLICY.maxMs * 1.5);
|
||||
});
|
||||
|
||||
it('gives up after maxAttempts', () => {
|
||||
const decision = reconnectDecision(DEFAULT_RECONNECT_POLICY.maxAttempts + 1);
|
||||
expect(decision).toEqual({ action: 'give-up' });
|
||||
});
|
||||
|
||||
it('custom policy works with jitter', () => {
|
||||
const policy = { baseMs: 500, maxMs: 5000, maxAttempts: 3 };
|
||||
const d1 = reconnectDecision(1, policy);
|
||||
expect(d1.action).toBe('reconnect');
|
||||
expect(d1.delayMs).toBeGreaterThanOrEqual(500);
|
||||
expect(d1.delayMs).toBeLessThanOrEqual(750);
|
||||
|
||||
const d2 = reconnectDecision(2, policy);
|
||||
expect(d2.action).toBe('reconnect');
|
||||
expect(d2.delayMs).toBeGreaterThanOrEqual(1000);
|
||||
expect(d2.delayMs).toBeLessThanOrEqual(1500);
|
||||
|
||||
const d3 = reconnectDecision(3, policy);
|
||||
expect(d3.action).toBe('reconnect');
|
||||
expect(d3.delayMs).toBeGreaterThanOrEqual(2000);
|
||||
expect(d3.delayMs).toBeLessThanOrEqual(3000);
|
||||
|
||||
const d4 = reconnectDecision(4, policy);
|
||||
expect(d4).toEqual({ action: 'give-up' });
|
||||
});
|
||||
});
|
||||
42
apps/control/src/services/__tests__/fleet-state.test.ts
Normal file
42
apps/control/src/services/__tests__/fleet-state.test.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { createFleetState, ensureHostState, stampLastSeen } from '../fleet-state.js';
|
||||
|
||||
describe('createFleetState', () => {
|
||||
it('creates an empty fleet', () => {
|
||||
const fleet = createFleetState();
|
||||
expect(fleet.hosts.size).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('ensureHostState', () => {
|
||||
it('creates a new host state if none exists', () => {
|
||||
const fleet = createFleetState();
|
||||
const state = ensureHostState(fleet, 'test-host');
|
||||
expect(state.providerId).toBe('test-host');
|
||||
expect(state.liveness).toBe('down');
|
||||
expect(state.lastSeenAt).toBeNull();
|
||||
expect(state.seq).toBe(0);
|
||||
expect(state.models.size).toBe(0);
|
||||
});
|
||||
|
||||
it('returns existing host state', () => {
|
||||
const fleet = createFleetState();
|
||||
const state1 = ensureHostState(fleet, 'test-host');
|
||||
const state2 = ensureHostState(fleet, 'test-host');
|
||||
expect(state1).toBe(state2);
|
||||
});
|
||||
|
||||
it('seq is 0 on first call', () => {
|
||||
const fleet = createFleetState();
|
||||
const state = ensureHostState(fleet, 'test-host');
|
||||
expect(state.seq).toBe(0);
|
||||
});
|
||||
|
||||
it('stamps lastSeenAt on connection', () => {
|
||||
const fleet = createFleetState();
|
||||
const state = ensureHostState(fleet, 'test-host');
|
||||
expect(state.lastSeenAt).toBeNull();
|
||||
stampLastSeen(state);
|
||||
expect(state.lastSeenAt).not.toBeNull();
|
||||
});
|
||||
});
|
||||
92
apps/control/src/services/__tests__/gateway.test.ts
Normal file
92
apps/control/src/services/__tests__/gateway.test.ts
Normal file
@@ -0,0 +1,92 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import {
|
||||
isGatewayVirtualModel,
|
||||
parseVirtualModel,
|
||||
orderCandidates,
|
||||
splitComposite,
|
||||
} from '../gateway.js';
|
||||
import type { ModelScore } from '../routing-scores.js';
|
||||
|
||||
function score(compositeId: string, partial: Partial<ModelScore> = {}): ModelScore {
|
||||
return {
|
||||
compositeId,
|
||||
providerId: compositeId.split('/')[0]!,
|
||||
model: compositeId.split('/').slice(1).join('/'),
|
||||
codeScore: null,
|
||||
chatScore: null,
|
||||
evalScore: null,
|
||||
avgGenTps: null,
|
||||
avgLatencyMs: null,
|
||||
sampleCount: 0,
|
||||
healthy: true,
|
||||
badges: [],
|
||||
...partial,
|
||||
};
|
||||
}
|
||||
|
||||
describe('isGatewayVirtualModel', () => {
|
||||
it('matches auto and auto:* tokens', () => {
|
||||
expect(isGatewayVirtualModel('auto')).toBe(true);
|
||||
expect(isGatewayVirtualModel('auto:code')).toBe(true);
|
||||
expect(isGatewayVirtualModel('auto:fast')).toBe(true);
|
||||
});
|
||||
it('does not match ordinary models', () => {
|
||||
expect(isGatewayVirtualModel('qwopus-35b')).toBe(false);
|
||||
expect(isGatewayVirtualModel('autobahn')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseVirtualModel', () => {
|
||||
it('strips a gateway provider prefix', () => {
|
||||
expect(parseVirtualModel('auto/auto:code')).toBe('auto:code');
|
||||
});
|
||||
it('passes a bare virtual model through', () => {
|
||||
expect(parseVirtualModel('auto:fast')).toBe('auto:fast');
|
||||
});
|
||||
});
|
||||
|
||||
describe('splitComposite', () => {
|
||||
it('splits provider/model', () => {
|
||||
expect(splitComposite('sam-desktop/qwopus-35b')).toEqual({ providerId: 'sam-desktop', model: 'qwopus-35b' });
|
||||
});
|
||||
it('returns null for a bare id', () => {
|
||||
expect(splitComposite('qwopus-35b')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('orderCandidates', () => {
|
||||
it('orders auto:code by code score among healthy hosts', () => {
|
||||
const scores = [
|
||||
score('a/m1', { codeScore: 0.6 }),
|
||||
score('a/m2', { codeScore: 0.9 }),
|
||||
score('a/m3', { codeScore: 0.7, healthy: false }),
|
||||
];
|
||||
expect(orderCandidates('auto:code', null, scores)).toEqual(['a/m2', 'a/m1']);
|
||||
});
|
||||
|
||||
it('orders auto:fast by throughput', () => {
|
||||
const scores = [
|
||||
score('a/slow', { avgGenTps: 10 }),
|
||||
score('a/fast', { avgGenTps: 50 }),
|
||||
];
|
||||
expect(orderCandidates('auto:fast', null, scores)).toEqual(['a/fast', 'a/slow']);
|
||||
});
|
||||
|
||||
it('honors an explicit policy order and appends the fallback', () => {
|
||||
const scores = [score('a/m1'), score('a/m2'), score('a/fb')];
|
||||
const ordered = orderCandidates('auto:code', { candidates: ['a/m2', 'a/m1'], fallback: 'a/fb' }, scores);
|
||||
expect(ordered).toEqual(['a/m2', 'a/m1', 'a/fb']);
|
||||
});
|
||||
|
||||
it('drops policy candidates whose host is unhealthy', () => {
|
||||
const scores = [score('a/m1', { healthy: false }), score('a/m2', { healthy: true })];
|
||||
const ordered = orderCandidates('auto:code', { candidates: ['a/m1', 'a/m2'], fallback: null }, scores);
|
||||
expect(ordered).toEqual(['a/m2']);
|
||||
});
|
||||
|
||||
it('keeps a never-seen policy candidate (unknown health) for dispatch to try', () => {
|
||||
const scores = [score('a/known', { healthy: true })];
|
||||
const ordered = orderCandidates('auto:code', { candidates: ['a/never-seen', 'a/known'], fallback: null }, scores);
|
||||
expect(ordered).toEqual(['a/never-seen', 'a/known']);
|
||||
});
|
||||
});
|
||||
60
apps/control/src/services/__tests__/jsonb.test.ts
Normal file
60
apps/control/src/services/__tests__/jsonb.test.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { jsonbStringArray, jsonbArray, jsonbNumberArray, jsonbObject } from '../jsonb.js';
|
||||
|
||||
describe('jsonbStringArray', () => {
|
||||
it('passes through an already-parsed array (porsager behavior)', () => {
|
||||
expect(jsonbStringArray(['a', 'b'])).toEqual(['a', 'b']);
|
||||
});
|
||||
it('parses a JSON string array', () => {
|
||||
expect(jsonbStringArray('["a","b"]')).toEqual(['a', 'b']);
|
||||
});
|
||||
it('filters non-strings out of a parsed array', () => {
|
||||
expect(jsonbStringArray(['a', 1, null, 'b'])).toEqual(['a', 'b']);
|
||||
});
|
||||
it('returns [] for null / invalid', () => {
|
||||
expect(jsonbStringArray(null)).toEqual([]);
|
||||
expect(jsonbStringArray('not json')).toEqual([]);
|
||||
expect(jsonbStringArray({})).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('jsonbArray', () => {
|
||||
it('passes through an already-parsed array of objects (eval tasks)', () => {
|
||||
expect(jsonbArray([{ id: 't1' }])).toEqual([{ id: 't1' }]);
|
||||
});
|
||||
it('parses a JSON string array', () => {
|
||||
expect(jsonbArray('[{"id":"t1"}]')).toEqual([{ id: 't1' }]);
|
||||
});
|
||||
it('returns [] for null / invalid / non-array', () => {
|
||||
expect(jsonbArray(null)).toEqual([]);
|
||||
expect(jsonbArray('nope')).toEqual([]);
|
||||
expect(jsonbArray({})).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('jsonbNumberArray', () => {
|
||||
it('passes through an already-parsed number array (bench token grids)', () => {
|
||||
expect(jsonbNumberArray([128, 512])).toEqual([128, 512]);
|
||||
});
|
||||
it('parses a JSON string array and filters non-numbers', () => {
|
||||
expect(jsonbNumberArray('[128,"x",512]')).toEqual([128, 512]);
|
||||
});
|
||||
it('returns [] for null / invalid', () => {
|
||||
expect(jsonbNumberArray(null)).toEqual([]);
|
||||
expect(jsonbNumberArray('nope')).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('jsonbObject', () => {
|
||||
it('passes through an already-parsed object', () => {
|
||||
expect(jsonbObject({ a: 1 })).toEqual({ a: 1 });
|
||||
});
|
||||
it('parses a JSON string object', () => {
|
||||
expect(jsonbObject('{"a":1}')).toEqual({ a: 1 });
|
||||
});
|
||||
it('returns null for arrays, null, and invalid', () => {
|
||||
expect(jsonbObject([1, 2])).toBeNull();
|
||||
expect(jsonbObject(null)).toBeNull();
|
||||
expect(jsonbObject('nope')).toBeNull();
|
||||
});
|
||||
});
|
||||
55
apps/control/src/services/__tests__/judge-runner.test.ts
Normal file
55
apps/control/src/services/__tests__/judge-runner.test.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
|
||||
// ─── Judge runner tests (mock sql + real functions) ─────────────────────────
|
||||
|
||||
describe('judge runner', () => {
|
||||
beforeEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('runJudgeError', async () => {
|
||||
// Test that the judge runner imports correctly and has the expected interface.
|
||||
const mod = await import('../judge-runner.js');
|
||||
expect(typeof mod.runJudgeEval).toBe('function');
|
||||
});
|
||||
|
||||
it('generateResponse rejects on bad URL', async () => {
|
||||
// The generateResponse function is internal, but we can test the public API.
|
||||
const { runJudgeEval } = await import('../judge-runner.js');
|
||||
|
||||
// Mock sql operations.
|
||||
const mockSql = vi.fn().mockResolvedValue([]);
|
||||
mockSql.tag = vi.fn().mockReturnValue({ SQL: '' });
|
||||
|
||||
const mockEmitter = {
|
||||
publish: vi.fn(),
|
||||
};
|
||||
|
||||
const mockLogger = {
|
||||
info: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
error: vi.fn(),
|
||||
};
|
||||
|
||||
const progressHandler = vi.fn();
|
||||
|
||||
// This will fail because resolveProviderBaseUrl returns null for unknown provider.
|
||||
const result = await runJudgeEval(
|
||||
{
|
||||
runId: 'test_run',
|
||||
providerId: 'nonexistent-provider',
|
||||
model: 'test-model',
|
||||
quant: null,
|
||||
tasks: [],
|
||||
judgeModel: null,
|
||||
},
|
||||
mockSql as unknown as import('../../db.js').Sql,
|
||||
mockEmitter as unknown as import('../../index.js').DeltaEmitter,
|
||||
0,
|
||||
mockLogger as unknown as import('fastify').FastifyBaseLogger,
|
||||
progressHandler,
|
||||
);
|
||||
|
||||
expect(result.error).toContain('no base URL');
|
||||
});
|
||||
});
|
||||
102
apps/control/src/services/__tests__/liveness.test.ts
Normal file
102
apps/control/src/services/__tests__/liveness.test.ts
Normal file
@@ -0,0 +1,102 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import type { HostState } from '../fleet-state.js';
|
||||
|
||||
type Liveness = 'connected' | 'reconnecting' | 'down';
|
||||
|
||||
function transitionLiveness(current: Liveness, event: 'connect' | 'disconnect' | 'reconnect_attempt' | 'reconnect_success'): Liveness {
|
||||
switch (event) {
|
||||
case 'connect':
|
||||
return 'connected';
|
||||
case 'disconnect':
|
||||
return 'down';
|
||||
case 'reconnect_attempt':
|
||||
return 'reconnecting';
|
||||
case 'reconnect_success':
|
||||
return 'connected';
|
||||
}
|
||||
}
|
||||
|
||||
describe('liveness state machine', () => {
|
||||
it('starts as down', () => {
|
||||
const state: HostState = {
|
||||
providerId: 'test',
|
||||
liveness: 'down',
|
||||
lastSeenAt: null,
|
||||
seq: 0,
|
||||
models: new Map(),
|
||||
};
|
||||
expect(state.liveness).toBe('down');
|
||||
});
|
||||
|
||||
it('connect -> connected', () => {
|
||||
const state: HostState = {
|
||||
providerId: 'test',
|
||||
liveness: 'down',
|
||||
lastSeenAt: null,
|
||||
seq: 0,
|
||||
models: new Map(),
|
||||
};
|
||||
state.liveness = transitionLiveness(state.liveness, 'connect');
|
||||
expect(state.liveness).toBe('connected');
|
||||
});
|
||||
|
||||
it('connected -> down on disconnect', () => {
|
||||
const state: HostState = {
|
||||
providerId: 'test',
|
||||
liveness: 'connected',
|
||||
lastSeenAt: new Date(),
|
||||
seq: 0,
|
||||
models: new Map(),
|
||||
};
|
||||
state.liveness = transitionLiveness(state.liveness, 'disconnect');
|
||||
expect(state.liveness).toBe('down');
|
||||
});
|
||||
|
||||
it('down -> reconnecting on reconnect attempt', () => {
|
||||
const state: HostState = {
|
||||
providerId: 'test',
|
||||
liveness: 'down',
|
||||
lastSeenAt: null,
|
||||
seq: 0,
|
||||
models: new Map(),
|
||||
};
|
||||
state.liveness = transitionLiveness(state.liveness, 'reconnect_attempt');
|
||||
expect(state.liveness).toBe('reconnecting');
|
||||
});
|
||||
|
||||
it('reconnecting -> connected on reconnect success', () => {
|
||||
const state: HostState = {
|
||||
providerId: 'test',
|
||||
liveness: 'reconnecting',
|
||||
lastSeenAt: null,
|
||||
seq: 0,
|
||||
models: new Map(),
|
||||
};
|
||||
state.liveness = transitionLiveness(state.liveness, 'reconnect_success');
|
||||
expect(state.liveness).toBe('connected');
|
||||
});
|
||||
|
||||
it('connected -> reconnecting on reconnect attempt', () => {
|
||||
const state: HostState = {
|
||||
providerId: 'test',
|
||||
liveness: 'connected',
|
||||
lastSeenAt: new Date(),
|
||||
seq: 0,
|
||||
models: new Map(),
|
||||
};
|
||||
state.liveness = transitionLiveness(state.liveness, 'reconnect_attempt');
|
||||
expect(state.liveness).toBe('reconnecting');
|
||||
});
|
||||
|
||||
it('reconnecting -> down on reconnect failure', () => {
|
||||
const state: HostState = {
|
||||
providerId: 'test',
|
||||
liveness: 'reconnecting',
|
||||
lastSeenAt: null,
|
||||
seq: 0,
|
||||
models: new Map(),
|
||||
};
|
||||
state.liveness = transitionLiveness(state.liveness, 'disconnect');
|
||||
expect(state.liveness).toBe('down');
|
||||
});
|
||||
});
|
||||
115
apps/control/src/services/__tests__/llama-providers.test.ts
Normal file
115
apps/control/src/services/__tests__/llama-providers.test.ts
Normal file
@@ -0,0 +1,115 @@
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { writeFileSync, unlinkSync } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { loadLlamaProviders, getLlamaProviders, resolveProviderBaseUrl } from '../llama-providers.js';
|
||||
|
||||
function loadFixture(
|
||||
providers: Array<{ id: string; label: string; baseUrl: string; kind?: string }>,
|
||||
): string {
|
||||
const file = {
|
||||
defaultProvider: providers[0]!.id,
|
||||
providers: providers.map((p) => ({ ...p, kind: p.kind ?? 'llama-swap' })),
|
||||
};
|
||||
const path = join(tmpdir(), `llama-providers-test-${Math.random().toString(36).slice(2)}.json`);
|
||||
writeFileSync(path, JSON.stringify(file), 'utf8');
|
||||
return path;
|
||||
}
|
||||
|
||||
describe('loadLlamaProviders', () => {
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('loads a valid providers file', () => {
|
||||
const path = loadFixture([
|
||||
{ id: 'sam-desktop', label: 'Sam Desktop', baseUrl: 'http://100.101.41.16:8401' },
|
||||
{ id: 'embedding', label: 'Embedding', baseUrl: 'http://100.90.172.55:8411' },
|
||||
]);
|
||||
|
||||
const result = loadLlamaProviders(path, 'http://legacy.test:8080');
|
||||
|
||||
expect(result.providers).toHaveLength(2);
|
||||
expect(result.providers[0]!.id).toBe('sam-desktop');
|
||||
expect(result.providers[0]!.baseUrl).toBe('http://100.101.41.16:8401');
|
||||
expect(result.providers[1]!.id).toBe('embedding');
|
||||
expect(result.providers[1]!.baseUrl).toBe('http://100.90.172.55:8411');
|
||||
|
||||
unlinkSync(path);
|
||||
});
|
||||
|
||||
it('falls back to legacy when file is missing', () => {
|
||||
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
|
||||
const result = loadLlamaProviders('/nonexistent/path.json', 'http://legacy.test:8080');
|
||||
|
||||
expect(result.providers).toHaveLength(1);
|
||||
expect(result.providers[0]!.id).toBe('llama-swap');
|
||||
expect(result.providers[0]!.baseUrl).toBe('http://legacy.test:8080');
|
||||
|
||||
warnSpy.mockRestore();
|
||||
});
|
||||
|
||||
it('falls back to legacy when path is undefined', () => {
|
||||
const result = loadLlamaProviders(undefined, 'http://legacy.test:8080');
|
||||
|
||||
expect(result.providers).toHaveLength(1);
|
||||
expect(result.providers[0]!.id).toBe('llama-swap');
|
||||
expect(result.providers[0]!.baseUrl).toBe('http://legacy.test:8080');
|
||||
});
|
||||
|
||||
it('falls back to legacy when JSON is invalid', () => {
|
||||
const path = join(tmpdir(), `llama-providers-bad-${Math.random().toString(36).slice(2)}.json`);
|
||||
writeFileSync(path, '{not valid json', 'utf8');
|
||||
const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
|
||||
|
||||
const result = loadLlamaProviders(path, 'http://legacy.test:8080');
|
||||
|
||||
expect(result.providers).toHaveLength(1);
|
||||
expect(result.providers[0]!.id).toBe('llama-swap');
|
||||
|
||||
errorSpy.mockRestore();
|
||||
unlinkSync(path);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getLlamaProviders', () => {
|
||||
it('returns cached result after load', () => {
|
||||
loadLlamaProviders(undefined, 'http://test.example:9999');
|
||||
const cached = getLlamaProviders();
|
||||
expect(cached.providers[0]!.baseUrl).toBe('http://test.example:9999');
|
||||
});
|
||||
|
||||
it('returns legacy fallback when nothing loaded', () => {
|
||||
// This tests the fallback when cached is null.
|
||||
// Since loadLlamaProviders always sets cached, we test the default URL.
|
||||
const result = getLlamaProviders();
|
||||
expect(result).toBeDefined();
|
||||
expect(result.providers.length).toBeGreaterThanOrEqual(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolveProviderBaseUrl', () => {
|
||||
it('resolves baseUrl for a known provider', () => {
|
||||
loadLlamaProviders(undefined, 'http://test.example:9999');
|
||||
expect(resolveProviderBaseUrl('llama-swap')).toBe('http://test.example:9999');
|
||||
});
|
||||
|
||||
it('returns null for unknown provider', () => {
|
||||
loadLlamaProviders(undefined, 'http://test.example:9999');
|
||||
expect(resolveProviderBaseUrl('nonexistent')).toBeNull();
|
||||
});
|
||||
|
||||
it('resolves correct URLs for both seeded providers', () => {
|
||||
const path = loadFixture([
|
||||
{ id: 'sam-desktop', label: 'Sam Desktop', baseUrl: 'http://100.101.41.16:8401' },
|
||||
{ id: 'embedding', label: 'Embedding', baseUrl: 'http://100.90.172.55:8411' },
|
||||
]);
|
||||
loadLlamaProviders(path, 'http://legacy.test:8080');
|
||||
|
||||
expect(resolveProviderBaseUrl('sam-desktop')).toBe('http://100.101.41.16:8401');
|
||||
expect(resolveProviderBaseUrl('embedding')).toBe('http://100.90.172.55:8411');
|
||||
|
||||
unlinkSync(path);
|
||||
});
|
||||
});
|
||||
63
apps/control/src/services/__tests__/log-relay.test.ts
Normal file
63
apps/control/src/services/__tests__/log-relay.test.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
import { describe, it, expect, beforeEach } from 'vitest';
|
||||
import { LogRelay } from '../log-relay.js';
|
||||
|
||||
describe('LogRelay', () => {
|
||||
let relay: LogRelay;
|
||||
|
||||
beforeEach(() => {
|
||||
relay = new LogRelay();
|
||||
});
|
||||
|
||||
it('appends log lines to per-host tail', () => {
|
||||
relay.append('host1', 'proxy', 'connection established');
|
||||
relay.append('host1', 'upstream', 'request completed');
|
||||
|
||||
const tail = relay.getTail('host1');
|
||||
expect(tail).toHaveLength(2);
|
||||
expect(tail[0].source).toBe('proxy');
|
||||
expect(tail[1].source).toBe('upstream');
|
||||
});
|
||||
|
||||
it('trims tail to MAX_LOG_LINES (2000)', () => {
|
||||
for (let i = 0; i < 2500; i++) {
|
||||
relay.append('host1', 'proxy', `line ${i}`);
|
||||
}
|
||||
|
||||
const tail = relay.getTail('host1');
|
||||
expect(tail.length).toBe(2000);
|
||||
expect(tail[0].line).toBe('line 500');
|
||||
expect(tail[tail.length - 1].line).toBe('line 2499');
|
||||
});
|
||||
|
||||
it('returns empty array for unknown host', () => {
|
||||
expect(relay.getTail('unknown')).toEqual([]);
|
||||
});
|
||||
|
||||
it('getAllTails returns lines from all hosts', () => {
|
||||
relay.append('host1', 'proxy', 'line1');
|
||||
relay.append('host2', 'upstream', 'line2');
|
||||
|
||||
const all = relay.getAllTails();
|
||||
expect(all).toHaveLength(2);
|
||||
expect(all.map((l) => l.providerId)).toContain('host1');
|
||||
expect(all.map((l) => l.providerId)).toContain('host2');
|
||||
});
|
||||
|
||||
it('getSources returns unique source values', () => {
|
||||
relay.append('host1', 'proxy', 'line1');
|
||||
relay.append('host1', 'upstream', 'line2');
|
||||
relay.append('host2', 'model', 'line3');
|
||||
|
||||
const sources = relay.getSources();
|
||||
expect(sources).toContain('proxy');
|
||||
expect(sources).toContain('upstream');
|
||||
expect(sources).toContain('model');
|
||||
expect(sources.length).toBe(3);
|
||||
});
|
||||
|
||||
it('timestamps are set on each line', () => {
|
||||
relay.append('host1', 'proxy', 'test');
|
||||
const tail = relay.getTail('host1');
|
||||
expect(tail[0].ts).toBeInstanceOf(Date);
|
||||
});
|
||||
});
|
||||
83
apps/control/src/services/__tests__/model-pull.test.ts
Normal file
83
apps/control/src/services/__tests__/model-pull.test.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { validateRepoId, buildPullCommand, runModelPull } from '../model-pull.js';
|
||||
import type { SshExec, ExecResult } from '../ssh-config.js';
|
||||
import type { DeltaEmitter } from '../../index.js';
|
||||
|
||||
describe('validateRepoId', () => {
|
||||
it('accepts org/name', () => {
|
||||
expect(validateRepoId('Qwen/Qwen3.5-9B')).toBe(true);
|
||||
expect(validateRepoId('lmstudio-community/model.gguf-q4')).toBe(true);
|
||||
});
|
||||
it('rejects traversal, spaces, metacharacters, and bare names', () => {
|
||||
expect(validateRepoId('../etc/passwd')).toBe(false);
|
||||
expect(validateRepoId('a/b; rm -rf /')).toBe(false);
|
||||
expect(validateRepoId('a b/c')).toBe(false);
|
||||
expect(validateRepoId('justname')).toBe(false);
|
||||
expect(validateRepoId('a/b/c')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildPullCommand', () => {
|
||||
it('wrapper mode emits the pull verb', () => {
|
||||
expect(buildPullCommand('wrapper', 'Qwen/Q3')).toBe('pull Qwen/Q3');
|
||||
});
|
||||
it('shell mode emits huggingface-cli into a sanitized local dir', () => {
|
||||
expect(buildPullCommand('shell', 'Qwen/Q3', '/home/u/models/')).toBe(
|
||||
"huggingface-cli download Qwen/Q3 --local-dir '/home/u/models/Qwen__Q3'",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
function emitterSpy(): { emitter: DeltaEmitter; frames: Record<string, unknown>[] } {
|
||||
const frames: Record<string, unknown>[] = [];
|
||||
const emitter: DeltaEmitter = {
|
||||
subscribe: () => () => {},
|
||||
publish: (d) => { frames.push(d as Record<string, unknown>); },
|
||||
};
|
||||
return { emitter, frames };
|
||||
}
|
||||
|
||||
function execReturning(result: ExecResult): { exec: SshExec; calls: string[] } {
|
||||
const calls: string[] = [];
|
||||
const exec: SshExec = async (_t, command) => { calls.push(command); return result; };
|
||||
return { exec, calls };
|
||||
}
|
||||
|
||||
const target = { host: 'h', user: 'u', keyPath: '/k' };
|
||||
|
||||
describe('runModelPull', () => {
|
||||
it('rejects an invalid repo id before issuing any command', async () => {
|
||||
const { emitter, frames } = emitterSpy();
|
||||
const { exec, calls } = execReturning({ code: 0, stdout: '', stderr: '' });
|
||||
const r = await runModelPull({ jobId: 'j1', target, repo: '../x', mode: 'wrapper' }, exec, emitter);
|
||||
expect(r.ok).toBe(false);
|
||||
expect(calls).toHaveLength(0);
|
||||
expect(frames[frames.length - 1]).toMatchObject({ type: 'control_job', status: 'failed' });
|
||||
});
|
||||
|
||||
it('runs the wrapper pull verb and emits running then completed', async () => {
|
||||
const { emitter, frames } = emitterSpy();
|
||||
const { exec, calls } = execReturning({ code: 0, stdout: 'done', stderr: '' });
|
||||
const r = await runModelPull({ jobId: 'j2', target, repo: 'Qwen/Q3', mode: 'wrapper' }, exec, emitter);
|
||||
expect(r.ok).toBe(true);
|
||||
expect(calls).toEqual(['pull Qwen/Q3']);
|
||||
expect(frames.map((f) => f.status)).toEqual(['running', 'completed']);
|
||||
expect(frames.every((f) => (f.detail as { kind?: string }).kind === 'pull')).toBe(true);
|
||||
});
|
||||
|
||||
it('reports a non-zero exit as failed', async () => {
|
||||
const { emitter, frames } = emitterSpy();
|
||||
const { exec } = execReturning({ code: 1, stdout: '', stderr: 'no such repo' });
|
||||
const r = await runModelPull({ jobId: 'j3', target, repo: 'Qwen/Q3', mode: 'wrapper' }, exec, emitter);
|
||||
expect(r.ok).toBe(false);
|
||||
expect(frames[frames.length - 1]).toMatchObject({ status: 'failed' });
|
||||
});
|
||||
|
||||
it('shell mode without a models dir fails fast', async () => {
|
||||
const { emitter } = emitterSpy();
|
||||
const { exec, calls } = execReturning({ code: 0, stdout: '', stderr: '' });
|
||||
const r = await runModelPull({ jobId: 'j4', target, repo: 'Qwen/Q3', mode: 'shell' }, exec, emitter);
|
||||
expect(r.ok).toBe(false);
|
||||
expect(calls).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
337
apps/control/src/services/__tests__/pipeline.test.ts
Normal file
337
apps/control/src/services/__tests__/pipeline.test.ts
Normal file
@@ -0,0 +1,337 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { parseSseLine } from '../fleet-connector.js';
|
||||
import type { LlamaSweepSSEEvent, MetricsEntry, ModelStatusEntry } from '../fleet-connector.js';
|
||||
import { createFleetState, ensureHostState, incrementSeq } from '../fleet-state.js';
|
||||
import { createDeltaEmitter, handleLlamaSweepEvent } from '../../index.js';
|
||||
import type { DeltaEmitter } from '../../index.js';
|
||||
import type { Sql } from '../../db.js';
|
||||
import type { Config } from '../../config.js';
|
||||
|
||||
// ─── SSE parser tests (REAL wire shapes from apigroup.go) ────────────────────
|
||||
// Real format: event:message / data:{"type":"<TYPE>","data":"<ESCAPED JSON>"}
|
||||
|
||||
describe('parseSseLine (real wire shapes)', () => {
|
||||
it('parses double-encoded modelStatus (real full-fleet array payload)', () => {
|
||||
const inner = JSON.stringify([
|
||||
{ id: 'llama3', name: '', description: '', state: 'ready', unlisted: false, peerID: '' },
|
||||
]);
|
||||
const outer = JSON.stringify({ type: 'modelStatus', data: inner });
|
||||
const result = parseSseLine(`data: ${outer}`);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.type).toBe('modelStatus');
|
||||
expect(result!.data).toEqual([
|
||||
{ id: 'llama3', name: '', description: '', state: 'ready', unlisted: false, peerID: '' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('ignores event: lines (always event:message)', () => {
|
||||
expect(parseSseLine('event:message')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for data: with missing inner data field', () => {
|
||||
expect(parseSseLine('data:{"type":"modelStatus"}')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for empty line', () => {
|
||||
expect(parseSseLine('')).toBeNull();
|
||||
expect(parseSseLine(' ')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for malformed JSON', () => {
|
||||
expect(parseSseLine('data: not-json')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Pipeline integration test (real functions) ──────────────────────────────
|
||||
|
||||
|
||||
function apiModel(id: string, state: string): ModelStatusEntry {
|
||||
return { id, name: '', description: '', state, unlisted: false, peerID: '' };
|
||||
}
|
||||
|
||||
describe('SSE pipeline: parse -> handleLlamaSweepEvent -> emit deltas', () => {
|
||||
let mockSql: Sql;
|
||||
let mockConfig: Config;
|
||||
let executedQueries: string[];
|
||||
|
||||
beforeEach(() => {
|
||||
executedQueries = [];
|
||||
mockSql = Object.assign(
|
||||
(strings: TemplateStringsArray, ...values: unknown[]) => {
|
||||
const query = strings.reduce((acc: string, s: string, i: number) => acc + s + (values[i] ?? ''), '');
|
||||
executedQueries.push(query);
|
||||
return Promise.resolve([]);
|
||||
},
|
||||
{
|
||||
json: (v: unknown) => v,
|
||||
unsafe: async (q: string) => { executedQueries.push(q); return []; },
|
||||
},
|
||||
) as unknown as Sql;
|
||||
|
||||
mockConfig = {
|
||||
NODE_ENV: 'production',
|
||||
PORT: 9503,
|
||||
HOST: '127.0.0.1',
|
||||
DATABASE_URL: 'postgres://test',
|
||||
LOG_LEVEL: 'info',
|
||||
RETENTION_RAW_HOURS: 48,
|
||||
RETENTION_ROLLUP_DAYS: 90,
|
||||
CAPTURE_SIZE_KB: 256,
|
||||
CAPTURE_BUDGET_MB: 50,
|
||||
} as unknown as Config;
|
||||
});
|
||||
|
||||
it('processes modelStatus SSE event and emits delta with seq=1', async () => {
|
||||
const fleet = createFleetState();
|
||||
const emitter = createDeltaEmitter();
|
||||
const deltas: unknown[] = [];
|
||||
emitter.subscribe((d) => deltas.push(d));
|
||||
|
||||
const event: LlamaSweepSSEEvent = {
|
||||
type: 'modelStatus',
|
||||
data: [apiModel('llama3', 'ready')],
|
||||
};
|
||||
|
||||
await handleLlamaSweepEvent(fleet, mockSql, mockConfig, 'host1', emitter, event);
|
||||
|
||||
// Assert: delta was emitted
|
||||
expect(deltas).toHaveLength(1);
|
||||
const delta = deltas[0] as { type: string; seq: number; hosts: Array<{ seq: number; models: Array<{ model: string; state: string }> }> };
|
||||
expect(delta.type).toBe('control_fleet');
|
||||
expect(delta.seq).toBe(1);
|
||||
expect(delta.hosts[0].seq).toBe(1);
|
||||
expect(delta.hosts[0].models[0].model).toBe('llama3');
|
||||
expect(delta.hosts[0].models[0].state).toBe('ready');
|
||||
|
||||
// Assert: SQL INSERT was called
|
||||
expect(executedQueries.length).toBe(1);
|
||||
expect(executedQueries[0]).toContain('control_model_events');
|
||||
expect(executedQueries[0]).toContain('llama3');
|
||||
});
|
||||
|
||||
it('increments seq monotonically across multiple events', async () => {
|
||||
const fleet = createFleetState();
|
||||
const emitter = createDeltaEmitter();
|
||||
const deltas: unknown[] = [];
|
||||
emitter.subscribe((d) => deltas.push(d));
|
||||
|
||||
for (let i = 0; i < 3; i++) {
|
||||
// Each snapshot adds a new model -> a transition -> a delta.
|
||||
await handleLlamaSweepEvent(fleet, mockSql, mockConfig, 'host1', emitter, {
|
||||
type: 'modelStatus',
|
||||
data: [apiModel(`model${i}`, 'ready')],
|
||||
});
|
||||
}
|
||||
|
||||
expect(deltas).toHaveLength(3);
|
||||
const seqs = deltas.map((d) => (d as { seq: number }).seq);
|
||||
expect(seqs).toEqual([1, 2, 3]);
|
||||
});
|
||||
|
||||
it('processes metrics event with multiple entries and emits activity deltas', async () => {
|
||||
const fleet = createFleetState();
|
||||
const emitter = createDeltaEmitter();
|
||||
const deltas: unknown[] = [];
|
||||
emitter.subscribe((d) => deltas.push(d));
|
||||
|
||||
const metricsEvent: LlamaSweepSSEEvent = {
|
||||
type: 'metrics',
|
||||
data: [
|
||||
{
|
||||
id: 1,
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
model: 'llama3',
|
||||
req_path: '/v1/chat/completions',
|
||||
resp_status_code: 200,
|
||||
duration_ms: 1500,
|
||||
tokens: {
|
||||
cache_tokens: 100,
|
||||
input_tokens: 50,
|
||||
output_tokens: 200,
|
||||
prompt_per_second: 30,
|
||||
tokens_per_second: 50,
|
||||
},
|
||||
has_capture: false,
|
||||
},
|
||||
{
|
||||
id: 2,
|
||||
timestamp: '2024-01-01T00:01:00Z',
|
||||
model: 'llama3',
|
||||
req_path: '/v1/chat/completions',
|
||||
resp_status_code: 200,
|
||||
duration_ms: 1200,
|
||||
tokens: {
|
||||
cache_tokens: 0,
|
||||
input_tokens: 100,
|
||||
output_tokens: 300,
|
||||
prompt_per_second: 25,
|
||||
tokens_per_second: 45,
|
||||
},
|
||||
has_capture: false,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
await handleLlamaSweepEvent(fleet, mockSql, mockConfig, 'host1', emitter, metricsEvent);
|
||||
|
||||
// handleReconcile is called (gap detection), then 2 activity deltas
|
||||
// The reconcile SQL call + 2 INSERT calls = 3 queries
|
||||
expect(executedQueries.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// Activity deltas (2 entries)
|
||||
const activityDeltas = deltas.filter((d) => (d as { type: string }).type === 'control_activity');
|
||||
expect(activityDeltas).toHaveLength(2);
|
||||
|
||||
const d1 = activityDeltas[0] as { entry: { id: number } };
|
||||
const d2 = activityDeltas[1] as { entry: { id: number } };
|
||||
expect(d1.entry.id).toBe(1);
|
||||
expect(d2.entry.id).toBe(2);
|
||||
});
|
||||
|
||||
it('snapshot seq is max of all host seqs', () => {
|
||||
const fleet = createFleetState();
|
||||
|
||||
const host1 = ensureHostState(fleet, 'host1');
|
||||
incrementSeq(host1);
|
||||
incrementSeq(host1);
|
||||
|
||||
const host2 = ensureHostState(fleet, 'host2');
|
||||
incrementSeq(host2);
|
||||
incrementSeq(host2);
|
||||
incrementSeq(host2);
|
||||
|
||||
const hosts = Array.from(fleet.hosts.values()).map((h) => ({
|
||||
providerId: h.providerId,
|
||||
seq: h.seq,
|
||||
}));
|
||||
const snapshotMaxSeq = hosts.reduce((max: number, h: { seq: number }) => Math.max(max, h.seq), 0);
|
||||
expect(snapshotMaxSeq).toBe(3);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── 2-host delta merge test (B9) ────────────────────────────────────────────
|
||||
|
||||
// ─── P4: source column mapping ──────────────────────────────────────────────
|
||||
|
||||
describe('P4: source column in metrics ingest', () => {
|
||||
let mockSql: Sql;
|
||||
let mockConfig: Config;
|
||||
let executedQueries: string[];
|
||||
|
||||
beforeEach(() => {
|
||||
executedQueries = [];
|
||||
mockSql = Object.assign(
|
||||
(strings: TemplateStringsArray, ...values: unknown[]) => {
|
||||
const query = strings.reduce((acc: string, s: string, i: number) => acc + s + (values[i] ?? ''), '');
|
||||
executedQueries.push(query);
|
||||
return Promise.resolve([]);
|
||||
},
|
||||
{
|
||||
json: (v: unknown) => v,
|
||||
unsafe: async (q: string) => { executedQueries.push(q); return []; },
|
||||
},
|
||||
) as unknown as Sql;
|
||||
|
||||
mockConfig = {
|
||||
NODE_ENV: 'production',
|
||||
PORT: 9503,
|
||||
HOST: '127.0.0.1',
|
||||
DATABASE_URL: 'postgres://test',
|
||||
LOG_LEVEL: 'info',
|
||||
RETENTION_RAW_HOURS: 48,
|
||||
RETENTION_ROLLUP_DAYS: 90,
|
||||
CAPTURE_SIZE_KB: 256,
|
||||
CAPTURE_BUDGET_MB: 50,
|
||||
} as unknown as Config;
|
||||
});
|
||||
|
||||
it('maps source as NULL for ring data (ActivityLogEntry has no headers)', async () => {
|
||||
const fleet = createFleetState();
|
||||
const emitter = createDeltaEmitter();
|
||||
const deltas: unknown[] = [];
|
||||
emitter.subscribe((d) => deltas.push(d));
|
||||
|
||||
const metricsEvent: LlamaSweepSSEEvent = {
|
||||
type: 'metrics',
|
||||
data: [
|
||||
{
|
||||
id: 1,
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
model: 'llama3',
|
||||
req_path: '/v1/chat/completions',
|
||||
resp_status_code: 200,
|
||||
duration_ms: 1500,
|
||||
tokens: {
|
||||
cache_tokens: 100,
|
||||
input_tokens: 50,
|
||||
output_tokens: 200,
|
||||
prompt_per_second: 30,
|
||||
tokens_per_second: 50,
|
||||
},
|
||||
has_capture: false,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
await handleLlamaSweepEvent(fleet, mockSql, mockConfig, 'host1', emitter, metricsEvent);
|
||||
|
||||
// The INSERT query should include the source column
|
||||
const insertQueries = executedQueries.filter((q) => q.includes('control_requests'));
|
||||
expect(insertQueries.length).toBeGreaterThanOrEqual(2);
|
||||
// The SSE handler INSERT (second one) includes source; reconcile INSERT (first) does not
|
||||
expect(insertQueries[1]).toContain('source');
|
||||
});
|
||||
});
|
||||
|
||||
describe('2-host delta merge (B9)', () => {
|
||||
it('delta for host2 does not wipe host1 from the hosts array', () => {
|
||||
// Simulate the merge logic from useControlStream.tsx
|
||||
const hosts = [
|
||||
{ providerId: 'host1', liveness: 'connected' as const, lastSeenAt: '', seq: 5, models: [] },
|
||||
{ providerId: 'host2', liveness: 'connected' as const, lastSeenAt: '', seq: 3, models: [] },
|
||||
];
|
||||
|
||||
// Delta arrives for host2 only
|
||||
const deltaHosts = [
|
||||
{ providerId: 'host2', liveness: 'connected' as const, lastSeenAt: '', seq: 4, models: [] },
|
||||
];
|
||||
|
||||
const merged = [...hosts];
|
||||
for (const dh of deltaHosts) {
|
||||
const idx = merged.findIndex((h) => h.providerId === dh.providerId);
|
||||
if (idx >= 0) {
|
||||
merged[idx] = dh;
|
||||
} else {
|
||||
merged.push(dh);
|
||||
}
|
||||
}
|
||||
|
||||
expect(merged).toHaveLength(2);
|
||||
expect(merged.find((h) => h.providerId === 'host1')).toBeDefined();
|
||||
expect(merged.find((h) => h.providerId === 'host2')!.seq).toBe(4);
|
||||
expect(merged.find((h) => h.providerId === 'host1')!.seq).toBe(5);
|
||||
});
|
||||
|
||||
it('new host is appended when not in existing array', () => {
|
||||
const hosts = [
|
||||
{ providerId: 'host1', liveness: 'connected' as const, lastSeenAt: '', seq: 5, models: [] },
|
||||
];
|
||||
|
||||
const deltaHosts = [
|
||||
{ providerId: 'host3', liveness: 'connected' as const, lastSeenAt: '', seq: 1, models: [] },
|
||||
];
|
||||
|
||||
const merged = [...hosts];
|
||||
for (const dh of deltaHosts) {
|
||||
const idx = merged.findIndex((h) => h.providerId === dh.providerId);
|
||||
if (idx >= 0) {
|
||||
merged[idx] = dh;
|
||||
} else {
|
||||
merged.push(dh);
|
||||
}
|
||||
}
|
||||
|
||||
expect(merged).toHaveLength(2);
|
||||
expect(merged.map((h) => h.providerId)).toEqual(['host1', 'host3']);
|
||||
});
|
||||
});
|
||||
34
apps/control/src/services/__tests__/reconcile.test.ts
Normal file
34
apps/control/src/services/__tests__/reconcile.test.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { detectGap } from '../reconcile.js';
|
||||
|
||||
describe('detectGap', () => {
|
||||
it('detects gap when oldest reconcile is newer than newest persisted', () => {
|
||||
expect(detectGap('2024-01-02T00:00:00Z', '2024-01-01T00:00:00Z')).toBe(true);
|
||||
});
|
||||
|
||||
it('does not detect gap when overlap exists', () => {
|
||||
expect(detectGap('2024-01-01T00:00:00Z', '2024-01-02T00:00:00Z')).toBe(false);
|
||||
});
|
||||
|
||||
it('does not detect gap when timestamps are equal', () => {
|
||||
expect(detectGap('2024-01-01T00:00:00Z', '2024-01-01T00:00:00Z')).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false when oldest reconcile is null', () => {
|
||||
expect(detectGap(null, '2024-01-01T00:00:00Z')).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false when newest persisted is null', () => {
|
||||
expect(detectGap('2024-01-01T00:00:00Z', null)).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false when both are null', () => {
|
||||
expect(detectGap(null, null)).toBe(false);
|
||||
});
|
||||
|
||||
it('handles timezone offsets correctly', () => {
|
||||
// 2024-01-01T12:00:00Z == 2024-01-01T14:00:00+02:00
|
||||
expect(detectGap('2024-01-01T12:00:00Z', '2024-01-01T14:00:00+02:00')).toBe(false);
|
||||
expect(detectGap('2024-01-01T13:00:00Z', '2024-01-01T14:00:00+02:00')).toBe(true);
|
||||
});
|
||||
});
|
||||
66
apps/control/src/services/__tests__/reports.test.ts
Normal file
66
apps/control/src/services/__tests__/reports.test.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { renderReportMarkdown, isReportDue, type ReportStats } from '../reports.js';
|
||||
|
||||
function makeStats(partial: Partial<ReportStats> = {}): ReportStats {
|
||||
return {
|
||||
periodStart: '2026-06-11T00:00:00.000Z',
|
||||
periodEnd: '2026-06-12T00:00:00.000Z',
|
||||
interval: 'daily',
|
||||
totalRequests: 100,
|
||||
priorRequests: 50,
|
||||
totalInputTokens: 1000,
|
||||
totalOutputTokens: 2000,
|
||||
bySource: [{ source: 'boochat', requests: 80, inputTokens: 800, outputTokens: 1600 }],
|
||||
byProvider: [{ providerId: 'sam-desktop', requests: 100, swaps: 4 }],
|
||||
leaderboard: [{ providerId: 'sam-desktop', model: 'qwopus-35b', kind: 'code', avgScore: 0.82 }],
|
||||
regressions: [],
|
||||
...partial,
|
||||
};
|
||||
}
|
||||
|
||||
describe('renderReportMarkdown', () => {
|
||||
it('renders usage with a trend vs the prior period', () => {
|
||||
const md = renderReportMarkdown(makeStats());
|
||||
expect(md).toContain('# Fleet daily report');
|
||||
expect(md).toContain('Requests: 100 (+100% vs prior period)');
|
||||
expect(md).toContain('| boochat | 80 |');
|
||||
expect(md).toContain('| sam-desktop | 100 | 4 |');
|
||||
expect(md).toContain('No speed regressions flagged this period.');
|
||||
});
|
||||
|
||||
it('renders regression anomalies when present', () => {
|
||||
const md = renderReportMarkdown(makeStats({
|
||||
regressions: [{ providerId: 'sam-desktop', model: 'qwopus-35b', avgGenTps: 42.5 }],
|
||||
}));
|
||||
expect(md).toContain('Regression: sam-desktop/qwopus-35b');
|
||||
expect(md).toContain('42.5 tok/s');
|
||||
});
|
||||
|
||||
it('handles a zero prior period without dividing by zero', () => {
|
||||
const md = renderReportMarkdown(makeStats({ totalRequests: 5, priorRequests: 0 }));
|
||||
expect(md).toContain('Requests: 5 (new vs prior period)');
|
||||
});
|
||||
});
|
||||
|
||||
describe('isReportDue', () => {
|
||||
const now = new Date('2026-06-12T12:00:00.000Z');
|
||||
|
||||
it('is due when never run', () => {
|
||||
expect(isReportDue(null, 'daily', now)).toBe(true);
|
||||
});
|
||||
|
||||
it('is not due within the interval', () => {
|
||||
const lastRun = new Date('2026-06-12T06:00:00.000Z'); // 6h ago
|
||||
expect(isReportDue(lastRun, 'daily', now)).toBe(false);
|
||||
});
|
||||
|
||||
it('is due once the interval has elapsed', () => {
|
||||
const lastRun = new Date('2026-06-11T06:00:00.000Z'); // 30h ago
|
||||
expect(isReportDue(lastRun, 'daily', now)).toBe(true);
|
||||
});
|
||||
|
||||
it('uses a 7-day window for weekly', () => {
|
||||
const lastRun = new Date('2026-06-09T12:00:00.000Z'); // 3 days ago
|
||||
expect(isReportDue(lastRun, 'weekly', now)).toBe(false);
|
||||
});
|
||||
});
|
||||
68
apps/control/src/services/__tests__/retention.test.ts
Normal file
68
apps/control/src/services/__tests__/retention.test.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { trimCapture, parseCaptureJson } from '../retention.js';
|
||||
|
||||
describe('trimCapture', () => {
|
||||
it('returns null for null input', () => {
|
||||
expect(trimCapture(null, 256)).toBeNull();
|
||||
});
|
||||
|
||||
it('returns unchanged capture when within cap', () => {
|
||||
const capture = JSON.stringify({ data: 'x'.repeat(100) });
|
||||
const result = trimCapture(capture, 256);
|
||||
expect(result).toBe(capture);
|
||||
});
|
||||
|
||||
it('trims capture when over cap', () => {
|
||||
const capture = JSON.stringify({ data: 'x'.repeat(300_000) }); // ~600KB
|
||||
const result = trimCapture(capture, 256);
|
||||
expect(result).not.toBe(capture);
|
||||
expect(result!.length).toBeLessThan(capture.length);
|
||||
});
|
||||
|
||||
it('trims to roughly the cap size', () => {
|
||||
const capture = JSON.stringify({ data: 'x'.repeat(1_000_000) }); // ~2MB
|
||||
const result = trimCapture(capture, 256);
|
||||
// trimCapture slices to sizeKB * 1024 bytes
|
||||
const expectedLength = Math.floor(256 * 1024);
|
||||
expect(result!.length).toBeLessThanOrEqual(expectedLength);
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseCaptureJson', () => {
|
||||
it('parses valid JSON string into object', () => {
|
||||
const input = JSON.stringify({ requestHeaders: {}, requestBody: '{}', responseHeaders: {}, responseBody: '{}' });
|
||||
const result = parseCaptureJson(input);
|
||||
expect(result).toEqual({ requestHeaders: {}, requestBody: '{}', responseHeaders: {}, responseBody: '{}' });
|
||||
});
|
||||
|
||||
it('returns null for null input', () => {
|
||||
expect(parseCaptureJson(null)).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for invalid JSON', () => {
|
||||
expect(parseCaptureJson('not json')).toBeNull();
|
||||
});
|
||||
|
||||
it('B7: trimmed capture produces a JSONB-ready object, not a string', () => {
|
||||
// Simulate the pipeline: trim -> parse -> ready for sql.json()
|
||||
// A capture within the cap parses cleanly to an object for sql.json()
|
||||
const withinCap = JSON.stringify({ requestHeaders: {}, requestBody: '{}', responseBody: '{}' });
|
||||
const parsed = parseCaptureJson(withinCap);
|
||||
expect(typeof parsed).toBe('object');
|
||||
expect(parsed).not.toBeNull();
|
||||
// sql.json() expects an object/array; a string would double-serialize
|
||||
expect(Array.isArray(parsed) || typeof parsed === 'object').toBe(true);
|
||||
});
|
||||
|
||||
it('B7: oversized capture trims to invalid JSON -> parseCaptureJson returns null -> stored as NULL', () => {
|
||||
// trimCapture slices by byte count, which produces invalid JSON for large captures.
|
||||
// parseCaptureJson returns null for invalid JSON, and the insert stores NULL::jsonb.
|
||||
// This is acceptable: a truncated capture is not useful anyway.
|
||||
const raw = JSON.stringify({ data: 'x'.repeat(300_000) });
|
||||
const trimmed = trimCapture(raw, 256);
|
||||
expect(trimmed).not.toBeNull();
|
||||
const parsed = parseCaptureJson(trimmed!);
|
||||
// Trimmed capture is invalid JSON (sliced mid-object), so parse returns null
|
||||
expect(parsed).toBeNull();
|
||||
});
|
||||
});
|
||||
57
apps/control/src/services/__tests__/routing-scores.test.ts
Normal file
57
apps/control/src/services/__tests__/routing-scores.test.ts
Normal file
@@ -0,0 +1,57 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { assignBadges, type ModelScore } from '../routing-scores.js';
|
||||
|
||||
function makeScore(partial: Partial<ModelScore> & { compositeId: string }): ModelScore {
|
||||
return {
|
||||
providerId: partial.compositeId.split('/')[0]!,
|
||||
model: partial.compositeId.split('/').slice(1).join('/'),
|
||||
codeScore: null,
|
||||
chatScore: null,
|
||||
evalScore: null,
|
||||
avgGenTps: null,
|
||||
avgLatencyMs: null,
|
||||
sampleCount: 0,
|
||||
healthy: true,
|
||||
badges: [],
|
||||
...partial,
|
||||
};
|
||||
}
|
||||
|
||||
describe('assignBadges', () => {
|
||||
it('awards best-code to the highest healthy code score', () => {
|
||||
const scores = [
|
||||
makeScore({ compositeId: 'a/m1', codeScore: 0.7 }),
|
||||
makeScore({ compositeId: 'a/m2', codeScore: 0.9 }),
|
||||
makeScore({ compositeId: 'a/m3', codeScore: 0.5 }),
|
||||
];
|
||||
assignBadges(scores);
|
||||
expect(scores.find((s) => s.compositeId === 'a/m2')!.badges).toContain('best-code');
|
||||
expect(scores.find((s) => s.compositeId === 'a/m1')!.badges).not.toContain('best-code');
|
||||
});
|
||||
|
||||
it('excludes unhealthy hosts from winning any badge', () => {
|
||||
const scores = [
|
||||
makeScore({ compositeId: 'a/m1', codeScore: 0.95, healthy: false }),
|
||||
makeScore({ compositeId: 'a/m2', codeScore: 0.6, healthy: true }),
|
||||
];
|
||||
assignBadges(scores);
|
||||
expect(scores.find((s) => s.compositeId === 'a/m1')!.badges).toHaveLength(0);
|
||||
expect(scores.find((s) => s.compositeId === 'a/m2')!.badges).toContain('best-code');
|
||||
});
|
||||
|
||||
it('awards best-fast by throughput independently of eval scores', () => {
|
||||
const scores = [
|
||||
makeScore({ compositeId: 'a/slow', codeScore: 0.9, avgGenTps: 10 }),
|
||||
makeScore({ compositeId: 'a/fast', codeScore: 0.4, avgGenTps: 80 }),
|
||||
];
|
||||
assignBadges(scores);
|
||||
expect(scores.find((s) => s.compositeId === 'a/fast')!.badges).toContain('best-fast');
|
||||
expect(scores.find((s) => s.compositeId === 'a/slow')!.badges).toContain('best-code');
|
||||
});
|
||||
|
||||
it('awards nothing for a category when no model has that metric', () => {
|
||||
const scores = [makeScore({ compositeId: 'a/m1', avgGenTps: 20 })];
|
||||
assignBadges(scores);
|
||||
expect(scores[0]!.badges).toEqual(['best-fast']);
|
||||
});
|
||||
});
|
||||
130
apps/control/src/services/__tests__/sandbox-runner.test.ts
Normal file
130
apps/control/src/services/__tests__/sandbox-runner.test.ts
Normal file
@@ -0,0 +1,130 @@
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
|
||||
// ─── Sandbox lifecycle tests (mock docker spawn, test orchestration) ─────────
|
||||
|
||||
describe('sandbox runner lifecycle', () => {
|
||||
beforeEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('runCodeEval is importable', async () => {
|
||||
const mod = await import('../sandbox-runner.js');
|
||||
expect(typeof mod.runCodeEval).toBe('function');
|
||||
});
|
||||
|
||||
it('bounded fan-out via Promise.allSettled', async () => {
|
||||
// Test the bounded concurrency pattern directly.
|
||||
const tasks = Array.from({ length: 10 }, (_, i) => ({ id: `task_${i}` }));
|
||||
const concurrency = 4;
|
||||
const executionOrder: number[] = [];
|
||||
const activeCount: number[] = [];
|
||||
let currentlyActive = 0;
|
||||
|
||||
const results = await Promise.allSettled(
|
||||
tasks.slice(0, concurrency).map(async (task, idx) => {
|
||||
currentlyActive++;
|
||||
activeCount.push(currentlyActive);
|
||||
await new Promise((r) => setTimeout(r, 10 + idx * 5));
|
||||
executionOrder.push(idx);
|
||||
currentlyActive--;
|
||||
return { taskId: task.id, idx };
|
||||
}),
|
||||
);
|
||||
|
||||
// All should fulfill.
|
||||
expect(results.filter((r) => r.status === 'fulfilled').length).toBe(concurrency);
|
||||
// Max concurrent should not exceed concurrency limit.
|
||||
expect(Math.max(...activeCount)).toBeLessThanOrEqual(concurrency);
|
||||
});
|
||||
|
||||
it('per-task finally cleanup runs on error', async () => {
|
||||
const cleanupCalls: string[] = [];
|
||||
|
||||
const tasks = [
|
||||
{ id: 'task_ok' },
|
||||
{ id: 'task_fail' },
|
||||
{ id: 'task_ok2' },
|
||||
];
|
||||
|
||||
const results = await Promise.allSettled(
|
||||
tasks.map(async (task) => {
|
||||
try {
|
||||
if (task.id === 'task_fail') {
|
||||
throw new Error('simulated failure');
|
||||
}
|
||||
return { ok: true };
|
||||
} finally {
|
||||
cleanupCalls.push(task.id);
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
// All cleanup calls should run, even for the failed task.
|
||||
expect(cleanupCalls).toContain('task_ok');
|
||||
expect(cleanupCalls).toContain('task_fail');
|
||||
expect(cleanupCalls).toContain('task_ok2');
|
||||
|
||||
// One rejection, two fulfillments.
|
||||
expect(results.filter((r) => r.status === 'fulfilled').length).toBe(2);
|
||||
expect(results.filter((r) => r.status === 'rejected').length).toBe(1);
|
||||
});
|
||||
|
||||
it('kill-on-timeout pattern', async () => {
|
||||
// Test that spawn with timeout + SIGKILL works.
|
||||
const { spawn } = await import('node:child_process');
|
||||
const child = spawn('sleep', ['300']);
|
||||
const timeoutHandle = setTimeout(() => {
|
||||
child.kill('SIGKILL');
|
||||
}, 100);
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
child.on('close', () => {
|
||||
clearTimeout(timeoutHandle);
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
|
||||
// SIGKILL gives signal, not exit code.
|
||||
expect(child.killed).toBe(true);
|
||||
});
|
||||
|
||||
it('allSettled isolation: one failure does not abort others', async () => {
|
||||
const completed: string[] = [];
|
||||
|
||||
const results = await Promise.allSettled([
|
||||
(async () => {
|
||||
await new Promise((r) => setTimeout(r, 50));
|
||||
completed.push('task1');
|
||||
return 'ok1';
|
||||
})(),
|
||||
(async () => {
|
||||
await new Promise((r) => setTimeout(r, 20));
|
||||
throw new Error('fail');
|
||||
})(),
|
||||
(async () => {
|
||||
await new Promise((r) => setTimeout(r, 50));
|
||||
completed.push('task3');
|
||||
return 'ok3';
|
||||
})(),
|
||||
]);
|
||||
|
||||
// Both successful tasks completed despite the failure.
|
||||
expect(completed).toContain('task1');
|
||||
expect(completed).toContain('task3');
|
||||
|
||||
expect(results[0].status).toBe('fulfilled');
|
||||
expect(results[1].status).toBe('rejected');
|
||||
expect(results[2].status).toBe('fulfilled');
|
||||
});
|
||||
|
||||
it('pruneOrphanContainers handles missing docker gracefully', async () => {
|
||||
// The pruneOrphanContainers function is internal but handles docker errors gracefully.
|
||||
// We verify the module loads without error even if docker is not available.
|
||||
const mod = await import('../sandbox-runner.js');
|
||||
expect(typeof mod.runCodeEval).toBe('function');
|
||||
});
|
||||
});
|
||||
106
apps/control/src/services/__tests__/seq-logic.test.ts
Normal file
106
apps/control/src/services/__tests__/seq-logic.test.ts
Normal file
@@ -0,0 +1,106 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
|
||||
// Seq logic test: verify the buffer-then-filter rule.
|
||||
// Client buffers pre-snapshot deltas, discards seq <= snapshot_seq per-host.
|
||||
|
||||
interface Delta {
|
||||
type: 'control_fleet';
|
||||
seq: number;
|
||||
hosts: Array<{ providerId: string; seq: number }>;
|
||||
}
|
||||
|
||||
interface Snapshot {
|
||||
type: 'control_fleet';
|
||||
seq: number;
|
||||
hosts: Array<{ providerId: string; seq: number }>;
|
||||
}
|
||||
|
||||
function applyDelta(delta: Delta, snapshotSeqs: Map<string, number>): boolean {
|
||||
// Apply only if seq > snapshot seq for that host.
|
||||
const firstHost = delta.hosts[0];
|
||||
if (!firstHost) return false;
|
||||
const snapshotSeq = snapshotSeqs.get(firstHost.providerId) ?? 0;
|
||||
return delta.seq > snapshotSeq;
|
||||
}
|
||||
|
||||
function applySnapshot(snapshot: Snapshot, snapshotSeqs: Map<string, number>): void {
|
||||
for (const host of snapshot.hosts) {
|
||||
snapshotSeqs.set(host.providerId, host.seq);
|
||||
}
|
||||
}
|
||||
|
||||
describe('seq logic: buffer-then-filter', () => {
|
||||
it('applies delta when seq > snapshot seq', () => {
|
||||
const snapshotSeqs = new Map([['host1', 5]]);
|
||||
const delta: Delta = {
|
||||
type: 'control_fleet',
|
||||
seq: 10,
|
||||
hosts: [{ providerId: 'host1', seq: 10 }],
|
||||
};
|
||||
expect(applyDelta(delta, snapshotSeqs)).toBe(true);
|
||||
});
|
||||
|
||||
it('discards delta when seq <= snapshot seq', () => {
|
||||
const snapshotSeqs = new Map([['host1', 10]]);
|
||||
const delta: Delta = {
|
||||
type: 'control_fleet',
|
||||
seq: 5,
|
||||
hosts: [{ providerId: 'host1', seq: 5 }],
|
||||
};
|
||||
expect(applyDelta(delta, snapshotSeqs)).toBe(false);
|
||||
});
|
||||
|
||||
it('discards delta when seq equals snapshot seq', () => {
|
||||
const snapshotSeqs = new Map([['host1', 10]]);
|
||||
const delta: Delta = {
|
||||
type: 'control_fleet',
|
||||
seq: 10,
|
||||
hosts: [{ providerId: 'host1', seq: 10 }],
|
||||
};
|
||||
expect(applyDelta(delta, snapshotSeqs)).toBe(false);
|
||||
});
|
||||
|
||||
it('updates snapshot seqs on snapshot apply', () => {
|
||||
const snapshotSeqs = new Map<string, number>();
|
||||
const snapshot: Snapshot = {
|
||||
type: 'control_fleet',
|
||||
seq: 0,
|
||||
hosts: [
|
||||
{ providerId: 'host1', seq: 100 },
|
||||
{ providerId: 'host2', seq: 50 },
|
||||
],
|
||||
};
|
||||
applySnapshot(snapshot, snapshotSeqs);
|
||||
expect(snapshotSeqs.get('host1')).toBe(100);
|
||||
expect(snapshotSeqs.get('host2')).toBe(50);
|
||||
});
|
||||
|
||||
it('handles missing snapshot seq (treats as 0)', () => {
|
||||
const snapshotSeqs = new Map<string, number>();
|
||||
const delta: Delta = {
|
||||
type: 'control_fleet',
|
||||
seq: 1,
|
||||
hosts: [{ providerId: 'host1', seq: 1 }],
|
||||
};
|
||||
// Without a snapshot, seq 1 > 0, so delta applies.
|
||||
expect(applyDelta(delta, snapshotSeqs)).toBe(true);
|
||||
});
|
||||
|
||||
it('discards out-of-order delta after snapshot', () => {
|
||||
// Simulate: snapshot arrives at seq 10, then delta at seq 5 arrives.
|
||||
const snapshotSeqs = new Map<string, number>();
|
||||
const snapshot: Snapshot = {
|
||||
type: 'control_fleet',
|
||||
seq: 0,
|
||||
hosts: [{ providerId: 'host1', seq: 10 }],
|
||||
};
|
||||
applySnapshot(snapshot, snapshotSeqs);
|
||||
|
||||
const delta: Delta = {
|
||||
type: 'control_fleet',
|
||||
seq: 5,
|
||||
hosts: [{ providerId: 'host1', seq: 5 }],
|
||||
};
|
||||
expect(applyDelta(delta, snapshotSeqs)).toBe(false);
|
||||
});
|
||||
});
|
||||
234
apps/control/src/services/__tests__/ssh-config.test.ts
Normal file
234
apps/control/src/services/__tests__/ssh-config.test.ts
Normal file
@@ -0,0 +1,234 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import {
|
||||
validateLlamaConfig,
|
||||
computeDiff,
|
||||
backupFilename,
|
||||
applyRemoteConfig,
|
||||
healthWait,
|
||||
type SshExec,
|
||||
type ExecResult,
|
||||
} from '../ssh-config.js';
|
||||
|
||||
// A minimal subset of the llama-swap config schema sufficient for these tests:
|
||||
// top-level object with a required non-empty `models` object.
|
||||
const SCHEMA = {
|
||||
type: 'object',
|
||||
required: ['models'],
|
||||
properties: {
|
||||
models: {
|
||||
type: 'object',
|
||||
minProperties: 1,
|
||||
additionalProperties: {
|
||||
type: 'object',
|
||||
properties: { cmd: { type: 'string' } },
|
||||
},
|
||||
},
|
||||
},
|
||||
} as const;
|
||||
|
||||
const VALID_YAML = `models:\n m1:\n cmd: "llama-server -m m1.gguf"\n`;
|
||||
|
||||
describe('validateLlamaConfig', () => {
|
||||
it('accepts a valid config', () => {
|
||||
const r = validateLlamaConfig(VALID_YAML, SCHEMA);
|
||||
expect(r.valid).toBe(true);
|
||||
expect(r.errors).toEqual([]);
|
||||
});
|
||||
|
||||
it('rejects broken YAML with a parse error', () => {
|
||||
const r = validateLlamaConfig('models:\n m1:\n cmd: "x\n : :', SCHEMA);
|
||||
expect(r.valid).toBe(false);
|
||||
expect(r.errors[0]).toMatch(/YAML parse error/);
|
||||
});
|
||||
|
||||
it('rejects a config missing required models', () => {
|
||||
const r = validateLlamaConfig('healthCheckTimeout: 30\n', SCHEMA);
|
||||
expect(r.valid).toBe(false);
|
||||
expect(r.errors.join(' ')).toMatch(/models/);
|
||||
});
|
||||
|
||||
it('rejects a non-mapping document', () => {
|
||||
const r = validateLlamaConfig('- just\n- a\n- list\n', SCHEMA);
|
||||
expect(r.valid).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('computeDiff', () => {
|
||||
it('returns empty for identical text', () => {
|
||||
expect(computeDiff('a\nb\n', 'a\nb\n')).toBe('');
|
||||
});
|
||||
it('marks changed lines with -/+', () => {
|
||||
const d = computeDiff('a\nb\nc\n', 'a\nX\nc\n');
|
||||
expect(d).toContain('- b');
|
||||
expect(d).toContain('+ X');
|
||||
});
|
||||
});
|
||||
|
||||
describe('backupFilename', () => {
|
||||
it('produces a timestamped path', () => {
|
||||
const name = backupFilename('/etc/llama/config.yaml', new Date('2026-06-12T03:04:05.678Z'));
|
||||
expect(name).toBe('/etc/llama/config.yaml.bak-20260612T030405Z');
|
||||
});
|
||||
});
|
||||
|
||||
// ─── apply pipeline failure paths ────────────────────────────────────────────
|
||||
|
||||
function makeExec(handlers: Record<string, ExecResult>): { exec: SshExec; calls: string[] } {
|
||||
const calls: string[] = [];
|
||||
const exec: SshExec = async (_t, command) => {
|
||||
calls.push(command);
|
||||
for (const [pattern, result] of Object.entries(handlers)) {
|
||||
if (command.includes(pattern)) return result;
|
||||
}
|
||||
return { code: 0, stdout: '', stderr: '' };
|
||||
};
|
||||
return { exec, calls };
|
||||
}
|
||||
|
||||
const target = { host: 'h', user: 'u', keyPath: '/k' };
|
||||
const okFetcher = (async () => new Response('{}', { status: 200 })) as unknown as typeof fetch;
|
||||
|
||||
describe('applyRemoteConfig', () => {
|
||||
it('aborts at validate for an invalid config and never touches the host', async () => {
|
||||
const { exec, calls } = makeExec({});
|
||||
const r = await applyRemoteConfig({
|
||||
target, configPath: '/c.yaml', restartCmd: 'restart', newConfig: 'not: valid: yaml: here:::',
|
||||
schema: SCHEMA, baseUrl: 'http://h', exec, fetcher: okFetcher,
|
||||
});
|
||||
expect(r.ok).toBe(false);
|
||||
expect(r.step).toBe('validate');
|
||||
expect(calls).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('aborts at validate when the host config is unreadable', async () => {
|
||||
const { exec } = makeExec({ "cat '": { code: 1, stdout: '', stderr: 'no such file' } });
|
||||
const r = await applyRemoteConfig({
|
||||
target, configPath: '/c.yaml', restartCmd: 'restart', newConfig: VALID_YAML,
|
||||
schema: SCHEMA, baseUrl: 'http://h', exec, fetcher: okFetcher,
|
||||
});
|
||||
expect(r.ok).toBe(false);
|
||||
expect(r.step).toBe('validate');
|
||||
expect(r.error).toMatch(/read current failed/);
|
||||
});
|
||||
|
||||
it('backs up BEFORE write and aborts on write failure (backup retained)', async () => {
|
||||
const { exec, calls } = makeExec({
|
||||
"cat '": { code: 0, stdout: 'models:\n old: {}\n', stderr: '' }, // read current
|
||||
'cp ': { code: 0, stdout: '', stderr: '' }, // backup
|
||||
'cat >': { code: 1, stdout: '', stderr: 'disk full' }, // write fails
|
||||
});
|
||||
const r = await applyRemoteConfig({
|
||||
target, configPath: '/c.yaml', restartCmd: 'restart', newConfig: VALID_YAML,
|
||||
schema: SCHEMA, baseUrl: 'http://h', exec, fetcher: okFetcher,
|
||||
now: new Date('2026-06-12T00:00:00Z'),
|
||||
});
|
||||
expect(r.ok).toBe(false);
|
||||
expect(r.step).toBe('write');
|
||||
expect(r.backupPath).toBe('/c.yaml.bak-20260612T000000Z');
|
||||
// backup (cp) must precede write (cat >)
|
||||
const cpIdx = calls.findIndex((c) => c.startsWith('cp '));
|
||||
const writeIdx = calls.findIndex((c) => c.startsWith('cat >'));
|
||||
expect(cpIdx).toBeGreaterThanOrEqual(0);
|
||||
expect(writeIdx).toBeGreaterThan(cpIdx);
|
||||
});
|
||||
|
||||
it('aborts at restart on restart failure', async () => {
|
||||
const { exec } = makeExec({
|
||||
"cat '": { code: 0, stdout: 'models:\n old: {}\n', stderr: '' },
|
||||
'cp ': { code: 0, stdout: '', stderr: '' },
|
||||
'cat >': { code: 0, stdout: '', stderr: '' },
|
||||
restart: { code: 1, stdout: '', stderr: 'service not found' },
|
||||
});
|
||||
const r = await applyRemoteConfig({
|
||||
target, configPath: '/c.yaml', restartCmd: 'restart-svc', newConfig: VALID_YAML,
|
||||
schema: SCHEMA, baseUrl: 'http://h', exec, fetcher: okFetcher,
|
||||
});
|
||||
expect(r.ok).toBe(false);
|
||||
expect(r.step).toBe('restart');
|
||||
});
|
||||
|
||||
it('aborts at health when the service never comes back', async () => {
|
||||
const { exec } = makeExec({
|
||||
"cat '": { code: 0, stdout: 'models:\n old: {}\n', stderr: '' },
|
||||
'cp ': { code: 0, stdout: '', stderr: '' },
|
||||
'cat >': { code: 0, stdout: '', stderr: '' },
|
||||
'restart-svc': { code: 0, stdout: '', stderr: '' },
|
||||
});
|
||||
const downFetcher = (async () => { throw new Error('refused'); }) as unknown as typeof fetch;
|
||||
const r = await applyRemoteConfig({
|
||||
target, configPath: '/c.yaml', restartCmd: 'restart-svc', newConfig: VALID_YAML,
|
||||
schema: SCHEMA, baseUrl: 'http://h', exec, fetcher: downFetcher,
|
||||
healthAttempts: 2, healthDelayMs: 1,
|
||||
});
|
||||
expect(r.ok).toBe(false);
|
||||
expect(r.step).toBe('health');
|
||||
});
|
||||
|
||||
it('succeeds through the full pipeline', async () => {
|
||||
const { exec } = makeExec({
|
||||
"cat '": { code: 0, stdout: 'models:\n old: {}\n', stderr: '' },
|
||||
'cp ': { code: 0, stdout: '', stderr: '' },
|
||||
'cat >': { code: 0, stdout: '', stderr: '' },
|
||||
'restart-svc': { code: 0, stdout: '', stderr: '' },
|
||||
});
|
||||
const r = await applyRemoteConfig({
|
||||
target, configPath: '/c.yaml', restartCmd: 'restart-svc', newConfig: VALID_YAML,
|
||||
schema: SCHEMA, baseUrl: 'http://h', exec, fetcher: okFetcher,
|
||||
healthAttempts: 1, healthDelayMs: 1,
|
||||
});
|
||||
expect(r.ok).toBe(true);
|
||||
expect(r.step).toBe('done');
|
||||
expect(r.backupPath).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('healthWait', () => {
|
||||
it('returns true on first OK', async () => {
|
||||
const ok = await healthWait('http://h', okFetcher, 3, 1);
|
||||
expect(ok).toBe(true);
|
||||
});
|
||||
it('returns false after exhausting attempts', async () => {
|
||||
const downFetcher = (async () => new Response('', { status: 503 })) as unknown as typeof fetch;
|
||||
const ok = await healthWait('http://h', downFetcher, 2, 1);
|
||||
expect(ok).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── wrapper mode (forced-command verbs) ─────────────────────────────────────
|
||||
|
||||
describe('applyRemoteConfig wrapper mode', () => {
|
||||
it('sends verbs (not raw shell) and reads the backup path from the backup verb', async () => {
|
||||
const { exec, calls } = makeExec({
|
||||
read: { code: 0, stdout: 'models:\n old: {}\n', stderr: '' },
|
||||
backup: { code: 0, stdout: '/c.yaml.bak-WRAP\n', stderr: '' },
|
||||
write: { code: 0, stdout: '', stderr: '' },
|
||||
restart: { code: 0, stdout: '', stderr: '' },
|
||||
});
|
||||
const r = await applyRemoteConfig({
|
||||
target, configPath: '/c.yaml', restartCmd: 'ignored-in-wrapper', newConfig: VALID_YAML,
|
||||
schema: SCHEMA, baseUrl: 'http://h', exec, fetcher: okFetcher, mode: 'wrapper',
|
||||
healthAttempts: 1, healthDelayMs: 1,
|
||||
});
|
||||
expect(r.ok).toBe(true);
|
||||
// backup path comes from the wrapper's stdout, not a client-computed name
|
||||
expect(r.backupPath).toBe('/c.yaml.bak-WRAP');
|
||||
// verbs only — no cat/cp/cat > shell commands
|
||||
expect(calls).toEqual(['read', 'backup', 'write', 'restart']);
|
||||
expect(calls.some((c) => c.includes('cat') || c.includes('cp '))).toBe(false);
|
||||
});
|
||||
|
||||
it('aborts at write when the wrapper write verb fails (backup retained)', async () => {
|
||||
const { exec } = makeExec({
|
||||
read: { code: 0, stdout: 'old\n', stderr: '' },
|
||||
backup: { code: 0, stdout: '/c.yaml.bak-WRAP\n', stderr: '' },
|
||||
write: { code: 1, stdout: '', stderr: 'denied' },
|
||||
});
|
||||
const r = await applyRemoteConfig({
|
||||
target, configPath: '/c.yaml', restartCmd: 'x', newConfig: VALID_YAML,
|
||||
schema: SCHEMA, baseUrl: 'http://h', exec, fetcher: okFetcher, mode: 'wrapper',
|
||||
});
|
||||
expect(r.ok).toBe(false);
|
||||
expect(r.step).toBe('write');
|
||||
expect(r.backupPath).toBe('/c.yaml.bak-WRAP');
|
||||
});
|
||||
});
|
||||
236
apps/control/src/services/action-queue.ts
Normal file
236
apps/control/src/services/action-queue.ts
Normal file
@@ -0,0 +1,236 @@
|
||||
/**
|
||||
* Per-host FIFO action queue.
|
||||
*
|
||||
* All host-mutating actions (warm, unload) from BooControl serialize through
|
||||
* a single FIFO queue per provider_id. Queue discipline:
|
||||
*
|
||||
* - Submissions rejected immediately while host liveness is 'down'
|
||||
* - Queue depth capped at 4; reject-on-full includes pending queue contents
|
||||
* - Each action re-checks liveness on dequeue and skips if stale
|
||||
* - Unload-during-bench returns 409 {error: 'bench in progress', requiresConfirmation: true}
|
||||
*
|
||||
* Pattern: arena-runner.ts advanceChain promise-chain + read-fresh-state-or-skip.
|
||||
*/
|
||||
|
||||
import type { FastifyBaseLogger } from 'fastify';
|
||||
|
||||
export type ActionType = 'warm' | 'unload';
|
||||
|
||||
export interface QueuedAction {
|
||||
actionId: string;
|
||||
type: ActionType;
|
||||
providerId: string;
|
||||
model?: string; // for warm: target model; for unload: specific model or undefined for all
|
||||
confirmed: boolean; // true if client confirmed takeover
|
||||
createdAt: Date;
|
||||
}
|
||||
|
||||
export interface ActionQueueEntry {
|
||||
action: QueuedAction;
|
||||
status: 'pending' | 'running' | 'completed' | 'failed' | 'skipped';
|
||||
error?: string;
|
||||
enqueuedAt: Date;
|
||||
}
|
||||
|
||||
export interface ActionQueueState {
|
||||
queue: ActionQueueEntry[];
|
||||
running: boolean;
|
||||
}
|
||||
|
||||
export interface ActionQueueDeps {
|
||||
baseUrl: string;
|
||||
isLivenessUp: () => boolean;
|
||||
isInflightRequests: () => number;
|
||||
log: FastifyBaseLogger;
|
||||
}
|
||||
|
||||
const MAX_QUEUE_DEPTH = 4;
|
||||
|
||||
export class ActionQueue {
|
||||
private queues: Map<string, ActionQueueState> = new Map();
|
||||
private depsMap: Map<string, ActionQueueDeps> = new Map();
|
||||
|
||||
registerHost(providerId: string, deps: ActionQueueDeps): void {
|
||||
this.depsMap.set(providerId, deps);
|
||||
if (!this.queues.has(providerId)) {
|
||||
this.queues.set(providerId, { queue: [], running: false });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Submit an action to the per-host queue.
|
||||
* Returns rejection reasons for: host down, queue full, bench in progress.
|
||||
*/
|
||||
submit(action: QueuedAction): { ok: true } | { ok: false; error: string; pending?: QueuedAction[]; requiresConfirmation?: boolean } {
|
||||
const deps = this.depsMap.get(action.providerId);
|
||||
if (!deps) {
|
||||
return { ok: false, error: `unknown host: ${action.providerId}` };
|
||||
}
|
||||
|
||||
// Reject if host is down
|
||||
if (!deps.isLivenessUp()) {
|
||||
return { ok: false, error: 'host offline' };
|
||||
}
|
||||
|
||||
const state = this.queues.get(action.providerId);
|
||||
if (!state) {
|
||||
return { ok: false, error: `queue not initialized for ${action.providerId}` };
|
||||
}
|
||||
|
||||
// Check bench in progress for unload actions
|
||||
if (action.type === 'unload' && !action.confirmed) {
|
||||
const inflight = deps.isInflightRequests();
|
||||
if (inflight > 0) {
|
||||
return {
|
||||
ok: false,
|
||||
error: 'bench in progress',
|
||||
requiresConfirmation: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Depth cap
|
||||
if (state.queue.length >= MAX_QUEUE_DEPTH) {
|
||||
const pending = state.queue.map((e) => e.action);
|
||||
return {
|
||||
ok: false,
|
||||
error: `queue full (${state.queue.length}/${MAX_QUEUE_DEPTH})`,
|
||||
pending,
|
||||
};
|
||||
}
|
||||
|
||||
const entry: ActionQueueEntry = {
|
||||
action,
|
||||
status: 'pending',
|
||||
enqueuedAt: new Date(),
|
||||
};
|
||||
state.queue.push(entry);
|
||||
|
||||
// Kick the processor
|
||||
void this.processNext(action.providerId, deps);
|
||||
return { ok: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current queue state for a host.
|
||||
*/
|
||||
getState(providerId: string): ActionQueueState | null {
|
||||
return this.queues.get(providerId) ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the next action in the queue for a host.
|
||||
* Uses promise-chain pattern: each action runs to completion before the next.
|
||||
*/
|
||||
private async processNext(providerId: string, deps: ActionQueueDeps): Promise<void> {
|
||||
const state = this.queues.get(providerId);
|
||||
if (!state || state.running || state.queue.length === 0) return;
|
||||
|
||||
state.running = true;
|
||||
const entry = state.queue[0];
|
||||
if (!entry) {
|
||||
state.running = false;
|
||||
return;
|
||||
}
|
||||
|
||||
entry.status = 'running';
|
||||
|
||||
try {
|
||||
// Re-check liveness on dequeue — skip stale actions
|
||||
if (!deps.isLivenessUp()) {
|
||||
entry.status = 'skipped';
|
||||
entry.error = 'host went down during queue wait';
|
||||
state.queue.shift();
|
||||
state.running = false;
|
||||
// Process next
|
||||
void this.processNext(providerId, deps);
|
||||
return;
|
||||
}
|
||||
|
||||
// Re-check if action is still valid (stale warm after model loaded, etc.)
|
||||
if (entry.action.type === 'warm' && this.isModelAlreadyLoaded(providerId, entry.action.model)) {
|
||||
entry.status = 'skipped';
|
||||
entry.error = 'model already loaded';
|
||||
state.queue.shift();
|
||||
state.running = false;
|
||||
void this.processNext(providerId, deps);
|
||||
return;
|
||||
}
|
||||
|
||||
await this.executeAction(entry.action, deps);
|
||||
entry.status = 'completed';
|
||||
} catch (err) {
|
||||
entry.status = 'failed';
|
||||
entry.error = (err as Error).message ?? String(err);
|
||||
deps.log.error({ actionId: entry.action.actionId, err: entry.error }, 'action: failed');
|
||||
}
|
||||
|
||||
state.queue.shift();
|
||||
state.running = false;
|
||||
void this.processNext(providerId, deps);
|
||||
}
|
||||
|
||||
private async executeAction(action: QueuedAction, deps: ActionQueueDeps): Promise<void> {
|
||||
const baseUrl = deps.baseUrl;
|
||||
|
||||
switch (action.type) {
|
||||
case 'warm': {
|
||||
// 1-token POST /v1/chat/completions with bare wire ID
|
||||
if (!action.model) {
|
||||
throw new Error('warm action requires model');
|
||||
}
|
||||
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: action.model,
|
||||
prompt: '.',
|
||||
max_tokens: 1,
|
||||
stream: false,
|
||||
}),
|
||||
signal: AbortSignal.timeout(60_000),
|
||||
});
|
||||
if (!res.ok) {
|
||||
const body = await res.text().catch(() => '');
|
||||
throw new Error(`warm failed: ${res.status} ${body.slice(0, 200)}`);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'unload': {
|
||||
let url: string;
|
||||
if (action.model) {
|
||||
url = `${baseUrl}/api/models/unload/${encodeURIComponent(action.model)}`;
|
||||
} else {
|
||||
url = `${baseUrl}/api/models/unload`;
|
||||
}
|
||||
const res = await fetch(url, {
|
||||
method: 'POST',
|
||||
signal: AbortSignal.timeout(30_000),
|
||||
});
|
||||
if (!res.ok) {
|
||||
const body = await res.text().catch(() => '');
|
||||
throw new Error(`unload failed: ${res.status} ${body.slice(0, 200)}`);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a model is already loaded on the host (stale-action guard).
|
||||
* This is a placeholder — the real check reads from fleet state.
|
||||
*/
|
||||
private isModelAlreadyLoaded(_providerId: string, _model: string | undefined): boolean {
|
||||
// Will be wired to fleet state in index.ts
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the model-loaded check callback (wired from index.ts).
|
||||
*/
|
||||
setModelLoadedCheck(fn: (providerId: string, model: string | undefined) => boolean): void {
|
||||
const original = this.isModelAlreadyLoaded.bind(this);
|
||||
this.isModelAlreadyLoaded = fn;
|
||||
}
|
||||
}
|
||||
517
apps/control/src/services/bench-engine.ts
Normal file
517
apps/control/src/services/bench-engine.ts
Normal file
@@ -0,0 +1,517 @@
|
||||
/**
|
||||
* Bench engine: speed benchmark runner.
|
||||
*
|
||||
* Suite = grid of (prompt_tokens x gen_tokens x concurrency) x repetitions.
|
||||
* TTFT measured client-side at first stream delta.
|
||||
* llama.cpp timings parsed from final stream chunk.
|
||||
* Bounded fan-out via Promise.allSettled at suite-declared concurrency.
|
||||
* Warmup excluded from results.
|
||||
*/
|
||||
|
||||
import type { Sql } from '../db.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import { jsonbObject } from './jsonb.js';
|
||||
|
||||
// ─── types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface BenchSuite {
|
||||
id: string;
|
||||
name: string;
|
||||
providerId: string;
|
||||
model: string;
|
||||
promptTokens: number[];
|
||||
genTokens: number[];
|
||||
concurrency: number[];
|
||||
repetitions: number;
|
||||
temperature?: number;
|
||||
topP?: number;
|
||||
metadata?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface BenchRunParams {
|
||||
suite: BenchSuite;
|
||||
baseUrl: string;
|
||||
temperature?: number;
|
||||
topP?: number;
|
||||
}
|
||||
|
||||
export interface BenchTimings {
|
||||
promptPerSecond: number;
|
||||
predictedPerSecond: number;
|
||||
cacheN: number;
|
||||
}
|
||||
|
||||
export interface BenchSample {
|
||||
promptTokens: number;
|
||||
genTokens: number;
|
||||
concurrency: number;
|
||||
repetition: number;
|
||||
ttftMs: number | null;
|
||||
totalMs: number | null;
|
||||
promptTps: number | null;
|
||||
genTps: number | null;
|
||||
cacheN: number | null;
|
||||
error: string | null;
|
||||
}
|
||||
|
||||
// ─── stream parser ──────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Parse llama.cpp timings from the final chunk of a streaming response.
|
||||
* llama.cpp returns timings in the last chunk's usage or as a separate field:
|
||||
* { "timings": { "prompt_per_second": N, "predicted_per_second": N, "cache_n": N } }
|
||||
* or in the usage object.
|
||||
*/
|
||||
export function parseLlamaTimings(chunk: string): BenchTimings | null {
|
||||
try {
|
||||
// Strip "data: " prefix if present
|
||||
const jsonStr = chunk.startsWith('data: ') ? chunk.slice(6) : chunk;
|
||||
if (jsonStr.trim() === '[DONE]') return null;
|
||||
|
||||
const parsed = JSON.parse(jsonStr) as Record<string, unknown>;
|
||||
|
||||
// Try the timings object first (llama.cpp standard)
|
||||
const timings = parsed.timings as {
|
||||
prompt_per_second?: number;
|
||||
predicted_per_second?: number;
|
||||
cache_n?: number;
|
||||
} | undefined;
|
||||
if (timings) {
|
||||
return {
|
||||
promptPerSecond: timings.prompt_per_second ?? 0,
|
||||
predictedPerSecond: timings.predicted_per_second ?? 0,
|
||||
cacheN: timings.cache_n ?? 0,
|
||||
};
|
||||
}
|
||||
|
||||
// Fallback: check usage.completion_tokens_details or completion_tokens
|
||||
const usage = parsed.usage as {
|
||||
prompt_tokens?: number;
|
||||
completion_tokens?: number;
|
||||
} | undefined;
|
||||
if (usage) {
|
||||
return {
|
||||
promptPerSecond: 0,
|
||||
predictedPerSecond: 0,
|
||||
cacheN: 0,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ─── single request runner ──────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Run a single bench request: stream completion, capture TTFT, parse timings.
|
||||
* Returns a BenchSample.
|
||||
*/
|
||||
export async function runSingleBenchRequest(
|
||||
baseUrl: string,
|
||||
model: string,
|
||||
promptTokens: number,
|
||||
genTokens: number,
|
||||
repetition: number,
|
||||
temperature: number = 0.7,
|
||||
topP: number = 0.9,
|
||||
): Promise<BenchSample> {
|
||||
const sample: BenchSample = {
|
||||
promptTokens,
|
||||
genTokens,
|
||||
concurrency: 1, // set by the fan-out caller
|
||||
repetition,
|
||||
ttftMs: null,
|
||||
totalMs: null,
|
||||
promptTps: null,
|
||||
genTps: null,
|
||||
cacheN: null,
|
||||
error: null,
|
||||
};
|
||||
|
||||
// Generate a deterministic prompt of the target length.
|
||||
const prompt = generatePrompt(promptTokens);
|
||||
|
||||
const startTime = Date.now();
|
||||
let firstDeltaTime: number | null = null;
|
||||
let timings: BenchTimings | null = null;
|
||||
|
||||
try {
|
||||
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
temperature,
|
||||
top_p: topP,
|
||||
max_tokens: genTokens,
|
||||
stream: true,
|
||||
}),
|
||||
signal: AbortSignal.timeout(120_000),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const errBody = await res.text().catch(() => '');
|
||||
throw new Error(`bench request failed: ${res.status} ${errBody.slice(0, 200)}`);
|
||||
}
|
||||
|
||||
const reader = res.body?.getReader();
|
||||
if (!reader) {
|
||||
throw new Error('no response body');
|
||||
}
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() ?? '';
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed || trimmed === 'data: [DONE]') continue;
|
||||
|
||||
// TTFT: capture at first delta
|
||||
if (firstDeltaTime === null) {
|
||||
firstDeltaTime = Date.now();
|
||||
}
|
||||
|
||||
// Parse timings from the final chunk
|
||||
const t = parseLlamaTimings(trimmed);
|
||||
if (t) {
|
||||
timings = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sample.ttftMs = firstDeltaTime !== null ? firstDeltaTime - startTime : null;
|
||||
sample.totalMs = Date.now() - startTime;
|
||||
|
||||
if (timings) {
|
||||
sample.promptTps = timings.promptPerSecond;
|
||||
sample.genTps = timings.predictedPerSecond;
|
||||
sample.cacheN = timings.cacheN;
|
||||
}
|
||||
} catch (err) {
|
||||
sample.error = (err as Error).message ?? String(err);
|
||||
}
|
||||
|
||||
return sample;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a deterministic prompt with approximately the target token count.
|
||||
* Uses a repeating pattern that averages ~1.3 chars per token for GPT-style tokenizers.
|
||||
*/
|
||||
function generatePrompt(targetTokens: number): string {
|
||||
// Simple pattern: repeat a sentence that tokenizes predictably.
|
||||
// ~1.3 chars/token is a rough average for English text.
|
||||
const charsPerToken = 4;
|
||||
const targetChars = targetTokens * charsPerToken;
|
||||
const base = 'The quick brown fox jumps over the lazy dog. ';
|
||||
let result = '';
|
||||
while (result.length < targetChars) {
|
||||
result += base;
|
||||
}
|
||||
return result.slice(0, targetChars);
|
||||
}
|
||||
|
||||
// ─── bench runner ───────────────────────────────────────────────────────────
|
||||
|
||||
export interface BenchRunProgress {
|
||||
jobId: string;
|
||||
totalSamples: number;
|
||||
completedSamples: number;
|
||||
currentPromptTokens: number;
|
||||
currentGenTokens: number;
|
||||
currentConcurrency: number;
|
||||
currentRepetition: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a full bench suite: grid of all combinations.
|
||||
* Bounded fan-out via Promise.allSettled at suite-declared concurrency.
|
||||
* Warmup excluded from results (1 warmup request per unique grid cell, discarded).
|
||||
*/
|
||||
export async function runBenchSuite(
|
||||
params: BenchRunParams,
|
||||
sql: Sql,
|
||||
emitter: DeltaEmitter,
|
||||
seq: number,
|
||||
onProgress: (progress: BenchRunProgress) => void,
|
||||
): Promise<void> {
|
||||
const { suite, baseUrl } = params;
|
||||
|
||||
// A4: suite-defined sampling params with fallback defaults.
|
||||
const temperature = suite.temperature ?? params.temperature ?? 0.7;
|
||||
const topP = suite.topP ?? params.topP ?? 0.9;
|
||||
const jobId = suite.id;
|
||||
|
||||
// Build the full grid of combinations.
|
||||
const grid: Array<{
|
||||
promptTokens: number;
|
||||
genTokens: number;
|
||||
concurrency: number;
|
||||
repetition: number;
|
||||
}> = [];
|
||||
|
||||
for (const pt of suite.promptTokens) {
|
||||
for (const gt of suite.genTokens) {
|
||||
for (const conc of suite.concurrency) {
|
||||
for (let rep = 0; rep < suite.repetitions; rep++) {
|
||||
grid.push({ promptTokens: pt, genTokens: gt, concurrency: conc, repetition: rep });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const totalSamples = grid.length;
|
||||
|
||||
// Persist the run record with jobType (A2) and sampling params (A4).
|
||||
const runId = `${jobId}_${Date.now()}`;
|
||||
await sql`
|
||||
INSERT INTO bench_runs (id, suite_id, job_type, status, started_at, total_samples, temperature, top_p)
|
||||
VALUES (${runId}, ${suite.id}, 'bench', 'running', clock_timestamp(), ${totalSamples}, ${temperature}, ${topP})
|
||||
`;
|
||||
|
||||
// Publish run started.
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
seq,
|
||||
jobType: 'bench' as const,
|
||||
jobId: runId,
|
||||
status: 'running' as const,
|
||||
detail: {
|
||||
suiteId: suite.id,
|
||||
providerId: suite.providerId,
|
||||
model: suite.model,
|
||||
totalSamples,
|
||||
},
|
||||
});
|
||||
|
||||
// A5: Warmup pass — 1 request per unique (promptTokens, genTokens) cell, discarded.
|
||||
const uniqueCells = new Set<string>();
|
||||
for (const item of grid) {
|
||||
const cellKey = `${item.promptTokens}_${item.genTokens}`;
|
||||
if (!uniqueCells.has(cellKey)) {
|
||||
uniqueCells.add(cellKey);
|
||||
}
|
||||
}
|
||||
const warmupPromises = Array.from(uniqueCells).map(async (cellKey) => {
|
||||
const parts = cellKey.split('_').map(Number);
|
||||
const pt = parts[0] ?? 0;
|
||||
const gt = parts[1] ?? 0;
|
||||
return runSingleBenchRequest(baseUrl, suite.model, pt, gt, 0, temperature, topP);
|
||||
});
|
||||
await Promise.allSettled(warmupPromises);
|
||||
|
||||
let completed = 0;
|
||||
const samples: BenchSample[] = [];
|
||||
|
||||
// Group by (promptTokens, genTokens, concurrency) for fan-out; each group
|
||||
// runs 'repetitions' requests concurrently.
|
||||
const groups = new Map<string, typeof grid>();
|
||||
for (const item of grid) {
|
||||
const key = `${item.promptTokens}_${item.genTokens}_${item.concurrency}`;
|
||||
if (!groups.has(key)) {
|
||||
groups.set(key, []);
|
||||
}
|
||||
groups.get(key)!.push(item);
|
||||
}
|
||||
|
||||
for (const [key, group] of groups) {
|
||||
const concurrency = group[0]!.concurrency;
|
||||
const batchSize = Math.min(concurrency, group.length);
|
||||
|
||||
// Process in batches of 'concurrency' size using Promise.allSettled.
|
||||
for (let batchStart = 0; batchStart < group.length; batchStart += batchSize) {
|
||||
const batch = group.slice(batchStart, batchStart + batchSize);
|
||||
|
||||
const promises = batch.map(async (item) => {
|
||||
const sample = await runSingleBenchRequest(
|
||||
baseUrl,
|
||||
suite.model,
|
||||
item.promptTokens,
|
||||
item.genTokens,
|
||||
item.repetition,
|
||||
temperature,
|
||||
topP,
|
||||
);
|
||||
sample.concurrency = item.concurrency;
|
||||
return sample;
|
||||
});
|
||||
|
||||
const results = await Promise.allSettled(promises);
|
||||
for (const result of results) {
|
||||
if (result.status === 'fulfilled') {
|
||||
samples.push(result.value);
|
||||
}
|
||||
completed++;
|
||||
|
||||
// Progress callback
|
||||
const current = batch[0]!;
|
||||
onProgress({
|
||||
jobId: runId,
|
||||
totalSamples,
|
||||
completedSamples: completed,
|
||||
currentPromptTokens: current.promptTokens,
|
||||
currentGenTokens: current.genTokens,
|
||||
currentConcurrency: current.concurrency,
|
||||
currentRepetition: current.repetition,
|
||||
});
|
||||
|
||||
// Publish progress
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
seq,
|
||||
jobType: 'bench' as const,
|
||||
jobId: runId,
|
||||
status: 'running' as const,
|
||||
detail: {
|
||||
completedSamples: completed,
|
||||
totalSamples,
|
||||
percent: Math.round((completed / totalSamples) * 100),
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Persist all samples.
|
||||
for (const s of samples) {
|
||||
await sql`
|
||||
INSERT INTO bench_samples (run_id, prompt_tokens, gen_tokens, concurrency, repetition, ttft_ms, total_ms, prompt_tps, gen_tps, cache_n, error)
|
||||
VALUES (${runId}, ${s.promptTokens}, ${s.genTokens}, ${s.concurrency}, ${s.repetition}, ${s.ttftMs ?? null}, ${s.totalMs ?? null}, ${s.promptTps ?? null}, ${s.genTps ?? null}, ${s.cacheN ?? null}, ${s.error ?? null})
|
||||
`;
|
||||
}
|
||||
|
||||
// Compute aggregates.
|
||||
const validSamples = samples.filter((s) => !s.error && s.genTps != null);
|
||||
const aggregate = computeAggregates(validSamples);
|
||||
|
||||
// A1: Baseline persistence + regression flag.
|
||||
// Compare against existing baseline; first run seeds it.
|
||||
const baselineRows = await sql<{ aggregate: string }[]>`
|
||||
SELECT aggregate FROM bench_baselines
|
||||
WHERE provider_id = ${suite.providerId} AND model = ${suite.model}
|
||||
`;
|
||||
|
||||
const regressionFlag = computeRegressionFlag(aggregate, baselineRows[0]?.aggregate);
|
||||
|
||||
// Upsert baseline.
|
||||
await sql`
|
||||
INSERT INTO bench_baselines (provider_id, model, aggregate, run_id)
|
||||
VALUES (${suite.providerId}, ${suite.model}, ${sql.json(aggregate as never)}, ${runId})
|
||||
ON CONFLICT (provider_id, model) DO UPDATE SET
|
||||
aggregate = EXCLUDED.aggregate,
|
||||
run_id = EXCLUDED.run_id,
|
||||
created_at = clock_timestamp()
|
||||
`;
|
||||
|
||||
// Update run record with regression flag.
|
||||
await sql`
|
||||
UPDATE bench_runs
|
||||
SET status = 'completed', finished_at = clock_timestamp(), completed_samples = ${completed},
|
||||
aggregate = ${sql.json(aggregate as never)}, regression_flag = ${regressionFlag}
|
||||
WHERE id = ${runId}
|
||||
`;
|
||||
|
||||
// Publish completion.
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
seq,
|
||||
jobType: 'bench' as const,
|
||||
jobId: runId,
|
||||
status: 'completed' as const,
|
||||
detail: { ...aggregate, regressionFlag },
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* A1: Compute regression flag against baseline.
|
||||
* Threshold: gen tok/s -10% = regression, +5% = improvement.
|
||||
* N5: guards against divide-by-zero.
|
||||
*/
|
||||
export function computeRegressionFlag(
|
||||
current: BenchAggregate,
|
||||
// Accepts the raw bench_baselines.aggregate value: porsager returns jsonb
|
||||
// already-parsed (object), while tests pass a JSON string. jsonbObject handles
|
||||
// both. undefined => no baseline row yet => seed.
|
||||
baselineJson: unknown,
|
||||
): 'baseline' | 'regression' | 'improvement' | null {
|
||||
if (!current.avgGenTps) return null;
|
||||
if (!baselineJson) return 'baseline';
|
||||
|
||||
const baseline = jsonbObject(baselineJson) as BenchAggregate | null;
|
||||
if (!baseline) return null;
|
||||
|
||||
if (!baseline.avgGenTps || baseline.avgGenTps === 0) return null;
|
||||
|
||||
const delta = (current.avgGenTps - baseline.avgGenTps) / baseline.avgGenTps;
|
||||
if (delta < -0.1) return 'regression';
|
||||
if (delta > 0.05) return 'improvement';
|
||||
return 'baseline';
|
||||
}
|
||||
|
||||
export interface BenchAggregate {
|
||||
avgTtftMs: number | null;
|
||||
medianTtftMs: number | null;
|
||||
avgGenTps: number | null;
|
||||
medianGenTps: number | null;
|
||||
avgPromptTps: number | null;
|
||||
medianPromptTps: number | null;
|
||||
totalSamples: number;
|
||||
errorSamples: number;
|
||||
p95TtftMs: number | null;
|
||||
}
|
||||
|
||||
export function computeAggregates(samples: BenchSample[]): BenchAggregate {
|
||||
if (samples.length === 0) {
|
||||
return {
|
||||
avgTtftMs: null,
|
||||
medianTtftMs: null,
|
||||
avgGenTps: null,
|
||||
medianGenTps: null,
|
||||
avgPromptTps: null,
|
||||
medianPromptTps: null,
|
||||
totalSamples: 0,
|
||||
errorSamples: 0,
|
||||
p95TtftMs: null,
|
||||
};
|
||||
}
|
||||
|
||||
const ttfts = samples.map((s) => s.ttftMs).filter((v): v is number => v != null).sort((a, b) => a - b);
|
||||
const genTps = samples.map((s) => s.genTps).filter((v): v is number => v != null).sort((a, b) => a - b);
|
||||
const promptTps = samples.map((s) => s.promptTps).filter((v): v is number => v != null).sort((a, b) => a - b);
|
||||
|
||||
const avg = (arr: number[]) => arr.length ? arr.reduce((a, b) => a + b, 0) / arr.length : null;
|
||||
const median = (arr: number[]) => {
|
||||
if (arr.length === 0) return null;
|
||||
const mid = Math.floor(arr.length / 2);
|
||||
return arr.length % 2 ? arr[mid]! : (arr[mid - 1]! + arr[mid]!) / 2;
|
||||
};
|
||||
const p95 = (arr: number[]) => {
|
||||
if (arr.length === 0) return null;
|
||||
const idx = Math.ceil(arr.length * 0.95) - 1;
|
||||
return arr[Math.max(0, idx)] ?? null;
|
||||
};
|
||||
|
||||
return {
|
||||
avgTtftMs: avg(ttfts),
|
||||
medianTtftMs: median(ttfts),
|
||||
avgGenTps: avg(genTps),
|
||||
medianGenTps: median(genTps),
|
||||
avgPromptTps: avg(promptTps),
|
||||
medianPromptTps: median(promptTps),
|
||||
totalSamples: samples.length,
|
||||
errorSamples: samples.filter((s) => s.error).length,
|
||||
p95TtftMs: p95(ttfts),
|
||||
};
|
||||
}
|
||||
142
apps/control/src/services/capture-fetch.ts
Normal file
142
apps/control/src/services/capture-fetch.ts
Normal file
@@ -0,0 +1,142 @@
|
||||
/**
|
||||
* Capture fetch: GET /api/captures/:id on llama-swap host, decode base64,
|
||||
* persist trimmed copy (256KB cap app-enforced), render with shiki JSON.
|
||||
*
|
||||
* The 256KB cap is application-enforced in the fetch handler, not a DB constraint.
|
||||
* Total budget: 50MB default, configurable via CAPTURE_BUDGET_MB env var.
|
||||
*/
|
||||
|
||||
import type { Sql } from '../db.js';
|
||||
|
||||
const MAX_CAPTURE_BYTES = 256 * 1024; // 256KB
|
||||
|
||||
export interface CaptureData {
|
||||
id: number;
|
||||
providerId: string;
|
||||
timestamp: string;
|
||||
model: string;
|
||||
requestHeaders: Record<string, string>;
|
||||
requestBody: string;
|
||||
responseHeaders: Record<string, string>;
|
||||
responseBody: string;
|
||||
durationMs: number;
|
||||
sizeBytes: number;
|
||||
}
|
||||
|
||||
export interface CaptureFetchResult {
|
||||
ok: boolean;
|
||||
capture?: CaptureData;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch a capture from a llama-swap host by its swap_entry_id.
|
||||
*/
|
||||
export async function fetchCapture(
|
||||
baseUrl: string,
|
||||
providerId: string,
|
||||
swapEntryId: number,
|
||||
): Promise<CaptureFetchResult> {
|
||||
try {
|
||||
const res = await fetch(`${baseUrl}/api/captures/${swapEntryId}`, {
|
||||
signal: AbortSignal.timeout(10_000),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
if (res.status === 404) {
|
||||
return { ok: false, error: 'capture not found on host' };
|
||||
}
|
||||
return { ok: false, error: `fetch failed: ${res.status}` };
|
||||
}
|
||||
|
||||
const raw = await res.json() as Record<string, unknown>;
|
||||
return { ok: true, capture: parseCapture(raw, providerId, swapEntryId) };
|
||||
} catch (err) {
|
||||
return { ok: false, error: (err as Error).message ?? String(err) };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse raw capture data from llama-swap into our structured format.
|
||||
* Trims to 256KB cap.
|
||||
*/
|
||||
export function parseCapture(
|
||||
raw: Record<string, unknown>,
|
||||
providerId: string,
|
||||
swapEntryId: number,
|
||||
): CaptureData {
|
||||
const requestHeaders = (raw.request_headers ?? raw.headers ?? {}) as Record<string, string>;
|
||||
const responseHeaders = (raw.response_headers ?? {}) as Record<string, string>;
|
||||
|
||||
let requestBody = '';
|
||||
let responseBody = '';
|
||||
|
||||
// Decode base64 bodies if present
|
||||
const reqBodyRaw = raw.request_body as string | undefined;
|
||||
const respBodyRaw = raw.response_body as string | undefined;
|
||||
|
||||
if (reqBodyRaw) {
|
||||
try {
|
||||
requestBody = Buffer.from(reqBodyRaw, 'base64').toString('utf8');
|
||||
} catch {
|
||||
requestBody = reqBodyRaw;
|
||||
}
|
||||
}
|
||||
|
||||
if (respBodyRaw) {
|
||||
try {
|
||||
responseBody = Buffer.from(respBodyRaw, 'base64').toString('utf8');
|
||||
} catch {
|
||||
responseBody = respBodyRaw;
|
||||
}
|
||||
}
|
||||
|
||||
// Enforce 256KB cap by trimming response body (largest component)
|
||||
const totalSize = requestBody.length + responseBody.length;
|
||||
if (totalSize > MAX_CAPTURE_BYTES) {
|
||||
const remaining = MAX_CAPTURE_BYTES - requestBody.length;
|
||||
responseBody = responseBody.slice(0, Math.max(0, Math.floor(remaining)));
|
||||
responseBody += '\n\n[truncated: capture exceeds 256KB cap]';
|
||||
}
|
||||
|
||||
const sizeBytes = Buffer.byteLength(requestBody + responseBody);
|
||||
|
||||
return {
|
||||
id: swapEntryId,
|
||||
providerId,
|
||||
timestamp: (raw.timestamp ?? raw.ts ?? new Date().toISOString()) as string,
|
||||
model: (raw.model ?? '') as string,
|
||||
requestHeaders,
|
||||
requestBody,
|
||||
responseHeaders,
|
||||
responseBody,
|
||||
durationMs: (raw.duration_ms ?? 0) as number,
|
||||
sizeBytes,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Persist a trimmed capture to the control_requests table.
|
||||
* Uses sql.json(value as never) per convention.
|
||||
*/
|
||||
export async function persistCapture(
|
||||
sql: Sql,
|
||||
capture: CaptureData,
|
||||
): Promise<void> {
|
||||
// Pass the OBJECT to sql.json — wrapping a pre-stringified value stores a
|
||||
// JSON string in the JSONB column (the double-serialization gotcha).
|
||||
const captureObj = {
|
||||
requestHeaders: capture.requestHeaders,
|
||||
requestBody: capture.requestBody,
|
||||
responseHeaders: capture.responseHeaders,
|
||||
responseBody: capture.responseBody,
|
||||
durationMs: capture.durationMs,
|
||||
};
|
||||
|
||||
await sql`
|
||||
INSERT INTO control_requests (provider_id, swap_entry_id, ts, model, capture)
|
||||
VALUES (${capture.providerId}, ${capture.id}, ${capture.timestamp}, ${capture.model}, ${sql.json(captureObj as never)})
|
||||
ON CONFLICT (provider_id, swap_entry_id, ts) DO UPDATE SET
|
||||
capture = EXCLUDED.capture
|
||||
`;
|
||||
}
|
||||
409
apps/control/src/services/eval-suites.ts
Normal file
409
apps/control/src/services/eval-suites.ts
Normal file
@@ -0,0 +1,409 @@
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import { readFileSync, readdirSync } from 'node:fs';
|
||||
import { resolve, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { load as loadYaml } from 'js-yaml';
|
||||
import type { Sql } from '../db.js';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
// ─── types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface CodeTask {
|
||||
id: string;
|
||||
prompt: string;
|
||||
test_code: string;
|
||||
expected_output: string;
|
||||
language: string;
|
||||
}
|
||||
|
||||
export interface RubricCriterion {
|
||||
criterion: string;
|
||||
description: string;
|
||||
weight: number;
|
||||
}
|
||||
|
||||
export interface ChatTask {
|
||||
id: string;
|
||||
prompt: string;
|
||||
prompt_template?: string;
|
||||
context_generator?: string;
|
||||
rubric: {
|
||||
criteria: RubricCriterion[];
|
||||
max_score: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface EvalSuiteData {
|
||||
id: string;
|
||||
name: string;
|
||||
kind: 'chat' | 'code';
|
||||
version: number;
|
||||
description?: string;
|
||||
judge_model: string | null;
|
||||
tasks: (CodeTask | ChatTask)[];
|
||||
}
|
||||
|
||||
export interface EvalSuiteRow {
|
||||
id: string;
|
||||
name: string;
|
||||
kind: string;
|
||||
version: number;
|
||||
tasks: string;
|
||||
judge_model: string | null;
|
||||
judge_model_version: string | null;
|
||||
metadata: string | null;
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
// ─── YAML loader ────────────────────────────────────────────────────────────
|
||||
|
||||
const DATA_DIR = resolve(dirname(__filename), '../../data');
|
||||
|
||||
/**
|
||||
* Load all eval suite YAML files from the data/ directory.
|
||||
*/
|
||||
export function loadEvalSuitesFromData(): EvalSuiteData[] {
|
||||
const suites: EvalSuiteData[] = [];
|
||||
try {
|
||||
const files = readdirSync(DATA_DIR).filter((f) => f.startsWith('suite-') && f.endsWith('.yaml'));
|
||||
for (const file of files) {
|
||||
const path = resolve(DATA_DIR, file);
|
||||
const content = readFileSync(path, 'utf8');
|
||||
const parsed = loadYaml(content) as Record<string, unknown>;
|
||||
const tasks = parsed.tasks as (CodeTask | ChatTask)[] | undefined;
|
||||
if (!tasks || !Array.isArray(tasks)) continue;
|
||||
|
||||
const chatTasks: ChatTask[] = [];
|
||||
const codeTasks: CodeTask[] = [];
|
||||
|
||||
for (const task of tasks) {
|
||||
const t = task as unknown as Record<string, unknown>;
|
||||
if (t.rubric) {
|
||||
const rubric = t.rubric as Record<string, unknown>;
|
||||
chatTasks.push({
|
||||
id: t.id as string,
|
||||
prompt: t.prompt as string,
|
||||
prompt_template: (t.prompt_template as string) ?? undefined,
|
||||
context_generator: (t.context_generator as string) ?? undefined,
|
||||
rubric: {
|
||||
criteria: normalizeCriteria(rubric),
|
||||
max_score: (rubric.max_score as number) ?? 7,
|
||||
},
|
||||
});
|
||||
} else if (t.test_code) {
|
||||
codeTasks.push({
|
||||
id: t.id as string,
|
||||
prompt: t.prompt as string,
|
||||
test_code: t.test_code as string,
|
||||
expected_output: t.expected_output as string,
|
||||
language: t.language as string,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
suites.push({
|
||||
id: parsed.id as string,
|
||||
name: parsed.name as string,
|
||||
kind: parsed.kind as 'chat' | 'code',
|
||||
version: (parsed.version as number) ?? 1,
|
||||
description: (parsed.description as string) ?? undefined,
|
||||
judge_model: (parsed.judge_model as string) ?? null,
|
||||
tasks: [...codeTasks, ...chatTasks],
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn({ err: (err as Error).message }, 'eval: failed to load suites from data/');
|
||||
}
|
||||
return suites;
|
||||
}
|
||||
|
||||
function normalizeCriteria(rubric: Record<string, unknown>): RubricCriterion[] {
|
||||
const criteria = rubric.criteria as RubricCriterion[] | undefined;
|
||||
if (criteria && Array.isArray(criteria)) {
|
||||
return criteria.filter((c) => c.criterion && c.weight);
|
||||
}
|
||||
const maxScore = rubric.max_score as number | undefined;
|
||||
const entries = Object.entries(rubric);
|
||||
const result: RubricCriterion[] = [];
|
||||
let totalWeight = 0;
|
||||
for (const [key, val] of entries) {
|
||||
if (key === 'max_score' || key === 'criteria') continue;
|
||||
const entry = val as { criterion?: string; description?: string; weight?: number };
|
||||
if (entry.weight && entry.description) {
|
||||
result.push({ criterion: key, description: entry.description, weight: entry.weight });
|
||||
totalWeight += entry.weight;
|
||||
}
|
||||
}
|
||||
if (result.length === 0) {
|
||||
for (const [key, val] of entries) {
|
||||
if (key === 'max_score' || key === 'criteria') continue;
|
||||
result.push({ criterion: key, description: String(val), weight: 1 });
|
||||
}
|
||||
}
|
||||
if (maxScore && totalWeight > 0) {
|
||||
const scale = maxScore / totalWeight;
|
||||
for (const c of result) {
|
||||
c.weight = Math.round(c.weight * scale * 10) / 10;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// ─── DB operations ──────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Seed eval suites from data/ YAML files into the database.
|
||||
* Uses INSERT ... ON CONFLICT DO NOTHING for idempotency.
|
||||
*/
|
||||
export async function seedEvalSuites(sql: Sql): Promise<void> {
|
||||
const suites = loadEvalSuitesFromData();
|
||||
for (const suite of suites) {
|
||||
await sql`
|
||||
INSERT INTO eval_suites (id, name, kind, version, tasks, judge_model, judge_model_version, metadata)
|
||||
VALUES (
|
||||
${suite.id},
|
||||
${suite.name},
|
||||
${suite.kind},
|
||||
${suite.version},
|
||||
${sql.json(suite.tasks as never)},
|
||||
${suite.judge_model},
|
||||
NULL,
|
||||
${suite.description ? sql.json({ description: suite.description } as never) : sql`NULL::jsonb`}
|
||||
)
|
||||
ON CONFLICT (id) DO NOTHING
|
||||
`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List all eval suites.
|
||||
*/
|
||||
export async function listEvalSuites(sql: Sql): Promise<EvalSuiteRow[]> {
|
||||
return await sql<EvalSuiteRow[]>`
|
||||
SELECT id, name, kind, version, tasks, judge_model, judge_model_version, metadata, created_at
|
||||
FROM eval_suites
|
||||
ORDER BY created_at DESC
|
||||
`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a single eval suite by ID.
|
||||
*/
|
||||
export async function getEvalSuite(sql: Sql, id: string): Promise<EvalSuiteRow | null> {
|
||||
const rows = await sql<EvalSuiteRow[]>`
|
||||
SELECT id, name, kind, version, tasks, judge_model, judge_model_version, metadata, created_at
|
||||
FROM eval_suites WHERE id = ${id}
|
||||
`;
|
||||
return rows[0] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create or update an eval suite.
|
||||
*/
|
||||
export async function upsertEvalSuite(
|
||||
sql: Sql,
|
||||
id: string | null,
|
||||
name: string,
|
||||
kind: 'chat' | 'code',
|
||||
tasks: unknown[],
|
||||
judgeModel: string | null,
|
||||
metadata?: Record<string, unknown>,
|
||||
): Promise<string> {
|
||||
const suiteId = id ?? randomUUID();
|
||||
const existing = await getEvalSuite(sql, suiteId);
|
||||
const version = existing ? existing.version + 1 : 1;
|
||||
|
||||
await sql`
|
||||
INSERT INTO eval_suites (id, name, kind, version, tasks, judge_model, judge_model_version, metadata)
|
||||
VALUES (
|
||||
${suiteId},
|
||||
${name},
|
||||
${kind},
|
||||
${version},
|
||||
${sql.json(tasks as never)},
|
||||
${judgeModel},
|
||||
NULL,
|
||||
${metadata ? sql.json(metadata as never) : sql`NULL::jsonb`}
|
||||
)
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
kind = EXCLUDED.kind,
|
||||
version = EXCLUDED.version,
|
||||
tasks = EXCLUDED.tasks,
|
||||
judge_model = EXCLUDED.judge_model,
|
||||
metadata = EXCLUDED.metadata
|
||||
`;
|
||||
return suiteId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new eval run record.
|
||||
*/
|
||||
export async function createEvalRun(
|
||||
sql: Sql,
|
||||
suiteId: string,
|
||||
providerId: string,
|
||||
model: string,
|
||||
quant: string | null,
|
||||
judgeModel: string | null,
|
||||
judgeModelVersion: string | null,
|
||||
totalTasks: number,
|
||||
): Promise<string> {
|
||||
const runId = `eval_${Date.now()}_${randomUUID().slice(0, 8)}`;
|
||||
await sql`
|
||||
INSERT INTO eval_runs (id, suite_id, job_type, provider_id, model, quant, status, judge_model, judge_model_version, started_at, total_tasks)
|
||||
VALUES (
|
||||
${runId}, ${suiteId}, 'eval', ${providerId}, ${model}, ${quant},
|
||||
'running', ${judgeModel}, ${judgeModelVersion},
|
||||
clock_timestamp(), ${totalTasks}
|
||||
)
|
||||
`;
|
||||
return runId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a single eval result.
|
||||
*/
|
||||
export async function recordEvalResult(
|
||||
sql: Sql,
|
||||
runId: string,
|
||||
taskId: string,
|
||||
taskIndex: number,
|
||||
score: number | null,
|
||||
maxScore: number | null,
|
||||
rationale: string | null,
|
||||
sandboxExitCode: number | null,
|
||||
sandboxStderr: string | null,
|
||||
sandboxStdout: string | null,
|
||||
executionMs: number | null,
|
||||
error: string | null,
|
||||
): Promise<void> {
|
||||
await sql`
|
||||
INSERT INTO eval_results (run_id, task_id, task_index, score, max_score, rationale, sandbox_exit_code, sandbox_stderr, sandbox_stdout, execution_ms, error)
|
||||
VALUES (
|
||||
${runId}, ${taskId}, ${taskIndex}, ${score}, ${maxScore},
|
||||
${rationale}, ${sandboxExitCode}, ${sandboxStderr}, ${sandboxStdout},
|
||||
${executionMs}, ${error}
|
||||
)
|
||||
`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update eval run completion.
|
||||
*/
|
||||
export async function completeEvalRun(
|
||||
sql: Sql,
|
||||
runId: string,
|
||||
completedTasks: number,
|
||||
aggregate: Record<string, unknown> | null,
|
||||
error: string | null,
|
||||
): Promise<void> {
|
||||
await sql`
|
||||
UPDATE eval_runs
|
||||
SET status = ${error ? 'failed' : 'completed'},
|
||||
finished_at = clock_timestamp(),
|
||||
completed_tasks = ${completedTasks},
|
||||
aggregate = ${aggregate ? sql.json(aggregate as never) : sql`NULL::jsonb`},
|
||||
error = ${error}
|
||||
WHERE id = ${runId}
|
||||
`;
|
||||
}
|
||||
|
||||
/**
|
||||
* List eval runs with optional filters.
|
||||
*/
|
||||
export async function listEvalRuns(
|
||||
sql: Sql,
|
||||
suiteId?: string,
|
||||
providerId?: string,
|
||||
): Promise<Array<{
|
||||
id: string;
|
||||
suite_id: string;
|
||||
job_type: string;
|
||||
provider_id: string;
|
||||
model: string;
|
||||
quant: string | null;
|
||||
status: string;
|
||||
judge_model: string | null;
|
||||
started_at: string | null;
|
||||
finished_at: string | null;
|
||||
total_tasks: number;
|
||||
completed_tasks: number;
|
||||
aggregate: string | null;
|
||||
error: string | null;
|
||||
created_at: string;
|
||||
}>> {
|
||||
let query = sql<EvalSuiteRow[]>`
|
||||
SELECT id, suite_id, job_type, provider_id, model, quant, status, judge_model,
|
||||
started_at, finished_at, total_tasks, completed_tasks, aggregate, error, created_at
|
||||
FROM eval_runs
|
||||
WHERE 1=1
|
||||
`;
|
||||
|
||||
if (suiteId) {
|
||||
query = sql`${query} AND suite_id = ${suiteId}`;
|
||||
}
|
||||
if (providerId) {
|
||||
query = sql`${query} AND provider_id = ${providerId}`;
|
||||
}
|
||||
|
||||
query = sql`${query} ORDER BY created_at DESC LIMIT 200`;
|
||||
return query as unknown as Array<{
|
||||
id: string;
|
||||
suite_id: string;
|
||||
job_type: string;
|
||||
provider_id: string;
|
||||
model: string;
|
||||
quant: string | null;
|
||||
status: string;
|
||||
judge_model: string | null;
|
||||
started_at: string | null;
|
||||
finished_at: string | null;
|
||||
total_tasks: number;
|
||||
completed_tasks: number;
|
||||
aggregate: string | null;
|
||||
error: string | null;
|
||||
created_at: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get eval results for a run.
|
||||
*/
|
||||
export async function getEvalResults(
|
||||
sql: Sql,
|
||||
runId: string,
|
||||
): Promise<Array<{
|
||||
id: number;
|
||||
task_id: string;
|
||||
task_index: number;
|
||||
score: number | null;
|
||||
max_score: number | null;
|
||||
rationale: string | null;
|
||||
sandbox_exit_code: number | null;
|
||||
sandbox_stderr: string | null;
|
||||
sandbox_stdout: string | null;
|
||||
execution_ms: number | null;
|
||||
error: string | null;
|
||||
}>> {
|
||||
return await sql<Array<{
|
||||
id: number;
|
||||
task_id: string;
|
||||
task_index: number;
|
||||
score: number | null;
|
||||
max_score: number | null;
|
||||
rationale: string | null;
|
||||
sandbox_exit_code: number | null;
|
||||
sandbox_stderr: string | null;
|
||||
sandbox_stdout: string | null;
|
||||
execution_ms: number | null;
|
||||
error: string | null;
|
||||
}>>`
|
||||
SELECT id, task_id, task_index, score, max_score, rationale,
|
||||
sandbox_exit_code, sandbox_stderr, sandbox_stdout, execution_ms, error
|
||||
FROM eval_results WHERE run_id = ${runId}
|
||||
ORDER BY task_index
|
||||
`;
|
||||
}
|
||||
264
apps/control/src/services/fleet-connector.ts
Normal file
264
apps/control/src/services/fleet-connector.ts
Normal file
@@ -0,0 +1,264 @@
|
||||
/**
|
||||
* Fleet connector: SSE client consuming llama-swap /api/events per enabled host.
|
||||
*
|
||||
* Ports the opencode-sse.ts reconnectDecision pattern (exponential backoff +
|
||||
* circuit-breaker) with one critical addition: **jitter**. The source pattern
|
||||
* has NO jitter, which causes thundering-herd reconnections across N hosts.
|
||||
*
|
||||
* Jitter: random 0-50% of computed delay. Pure function for testability.
|
||||
*
|
||||
* Event parsing is NEW code — llama-swap's SSE envelope (modelStatus | logData |
|
||||
* metrics | inflight) differs from the opencode SDK's Event type.
|
||||
*/
|
||||
|
||||
import type { FastifyBaseLogger } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
|
||||
// ─── jitter (pure) ──────────────────────────────────────────────────────────
|
||||
|
||||
/** Add random 0-50% jitter to a delay value. */
|
||||
export function addJitter(delayMs: number): number {
|
||||
const jitter = delayMs * Math.random() * 0.5;
|
||||
return delayMs + jitter;
|
||||
}
|
||||
|
||||
// ─── reconnect backoff ──────────────────────────────────────────────────────
|
||||
|
||||
export interface ReconnectPolicy {
|
||||
baseMs: number;
|
||||
maxMs: number;
|
||||
maxAttempts: number;
|
||||
}
|
||||
|
||||
export const DEFAULT_RECONNECT_POLICY: ReconnectPolicy = {
|
||||
baseMs: 1_000,
|
||||
maxMs: 30_000,
|
||||
maxAttempts: 6,
|
||||
};
|
||||
|
||||
export type ReconnectDecision =
|
||||
| { action: 'reconnect'; delayMs: number }
|
||||
| { action: 'give-up' };
|
||||
|
||||
export function reconnectDecision(
|
||||
failures: number,
|
||||
policy: ReconnectPolicy = DEFAULT_RECONNECT_POLICY,
|
||||
): ReconnectDecision {
|
||||
if (failures > policy.maxAttempts) return { action: 'give-up' };
|
||||
const exp = policy.baseMs * 2 ** (failures - 1);
|
||||
const capped = Math.min(policy.maxMs, exp);
|
||||
return { action: 'reconnect', delayMs: addJitter(capped) };
|
||||
}
|
||||
|
||||
// ─── llama-swap SSE envelope types ──────────────────────────────────────────
|
||||
// Real wire shape (apigroup.go):
|
||||
// event:message
|
||||
// data:{"type":"modelStatus|logData|metrics|inflight","data":"<ESCAPED JSON STRING>"}
|
||||
// The SSE event name is ALWAYS 'message'. The discriminator is the outer JSON's
|
||||
// .type field. The payload is DOUBLE-ENCODED: JSON.parse(data) gives {type, data:string},
|
||||
// then JSON.parse(that.data) gives the actual payload.
|
||||
|
||||
// Per-type payload shapes, verified against the fork source
|
||||
// (/opt/forks/llama-swap/internal/server/apigroup.go sendModels/sendLogData/
|
||||
// sendMetrics/sendInFlight, apiModel struct at :20):
|
||||
// modelStatus -> []apiModel (FULL-FLEET snapshot array, not a single transition)
|
||||
// logData -> {source, data} (field is 'data', not 'line')
|
||||
// metrics -> []ActivityLogEntry (BARE array, tokens nested)
|
||||
// inflight -> {total} (host-level total, NOT per-model)
|
||||
export type LlamaSweepSSEEvent =
|
||||
| { type: 'modelStatus'; data: ModelStatusEntry[] }
|
||||
| { type: 'logData'; data: LogData }
|
||||
| { type: 'metrics'; data: MetricsEntry[] }
|
||||
| { type: 'inflight'; data: InflightData };
|
||||
|
||||
/** One entry of the modelStatus full-fleet array (fork apiModel struct). */
|
||||
export interface ModelStatusEntry {
|
||||
id: string;
|
||||
name: string;
|
||||
description: string;
|
||||
state: string;
|
||||
unlisted: boolean;
|
||||
peerID: string;
|
||||
aliases?: string[];
|
||||
}
|
||||
|
||||
export interface LogData {
|
||||
source: string;
|
||||
data: string;
|
||||
}
|
||||
|
||||
// Real /api/metrics shape: bare JSON array of entries with NESTED tokens.
|
||||
// {id, timestamp, model, req_path, resp_status_code, tokens:{...}, duration_ms, has_capture}
|
||||
// NOTE: ActivityLogEntry does NOT carry request headers or source field.
|
||||
// Headers exist only in ReqRespCapture (fetched on-demand via /api/captures/:id).
|
||||
// See design §7 "Implementation notes" for the discrepancy.
|
||||
export interface MetricsEntry {
|
||||
id: number;
|
||||
timestamp: string;
|
||||
model: string;
|
||||
req_path: string;
|
||||
resp_status_code: number;
|
||||
tokens: {
|
||||
cache_tokens: number;
|
||||
input_tokens: number;
|
||||
output_tokens: number;
|
||||
prompt_per_second: number;
|
||||
tokens_per_second: number;
|
||||
};
|
||||
duration_ms: number;
|
||||
has_capture: boolean;
|
||||
capture?: string;
|
||||
}
|
||||
|
||||
export interface InflightData {
|
||||
total: number;
|
||||
}
|
||||
|
||||
// ─── the loop ───────────────────────────────────────────────────────────────
|
||||
|
||||
export interface FleetConnectorDeps {
|
||||
isUp: () => boolean;
|
||||
sql: Sql;
|
||||
log: FastifyBaseLogger;
|
||||
onEvent: (providerId: string, event: LlamaSweepSSEEvent) => void | Promise<void>;
|
||||
onReconcile: (providerId: string, metrics: MetricsEntry[]) => Promise<boolean>;
|
||||
onReconnectGiveUp: (providerId: string) => Promise<void>;
|
||||
sleep?: (ms: number) => Promise<void>;
|
||||
policy?: ReconnectPolicy;
|
||||
}
|
||||
|
||||
function defaultSleep(ms: number): Promise<void> {
|
||||
return new Promise((r) => setTimeout(r, ms));
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse llama-swap SSE lines.
|
||||
*
|
||||
* Real wire shape (apigroup.go):
|
||||
* event:message
|
||||
* data:{"type":"modelStatus","data":"<ESCAPED JSON STRING>"}
|
||||
*
|
||||
* The SSE event name is always 'message'. The discriminator is the outer JSON's
|
||||
* .type field. The payload is DOUBLE-ENCODED: JSON.parse(data) gives {type, data:string},
|
||||
* then JSON.parse(that.data) gives the actual payload.
|
||||
*
|
||||
* Returns the fully-decoded event, or null for non-data lines.
|
||||
*/
|
||||
export function parseSseLine(line: string): LlamaSweepSSEEvent | null {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) return null;
|
||||
|
||||
// The SSE event name is always 'event:message' -- we ignore it.
|
||||
if (trimmed.startsWith('event:')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// "data: <json>" -- the only line that carries payload.
|
||||
if (trimmed.startsWith('data:')) {
|
||||
const dataStr = trimmed.slice(5).trimStart();
|
||||
if (!dataStr) return null;
|
||||
|
||||
// First JSON parse: { type: "modelStatus", data: "<escaped json>" }
|
||||
let outer: { type: string; data: string };
|
||||
try {
|
||||
outer = JSON.parse(dataStr) as { type: string; data: string };
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!outer.type || typeof outer.data !== 'string' || !outer.data) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Second JSON parse: the actual payload (double-encoded string).
|
||||
let inner: unknown;
|
||||
try {
|
||||
inner = JSON.parse(outer.data);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
||||
return { type: outer.type, data: inner } as LlamaSweepSSEEvent;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function startFleetConnector(providerId: string, baseUrl: string, deps: FleetConnectorDeps): AbortController {
|
||||
const abort = new AbortController();
|
||||
void runFleetConnector(providerId, baseUrl, abort, deps).finally(() => {
|
||||
if (abort.signal.aborted) {
|
||||
// connection dropped — cleanup handled by caller
|
||||
}
|
||||
});
|
||||
return abort;
|
||||
}
|
||||
|
||||
export async function runFleetConnector(
|
||||
providerId: string,
|
||||
baseUrl: string,
|
||||
abort: AbortController,
|
||||
deps: FleetConnectorDeps,
|
||||
): Promise<void> {
|
||||
const signal = abort.signal;
|
||||
const sleep = deps.sleep ?? defaultSleep;
|
||||
const policy = deps.policy ?? DEFAULT_RECONNECT_POLICY;
|
||||
let failures = 0;
|
||||
|
||||
while (deps.isUp() && !signal.aborted) {
|
||||
const url = `${baseUrl}/api/events`;
|
||||
try {
|
||||
const res = await fetch(url, { signal });
|
||||
if (!res.ok) {
|
||||
throw new Error(`SSE connect failed: ${res.status} ${res.statusText}`);
|
||||
}
|
||||
|
||||
const reader = res.body?.getReader();
|
||||
if (!reader) throw new Error('no response body');
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
|
||||
while (!signal.aborted) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() ?? '';
|
||||
|
||||
for (const line of lines) {
|
||||
if (signal.aborted) break;
|
||||
const event = parseSseLine(line);
|
||||
if (!event) continue;
|
||||
|
||||
try {
|
||||
await Promise.resolve(deps.onEvent(providerId, event));
|
||||
} catch (err) {
|
||||
deps.log.error({ providerId, err: (err as Error).message }, 'fleet: onEvent failed');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clean stream end — healthy reconnect at base delay (pre-hardening).
|
||||
failures = 0;
|
||||
if (deps.isUp() && !signal.aborted) {
|
||||
await sleep(policy.baseMs);
|
||||
}
|
||||
} catch (err) {
|
||||
if (!deps.isUp() || signal.aborted) break;
|
||||
failures += 1;
|
||||
const decision = reconnectDecision(failures, policy);
|
||||
deps.log.warn(
|
||||
{ providerId, failures, action: decision.action, err: (err as Error).message },
|
||||
'fleet: SSE error; reconnecting',
|
||||
);
|
||||
if (decision.action === 'give-up') {
|
||||
deps.log.warn({ providerId, failures }, 'fleet: SSE reconnect gave up (circuit breaker)');
|
||||
await deps.onReconnectGiveUp(providerId);
|
||||
break;
|
||||
}
|
||||
await sleep(decision.delayMs);
|
||||
}
|
||||
}
|
||||
}
|
||||
89
apps/control/src/services/fleet-state.ts
Normal file
89
apps/control/src/services/fleet-state.ts
Normal file
@@ -0,0 +1,89 @@
|
||||
export interface HostConfig {
|
||||
providerId: string;
|
||||
baseUrl: string;
|
||||
enabled: boolean;
|
||||
}
|
||||
|
||||
export interface FleetState {
|
||||
hosts: Map<string, HostState>;
|
||||
}
|
||||
|
||||
export interface HostState {
|
||||
providerId: string;
|
||||
liveness: 'connected' | 'reconnecting' | 'down';
|
||||
lastSeenAt: Date | null;
|
||||
seq: number;
|
||||
/** Host-level inflight total (the fork's SSE publishes only a total, not per-model). */
|
||||
inflightTotal: number;
|
||||
models: Map<string, ModelState>;
|
||||
}
|
||||
|
||||
export interface ModelState {
|
||||
model: string;
|
||||
state: string;
|
||||
ts: Date;
|
||||
ttlDeadline: Date | null;
|
||||
inflight: number;
|
||||
}
|
||||
|
||||
export interface SnapshotData {
|
||||
hosts: Array<{
|
||||
providerId: string;
|
||||
liveness: 'connected' | 'reconnecting' | 'down';
|
||||
lastSeenAt: string | null;
|
||||
seq: number;
|
||||
models: Array<{
|
||||
model: string;
|
||||
state: string;
|
||||
ts: string;
|
||||
ttlDeadline: string | null;
|
||||
inflight: number;
|
||||
}>;
|
||||
}>;
|
||||
requests?: Array<{
|
||||
id: number;
|
||||
providerId: string;
|
||||
ts: string;
|
||||
model: string | null;
|
||||
reqPath: string | null;
|
||||
statusCode: number | null;
|
||||
durationMs: number | null;
|
||||
}>;
|
||||
perfSamples?: Array<{
|
||||
providerId: string;
|
||||
ts: string;
|
||||
gpu: unknown;
|
||||
sys: unknown;
|
||||
}>;
|
||||
}
|
||||
|
||||
// ─── helpers for tests ──────────────────────────────────────────────────────
|
||||
|
||||
export function createFleetState(): FleetState {
|
||||
return { hosts: new Map() };
|
||||
}
|
||||
|
||||
export function ensureHostState(fleet: FleetState, providerId: string): HostState {
|
||||
let state = fleet.hosts.get(providerId);
|
||||
if (!state) {
|
||||
state = {
|
||||
providerId,
|
||||
liveness: 'down',
|
||||
lastSeenAt: null,
|
||||
seq: 0,
|
||||
inflightTotal: 0,
|
||||
models: new Map(),
|
||||
};
|
||||
fleet.hosts.set(providerId, state);
|
||||
}
|
||||
return state;
|
||||
}
|
||||
|
||||
export function stampLastSeen(state: HostState): void {
|
||||
state.lastSeenAt = new Date();
|
||||
}
|
||||
|
||||
export function incrementSeq(state: HostState): number {
|
||||
state.seq += 1;
|
||||
return state.seq;
|
||||
}
|
||||
140
apps/control/src/services/gateway.ts
Normal file
140
apps/control/src/services/gateway.ts
Normal file
@@ -0,0 +1,140 @@
|
||||
/**
|
||||
* P7.1: auto:* gateway candidate resolution.
|
||||
*
|
||||
* The gateway exposes OpenAI-compatible virtual models. A completion against
|
||||
* `auto:code` (etc.) is resolved to an ordered list of concrete candidate
|
||||
* composite ids ('provider/model'), then dispatched with failover.
|
||||
*
|
||||
* Ordering source:
|
||||
* - An explicit route_policy for the virtual model (admin-curated candidates).
|
||||
* - Otherwise, advisory routing scores ranked by the category metric.
|
||||
*
|
||||
* Health filtering (only connected hosts are eligible) is applied last so a
|
||||
* curated policy never dispatches to a down host.
|
||||
*
|
||||
* Pure helpers (orderCandidates, parseVirtualModel) are unit-tested; the DB
|
||||
* read lives in resolveCandidates().
|
||||
*/
|
||||
|
||||
import type { Sql } from '../db.js';
|
||||
import type { FleetState } from './fleet-state.js';
|
||||
import { computeRoutingScores, type ModelScore } from './routing-scores.js';
|
||||
import { jsonbStringArray } from './jsonb.js';
|
||||
|
||||
export const VIRTUAL_MODELS = ['auto', 'auto:code', 'auto:fast', 'auto:cheap'] as const;
|
||||
export type VirtualModel = (typeof VIRTUAL_MODELS)[number];
|
||||
|
||||
export function isGatewayVirtualModel(id: string): boolean {
|
||||
return id === 'auto' || id.startsWith('auto:');
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip a composite/provider prefix the picker may prepend. The gateway
|
||||
* registry provider id is 'auto', so BooChat may send 'auto/auto:code'.
|
||||
* Normalize to the bare virtual model token.
|
||||
*/
|
||||
export function parseVirtualModel(modelId: string): string {
|
||||
// Composite form: '<gatewayProviderId>/<virtual>' — take the part after '/'.
|
||||
const slash = modelId.indexOf('/');
|
||||
const tail = slash >= 0 ? modelId.slice(slash + 1) : modelId;
|
||||
return tail;
|
||||
}
|
||||
|
||||
export interface RoutePolicyRow {
|
||||
virtual_model: string;
|
||||
candidates: unknown; // jsonb: porsager returns a parsed array (see jsonb.ts)
|
||||
fallback: string | null;
|
||||
enabled: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Order concrete candidates for a virtual model. Pure.
|
||||
*
|
||||
* When an explicit policy is provided, its candidate list defines the order
|
||||
* (with the fallback appended last). Otherwise candidates are derived from
|
||||
* advisory scores ranked by the virtual model's category metric.
|
||||
*
|
||||
* The returned list is health-filtered: only composite ids whose host is
|
||||
* connected survive (a curated candidate on a down host is skipped, not
|
||||
* dispatched to).
|
||||
*/
|
||||
export function orderCandidates(
|
||||
virtualModel: string,
|
||||
policy: { candidates: string[]; fallback: string | null } | null,
|
||||
scores: ModelScore[],
|
||||
): string[] {
|
||||
const healthy = new Set(scores.filter((s) => s.healthy).map((s) => s.compositeId));
|
||||
|
||||
if (policy) {
|
||||
const ordered = [...policy.candidates];
|
||||
if (policy.fallback && !ordered.includes(policy.fallback)) ordered.push(policy.fallback);
|
||||
// Keep curated order; drop unhealthy. If a candidate isn't in the scores
|
||||
// set at all (never seen), keep it — health is unknown, let dispatch try.
|
||||
return ordered.filter((id) => !scores.some((s) => s.compositeId === id) || healthy.has(id));
|
||||
}
|
||||
|
||||
// Derive from advisory scores by category metric.
|
||||
const metric = (s: ModelScore): number | null => {
|
||||
switch (virtualModel) {
|
||||
case 'auto:code':
|
||||
return s.codeScore;
|
||||
case 'auto:fast':
|
||||
case 'auto:cheap':
|
||||
return s.avgGenTps;
|
||||
case 'auto':
|
||||
default:
|
||||
// Overall: prefer eval score, then throughput.
|
||||
return s.evalScore ?? (s.avgGenTps != null ? s.avgGenTps / 1000 : null);
|
||||
}
|
||||
};
|
||||
|
||||
return scores
|
||||
.filter((s) => s.healthy && metric(s) != null)
|
||||
.sort((a, b) => (metric(b) ?? -Infinity) - (metric(a) ?? -Infinity))
|
||||
.map((s) => s.compositeId);
|
||||
}
|
||||
|
||||
export interface ResolvedCandidates {
|
||||
virtualModel: string;
|
||||
candidates: string[];
|
||||
policyName: string | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the ordered candidate list for a virtual model against the live
|
||||
* fleet + policies + advisory scores.
|
||||
*/
|
||||
export async function resolveCandidates(
|
||||
sql: Sql,
|
||||
fleet: FleetState,
|
||||
modelId: string,
|
||||
): Promise<ResolvedCandidates> {
|
||||
const virtualModel = parseVirtualModel(modelId);
|
||||
|
||||
const policyRows = await sql<(RoutePolicyRow & { name: string })[]>`
|
||||
SELECT name, virtual_model, candidates, fallback, enabled
|
||||
FROM route_policies
|
||||
WHERE virtual_model = ${virtualModel} AND enabled = true
|
||||
LIMIT 1
|
||||
`;
|
||||
|
||||
const scores = await computeRoutingScores(sql, fleet);
|
||||
|
||||
let policy: { candidates: string[]; fallback: string | null } | null = null;
|
||||
let policyName: string | null = null;
|
||||
if (policyRows.length > 0) {
|
||||
const row = policyRows[0]!;
|
||||
policy = { candidates: jsonbStringArray(row.candidates as unknown), fallback: row.fallback };
|
||||
policyName = row.name;
|
||||
}
|
||||
|
||||
const candidates = orderCandidates(virtualModel, policy, scores);
|
||||
return { virtualModel, candidates, policyName };
|
||||
}
|
||||
|
||||
/** Split a composite id 'provider/model' into parts. */
|
||||
export function splitComposite(compositeId: string): { providerId: string; model: string } | null {
|
||||
const slash = compositeId.indexOf('/');
|
||||
if (slash <= 0) return null;
|
||||
return { providerId: compositeId.slice(0, slash), model: compositeId.slice(slash + 1) };
|
||||
}
|
||||
19
apps/control/src/services/host-access.ts
Normal file
19
apps/control/src/services/host-access.ts
Normal file
@@ -0,0 +1,19 @@
|
||||
/**
|
||||
* Host-access seam: acquire exclusive access to a host for a purpose.
|
||||
*
|
||||
* V1 body: no-op returning {ok: true}. This is the P8 seam — P8 swaps the
|
||||
* body for a DB lease without touching the bench engine.
|
||||
*/
|
||||
|
||||
export interface HostGrant {
|
||||
ok: boolean;
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
export async function acquireHostAccess(
|
||||
providerId: string,
|
||||
purpose: string,
|
||||
): Promise<HostGrant> {
|
||||
// V1: no-op — always grant access.
|
||||
return { ok: true };
|
||||
}
|
||||
41
apps/control/src/services/jsonb.ts
Normal file
41
apps/control/src/services/jsonb.ts
Normal file
@@ -0,0 +1,41 @@
|
||||
/**
|
||||
* JSONB read helpers.
|
||||
*
|
||||
* porsager/postgres returns `jsonb` columns already parsed into JS values (an
|
||||
* object/array), NOT a JSON string. Calling JSON.parse on that throws
|
||||
* ("[object Object] is not valid JSON"). These helpers accept either shape so a
|
||||
* read works whether the driver parsed the column or handed back a string.
|
||||
*/
|
||||
|
||||
/** Coerce a JSONB column value to a string array. */
|
||||
export function jsonbStringArray(value: unknown): string[] {
|
||||
let v = value;
|
||||
if (typeof v === 'string') {
|
||||
try { v = JSON.parse(v); } catch { return []; }
|
||||
}
|
||||
return Array.isArray(v) ? v.filter((x): x is string => typeof x === 'string') : [];
|
||||
}
|
||||
|
||||
/** Coerce a JSONB column value to an array (elements untyped). */
|
||||
export function jsonbArray(value: unknown): unknown[] {
|
||||
let v = value;
|
||||
if (typeof v === 'string') {
|
||||
try { v = JSON.parse(v); } catch { return []; }
|
||||
}
|
||||
return Array.isArray(v) ? v : [];
|
||||
}
|
||||
|
||||
/** Coerce a JSONB column value to a number array. */
|
||||
export function jsonbNumberArray(value: unknown): number[] {
|
||||
return jsonbArray(value).filter((x): x is number => typeof x === 'number');
|
||||
}
|
||||
|
||||
/** Coerce a JSONB column value to a plain object, or null. */
|
||||
export function jsonbObject(value: unknown): Record<string, unknown> | null {
|
||||
let v = value;
|
||||
if (v == null) return null;
|
||||
if (typeof v === 'string') {
|
||||
try { v = JSON.parse(v); } catch { return null; }
|
||||
}
|
||||
return v && typeof v === 'object' && !Array.isArray(v) ? (v as Record<string, unknown>) : null;
|
||||
}
|
||||
288
apps/control/src/services/judge-runner.ts
Normal file
288
apps/control/src/services/judge-runner.ts
Normal file
@@ -0,0 +1,288 @@
|
||||
import type { Sql } from '../db.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import { recordEvalResult, completeEvalRun } from './eval-suites.js';
|
||||
import { resolveProviderBaseUrl } from './llama-providers.js';
|
||||
|
||||
// ─── types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface JudgeEvalParams {
|
||||
runId: string;
|
||||
providerId: string;
|
||||
model: string;
|
||||
quant: string | null;
|
||||
tasks: Array<Record<string, unknown>>;
|
||||
judgeModel: string | null;
|
||||
}
|
||||
|
||||
export interface JudgeProgress {
|
||||
completedTasks: number;
|
||||
}
|
||||
|
||||
export interface JudgeResult {
|
||||
error: string | null;
|
||||
}
|
||||
|
||||
// ─── judge runner ───────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Run a judge-based eval (chat quality, rubric scoring).
|
||||
*
|
||||
* Judge requests go through llama-swap with:
|
||||
* - temperature 0
|
||||
* - judge model + version pinned per run
|
||||
* - X-Boo-Source: control-eval
|
||||
* - BARE wire model id
|
||||
*
|
||||
* Rubric scoring: each criterion gets a score, weighted average produces the task score.
|
||||
* Rationale is captured per criterion.
|
||||
*/
|
||||
export async function runJudgeEval(
|
||||
params: JudgeEvalParams,
|
||||
sql: Sql,
|
||||
emitter: DeltaEmitter,
|
||||
seq: number,
|
||||
logger: import('fastify').FastifyBaseLogger,
|
||||
onProgress: (progress: JudgeProgress) => void,
|
||||
): Promise<JudgeResult> {
|
||||
const { runId, providerId, model, tasks, judgeModel, quant } = params;
|
||||
|
||||
// Resolve the target model's base URL.
|
||||
const baseUrl = resolveProviderBaseUrl(providerId);
|
||||
if (!baseUrl) {
|
||||
const err = `no base URL for provider ${providerId}`;
|
||||
await completeEvalRun(sql, runId, 0, null, err).catch(() => {});
|
||||
return { error: err };
|
||||
}
|
||||
|
||||
// Determine judge model: suite default -> strongest local model.
|
||||
const judgeModelId = judgeModel ?? resolveDefaultJudgeModel();
|
||||
const judgeModelVersion = `${judgeModelId}@${Date.now()}`;
|
||||
|
||||
logger.info(
|
||||
{ runId, judgeModel: judgeModelId, targetModel: model, taskCount: tasks.length },
|
||||
'eval: judge run started',
|
||||
);
|
||||
|
||||
let completedTasks = 0;
|
||||
let error: string | null = null;
|
||||
|
||||
for (let i = 0; i < tasks.length; i++) {
|
||||
const task = tasks[i];
|
||||
if (!task) continue;
|
||||
const taskId = (task.id as string) ?? `task_${i}`;
|
||||
const prompt = (task.prompt as string) ?? '';
|
||||
const rubric = (task.rubric as { criteria: Array<{ criterion: string; description: string; weight: number }>; max_score: number }) ?? null;
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
// Generate the response from the target model.
|
||||
const response = await generateResponse(baseUrl, model, prompt);
|
||||
|
||||
// Score the response.
|
||||
let score: number | null = null;
|
||||
let maxScore: number | null = null;
|
||||
let rationale: string | null = null;
|
||||
|
||||
if (rubric) {
|
||||
const scoring = await scoreWithRubric(
|
||||
baseUrl,
|
||||
judgeModelId,
|
||||
prompt,
|
||||
response,
|
||||
rubric,
|
||||
);
|
||||
score = scoring.score;
|
||||
maxScore = scoring.maxScore;
|
||||
rationale = scoring.rationale;
|
||||
} else {
|
||||
// Simple pass/fail for tasks without rubric.
|
||||
score = response.trim().length > 0 ? 1 : 0;
|
||||
maxScore = 1;
|
||||
rationale = response.trim().length > 0 ? 'Response generated' : 'Empty response';
|
||||
}
|
||||
|
||||
const executionMs = Date.now() - startTime;
|
||||
|
||||
await recordEvalResult(
|
||||
sql,
|
||||
runId,
|
||||
taskId,
|
||||
i,
|
||||
score,
|
||||
maxScore,
|
||||
rationale,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
executionMs,
|
||||
null,
|
||||
);
|
||||
|
||||
completedTasks++;
|
||||
onProgress({ completedTasks });
|
||||
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
seq,
|
||||
jobType: 'eval' as const,
|
||||
jobId: runId,
|
||||
status: 'running' as const,
|
||||
detail: {
|
||||
completedTasks,
|
||||
totalTasks: tasks.length,
|
||||
taskId,
|
||||
score,
|
||||
},
|
||||
});
|
||||
} catch (err) {
|
||||
const msg = (err as Error).message ?? String(err);
|
||||
logger.warn({ taskId, err: msg }, 'eval: judge task failed');
|
||||
|
||||
await recordEvalResult(
|
||||
sql,
|
||||
runId,
|
||||
taskId,
|
||||
i,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
Date.now() - startTime,
|
||||
msg,
|
||||
).catch(() => {});
|
||||
|
||||
completedTasks++;
|
||||
onProgress({ completedTasks });
|
||||
}
|
||||
}
|
||||
|
||||
return { error };
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a response from the target model through llama-swap.
|
||||
*/
|
||||
async function generateResponse(
|
||||
baseUrl: string,
|
||||
model: string,
|
||||
prompt: string,
|
||||
): Promise<string> {
|
||||
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'X-Boo-Source': 'control-eval',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
// Design S8: temperature 0 everywhere in the eval pipeline -- response
|
||||
// generation must be as reproducible as the judging (audit B1).
|
||||
temperature: 0,
|
||||
max_tokens: 2048,
|
||||
}),
|
||||
signal: AbortSignal.timeout(120_000),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const body = await res.text().catch(() => '');
|
||||
throw new Error(`model response failed: ${res.status} ${body.slice(0, 200)}`);
|
||||
}
|
||||
|
||||
const data = await res.json() as { choices?: Array<{ message?: { content?: string } }> };
|
||||
return data.choices?.[0]?.message?.content ?? '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Score a response using a rubric via LLM-as-judge.
|
||||
*/
|
||||
async function scoreWithRubric(
|
||||
baseUrl: string,
|
||||
judgeModelId: string,
|
||||
prompt: string,
|
||||
response: string,
|
||||
rubric: { criteria: Array<{ criterion: string; description: string; weight: number }>; max_score: number },
|
||||
): Promise<{ score: number; maxScore: number; rationale: string }> {
|
||||
const criteriaText = rubric.criteria
|
||||
.map((c, i) => `${i + 1}. **${c.criterion}** (weight: ${c.weight}): ${c.description}`)
|
||||
.join('\n');
|
||||
|
||||
const judgePrompt = `You are an evaluation judge. Score the following response against the given prompt using the rubric criteria.
|
||||
|
||||
**Prompt:**
|
||||
${prompt}
|
||||
|
||||
**Response:**
|
||||
${response}
|
||||
|
||||
**Rubric Criteria (score each 0-3, then compute weighted average):**
|
||||
${criteriaText}
|
||||
|
||||
**Max Score:** ${rubric.max_score}
|
||||
|
||||
Return your evaluation in JSON format:
|
||||
{
|
||||
"criterion_scores": {
|
||||
"criterion_name": { "score": 0-3, "rationale": "explanation" }
|
||||
},
|
||||
"weighted_score": <number>,
|
||||
"overall_rationale": "<summary>"
|
||||
}`;
|
||||
|
||||
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'X-Boo-Source': 'control-eval',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: judgeModelId,
|
||||
messages: [{ role: 'user', content: judgePrompt }],
|
||||
temperature: 0,
|
||||
max_tokens: 1024,
|
||||
response_format: { type: 'json_object' },
|
||||
}),
|
||||
signal: AbortSignal.timeout(120_000),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const body = await res.text().catch(() => '');
|
||||
throw new Error(`judge failed: ${res.status} ${body.slice(0, 200)}`);
|
||||
}
|
||||
|
||||
const data = await res.json() as { choices?: Array<{ message?: { content?: string } }> };
|
||||
const content = data.choices?.[0]?.message?.content ?? '{}';
|
||||
|
||||
let parsed: { weighted_score?: number; overall_rationale?: string };
|
||||
try {
|
||||
parsed = JSON.parse(content);
|
||||
} catch {
|
||||
// Fallback: try to extract JSON from markdown code blocks.
|
||||
const match = content.match(/```(?:json)?\s*([\s\S]*?)```/);
|
||||
if (match && match[1]) {
|
||||
parsed = JSON.parse(match[1]);
|
||||
} else {
|
||||
parsed = {};
|
||||
}
|
||||
}
|
||||
|
||||
const score = parsed.weighted_score ?? 0;
|
||||
const rationale = parsed.overall_rationale ?? 'No rationale provided';
|
||||
|
||||
return {
|
||||
score: Math.min(score, rubric.max_score),
|
||||
maxScore: rubric.max_score,
|
||||
rationale,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the default judge model.
|
||||
* Strongest local model by default -- configurable via config.
|
||||
*/
|
||||
function resolveDefaultJudgeModel(): string {
|
||||
return process.env.EVAL_JUDGE_MODEL ?? 'qwen2.5-72b-instruct';
|
||||
}
|
||||
101
apps/control/src/services/llama-providers.ts
Normal file
101
apps/control/src/services/llama-providers.ts
Normal file
@@ -0,0 +1,101 @@
|
||||
/**
|
||||
* Local provider registry loader (control-side).
|
||||
*
|
||||
* Reads the shared llama-providers config file at startup and caches the
|
||||
* parsed result. When the file is absent or invalid, synthesizes a single
|
||||
* legacy provider from LLAMA_SWAP_URL so the service starts with only
|
||||
* legacy env vars (D-1).
|
||||
*
|
||||
* Schema and pure helpers live in @boocode/contracts/llama-providers.
|
||||
* File I/O stays app-local per D-1.
|
||||
*/
|
||||
import { readFileSync } from 'node:fs';
|
||||
import {
|
||||
LlamaProvidersFileSchema,
|
||||
type LlamaProvidersFile,
|
||||
type LlamaProvider,
|
||||
} from '@boocode/contracts/llama-providers';
|
||||
|
||||
export type { LlamaProvidersFile, LlamaProvider };
|
||||
|
||||
/** Synthesize a single legacy provider from env vars. */
|
||||
function buildLegacyProvider(llamaSwapUrl: string): LlamaProvidersFile {
|
||||
return {
|
||||
defaultProvider: 'llama-swap',
|
||||
providers: [
|
||||
{
|
||||
id: 'llama-swap',
|
||||
label: 'llama-swap',
|
||||
baseUrl: llamaSwapUrl,
|
||||
kind: 'llama-swap',
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
let cached: LlamaProvidersFile | null = null;
|
||||
|
||||
/**
|
||||
* Load (or re-load) the local provider config. Never throws on bad input --
|
||||
* falls back to the legacy single-provider shape.
|
||||
*/
|
||||
export function loadLlamaProviders(
|
||||
providersPath: string | undefined,
|
||||
llamaSwapUrl: string,
|
||||
): LlamaProvidersFile {
|
||||
if (!providersPath) {
|
||||
cached = buildLegacyProvider(llamaSwapUrl);
|
||||
return cached;
|
||||
}
|
||||
|
||||
let raw: string;
|
||||
try {
|
||||
raw = readFileSync(providersPath, 'utf8');
|
||||
} catch {
|
||||
console.warn(
|
||||
`llama-providers: file not found at ${providersPath} -- falling back to legacy single-provider`,
|
||||
);
|
||||
cached = buildLegacyProvider(llamaSwapUrl);
|
||||
return cached;
|
||||
}
|
||||
|
||||
let json: unknown;
|
||||
try {
|
||||
json = JSON.parse(raw);
|
||||
} catch (err) {
|
||||
console.error(
|
||||
`llama-providers: invalid JSON in ${providersPath} -- falling back to legacy single-provider`,
|
||||
err,
|
||||
);
|
||||
cached = buildLegacyProvider(llamaSwapUrl);
|
||||
return cached;
|
||||
}
|
||||
|
||||
const parsed = LlamaProvidersFileSchema.safeParse(json);
|
||||
if (!parsed.success) {
|
||||
console.error(
|
||||
`llama-providers: schema validation failed for ${providersPath} -- falling back to legacy single-provider`,
|
||||
parsed.error.flatten(),
|
||||
);
|
||||
cached = buildLegacyProvider(llamaSwapUrl);
|
||||
return cached;
|
||||
}
|
||||
|
||||
cached = parsed.data;
|
||||
return cached;
|
||||
}
|
||||
|
||||
/** The cached provider config. Returns legacy fallback if nothing loaded yet. */
|
||||
export function getLlamaProviders(): LlamaProvidersFile {
|
||||
return cached ?? buildLegacyProvider('http://localhost:8080');
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a provider's baseUrl by id from the cached registry.
|
||||
* Returns null if the provider is not found.
|
||||
*/
|
||||
export function resolveProviderBaseUrl(providerId: string): string | null {
|
||||
const file = getLlamaProviders();
|
||||
const provider = file.providers.find((p) => p.id === providerId);
|
||||
return provider?.baseUrl ?? null;
|
||||
}
|
||||
67
apps/control/src/services/log-relay.ts
Normal file
67
apps/control/src/services/log-relay.ts
Normal file
@@ -0,0 +1,67 @@
|
||||
/**
|
||||
* Log relay: in-memory tail buffer per host for logData SSE events.
|
||||
*
|
||||
* - 2k-line tail per host for late joiners
|
||||
* - Relays /api/events logData into control_log frames
|
||||
* - Source filter: proxy | upstream | model
|
||||
*/
|
||||
|
||||
const MAX_LOG_LINES = 2000;
|
||||
|
||||
export interface LogLine {
|
||||
providerId: string;
|
||||
source: 'proxy' | 'upstream' | 'model';
|
||||
line: string;
|
||||
ts: Date;
|
||||
}
|
||||
|
||||
export class LogRelay {
|
||||
private tails: Map<string, LogLine[]> = new Map();
|
||||
|
||||
/**
|
||||
* Append a log line to the per-host tail buffer.
|
||||
*/
|
||||
append(providerId: string, source: 'proxy' | 'upstream' | 'model', line: string): void {
|
||||
let tail = this.tails.get(providerId);
|
||||
if (!tail) {
|
||||
tail = [];
|
||||
this.tails.set(providerId, tail);
|
||||
}
|
||||
tail.push({ providerId, source, line, ts: new Date() });
|
||||
// Trim to max lines
|
||||
while (tail.length > MAX_LOG_LINES) {
|
||||
tail.shift();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the tail buffer for a host (for late joiners).
|
||||
*/
|
||||
getTail(providerId: string): LogLine[] {
|
||||
return this.tails.get(providerId) ?? [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all tails (for snapshot-on-join).
|
||||
*/
|
||||
getAllTails(): LogLine[] {
|
||||
const all: LogLine[] = [];
|
||||
for (const tail of this.tails.values()) {
|
||||
all.push(...tail);
|
||||
}
|
||||
return all;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get unique source values across all logs.
|
||||
*/
|
||||
getSources(): string[] {
|
||||
const sources = new Set<string>();
|
||||
for (const tail of this.tails.values()) {
|
||||
for (const entry of tail) {
|
||||
sources.add(entry.source);
|
||||
}
|
||||
}
|
||||
return Array.from(sources);
|
||||
}
|
||||
}
|
||||
105
apps/control/src/services/model-pull.ts
Normal file
105
apps/control/src/services/model-pull.ts
Normal file
@@ -0,0 +1,105 @@
|
||||
/**
|
||||
* P9 model pull: download a HuggingFace repo onto a host into its models dir.
|
||||
*
|
||||
* Non-blocking job (fire-and-forget like bench/eval), progress over the existing
|
||||
* control_job frame (jobType 'action', detail.kind = 'pull'). The repo id is
|
||||
* validated server-side as defense in depth on top of the wrapper's own check,
|
||||
* then passed as a single token (never interpolated into a shell string in
|
||||
* wrapper mode; in shell mode it is the only argument and is regex-clean).
|
||||
*/
|
||||
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import type { SshExec, SshTarget, SshMode } from './ssh-config.js';
|
||||
|
||||
/**
|
||||
* HF repo id: org/name. Each segment MUST start with an alphanumeric (HF's own
|
||||
* rule), which also rejects `..`/`.` traversal segments that a plain `[._-]+`
|
||||
* class would let through (e.g. `../x`). Exactly one slash; no spaces/metachars.
|
||||
*/
|
||||
export const REPO_ID_RE = /^[A-Za-z0-9][A-Za-z0-9._-]*\/[A-Za-z0-9][A-Za-z0-9._-]*$/;
|
||||
|
||||
export function validateRepoId(repo: string): boolean {
|
||||
return REPO_ID_RE.test(repo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the pull command for a host. Pure helper for testing.
|
||||
* - wrapper mode: the `pull <repo>` verb (wrapper hardcodes the models dir).
|
||||
* - shell mode: a direct `huggingface-cli download` into <modelsDir>/<repo__>.
|
||||
*/
|
||||
export function buildPullCommand(mode: SshMode, repo: string, modelsDir?: string): string {
|
||||
if (mode === 'wrapper') return `pull ${repo}`;
|
||||
const dir = (modelsDir ?? '').replace(/\/+$/, '');
|
||||
const local = `${dir}/${repo.replace(/\//g, '__')}`;
|
||||
return `huggingface-cli download ${repo} --local-dir '${local}'`;
|
||||
}
|
||||
|
||||
export interface PullParams {
|
||||
jobId: string;
|
||||
target: SshTarget;
|
||||
repo: string;
|
||||
mode: SshMode;
|
||||
modelsDir?: string; // required for shell mode
|
||||
}
|
||||
|
||||
export interface PullResult {
|
||||
ok: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a model pull as a control_job. Resolves when the pull finishes; callers
|
||||
* invoke it fire-and-forget so the HTTP response can return 202 immediately.
|
||||
*/
|
||||
export async function runModelPull(
|
||||
params: PullParams,
|
||||
exec: SshExec,
|
||||
emitter: DeltaEmitter,
|
||||
seq: number = 0,
|
||||
): Promise<PullResult> {
|
||||
const { jobId, target, repo, mode, modelsDir } = params;
|
||||
|
||||
if (!validateRepoId(repo)) {
|
||||
emitter.publish({
|
||||
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
|
||||
status: 'failed' as const, detail: { kind: 'pull', repo, error: 'invalid repo id' },
|
||||
});
|
||||
return { ok: false, error: 'invalid repo id' };
|
||||
}
|
||||
if (mode === 'shell' && !modelsDir) {
|
||||
emitter.publish({
|
||||
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
|
||||
status: 'failed' as const, detail: { kind: 'pull', repo, error: 'shell mode requires a models directory' },
|
||||
});
|
||||
return { ok: false, error: 'shell mode requires a models directory' };
|
||||
}
|
||||
|
||||
emitter.publish({
|
||||
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
|
||||
status: 'running' as const, detail: { kind: 'pull', repo },
|
||||
});
|
||||
|
||||
try {
|
||||
const res = await exec(target, buildPullCommand(mode, repo, modelsDir));
|
||||
if (res.code !== 0) {
|
||||
const error = `pull failed (exit ${res.code}): ${res.stderr.slice(0, 500)}`;
|
||||
emitter.publish({
|
||||
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
|
||||
status: 'failed' as const, detail: { kind: 'pull', repo, error },
|
||||
});
|
||||
return { ok: false, error };
|
||||
}
|
||||
emitter.publish({
|
||||
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
|
||||
status: 'completed' as const, detail: { kind: 'pull', repo, output: res.stdout.slice(-500) },
|
||||
});
|
||||
return { ok: true };
|
||||
} catch (err) {
|
||||
const error = (err as Error).message ?? String(err);
|
||||
emitter.publish({
|
||||
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
|
||||
status: 'failed' as const, detail: { kind: 'pull', repo, error },
|
||||
});
|
||||
return { ok: false, error };
|
||||
}
|
||||
}
|
||||
12
apps/control/src/services/reconcile.ts
Normal file
12
apps/control/src/services/reconcile.ts
Normal file
@@ -0,0 +1,12 @@
|
||||
/**
|
||||
* Reconcile gap detection: if the oldest entry in a reconcile fetch is newer
|
||||
* than the newest already-persisted entry for that provider, the ring wrapped
|
||||
* past our tail and we have a gap.
|
||||
*/
|
||||
export function detectGap(
|
||||
oldestReconcileTs: string | null,
|
||||
newestPersistedTs: string | null,
|
||||
): boolean {
|
||||
if (!oldestReconcileTs || !newestPersistedTs) return false;
|
||||
return new Date(oldestReconcileTs) > new Date(newestPersistedTs);
|
||||
}
|
||||
299
apps/control/src/services/reports.ts
Normal file
299
apps/control/src/services/reports.ts
Normal file
@@ -0,0 +1,299 @@
|
||||
/**
|
||||
* P6.2: Scheduled fleet digest reports.
|
||||
*
|
||||
* Same in-process timer pattern as the retention job (design §3/§6): an hourly
|
||||
* tick reads control_schedule_meta.last_run_at and runs the digest when due,
|
||||
* so a boot after a missed window catches up immediately. No cron dependency,
|
||||
* no new scheduler abstraction.
|
||||
*
|
||||
* The report gathers usage, trends vs the prior period, swap counts, the eval
|
||||
* leaderboard, and bench regression anomalies, renders a markdown digest, and
|
||||
* persists both the markdown and the structured stats to control_reports.
|
||||
*/
|
||||
|
||||
import type { Sql } from '../db.js';
|
||||
|
||||
export type ReportInterval = 'daily' | 'weekly';
|
||||
|
||||
export interface ReportStats {
|
||||
periodStart: string;
|
||||
periodEnd: string;
|
||||
interval: ReportInterval;
|
||||
totalRequests: number;
|
||||
priorRequests: number;
|
||||
totalInputTokens: number;
|
||||
totalOutputTokens: number;
|
||||
bySource: Array<{ source: string; requests: number; inputTokens: number; outputTokens: number }>;
|
||||
byProvider: Array<{ providerId: string; requests: number; swaps: number }>;
|
||||
leaderboard: Array<{ providerId: string; model: string; kind: string; avgScore: number | null }>;
|
||||
regressions: Array<{ providerId: string; model: string; avgGenTps: number | null }>;
|
||||
}
|
||||
|
||||
function intervalHours(interval: ReportInterval): number {
|
||||
return interval === 'weekly' ? 24 * 7 : 24;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gather the structured stats for a report window. Pure read; no writes.
|
||||
*/
|
||||
export async function gatherReportStats(
|
||||
sql: Sql,
|
||||
interval: ReportInterval,
|
||||
now: Date,
|
||||
): Promise<ReportStats> {
|
||||
const hours = intervalHours(interval);
|
||||
const periodEnd = now;
|
||||
const periodStart = new Date(now.getTime() - hours * 3600_000);
|
||||
const priorStart = new Date(periodStart.getTime() - hours * 3600_000);
|
||||
|
||||
const startIso = periodStart.toISOString();
|
||||
const endIso = periodEnd.toISOString();
|
||||
const priorIso = priorStart.toISOString();
|
||||
|
||||
const totals = await sql<{ requests: number; in_tokens: number; out_tokens: number }[]>`
|
||||
SELECT COUNT(*)::int AS requests,
|
||||
COALESCE(SUM(input_tokens), 0)::int AS in_tokens,
|
||||
COALESCE(SUM(output_tokens), 0)::int AS out_tokens
|
||||
FROM control_requests
|
||||
WHERE ts >= ${startIso} AND ts < ${endIso}
|
||||
`;
|
||||
|
||||
const prior = await sql<{ requests: number }[]>`
|
||||
SELECT COUNT(*)::int AS requests
|
||||
FROM control_requests
|
||||
WHERE ts >= ${priorIso} AND ts < ${startIso}
|
||||
`;
|
||||
|
||||
const bySource = await sql<{ source: string | null; requests: number; in_tokens: number; out_tokens: number }[]>`
|
||||
SELECT source,
|
||||
COUNT(*)::int AS requests,
|
||||
COALESCE(SUM(input_tokens), 0)::int AS in_tokens,
|
||||
COALESCE(SUM(output_tokens), 0)::int AS out_tokens
|
||||
FROM control_requests
|
||||
WHERE ts >= ${startIso} AND ts < ${endIso}
|
||||
GROUP BY source
|
||||
ORDER BY requests DESC
|
||||
`;
|
||||
|
||||
const byProviderReqs = await sql<{ provider_id: string; requests: number }[]>`
|
||||
SELECT provider_id, COUNT(*)::int AS requests
|
||||
FROM control_requests
|
||||
WHERE ts >= ${startIso} AND ts < ${endIso}
|
||||
GROUP BY provider_id
|
||||
`;
|
||||
|
||||
// Swap counts: a model entering 'ready' / 'starting' marks a load/swap.
|
||||
const swaps = await sql<{ provider_id: string; swaps: number }[]>`
|
||||
SELECT provider_id, COUNT(*)::int AS swaps
|
||||
FROM control_model_events
|
||||
WHERE ts >= ${startIso} AND ts < ${endIso}
|
||||
AND state IN ('ready', 'starting')
|
||||
GROUP BY provider_id
|
||||
`;
|
||||
|
||||
const swapMap = new Map<string, number>();
|
||||
for (const r of swaps) swapMap.set(r.provider_id, r.swaps);
|
||||
const providerIds = new Set<string>([
|
||||
...byProviderReqs.map((r) => r.provider_id),
|
||||
...swaps.map((r) => r.provider_id),
|
||||
]);
|
||||
const reqMap = new Map<string, number>();
|
||||
for (const r of byProviderReqs) reqMap.set(r.provider_id, r.requests);
|
||||
|
||||
const byProvider = Array.from(providerIds)
|
||||
.sort()
|
||||
.map((providerId) => ({
|
||||
providerId,
|
||||
requests: reqMap.get(providerId) ?? 0,
|
||||
swaps: swapMap.get(providerId) ?? 0,
|
||||
}));
|
||||
|
||||
// Leaderboard: latest completed eval avgScore per (provider, model, kind).
|
||||
const leaderboard = await sql<{ provider_id: string; model: string; kind: string; avg_score: number | null }[]>`
|
||||
SELECT er.provider_id, er.model, es.kind,
|
||||
(er.aggregate::jsonb ->> 'avgScore')::float AS avg_score
|
||||
FROM eval_runs er
|
||||
JOIN eval_suites es ON er.suite_id = es.id
|
||||
WHERE er.status = 'completed' AND er.aggregate IS NOT NULL
|
||||
AND er.finished_at = (
|
||||
SELECT MAX(er2.finished_at) FROM eval_runs er2
|
||||
JOIN eval_suites es2 ON er2.suite_id = es2.id
|
||||
WHERE er2.provider_id = er.provider_id AND er2.model = er.model
|
||||
AND es2.kind = es.kind AND er2.status = 'completed'
|
||||
)
|
||||
ORDER BY avg_score DESC NULLS LAST
|
||||
LIMIT 20
|
||||
`;
|
||||
|
||||
// Regression anomalies: bench runs flagged 'regression' in the window.
|
||||
const regressions = await sql<{ provider_id: string; model: string; avg_gen_tps: number | null }[]>`
|
||||
SELECT bs.provider_id, bs.model,
|
||||
(br.aggregate::jsonb ->> 'avgGenTps')::float AS avg_gen_tps
|
||||
FROM bench_runs br
|
||||
JOIN bench_suites bs ON br.suite_id = bs.id
|
||||
WHERE br.regression_flag = 'regression'
|
||||
AND br.finished_at >= ${startIso} AND br.finished_at < ${endIso}
|
||||
ORDER BY br.finished_at DESC
|
||||
`;
|
||||
|
||||
return {
|
||||
periodStart: startIso,
|
||||
periodEnd: endIso,
|
||||
interval,
|
||||
totalRequests: totals[0]?.requests ?? 0,
|
||||
priorRequests: prior[0]?.requests ?? 0,
|
||||
totalInputTokens: totals[0]?.in_tokens ?? 0,
|
||||
totalOutputTokens: totals[0]?.out_tokens ?? 0,
|
||||
bySource: bySource.map((r) => ({
|
||||
source: r.source ?? '(unattributed)',
|
||||
requests: r.requests,
|
||||
inputTokens: r.in_tokens,
|
||||
outputTokens: r.out_tokens,
|
||||
})),
|
||||
byProvider,
|
||||
leaderboard: leaderboard.map((r) => ({
|
||||
providerId: r.provider_id,
|
||||
model: r.model,
|
||||
kind: r.kind,
|
||||
avgScore: r.avg_score,
|
||||
})),
|
||||
regressions: regressions.map((r) => ({
|
||||
providerId: r.provider_id,
|
||||
model: r.model,
|
||||
avgGenTps: r.avg_gen_tps,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Render a markdown digest from gathered stats. Pure — unit-testable.
|
||||
*/
|
||||
export function renderReportMarkdown(stats: ReportStats): string {
|
||||
const lines: string[] = [];
|
||||
const pct = (cur: number, prev: number): string => {
|
||||
if (prev === 0) return cur === 0 ? '0%' : 'new';
|
||||
const d = ((cur - prev) / prev) * 100;
|
||||
return `${d >= 0 ? '+' : ''}${d.toFixed(0)}%`;
|
||||
};
|
||||
|
||||
lines.push(`# Fleet ${stats.interval} report`);
|
||||
lines.push('');
|
||||
lines.push(`Period: ${stats.periodStart} to ${stats.periodEnd}`);
|
||||
lines.push('');
|
||||
|
||||
lines.push('## Usage');
|
||||
lines.push('');
|
||||
lines.push(`- Requests: ${stats.totalRequests} (${pct(stats.totalRequests, stats.priorRequests)} vs prior period)`);
|
||||
lines.push(`- Input tokens: ${stats.totalInputTokens}`);
|
||||
lines.push(`- Output tokens: ${stats.totalOutputTokens}`);
|
||||
lines.push('');
|
||||
|
||||
if (stats.bySource.length > 0) {
|
||||
lines.push('## By source');
|
||||
lines.push('');
|
||||
lines.push('| Source | Requests | Input tok | Output tok |');
|
||||
lines.push('| --- | ---: | ---: | ---: |');
|
||||
for (const s of stats.bySource) {
|
||||
lines.push(`| ${s.source} | ${s.requests} | ${s.inputTokens} | ${s.outputTokens} |`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
if (stats.byProvider.length > 0) {
|
||||
lines.push('## By host');
|
||||
lines.push('');
|
||||
lines.push('| Host | Requests | Swaps |');
|
||||
lines.push('| --- | ---: | ---: |');
|
||||
for (const p of stats.byProvider) {
|
||||
lines.push(`| ${p.providerId} | ${p.requests} | ${p.swaps} |`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
if (stats.leaderboard.length > 0) {
|
||||
lines.push('## Leaderboard');
|
||||
lines.push('');
|
||||
lines.push('| Model | Kind | Score |');
|
||||
lines.push('| --- | --- | ---: |');
|
||||
for (const l of stats.leaderboard) {
|
||||
lines.push(`| ${l.providerId}/${l.model} | ${l.kind} | ${l.avgScore != null ? l.avgScore.toFixed(3) : 'n/a'} |`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
lines.push('## Anomalies');
|
||||
lines.push('');
|
||||
if (stats.regressions.length === 0) {
|
||||
lines.push('No speed regressions flagged this period.');
|
||||
} else {
|
||||
for (const r of stats.regressions) {
|
||||
lines.push(`- Regression: ${r.providerId}/${r.model} (avg gen ${r.avgGenTps != null ? r.avgGenTps.toFixed(1) : 'n/a'} tok/s)`);
|
||||
}
|
||||
}
|
||||
lines.push('');
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a report for the given interval and persist it. Returns the new id.
|
||||
*/
|
||||
export async function generateReport(
|
||||
sql: Sql,
|
||||
interval: ReportInterval,
|
||||
now: Date = new Date(),
|
||||
): Promise<string> {
|
||||
const stats = await gatherReportStats(sql, interval, now);
|
||||
const markdown = renderReportMarkdown(stats);
|
||||
const id = `report_${now.getTime()}_${interval}`;
|
||||
|
||||
await sql`
|
||||
INSERT INTO control_reports (id, kind, interval, period_start, period_end, markdown, stats)
|
||||
VALUES (${id}, 'digest', ${interval}, ${stats.periodStart}, ${stats.periodEnd}, ${markdown}, ${sql.json(stats as never)})
|
||||
ON CONFLICT (id) DO NOTHING
|
||||
`;
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decide whether a scheduled report is due. Pure helper for testing.
|
||||
*/
|
||||
export function isReportDue(
|
||||
lastRunAt: Date | null,
|
||||
interval: ReportInterval,
|
||||
now: Date,
|
||||
): boolean {
|
||||
if (!lastRunAt) return true;
|
||||
const elapsed = now.getTime() - lastRunAt.getTime();
|
||||
return elapsed >= intervalHours(interval) * 3600_000;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run one scheduler tick: check control_schedule_meta and generate the digest
|
||||
* if due. Catch-up-on-boot is achieved by calling this once at startup, then
|
||||
* hourly.
|
||||
*/
|
||||
export async function runReportSchedulerTick(
|
||||
sql: Sql,
|
||||
now: Date = new Date(),
|
||||
): Promise<{ ran: boolean; reportId?: string }> {
|
||||
const rows = await sql<{ interval: string; enabled: boolean; last_run_at: string | null }[]>`
|
||||
SELECT interval, enabled, last_run_at
|
||||
FROM control_schedule_meta WHERE name = 'report-digest'
|
||||
`;
|
||||
const meta = rows[0];
|
||||
if (!meta || !meta.enabled) return { ran: false };
|
||||
|
||||
const interval = (meta.interval === 'weekly' ? 'weekly' : 'daily') as ReportInterval;
|
||||
const lastRunAt = meta.last_run_at ? new Date(meta.last_run_at) : null;
|
||||
|
||||
if (!isReportDue(lastRunAt, interval, now)) return { ran: false };
|
||||
|
||||
const reportId = await generateReport(sql, interval, now);
|
||||
await sql`
|
||||
UPDATE control_schedule_meta SET last_run_at = ${now.toISOString()}
|
||||
WHERE name = 'report-digest'
|
||||
`;
|
||||
return { ran: true, reportId };
|
||||
}
|
||||
159
apps/control/src/services/retention.ts
Normal file
159
apps/control/src/services/retention.ts
Normal file
@@ -0,0 +1,159 @@
|
||||
/**
|
||||
* Retention job: daily in-process timer that rolls up raw perf samples and
|
||||
* prunes old data.
|
||||
*
|
||||
* Crash-safe by construction:
|
||||
* 1. Rollup is an idempotent upsert (INSERT ... ON CONFLICT DO UPDATE).
|
||||
* 2. Delete raw only AFTER covering buckets are committed.
|
||||
* 3. Chunked transactions: one per provider per 1-hour window.
|
||||
*/
|
||||
|
||||
import type { Sql } from '../db.js';
|
||||
import type { Config } from '../config.js';
|
||||
|
||||
export interface RetentionConfig {
|
||||
rawHours: number;
|
||||
rollupDays: number;
|
||||
captureSizeKB: number;
|
||||
captureBudgetMB: number;
|
||||
}
|
||||
|
||||
export function buildRetentionConfig(cfg: Config): RetentionConfig {
|
||||
return {
|
||||
rawHours: cfg.RETENTION_RAW_HOURS,
|
||||
rollupDays: cfg.RETENTION_ROLLUP_DAYS,
|
||||
captureSizeKB: cfg.CAPTURE_SIZE_KB,
|
||||
captureBudgetMB: cfg.CAPTURE_BUDGET_MB,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Roll up raw perf samples into 5-minute buckets.
|
||||
* Idempotent: re-running the same window produces identical rollups.
|
||||
*/
|
||||
export async function runRollup(sql: Sql, providerId: string, hours: number): Promise<void> {
|
||||
const cutoff = new Date(Date.now() - hours * 3600_000);
|
||||
const buckets = await sql<{ bucket: Date }[]>`
|
||||
SELECT date_trunc('5 minutes', ts) AS bucket
|
||||
FROM control_perf_samples
|
||||
WHERE provider_id = ${providerId}
|
||||
AND ts >= ${cutoff.toISOString()}
|
||||
GROUP BY bucket
|
||||
ORDER BY bucket
|
||||
`;
|
||||
|
||||
for (const { bucket } of buckets) {
|
||||
const bucketStart = new Date(bucket);
|
||||
const bucketEnd = new Date(bucket.getTime() + 5 * 60_000);
|
||||
|
||||
// Idempotent upsert: re-run recomputes the same buckets, never double-counts.
|
||||
await sql`
|
||||
INSERT INTO control_perf_rollup_5m (provider_id, bucket, gpu_agg, sys_agg)
|
||||
SELECT
|
||||
${providerId},
|
||||
${bucketStart.toISOString()},
|
||||
jsonb_agg(DISTINCT jsonb_build_object('ts', ts, 'gpu', gpu)) AS gpu_agg,
|
||||
jsonb_agg(DISTINCT jsonb_build_object('ts', ts, 'sys', sys)) AS sys_agg
|
||||
FROM control_perf_samples
|
||||
WHERE provider_id = ${providerId}
|
||||
AND ts >= ${bucketStart.toISOString()}
|
||||
AND ts < ${bucketEnd.toISOString()}
|
||||
GROUP BY provider_id
|
||||
ON CONFLICT (provider_id, bucket) DO UPDATE SET
|
||||
gpu_agg = EXCLUDED.gpu_agg,
|
||||
sys_agg = EXCLUDED.sys_agg
|
||||
`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prune raw perf samples older than the retention window.
|
||||
* Chunked: one transaction per provider per 1-hour window.
|
||||
*/
|
||||
export async function pruneRawSamples(sql: Sql, providerId: string, hours: number): Promise<void> {
|
||||
const cutoff = new Date(Date.now() - hours * 3600_000);
|
||||
const chunkSize = 1000;
|
||||
|
||||
while (true) {
|
||||
const toDelete = await sql<{ ts: Date }[]>`
|
||||
SELECT ts FROM control_perf_samples
|
||||
WHERE provider_id = ${providerId}
|
||||
AND ts < ${cutoff.toISOString()}
|
||||
ORDER BY ts DESC
|
||||
LIMIT ${chunkSize}
|
||||
`;
|
||||
if (toDelete.length === 0) break;
|
||||
|
||||
const timestamps = toDelete.map((r) => r.ts);
|
||||
await sql`DELETE FROM control_perf_samples WHERE provider_id = ${providerId} AND ts = ANY(${timestamps})`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prune activity (control_requests) older than the retention window.
|
||||
* Chunked: one transaction per batch to avoid long lock hold times.
|
||||
*/
|
||||
export async function pruneActivity(sql: Sql, hours: number): Promise<void> {
|
||||
const cutoff = new Date(Date.now() - hours * 3600_000);
|
||||
const chunkSize = 1000;
|
||||
|
||||
while (true) {
|
||||
const toDelete = await sql<{ ts: Date }[]>`
|
||||
SELECT ts FROM control_requests
|
||||
WHERE ts < ${cutoff.toISOString()}
|
||||
ORDER BY ts DESC
|
||||
LIMIT ${chunkSize}
|
||||
`;
|
||||
if (toDelete.length === 0) break;
|
||||
|
||||
const timestamps = toDelete.map((r) => r.ts);
|
||||
await sql`DELETE FROM control_requests WHERE ts = ANY(${timestamps})`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prune model events older than the retention window.
|
||||
* Chunked: one transaction per batch to avoid long lock hold times.
|
||||
*/
|
||||
export async function pruneModelEvents(sql: Sql, hours: number): Promise<void> {
|
||||
const cutoff = new Date(Date.now() - hours * 3600_000);
|
||||
const chunkSize = 1000;
|
||||
|
||||
while (true) {
|
||||
const toDelete = await sql<{ ts: Date }[]>`
|
||||
SELECT ts FROM control_model_events
|
||||
WHERE ts < ${cutoff.toISOString()}
|
||||
ORDER BY ts DESC
|
||||
LIMIT ${chunkSize}
|
||||
`;
|
||||
if (toDelete.length === 0) break;
|
||||
|
||||
const timestamps = toDelete.map((r) => r.ts);
|
||||
await sql`DELETE FROM control_model_events WHERE ts = ANY(${timestamps})`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Trim capture JSONB per-row to the configured size cap.
|
||||
* Returns the trimmed JSON string, or null.
|
||||
*/
|
||||
export function trimCapture(captureJson: string | null, sizeKB: number): string | null {
|
||||
if (!captureJson) return null;
|
||||
const sizeBytes = Buffer.byteLength(captureJson, 'utf8');
|
||||
if (sizeBytes <= sizeKB * 1024) return captureJson;
|
||||
// Trim the capture to fit within the cap.
|
||||
return captureJson.slice(0, Math.floor(sizeKB * 1024));
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a capture JSON string into an object for sql.json().
|
||||
* Returns null if the input is null or invalid JSON.
|
||||
*/
|
||||
export function parseCaptureJson(captureJson: string | null): Record<string, unknown> | null {
|
||||
if (!captureJson) return null;
|
||||
try {
|
||||
return JSON.parse(captureJson) as Record<string, unknown>;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
194
apps/control/src/services/routing-scores.ts
Normal file
194
apps/control/src/services/routing-scores.ts
Normal file
@@ -0,0 +1,194 @@
|
||||
/**
|
||||
* P6.1: Advisory routing scores.
|
||||
*
|
||||
* Combines three signals per (provider_id, model) into an advisory score and
|
||||
* a set of category badges surfaced in the BooChat model picker:
|
||||
* - eval results (eval_runs.aggregate.avgScore, split by suite kind)
|
||||
* - live latency (control_requests gen_tps + duration over a recent window)
|
||||
* - host health (fleet liveness — an unhealthy host can win no badge)
|
||||
*
|
||||
* Advisory only: this never enforces routing. It powers display badges
|
||||
* ("best code model right now") and the P7 gateway candidate ordering.
|
||||
*
|
||||
* The pure scoring/badge helpers are extracted for unit testing per the
|
||||
* turn-guard.ts pattern; the DB read lives in computeRoutingScores().
|
||||
*/
|
||||
|
||||
import type { Sql } from '../db.js';
|
||||
import type { FleetState } from './fleet-state.js';
|
||||
|
||||
/** Recent-activity window for live latency signals. */
|
||||
const LIVE_WINDOW_HOURS = 24;
|
||||
|
||||
export interface ModelScore {
|
||||
/** Composite picker id: `${providerId}/${model}` (matches /api/models). */
|
||||
compositeId: string;
|
||||
providerId: string;
|
||||
model: string;
|
||||
/** Avg score (0..1) from completed code-suite eval runs, or null. */
|
||||
codeScore: number | null;
|
||||
/** Avg score (0..1) from completed chat-suite eval runs, or null. */
|
||||
chatScore: number | null;
|
||||
/** Best eval score across kinds, or null when never evaluated. */
|
||||
evalScore: number | null;
|
||||
/** Avg gen tok/s over the live window, or null when no recent traffic. */
|
||||
avgGenTps: number | null;
|
||||
/** Avg request duration (ms) over the live window, or null. */
|
||||
avgLatencyMs: number | null;
|
||||
/** Recent request count in the live window. */
|
||||
sampleCount: number;
|
||||
/** Whether the owning host is currently connected. */
|
||||
healthy: boolean;
|
||||
/** Category badges this model currently wins. */
|
||||
badges: BadgeKind[];
|
||||
}
|
||||
|
||||
export type BadgeKind = 'best-code' | 'best-chat' | 'best-fast';
|
||||
|
||||
export const BADGE_LABELS: Record<BadgeKind, string> = {
|
||||
'best-code': 'Best code model now',
|
||||
'best-chat': 'Best chat model now',
|
||||
'best-fast': 'Fastest model now',
|
||||
};
|
||||
|
||||
interface EvalRow {
|
||||
provider_id: string;
|
||||
model: string;
|
||||
suite_kind: string;
|
||||
avg_score: number | null;
|
||||
}
|
||||
|
||||
interface LatencyRow {
|
||||
provider_id: string;
|
||||
model: string;
|
||||
avg_gen_tps: number | null;
|
||||
avg_duration_ms: number | null;
|
||||
sample_count: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pure badge assignment: given the per-model signals, award one winner per
|
||||
* category. Only healthy hosts are eligible; ties broken by first-seen order
|
||||
* (callers sort deterministically before passing in).
|
||||
*/
|
||||
export function assignBadges(scores: ModelScore[]): void {
|
||||
const eligible = scores.filter((s) => s.healthy);
|
||||
|
||||
const award = (
|
||||
pick: (s: ModelScore) => number | null,
|
||||
badge: BadgeKind,
|
||||
): void => {
|
||||
let best: ModelScore | null = null;
|
||||
let bestVal = -Infinity;
|
||||
for (const s of eligible) {
|
||||
const v = pick(s);
|
||||
if (v == null) continue;
|
||||
if (v > bestVal) {
|
||||
bestVal = v;
|
||||
best = s;
|
||||
}
|
||||
}
|
||||
if (best && bestVal > -Infinity) {
|
||||
best.badges.push(badge);
|
||||
}
|
||||
};
|
||||
|
||||
award((s) => s.codeScore, 'best-code');
|
||||
award((s) => s.chatScore, 'best-chat');
|
||||
award((s) => s.avgGenTps, 'best-fast');
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute advisory routing scores across all (provider_id, model) pairs that
|
||||
* have either eval history or recent live traffic.
|
||||
*/
|
||||
export async function computeRoutingScores(
|
||||
sql: Sql,
|
||||
fleet: FleetState,
|
||||
): Promise<ModelScore[]> {
|
||||
// 1. Eval scores — latest completed run per (provider, model, kind).
|
||||
// Take the most recent finished run's aggregate avgScore per kind so a
|
||||
// fresh run supersedes stale numbers.
|
||||
const evalRows = await sql<EvalRow[]>`
|
||||
SELECT er.provider_id,
|
||||
er.model,
|
||||
es.kind AS suite_kind,
|
||||
(er.aggregate::jsonb ->> 'avgScore')::float AS avg_score
|
||||
FROM eval_runs er
|
||||
JOIN eval_suites es ON er.suite_id = es.id
|
||||
WHERE er.status = 'completed'
|
||||
AND er.aggregate IS NOT NULL
|
||||
AND er.finished_at = (
|
||||
SELECT MAX(er2.finished_at)
|
||||
FROM eval_runs er2
|
||||
JOIN eval_suites es2 ON er2.suite_id = es2.id
|
||||
WHERE er2.provider_id = er.provider_id
|
||||
AND er2.model = er.model
|
||||
AND es2.kind = es.kind
|
||||
AND er2.status = 'completed'
|
||||
)
|
||||
`;
|
||||
|
||||
// 2. Live latency/throughput — recent control_requests per (provider, model).
|
||||
const cutoff = new Date(Date.now() - LIVE_WINDOW_HOURS * 3600_000).toISOString();
|
||||
const latencyRows = await sql<LatencyRow[]>`
|
||||
SELECT provider_id,
|
||||
model,
|
||||
AVG(gen_tps) FILTER (WHERE gen_tps > 0) AS avg_gen_tps,
|
||||
AVG(duration_ms) FILTER (WHERE duration_ms > 0) AS avg_duration_ms,
|
||||
COUNT(*)::int AS sample_count
|
||||
FROM control_requests
|
||||
WHERE ts >= ${cutoff}
|
||||
AND model IS NOT NULL
|
||||
GROUP BY provider_id, model
|
||||
`;
|
||||
|
||||
// 3. Merge signals keyed by compositeId.
|
||||
const byKey = new Map<string, ModelScore>();
|
||||
const keyOf = (providerId: string, model: string) => `${providerId}/${model}`;
|
||||
|
||||
const ensure = (providerId: string, model: string): ModelScore => {
|
||||
const compositeId = keyOf(providerId, model);
|
||||
let s = byKey.get(compositeId);
|
||||
if (!s) {
|
||||
s = {
|
||||
compositeId,
|
||||
providerId,
|
||||
model,
|
||||
codeScore: null,
|
||||
chatScore: null,
|
||||
evalScore: null,
|
||||
avgGenTps: null,
|
||||
avgLatencyMs: null,
|
||||
sampleCount: 0,
|
||||
healthy: fleet.hosts.get(providerId)?.liveness === 'connected',
|
||||
badges: [],
|
||||
};
|
||||
byKey.set(compositeId, s);
|
||||
}
|
||||
return s;
|
||||
};
|
||||
|
||||
for (const row of evalRows) {
|
||||
const s = ensure(row.provider_id, row.model);
|
||||
if (row.suite_kind === 'code') s.codeScore = row.avg_score;
|
||||
else if (row.suite_kind === 'chat') s.chatScore = row.avg_score;
|
||||
const best = Math.max(s.codeScore ?? -Infinity, s.chatScore ?? -Infinity);
|
||||
s.evalScore = best > -Infinity ? best : null;
|
||||
}
|
||||
|
||||
for (const row of latencyRows) {
|
||||
const s = ensure(row.provider_id, row.model);
|
||||
s.avgGenTps = row.avg_gen_tps;
|
||||
s.avgLatencyMs = row.avg_duration_ms;
|
||||
s.sampleCount = row.sample_count;
|
||||
}
|
||||
|
||||
// Deterministic order before badge assignment so ties are stable.
|
||||
const scores = Array.from(byKey.values()).sort((a, b) =>
|
||||
a.compositeId < b.compositeId ? -1 : a.compositeId > b.compositeId ? 1 : 0,
|
||||
);
|
||||
|
||||
assignBadges(scores);
|
||||
return scores;
|
||||
}
|
||||
410
apps/control/src/services/sandbox-runner.ts
Normal file
410
apps/control/src/services/sandbox-runner.ts
Normal file
@@ -0,0 +1,410 @@
|
||||
import { spawn, type ChildProcess } from 'node:child_process';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import type { Sql } from '../db.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import { recordEvalResult } from './eval-suites.js';
|
||||
|
||||
// ─── types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface SandboxEvalParams {
|
||||
runId: string;
|
||||
providerId: string;
|
||||
model: string;
|
||||
quant: string | null;
|
||||
tasks: Array<Record<string, unknown>>;
|
||||
}
|
||||
|
||||
export interface SandboxProgress {
|
||||
completedTasks: number;
|
||||
}
|
||||
|
||||
export interface SandboxResult {
|
||||
error: string | null;
|
||||
}
|
||||
|
||||
export interface SandboxContainer {
|
||||
id: string;
|
||||
process: ChildProcess;
|
||||
timeoutHandle: NodeJS.Timeout | null;
|
||||
}
|
||||
|
||||
// ─── hardening constants (LAW, not suggestions) ─────────────────────────────
|
||||
|
||||
const SANDBOX_IMAGE = process.env.SANDBOX_IMAGE ?? 'node:20-bookworm-slim';
|
||||
const SANDBOX_MEMORY = process.env.SANDBOX_MEMORY ?? '512m';
|
||||
const SANDBOX_CPU = process.env.SANDBOX_CPU ?? '0.5';
|
||||
const SANDBOX_PIDS = process.env.SANDBOX_PIDS ?? '100';
|
||||
const SANDBOX_TIMEOUT_MS = Number(process.env.SANDBOX_TIMEOUT_MS ?? '30000');
|
||||
const SANDBOX_CONCURRENCY = Number(process.env.SANDBOX_CONCURRENCY ?? '4');
|
||||
const SANDBOX_LABEL = 'boocontrol-eval';
|
||||
|
||||
// ─── sandbox runner ─────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Run a code sandbox eval: each task generates code via LLM, executes in
|
||||
* an ephemeral Docker container with hardening flags, and scores pass@1.
|
||||
*
|
||||
* HARDENING FLAGS (LAW):
|
||||
* - --network none: NO network access
|
||||
* - --user 1000:1000: non-root user
|
||||
* - --memory, --cpus, --pids-limit: resource caps
|
||||
* - --tmpfs /workspace:tmpfs workdir
|
||||
* - --rm: auto-remove on exit
|
||||
* - --label boocontrol-eval: orphan findability
|
||||
* - --security-opt=no-new-privileges: no privilege escalation
|
||||
* - --cap-drop=ALL: drop all capabilities
|
||||
*
|
||||
* NO volume mounts from the repo.
|
||||
* NO docker socket inside containers.
|
||||
*
|
||||
* Bounded concurrency via Promise.allSettled.
|
||||
* Per-task finally cleanup.
|
||||
* Kill-on-timeout.
|
||||
*/
|
||||
export async function runCodeEval(
|
||||
params: SandboxEvalParams,
|
||||
sql: Sql,
|
||||
emitter: DeltaEmitter,
|
||||
seq: number,
|
||||
onProgress: (progress: SandboxProgress) => void,
|
||||
): Promise<SandboxResult> {
|
||||
const { runId, tasks } = params;
|
||||
|
||||
// Orphan prune at engine start.
|
||||
await pruneOrphanContainers();
|
||||
|
||||
let completedTasks = 0;
|
||||
let error: string | null = null;
|
||||
|
||||
// Bounded concurrency: process tasks in batches.
|
||||
const batchSizes: number[] = [];
|
||||
for (let i = 0; i < tasks.length; i += SANDBOX_CONCURRENCY) {
|
||||
const batch = tasks.slice(i, i + SANDBOX_CONCURRENCY);
|
||||
batchSizes.push(batch.length);
|
||||
|
||||
// Promise.allSettled: a single task failure never abandons in-flight containers.
|
||||
const results = await Promise.allSettled(
|
||||
batch.map(async (task, batchIdx) => {
|
||||
const globalIdx = i + batchIdx;
|
||||
const taskId = (task.id as string) ?? `task_${globalIdx}`;
|
||||
const prompt = (task.prompt as string) ?? '';
|
||||
const testCode = (task.test_code as string) ?? '';
|
||||
const expectedOutput = (task.expected_output as string) ?? '';
|
||||
const language = (task.language as string) ?? 'typescript';
|
||||
|
||||
const startTime = Date.now();
|
||||
let container: SandboxContainer | null = null;
|
||||
|
||||
try {
|
||||
// Generate code from LLM.
|
||||
const generatedCode = await generateCode(params.providerId, params.model, prompt, language);
|
||||
|
||||
// Execute in sandbox.
|
||||
const execResult = await executeInSandbox(generatedCode, testCode, language);
|
||||
|
||||
const executionMs = Date.now() - startTime;
|
||||
|
||||
// pass@1 scoring: output matches expected.
|
||||
const passed = normalizeOutput(execResult.stdout) === normalizeOutput(expectedOutput);
|
||||
const score = passed ? 1 : 0;
|
||||
|
||||
await recordEvalResult(
|
||||
sql,
|
||||
runId,
|
||||
taskId,
|
||||
globalIdx,
|
||||
score,
|
||||
1,
|
||||
passed ? 'Output matches expected' : `Expected: ${expectedOutput}, Got: ${execResult.stdout}`,
|
||||
execResult.exitCode,
|
||||
execResult.stderr,
|
||||
execResult.stdout,
|
||||
executionMs,
|
||||
null,
|
||||
);
|
||||
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
seq,
|
||||
jobType: 'eval' as const,
|
||||
jobId: runId,
|
||||
status: 'running' as const,
|
||||
detail: {
|
||||
taskId,
|
||||
taskIndex: globalIdx,
|
||||
passed,
|
||||
score,
|
||||
},
|
||||
});
|
||||
|
||||
return { taskId, passed, score };
|
||||
} catch (err) {
|
||||
const msg = (err as Error).message ?? String(err);
|
||||
const executionMs = Date.now() - startTime;
|
||||
|
||||
await recordEvalResult(
|
||||
sql,
|
||||
runId,
|
||||
taskId,
|
||||
globalIdx,
|
||||
null,
|
||||
1,
|
||||
null,
|
||||
null,
|
||||
msg,
|
||||
null,
|
||||
executionMs,
|
||||
msg,
|
||||
).catch(() => {});
|
||||
|
||||
return { taskId, passed: false, score: 0, error: msg };
|
||||
} finally {
|
||||
// Per-task finally cleanup: kill container + remove.
|
||||
if (container) {
|
||||
await cleanupContainer(container);
|
||||
}
|
||||
completedTasks++;
|
||||
onProgress({ completedTasks });
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
// Log batch results.
|
||||
for (const result of results) {
|
||||
if (result.status === 'rejected') {
|
||||
console.error('sandbox: batch task rejected:', result.reason);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { error };
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate code from the target model.
|
||||
*/
|
||||
async function generateCode(
|
||||
providerId: string,
|
||||
model: string,
|
||||
prompt: string,
|
||||
language: string,
|
||||
): Promise<string> {
|
||||
const baseUrl = resolveProviderBaseUrlInternal(providerId);
|
||||
if (!baseUrl) {
|
||||
throw new Error(`no base URL for provider ${providerId}`);
|
||||
}
|
||||
|
||||
const systemPrompt = `You are a code generator. Write ${language} code that solves the given task.
|
||||
Output ONLY the code, no explanations, no markdown fences. The code will be executed directly.`;
|
||||
|
||||
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'X-Boo-Source': 'control-eval',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages: [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
{ role: 'user', content: prompt },
|
||||
],
|
||||
temperature: 0,
|
||||
max_tokens: 2048,
|
||||
}),
|
||||
signal: AbortSignal.timeout(120_000),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const body = await res.text().catch(() => '');
|
||||
throw new Error(`code generation failed: ${res.status} ${body.slice(0, 200)}`);
|
||||
}
|
||||
|
||||
const data = await res.json() as { choices?: Array<{ message?: { content?: string } }> };
|
||||
let code = data.choices?.[0]?.message?.content ?? '';
|
||||
|
||||
// Strip markdown code fences if present.
|
||||
const fenceMatch = code.match(/```[\w]*\n([\s\S]*?)```/);
|
||||
if (fenceMatch && fenceMatch[1]) {
|
||||
code = fenceMatch[1];
|
||||
}
|
||||
|
||||
return code.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute code in a hardened Docker container.
|
||||
*/
|
||||
async function executeInSandbox(
|
||||
generatedCode: string,
|
||||
testCode: string,
|
||||
language: string,
|
||||
): Promise<{ stdout: string; stderr: string; exitCode: number | null }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const containerId = `eval_${randomUUID().slice(0, 12)}`;
|
||||
|
||||
// Build the combined script: generated code + test code.
|
||||
const script = buildExecutionScript(generatedCode, testCode, language);
|
||||
|
||||
// SECURITY: Hardened Docker run command.
|
||||
// --network none: NO network access.
|
||||
// --user 1000:1000: non-root user.
|
||||
// --memory, --cpus, --pids-limit: resource caps.
|
||||
// --tmpfs /workspace: tmpfs workdir, no persistent storage.
|
||||
// --rm: auto-remove on exit.
|
||||
// --label boocontrol-eval: orphan findability.
|
||||
// --security-opt=no-new-privileges: no privilege escalation.
|
||||
// --cap-drop=ALL: drop all capabilities.
|
||||
const dockerArgs = [
|
||||
'run',
|
||||
'--network', 'none',
|
||||
'--user', '1000:1000',
|
||||
'--memory', SANDBOX_MEMORY,
|
||||
'--cpus', String(SANDBOX_CPU),
|
||||
'--pids-limit', String(SANDBOX_PIDS),
|
||||
'--tmpfs', '/workspace:rw,noexec,size=64m',
|
||||
'--rm',
|
||||
'--label', SANDBOX_LABEL,
|
||||
'--security-opt', 'no-new-privileges',
|
||||
'--cap-drop', 'ALL',
|
||||
'--name', containerId,
|
||||
'-e', 'NODE_ENV=production',
|
||||
SANDBOX_IMAGE,
|
||||
'sh', '-c', script,
|
||||
];
|
||||
|
||||
const dockerProcess = spawn('docker', dockerArgs, {
|
||||
timeout: SANDBOX_TIMEOUT_MS,
|
||||
env: { ...process.env },
|
||||
});
|
||||
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
|
||||
dockerProcess.stdout.on('data', (chunk: Buffer) => {
|
||||
stdout += chunk.toString();
|
||||
});
|
||||
|
||||
dockerProcess.stderr.on('data', (chunk: Buffer) => {
|
||||
stderr += chunk.toString();
|
||||
});
|
||||
|
||||
dockerProcess.on('close', (code) => {
|
||||
resolve({
|
||||
stdout: stdout.trim(),
|
||||
stderr: stderr.trim(),
|
||||
exitCode: code,
|
||||
});
|
||||
});
|
||||
|
||||
dockerProcess.on('error', (err) => {
|
||||
reject(new Error(`docker spawn failed: ${err.message}`));
|
||||
});
|
||||
|
||||
// Kill-on-timeout: if the process exceeds SANDBOX_TIMEOUT_MS, kill it.
|
||||
const timeoutHandle = setTimeout(() => {
|
||||
dockerProcess.kill('SIGKILL');
|
||||
reject(new Error(`sandbox execution timeout (${SANDBOX_TIMEOUT_MS}ms)`));
|
||||
}, SANDBOX_TIMEOUT_MS);
|
||||
|
||||
// Clear timeout on close.
|
||||
dockerProcess.on('close', () => {
|
||||
clearTimeout(timeoutHandle);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the execution script for the sandbox.
|
||||
*/
|
||||
function buildExecutionScript(
|
||||
generatedCode: string,
|
||||
testCode: string,
|
||||
language: string,
|
||||
): string {
|
||||
if (language === 'typescript' || language === 'javascript') {
|
||||
return [
|
||||
'cd /workspace',
|
||||
`echo '${escapeShell(generatedCode)}' > output.js`,
|
||||
`echo '${escapeShell(testCode)}' > test.js`,
|
||||
'npx --yes tsx test.js 2>&1',
|
||||
].join(' && ');
|
||||
}
|
||||
|
||||
// Fallback: generic shell execution.
|
||||
return [
|
||||
'cd /workspace',
|
||||
`echo '${escapeShell(generatedCode)}' > output.sh`,
|
||||
`echo '${escapeShell(testCode)}' > test.sh`,
|
||||
'chmod +x output.sh test.sh',
|
||||
'bash test.sh 2>&1',
|
||||
].join(' && ');
|
||||
}
|
||||
|
||||
/**
|
||||
* Escape a string for safe shell embedding.
|
||||
*/
|
||||
function escapeShell(str: string): string {
|
||||
return str.replace(/'/g, "'\\''");
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize output for comparison (trim, collapse whitespace).
|
||||
*/
|
||||
function normalizeOutput(output: string): string {
|
||||
return output.trim().replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
/**
|
||||
* Prune orphan containers from crashed runs.
|
||||
*/
|
||||
async function pruneOrphanContainers(): Promise<void> {
|
||||
return new Promise((resolve) => {
|
||||
const pruneCmd = spawn('docker', ['ps', '-q', '--filter', `label=${SANDBOX_LABEL}`]);
|
||||
let output = '';
|
||||
pruneCmd.stdout.on('data', (chunk: Buffer) => { output += chunk.toString(); });
|
||||
pruneCmd.on('close', async () => {
|
||||
const containerIds = output.trim().split('\n').filter(Boolean);
|
||||
if (containerIds.length > 0) {
|
||||
console.log({ count: containerIds.length }, 'sandbox: pruning orphan containers');
|
||||
const kill = spawn('docker', ['kill', ...containerIds]);
|
||||
await new Promise((r) => {
|
||||
kill.on('close', r);
|
||||
kill.on('error', r);
|
||||
});
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
pruneCmd.on('error', () => resolve());
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup a sandbox container.
|
||||
*/
|
||||
async function cleanupContainer(container: SandboxContainer): Promise<void> {
|
||||
if (container.timeoutHandle) {
|
||||
clearTimeout(container.timeoutHandle);
|
||||
}
|
||||
if (container.process.exitCode === null) {
|
||||
container.process.kill('SIGKILL');
|
||||
}
|
||||
// Container is --rm, so it auto-removes. But force-remove as safety net.
|
||||
await new Promise<void>((resolve) => {
|
||||
const rm = spawn('docker', ['rm', '-f', container.id]);
|
||||
rm.on('close', resolve);
|
||||
rm.on('error', resolve);
|
||||
}).catch(() => {});
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve provider base URL (internal, mirrors llama-providers).
|
||||
*/
|
||||
function resolveProviderBaseUrlInternal(providerId: string): string | null {
|
||||
try {
|
||||
const { resolveProviderBaseUrl } = require('./llama-providers.js');
|
||||
return resolveProviderBaseUrl(providerId);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
361
apps/control/src/services/ssh-config.ts
Normal file
361
apps/control/src/services/ssh-config.ts
Normal file
@@ -0,0 +1,361 @@
|
||||
/**
|
||||
* P9.1: SSH config editor for llama-swap hosts.
|
||||
*
|
||||
* Pipeline (design §5, stackctl flow with the tests stackctl never had):
|
||||
* SFTP/SSH read -> schema-validated edit (config-schema.json from the fork)
|
||||
* -> diff preview -> timestamped backup -> write -> restart -> health-wait.
|
||||
*
|
||||
* SSH I/O is shelled out via `ssh` (matching the booterm precedent — no ssh2
|
||||
* dependency, key from `secrets/`), injected as `SshExec` so every failure path
|
||||
* is unit-testable without a live host. The pure helpers (validate, diff,
|
||||
* backup filename) carry the logic and are tested directly.
|
||||
*/
|
||||
|
||||
import { spawn } from 'node:child_process';
|
||||
import { createRequire } from 'node:module';
|
||||
import { load as loadYaml } from 'js-yaml';
|
||||
import type { ValidateFunction } from 'ajv';
|
||||
|
||||
// ajv + ajv-formats are CJS. Under NodeNext ESM the default-import interop binds
|
||||
// the namespace, not the constructable class, so load them via createRequire to
|
||||
// get the real module.exports (class / plugin fn) at both type and runtime.
|
||||
const require = createRequire(import.meta.url);
|
||||
const Ajv = require('ajv') as typeof import('ajv').default;
|
||||
const addFormats = require('ajv-formats') as typeof import('ajv-formats').default;
|
||||
|
||||
// ─── host SSH target ─────────────────────────────────────────────────────────
|
||||
|
||||
export interface SshTarget {
|
||||
host: string;
|
||||
user: string;
|
||||
keyPath: string;
|
||||
}
|
||||
|
||||
export interface ExecResult {
|
||||
code: number;
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
}
|
||||
|
||||
/** Injectable SSH executor. `stdin`, when present, is piped to the remote command. */
|
||||
export type SshExec = (target: SshTarget, command: string, stdin?: string) => Promise<ExecResult>;
|
||||
|
||||
// ─── pure: schema validation ─────────────────────────────────────────────────
|
||||
|
||||
export interface ValidationResult {
|
||||
valid: boolean;
|
||||
errors: string[];
|
||||
/** Parsed config object when YAML is syntactically valid. */
|
||||
parsed?: unknown;
|
||||
}
|
||||
|
||||
let cachedValidator: ValidateFunction | null = null;
|
||||
let cachedSchemaRef: object | null = null;
|
||||
|
||||
function getValidator(schema: object): ValidateFunction {
|
||||
if (cachedValidator && cachedSchemaRef === schema) return cachedValidator;
|
||||
const ajv = new Ajv({ allErrors: true, strict: false });
|
||||
addFormats(ajv);
|
||||
const validate = ajv.compile(schema);
|
||||
cachedValidator = validate;
|
||||
cachedSchemaRef = schema;
|
||||
return validate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate a llama-swap config YAML string against the fork's
|
||||
* config-schema.json. Catches YAML syntax errors first, then schema errors.
|
||||
* Pure — no I/O; the schema object is passed in.
|
||||
*/
|
||||
export function validateLlamaConfig(yamlText: string, schema: object): ValidationResult {
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = loadYaml(yamlText);
|
||||
} catch (err) {
|
||||
return { valid: false, errors: [`YAML parse error: ${(err as Error).message}`] };
|
||||
}
|
||||
if (parsed === null || typeof parsed !== 'object') {
|
||||
return { valid: false, errors: ['config must be a YAML mapping'], parsed };
|
||||
}
|
||||
|
||||
const validate = getValidator(schema);
|
||||
const ok = validate(parsed);
|
||||
if (ok) return { valid: true, errors: [], parsed };
|
||||
|
||||
const errors = (validate.errors ?? []).map((e) => {
|
||||
const path = e.instancePath || '(root)';
|
||||
return `${path} ${e.message ?? 'invalid'}`;
|
||||
});
|
||||
return { valid: false, errors: errors.length ? errors : ['schema validation failed'], parsed };
|
||||
}
|
||||
|
||||
// ─── pure: unified-ish diff ──────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Produce a compact line diff between two texts. Trims a common prefix/suffix
|
||||
* and marks the changed middle with -/+ lines. Sufficient for a preview; not a
|
||||
* minimal-edit Myers diff.
|
||||
*/
|
||||
export function computeDiff(oldText: string, newText: string): string {
|
||||
const oldLines = oldText.split('\n');
|
||||
const newLines = newText.split('\n');
|
||||
|
||||
let start = 0;
|
||||
while (start < oldLines.length && start < newLines.length && oldLines[start] === newLines[start]) {
|
||||
start++;
|
||||
}
|
||||
let endOld = oldLines.length - 1;
|
||||
let endNew = newLines.length - 1;
|
||||
while (endOld >= start && endNew >= start && oldLines[endOld] === newLines[endNew]) {
|
||||
endOld--;
|
||||
endNew--;
|
||||
}
|
||||
|
||||
if (endOld < start && endNew < start) return ''; // identical
|
||||
|
||||
const out: string[] = [];
|
||||
out.push(`@@ lines ${start + 1}..${endOld + 1} -> ${start + 1}..${endNew + 1} @@`);
|
||||
for (let i = start; i <= endOld; i++) out.push(`- ${oldLines[i]}`);
|
||||
for (let i = start; i <= endNew; i++) out.push(`+ ${newLines[i]}`);
|
||||
return out.join('\n');
|
||||
}
|
||||
|
||||
// ─── pure: backup filename ───────────────────────────────────────────────────
|
||||
|
||||
/** Timestamped backup path: `<configPath>.bak-YYYYMMDDTHHMMSSZ`. */
|
||||
export function backupFilename(configPath: string, now: Date): string {
|
||||
const stamp = now.toISOString().replace(/[-:]/g, '').replace(/\.\d+Z$/, 'Z');
|
||||
return `${configPath}.bak-${stamp}`;
|
||||
}
|
||||
|
||||
// ─── RemoteOps seam (shell vs wrapper) ───────────────────────────────────────
|
||||
//
|
||||
// 'shell' mode issues raw shell commands (P9.1 behavior). 'wrapper' mode issues
|
||||
// fixed verbs so the key can be bound to an authorized_keys forced command that
|
||||
// hardcodes the paths. Both drive the same apply pipeline.
|
||||
|
||||
export type SshMode = 'shell' | 'wrapper';
|
||||
|
||||
export interface RemoteOps {
|
||||
read(): Promise<string>;
|
||||
backup(now: Date): Promise<string>; // returns the backup path
|
||||
write(content: string): Promise<void>;
|
||||
restart(restartCmd: string): Promise<void>;
|
||||
}
|
||||
|
||||
function fail(label: string, res: ExecResult): never {
|
||||
throw new Error(`${label} failed (exit ${res.code}): ${res.stderr.slice(0, 300)}`);
|
||||
}
|
||||
|
||||
/** Raw-command ops (no wrapper on the host). */
|
||||
export function shellOps(target: SshTarget, configPath: string, exec: SshExec): RemoteOps {
|
||||
return {
|
||||
async read() {
|
||||
const r = await exec(target, `cat ${shellQuote(configPath)}`);
|
||||
if (r.code !== 0) fail('read', r);
|
||||
return r.stdout;
|
||||
},
|
||||
async backup(now) {
|
||||
const backupPath = backupFilename(configPath, now);
|
||||
const r = await exec(target, `cp ${shellQuote(configPath)} ${shellQuote(backupPath)}`);
|
||||
if (r.code !== 0) fail('backup', r);
|
||||
return backupPath;
|
||||
},
|
||||
async write(content) {
|
||||
const r = await exec(target, `cat > ${shellQuote(configPath)}`, content);
|
||||
if (r.code !== 0) fail('write', r);
|
||||
},
|
||||
async restart(restartCmd) {
|
||||
const r = await exec(target, restartCmd);
|
||||
if (r.code !== 0) fail('restart', r);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/** Verb ops for a forced-command-locked key. The wrapper hardcodes the paths;
|
||||
* the backup verb stamps and returns the backup path on stdout. */
|
||||
export function wrapperOps(target: SshTarget, exec: SshExec): RemoteOps {
|
||||
return {
|
||||
async read() {
|
||||
const r = await exec(target, 'read');
|
||||
if (r.code !== 0) fail('read', r);
|
||||
return r.stdout;
|
||||
},
|
||||
async backup() {
|
||||
const r = await exec(target, 'backup');
|
||||
if (r.code !== 0) fail('backup', r);
|
||||
return r.stdout.trim();
|
||||
},
|
||||
async write(content) {
|
||||
const r = await exec(target, 'write', content);
|
||||
if (r.code !== 0) fail('write', r);
|
||||
},
|
||||
async restart() {
|
||||
const r = await exec(target, 'restart');
|
||||
if (r.code !== 0) fail('restart', r);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function makeRemoteOps(mode: SshMode, target: SshTarget, configPath: string, exec: SshExec): RemoteOps {
|
||||
return mode === 'wrapper' ? wrapperOps(target, exec) : shellOps(target, configPath, exec);
|
||||
}
|
||||
|
||||
// ─── orchestration (injectable exec) ─────────────────────────────────────────
|
||||
|
||||
/** Read the remote config file (mode-aware; defaults to shell for compat). */
|
||||
export async function readRemoteConfig(
|
||||
target: SshTarget,
|
||||
configPath: string,
|
||||
exec: SshExec,
|
||||
mode: SshMode = 'shell',
|
||||
): Promise<string> {
|
||||
return makeRemoteOps(mode, target, configPath, exec).read();
|
||||
}
|
||||
|
||||
export interface ApplyResult {
|
||||
ok: boolean;
|
||||
step: 'validate' | 'backup' | 'write' | 'restart' | 'health' | 'done';
|
||||
backupPath?: string;
|
||||
diff?: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface ApplyOptions {
|
||||
target: SshTarget;
|
||||
configPath: string;
|
||||
restartCmd: string;
|
||||
newConfig: string;
|
||||
schema: object;
|
||||
baseUrl: string;
|
||||
exec: SshExec;
|
||||
/** 'shell' (default) or 'wrapper'. */
|
||||
mode?: SshMode;
|
||||
fetcher?: typeof fetch;
|
||||
now?: Date;
|
||||
healthAttempts?: number;
|
||||
healthDelayMs?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* The full apply pipeline. Aborts at the first failing step and reports which
|
||||
* one. Backup ALWAYS precedes write, so a failed write leaves the timestamped
|
||||
* backup intact for manual recovery. Mode selects the wire commands (raw shell
|
||||
* vs forced-command verbs); the pipeline is identical.
|
||||
*/
|
||||
export async function applyRemoteConfig(opts: ApplyOptions): Promise<ApplyResult> {
|
||||
const {
|
||||
target, configPath, restartCmd, newConfig, schema, baseUrl, exec,
|
||||
mode = 'shell', fetcher = fetch, now = new Date(),
|
||||
healthAttempts = 10, healthDelayMs = 2000,
|
||||
} = opts;
|
||||
|
||||
const ops = makeRemoteOps(mode, target, configPath, exec);
|
||||
|
||||
// 1. Validate before touching the host.
|
||||
const validation = validateLlamaConfig(newConfig, schema);
|
||||
if (!validation.valid) {
|
||||
return { ok: false, step: 'validate', error: validation.errors.join('; ') };
|
||||
}
|
||||
|
||||
// Read current for diff + so an unreadable host fails before any write.
|
||||
let current = '';
|
||||
try {
|
||||
current = await ops.read();
|
||||
} catch (err) {
|
||||
return { ok: false, step: 'validate', error: `read current failed: ${(err as Error).message}` };
|
||||
}
|
||||
const diff = computeDiff(current, newConfig);
|
||||
|
||||
// 2. Timestamped backup BEFORE write.
|
||||
let backupPath: string;
|
||||
try {
|
||||
backupPath = await ops.backup(now);
|
||||
} catch (err) {
|
||||
return { ok: false, step: 'backup', diff, error: (err as Error).message };
|
||||
}
|
||||
|
||||
// 3. Write new config.
|
||||
try {
|
||||
await ops.write(newConfig);
|
||||
} catch (err) {
|
||||
return { ok: false, step: 'write', backupPath, diff, error: (err as Error).message };
|
||||
}
|
||||
|
||||
// 4. Restart the service.
|
||||
try {
|
||||
await ops.restart(restartCmd);
|
||||
} catch (err) {
|
||||
return { ok: false, step: 'restart', backupPath, diff, error: (err as Error).message };
|
||||
}
|
||||
|
||||
// 5. Health-wait: poll the provider until it serves /v1/models.
|
||||
const healthy = await healthWait(baseUrl, fetcher, healthAttempts, healthDelayMs);
|
||||
if (!healthy) {
|
||||
return { ok: false, step: 'health', backupPath, diff, error: 'health check did not pass after restart; backup retained' };
|
||||
}
|
||||
|
||||
return { ok: true, step: 'done', backupPath, diff };
|
||||
}
|
||||
|
||||
/** Poll the provider's /v1/models until it responds OK or attempts run out. */
|
||||
export async function healthWait(
|
||||
baseUrl: string,
|
||||
fetcher: typeof fetch,
|
||||
attempts: number,
|
||||
delayMs: number,
|
||||
): Promise<boolean> {
|
||||
for (let i = 0; i < attempts; i++) {
|
||||
try {
|
||||
const res = await fetcher(`${baseUrl.replace(/\/+$/, '')}/v1/models`, {
|
||||
signal: AbortSignal.timeout(5_000),
|
||||
});
|
||||
if (res.ok) return true;
|
||||
} catch {
|
||||
// not up yet
|
||||
}
|
||||
if (i < attempts - 1) await sleep(delayMs);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((r) => setTimeout(r, ms));
|
||||
}
|
||||
|
||||
// Minimal POSIX single-quote shell escape for the remote command string.
|
||||
function shellQuote(s: string): string {
|
||||
return `'${s.replace(/'/g, `'\\''`)}'`;
|
||||
}
|
||||
|
||||
// ─── real SSH executor (spawn) ───────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Default SSH executor. Uses the system `ssh` with an explicit identity file and
|
||||
* IdentitiesOnly so the agent's default key is never offered (the boocode Gitea
|
||||
* lesson). BatchMode avoids interactive prompts hanging the service.
|
||||
*/
|
||||
export const sshExec: SshExec = (target, command, stdin) => {
|
||||
return new Promise<ExecResult>((resolve) => {
|
||||
const args = [
|
||||
'-i', target.keyPath,
|
||||
'-o', 'IdentitiesOnly=yes',
|
||||
'-o', 'BatchMode=yes',
|
||||
'-o', 'StrictHostKeyChecking=accept-new',
|
||||
'-o', 'ConnectTimeout=10',
|
||||
`${target.user}@${target.host}`,
|
||||
command,
|
||||
];
|
||||
const child = spawn('ssh', args, { stdio: ['pipe', 'pipe', 'pipe'] });
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
child.stdout.on('data', (d) => { stdout += d.toString(); });
|
||||
child.stderr.on('data', (d) => { stderr += d.toString(); });
|
||||
child.on('error', (err) => resolve({ code: 127, stdout, stderr: `${stderr}${(err as Error).message}` }));
|
||||
child.on('close', (code) => resolve({ code: code ?? 1, stdout, stderr }));
|
||||
if (stdin !== undefined) {
|
||||
child.stdin.write(stdin);
|
||||
}
|
||||
child.stdin.end();
|
||||
});
|
||||
};
|
||||
Reference in New Issue
Block a user