chore: snapshot main sync

This commit is contained in:
2026-06-17 20:08:31 +00:00
parent b18de2a331
commit 8bd32537cf
354 changed files with 10208 additions and 9230 deletions

View File

@@ -7,8 +7,10 @@ Wants=network-online.target
Type=simple
User=samkintop
Group=samkintop
WorkingDirectory=/home/samkintop/opt/boocode
ExecStart=/home/samkintop/.local/share/pnpm/global/5/.pnpm/node_modules/pnpm/bin/pnpm.cjs start -C apps/control start
WorkingDirectory=/home/samkintop/opt/boocode/apps/control
# Run the built JS directly (boocoder.service pattern); pnpm/global path is not stable.
Environment=PATH=/home/samkintop/.nvm/versions/node/v24.15.0/bin:/home/samkintop/.local/bin:/usr/local/bin:/usr/bin:/bin
ExecStart=/home/samkintop/.nvm/versions/node/v24.15.0/bin/node /home/samkintop/opt/boocode/apps/control/dist/index.js
Restart=on-failure
RestartSec=5
EnvironmentFile=/home/samkintop/opt/boocode/apps/control/.env.host

View File

@@ -12,7 +12,9 @@ $cfg = 'D:\llama-swap\config.yaml'
$models = 'D:\models'
$service = 'llama-swap' # nssm service name
$parts = ($env:SSH_ORIGINAL_COMMAND ?? '') -split ' ', 2
$cmd = $env:SSH_ORIGINAL_COMMAND
if ($null -eq $cmd) { $cmd = '' }
$parts = $cmd -split ' ', 2
$verb = $parts[0]
$arg = if ($parts.Count -gt 1) { $parts[1].Trim() } else { '' }

View File

@@ -0,0 +1,15 @@
import type { Sql } from './db.js';
import type { Config } from './config.js';
import type { FleetState } from './services/fleet-state.js';
import type { DeltaEmitter } from './services/delta-emitter.js';
import type { ActionQueue } from './services/action-queue.js';
import type { LogRelay } from './services/log-relay.js';
export interface AppContext {
sql: Sql;
config: Config;
fleet: FleetState;
emitter: DeltaEmitter;
actionQueue: ActionQueue;
logRelay: LogRelay;
}

View File

@@ -1,15 +1,11 @@
import Fastify from 'fastify';
import fastifyWebsocket from '@fastify/websocket';
import '@fastify/websocket';
import { loadConfig } from './config.js';
import { getSql, applySchema, pingDb, waitForTable } from './db.js';
import type { FleetState, HostState } from './services/fleet-state.js';
import { createFleetState, ensureHostState, stampLastSeen, incrementSeq } from './services/fleet-state.js';
import { createFleetState, ensureHostState } from "./services/fleet-state.js";
import { registerControlWebSocket } from './routes/ws.js';
import type { LlamaSweepSSEEvent, MetricsEntry } from './services/fleet-connector.js';
import { startFleetConnector } from './services/fleet-connector.js';
import { buildRetentionConfig, runRollup, pruneRawSamples, pruneActivity, pruneModelEvents, trimCapture, parseCaptureJson } from './services/retention.js';
import { detectGap } from './services/reconcile.js';
import { jsonbObject } from './services/jsonb.js';
import { startFleetConnector } from "./services/fleet-connector.js";
import { buildRetentionConfig, runRollup, pruneRawSamples, pruneActivity, pruneModelEvents } from './services/retention.js';
import { ActionQueue } from './services/action-queue.js';
import { LogRelay } from './services/log-relay.js';
import { registerActionRoutes } from './routes/actions.js';
@@ -22,407 +18,14 @@ import { registerReportRoutes, startReportScheduler } from './routes/reports.js'
import { registerGatewayRoutes } from './routes/gateway.js';
import { registerPolicyRoutes } from './routes/policies.js';
import { registerSshConfigRoutes } from './routes/ssh-config.js';
import { loadLlamaProviders, getLlamaProviders, resolveProviderBaseUrl } from './services/llama-providers.js';
// ─── delta emitter (B3 fix) ─────────────────────────────────────────────────
export type DeltaCallback = (delta: unknown) => void;
export type DeltaEmitter = {
subscribe(cb: DeltaCallback): () => void;
publish(delta: unknown): void;
};
export function createDeltaEmitter(): DeltaEmitter {
const listeners = new Set<DeltaCallback>();
return {
subscribe(cb: DeltaCallback): () => void {
listeners.add(cb);
return () => { listeners.delete(cb); };
},
publish(delta: unknown): void {
for (const cb of listeners) {
try { cb(delta); } catch { /* ignore emitter errors */ }
}
},
};
}
// ─── metrics entry field-name mapper ─────────────────────────────────────────
// Real /api/metrics shape has nested tokens and different field names:
// {id, timestamp, model, req_path, resp_status_code, tokens:{...}, duration_ms, has_capture}
// Map to the column names used in control_requests.
interface MappedMetricsEntry {
id: number;
ts: string;
model: string;
req_path: string;
status_code: number;
duration_ms: number;
cache_tokens: number;
input_tokens: number;
output_tokens: number;
prompt_tps: number;
gen_tps: number;
has_capture: boolean;
/** P4: NULL for ring data — ActivityLogEntry does not carry request headers. */
source: string | null;
}
function mapMetricsEntry(entry: MetricsEntry): MappedMetricsEntry {
return {
id: entry.id,
ts: entry.timestamp,
model: entry.model,
req_path: entry.req_path,
status_code: entry.resp_status_code,
duration_ms: entry.duration_ms,
cache_tokens: entry.tokens.cache_tokens,
input_tokens: entry.tokens.input_tokens,
output_tokens: entry.tokens.output_tokens,
prompt_tps: entry.tokens.prompt_per_second,
gen_tps: entry.tokens.tokens_per_second,
has_capture: entry.has_capture,
/** P4: NULL — ActivityLogEntry does not carry request headers. */
source: null,
};
}
// ─── SSE event handlers (B5 fix: await onEvent; B2 fix: incrementSeq) ───────
export async function handleLlamaSweepEvent(
fleet: FleetState,
sql: ReturnType<typeof getSql>,
config: ReturnType<typeof loadConfig>,
providerId: string,
emitter: DeltaEmitter,
event: LlamaSweepSSEEvent,
logRelay: LogRelay | null = null,
): Promise<void> {
const state = ensureHostState(fleet, providerId);
stampLastSeen(state);
switch (event.type) {
case 'modelStatus': {
// Real payload: FULL-FLEET array of {id, state, ...} (fork apiModel).
// Derive transitions by diffing against current state; persist only changes.
state.liveness = 'connected';
const changed: Array<{ model: string; state: string }> = [];
for (const m of event.data) {
const prev = state.models.get(m.id);
if (!prev || prev.state !== m.state) {
changed.push({ model: m.id, state: m.state });
}
state.models.set(m.id, {
model: m.id,
state: m.state,
ts: new Date(),
ttlDeadline: prev?.ttlDeadline ?? null,
inflight: prev?.inflight ?? 0,
});
}
if (changed.length === 0) break;
const seq = incrementSeq(state);
for (const c of changed) {
await sql`
INSERT INTO control_model_events (provider_id, model, state, ts, detail)
VALUES (${providerId}, ${c.model}, ${c.state}, clock_timestamp(), ${sql.json({} as never)})
ON CONFLICT (provider_id, model, state, ts) DO NOTHING
`;
}
// Publish delta to WS subscribers (B3 fix).
emitter.publish({
type: 'control_fleet' as const,
seq,
hosts: [{
providerId: state.providerId,
liveness: state.liveness,
lastSeenAt: state.lastSeenAt?.toISOString() ?? null,
seq: state.seq,
models: Array.from(state.models.values()).map((m) => ({
model: m.model,
state: m.state,
ts: m.ts.toISOString(),
ttlDeadline: m.ttlDeadline?.toISOString() ?? null,
inflight: m.inflight,
})),
}],
});
break;
}
case 'logData': {
// Logs are relay-only; no persistence by default.
const source = event.data.source as 'proxy' | 'upstream' | 'model';
// Real payload field is 'data' (fork sendLogData), may contain multiple lines.
const text = event.data.data;
if (logRelay) {
logRelay.append(providerId, source, text);
}
const seq = incrementSeq(state);
emitter.publish({
type: 'control_log' as const,
seq,
providerId,
source,
line: text,
});
break;
}
case 'metrics': {
// Real payload: BARE array of ActivityLogEntry (fork sendMetrics).
const entries = event.data;
// B5 fix: await onEvent (handleReconcile is async).
const seq = incrementSeq(state);
await handleReconcile(fleet, sql, config, providerId, emitter, event.data).catch((err) => {
// A1: log the error instead of swallowing silently.
const msg = (err as Error).message ?? String(err);
console.warn({ providerId, err: msg }, 'fleet: reconcile failed');
});
// Publish activity deltas.
for (const entry of entries) {
const captureTrimmed = entry.capture ? trimCapture(entry.capture, config.CAPTURE_SIZE_KB) : null;
const captureObj = captureTrimmed ? parseCaptureJson(captureTrimmed) : null;
// Map real field names: resp_status_code -> status_code, tokens.* nested, timestamp -> ts.
const mapped = mapMetricsEntry(entry);
await sql`
INSERT INTO control_requests (provider_id, swap_entry_id, ts, model, req_path, status_code, duration_ms, cache_tokens, input_tokens, output_tokens, prompt_tps, gen_tps, has_capture, capture, source)
VALUES (${providerId}, ${mapped.id}, ${mapped.ts}, ${mapped.model}, ${mapped.req_path}, ${mapped.status_code}, ${mapped.duration_ms}, ${mapped.cache_tokens}, ${mapped.input_tokens}, ${mapped.output_tokens}, ${mapped.prompt_tps}, ${mapped.gen_tps}, ${mapped.has_capture}, ${captureObj ? sql.json(captureObj as never) : sql`NULL::jsonb`}, ${mapped.source})
ON CONFLICT (provider_id, swap_entry_id, ts) DO NOTHING
`;
emitter.publish({
type: 'control_activity' as const,
seq: state.seq,
providerId,
entry: {
id: mapped.id,
ts: mapped.ts,
model: mapped.model,
reqPath: mapped.req_path,
statusCode: mapped.status_code,
durationMs: mapped.duration_ms,
},
});
}
break;
}
case 'inflight': {
// Real payload: {total} -- host-level total (fork sendInFlight); the fork
// does not publish per-model inflight over SSE.
state.inflightTotal = event.data.total;
break;
}
}
}
// ─── reconcile handler (B7 fix: called from metrics event) ───────────────────
async function handleReconcile(
fleet: FleetState,
sql: ReturnType<typeof getSql>,
config: ReturnType<typeof loadConfig>,
providerId: string,
emitter: DeltaEmitter,
metrics: MetricsEntry[],
): Promise<boolean> {
const state = ensureHostState(fleet, providerId);
stampLastSeen(state);
state.liveness = 'connected';
// Detect gap: if oldest reconcile entry is newer than newest persisted entry
// for that provider, the ring wrapped past our tail.
const entries = metrics ?? [];
const oldestReconcileTs = entries.length > 0
? entries[entries.length - 1]!.timestamp
: null;
if (oldestReconcileTs) {
const newestPersisted = await sql<{ ts: string }[]>`
SELECT ts FROM control_requests
WHERE provider_id = ${providerId}
ORDER BY ts DESC LIMIT 1
`;
if (newestPersisted.length > 0) {
const newestRow = newestPersisted[0]!;
if (detectGap(oldestReconcileTs, newestRow.ts)) {
await sql`
INSERT INTO control_model_events (provider_id, model, state, ts, detail)
VALUES (${providerId}, '*', 'gap_suspected', clock_timestamp(), ${sql.json({
oldestReconcile: oldestReconcileTs,
newestPersisted: newestRow.ts,
} as never)})
ON CONFLICT (provider_id, model, state, ts) DO NOTHING
`;
}
}
}
// Ingest reconcile entries (dedup via UNIQUE constraint).
for (const entry of entries) {
const mapped = mapMetricsEntry(entry);
await sql`
INSERT INTO control_requests (provider_id, swap_entry_id, ts, model, req_path, status_code, duration_ms, cache_tokens, input_tokens, output_tokens, prompt_tps, gen_tps, has_capture, source)
VALUES (${providerId}, ${mapped.id}, ${mapped.ts}, ${mapped.model}, ${mapped.req_path}, ${mapped.status_code}, ${mapped.duration_ms}, ${mapped.cache_tokens}, ${mapped.input_tokens}, ${mapped.output_tokens}, ${mapped.prompt_tps}, ${mapped.gen_tps}, ${mapped.has_capture}, ${mapped.source})
ON CONFLICT (provider_id, swap_entry_id, ts) DO NOTHING
`;
}
return true;
}
// ─── perf poller (A7 fix: add timeout; A8 fix: log errors) ───────────────────
async function pollPerformance(
sql: ReturnType<typeof getSql>,
config: ReturnType<typeof loadConfig>,
providerId: string,
baseUrl: string,
fleet: FleetState,
emitter: DeltaEmitter,
): Promise<void> {
const state = ensureHostState(fleet, providerId);
// Recover watermark from MAX(ts) per provider.
const watermark = await sql<{ ts: string | null }[]>`
SELECT MAX(ts) AS ts FROM control_perf_samples WHERE provider_id = ${providerId}
`;
// porsager returns timestamptz as a Date object; interpolating it raw yields
// Date.toString() ("Thu Jun 12 2026 ...") which llama-swap rejects with 400.
const afterParam = watermark[0]?.ts
? `?after=${encodeURIComponent(new Date(watermark[0].ts).toISOString())}`
: '';
const url = `${baseUrl}/api/performance${afterParam}`;
try {
// A7 fix: add fetch timeout via AbortController.
const fetchSignal = AbortSignal.timeout(10_000);
const res = await fetch(url, { signal: fetchSignal });
if (!res.ok) return;
// Real shape: { gpu_stats: GpuStat[], sys_stats: SysStat[] }
const data = await res.json() as { gpu_stats?: unknown[]; sys_stats?: unknown[] } | null;
if (!data) return;
// Pair gpu_stats and sys_stats by timestamp.
const gpuMap = new Map<string, unknown>();
for (const g of data.gpu_stats ?? []) {
const gpu = g as { timestamp?: string };
if (gpu.timestamp) {
gpuMap.set(gpu.timestamp, g);
}
}
const sysMap = new Map<string, unknown>();
for (const s of data.sys_stats ?? []) {
const sys = s as { timestamp?: string };
if (sys.timestamp) {
sysMap.set(sys.timestamp, s);
}
}
// Collect all unique timestamps.
const allTimestamps = new Set([...gpuMap.keys(), ...sysMap.keys()]);
if (allTimestamps.size === 0) return;
stampLastSeen(state);
for (const ts of allTimestamps) {
const gpu = gpuMap.get(ts) ?? null;
const sys = sysMap.get(ts) ?? null;
await sql`
INSERT INTO control_perf_samples (provider_id, ts, gpu, sys)
VALUES (${providerId}, ${ts}, ${sql.json(gpu as never)}, ${sql.json(sys as never)})
ON CONFLICT (provider_id, ts) DO NOTHING
`;
const seq = incrementSeq(state);
emitter.publish({
type: 'control_perf' as const,
seq,
providerId,
ts,
gpu,
sys,
});
}
} catch (err) {
// A8 fix: log the error instead of swallowing silently.
const msg = (err as Error).message ?? String(err);
console.warn({ providerId, err: msg }, 'fleet: perf poll failed');
}
}
// ─── fleet-state rebuild from DB (A1/F2 fix) ─────────────────────────────────
async function rebuildFleetFromDB(fleet: FleetState, sql: ReturnType<typeof getSql>): Promise<void> {
// Query control_model_events for latest model state per provider.
// B3: ORDER BY ASC so iteration processes oldest first; Map.set() overwrites
// with the latest state for each model, so the newest event wins.
const modelEvents = await sql<{ provider_id: string; model: string; state: string; ts: string; detail: string }[]>`
SELECT provider_id, model, state, ts, detail
FROM control_model_events
WHERE ts IN (
SELECT MAX(ts) FROM control_model_events
GROUP BY provider_id, model, state
)
ORDER BY ts ASC
`;
for (const row of modelEvents) {
const state = ensureHostState(fleet, row.provider_id);
state.liveness = 'down';
stampLastSeen(state);
// row.detail is jsonb (porsager returns it parsed); jsonbObject tolerates
// both a parsed object and a JSON string.
const detail: unknown = jsonbObject(row.detail);
// B4: ttlDeadline recalculation. The live modelStatus handler (index.ts:57)
// computes ttlDeadline = new Date(Date.now() + ttl * 1000), relative to event
// arrival time. For rebuild, use the event timestamp so the deadline reflects
// when the model was actually loaded, not when we rebuild.
const ttl = (detail as { ttl?: number })?.ttl;
const eventTs = new Date(row.ts).getTime();
const ttlDeadline = ttl ? new Date(eventTs + ttl * 1000) : null;
state.models.set(row.model, {
model: row.model,
state: row.state,
ts: new Date(row.ts),
ttlDeadline,
inflight: 0,
});
}
// Query control_requests for last activity.
const lastRequests = await sql<{ provider_id: string; ts: string }[]>`
SELECT provider_id, ts FROM control_requests
WHERE ts IN (
SELECT MAX(ts) FROM control_requests GROUP BY provider_id
)
ORDER BY ts DESC
`;
for (const row of lastRequests) {
const state = ensureHostState(fleet, row.provider_id);
stampLastSeen(state);
}
// Query control_perf_samples for latest perf sample.
const lastPerf = await sql<{ provider_id: string; ts: string }[]>`
SELECT provider_id, ts FROM control_perf_samples
WHERE ts IN (
SELECT MAX(ts) FROM control_perf_samples GROUP BY provider_id
)
ORDER BY ts DESC
`;
for (const row of lastPerf) {
const state = ensureHostState(fleet, row.provider_id);
stampLastSeen(state);
}
}
// ─── main ───────────────────────────────────────────────────────────────────
import { loadLlamaProviders } from "./services/llama-providers.js";
import { GATEWAY_KIND } from "@boocode/contracts/gateway";
import { createDeltaEmitter } from "./services/delta-emitter.js";
import type { AppContext } from './app-context.js';
export type { DeltaEmitter } from './services/delta-emitter.js';
import { handleLlamaSweepEvent } from './services/sse-pipeline.js';
import { pollPerformance } from './services/perf-poller.js';
import { rebuildFleetFromDB } from './services/fleet-rebuild.js';
async function main() {
const config = loadConfig();
@@ -456,18 +59,19 @@ async function main() {
// P2: Action queue + log relay
const actionQueue = new ActionQueue();
const logRelay = new LogRelay();
registerControlWebSocket(app, fleet, emitter, logRelay);
registerActionRoutes(app, actionQueue, fleet, emitter);
const ctx: AppContext = { sql, config, fleet, emitter, actionQueue, logRelay };
registerControlWebSocket(app, ctx);
registerActionRoutes(app, ctx);
registerCaptureRoutes(app, sql);
setBenchApp(app.log);
registerBenchRoutes(app, sql, fleet, emitter);
registerBenchRoutes(app, ctx);
registerPlaygroundRoutes(app);
registerEvalRoutes(app, sql, fleet, emitter);
registerEvalRoutes(app, ctx);
registerRoutingRoutes(app, sql, fleet);
registerReportRoutes(app, sql);
registerGatewayRoutes(app, sql, fleet, emitter);
registerGatewayRoutes(app, ctx);
registerPolicyRoutes(app, sql);
registerSshConfigRoutes(app, sql, config, fleet, emitter);
registerSshConfigRoutes(app, ctx);
// Health endpoint.
app.get('/api/health', async (_req: unknown, reply: import('fastify').FastifyReply) => {
@@ -488,11 +92,7 @@ async function main() {
const registry = loadLlamaProviders(config.LLAMA_PROVIDERS_PATH, config.LLAMA_SWAP_URL);
app.log.info({ count: registry.providers.length }, 'fleet: provider registry loaded');
// P7.2: the auto:* gateway is itself a registry entry (kind boocontrol-gateway)
// so BooChat adopts it as a provider. BooControl must NOT treat it as a fleet
// host — it has no llama-swap SSE/perf surface and its baseUrl points back at
// this service. Filter it out of every fleet operation.
const fleetProviders = registry.providers.filter((p) => p.kind !== 'boocontrol-gateway');
const fleetProviders = registry.providers.filter((p) => p.kind !== GATEWAY_KIND);
// JOIN registry providers with control_hosts for the enabled flag.
// Insert a control_hosts row ON CONFLICT DO NOTHING for any registry provider
@@ -545,7 +145,6 @@ async function main() {
sql,
log: app.log,
onEvent: (pid, event) => handleLlamaSweepEvent(fleet, sql, config, pid, emitter, event, logRelay),
onReconcile: (pid, metrics) => handleReconcile(fleet, sql, config, pid, emitter, metrics),
onReconnectGiveUp: async (pid) => {
const state = ensureHostState(fleet, pid);
state.liveness = 'down';
@@ -567,15 +166,16 @@ async function main() {
// Retention job: daily timer — iterate registry providers.
const retentionConfig = buildRetentionConfig(config);
const retentionTimer = setInterval(async () => {
// Per-provider work: rollup + raw-sample prune (both scoped to provider_id).
for (const provider of fleetProviders) {
const enabled = enabledMap.get(provider.id) ?? true;
if (!enabled) continue;
await runRollup(sql, provider.id, retentionConfig.rawHours);
// A2 fix: chunk pruneRawSamples (already chunked), also chunk pruneActivity and pruneModelEvents.
await pruneRawSamples(sql, provider.id, retentionConfig.rawHours);
await pruneActivity(sql, retentionConfig.rawHours);
await pruneModelEvents(sql, retentionConfig.rollupDays * 24);
}
// Global prunes (no provider_id filter) run ONCE, not once per provider.
await pruneActivity(sql, retentionConfig.rawHours);
await pruneModelEvents(sql, retentionConfig.rollupDays * 24);
}, 24 * 3600_000); // daily
// P6.2: Report digest scheduler (catch-up on boot, then hourly).

View File

@@ -1,8 +1,7 @@
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import { randomUUID } from 'node:crypto';
import type { ActionQueue } from '../services/action-queue.js';
import type { FleetState } from '../services/fleet-state.js';
import type { DeltaEmitter } from '../index.js';
import { publishJob } from '../services/publish-job.js';
import type { AppContext } from '../app-context.js';
/**
* Register action submission routes.
@@ -12,10 +11,9 @@ import type { DeltaEmitter } from '../index.js';
*/
export function registerActionRoutes(
app: FastifyInstance,
actionQueue: ActionQueue,
fleet: FleetState,
emitter: DeltaEmitter,
ctx: AppContext,
): void {
const { actionQueue, fleet, emitter } = ctx;
app.post('/api/action/submit', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const type = body.type as string;
@@ -30,7 +28,6 @@ export function registerActionRoutes(
return reply.status(400).send({ error: 'providerId is required' });
}
// Check host liveness
const hostState = fleet.hosts.get(providerId);
if (!hostState || hostState.liveness === 'down') {
return reply.status(409).send({ error: 'host offline' });
@@ -63,13 +60,11 @@ export function registerActionRoutes(
return reply.status(409).send({ error: result.error });
}
// Publish action queued event
emitter.publish({
type: 'control_job' as const,
publishJob(emitter, {
seq: hostState.seq,
jobType: 'action' as const,
jobType: 'action',
jobId: action.actionId,
status: 'queued' as const,
status: 'queued',
detail: {
actionType: action.type,
providerId: action.providerId,

View File

@@ -1,13 +1,13 @@
import { randomUUID } from 'node:crypto';
import type { FastifyBaseLogger, FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import type { Sql } from '../db.js';
import type { FleetState } from '../services/fleet-state.js';
import type { DeltaEmitter } from '../index.js';
import type { DeltaEmitter } from '../services/delta-emitter.js';
import { publishJob } from '../services/publish-job.js';
import { acquireHostAccess } from '../services/host-access.js';
import type { BenchSuite, BenchRunProgress } from '../services/bench-engine.js';
import { runBenchSuite } from '../services/bench-engine.js';
import { runBenchSuite, type BenchSuite, type BenchRunProgress } from "../services/bench-engine.js";
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
import { jsonbNumberArray, jsonbObject } from '../services/jsonb.js';
import type { AppContext } from '../app-context.js';
/**
* Register bench routes.
@@ -22,11 +22,9 @@ import { jsonbNumberArray, jsonbObject } from '../services/jsonb.js';
*/
export function registerBenchRoutes(
app: FastifyInstance,
sql: Sql,
fleet: FleetState,
emitter: DeltaEmitter,
ctx: AppContext,
): void {
// ─── suite CRUD ──────────────────────────────────────────────────────────
const { sql, fleet, emitter } = ctx;
app.post('/api/bench/suite', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
@@ -136,8 +134,6 @@ export function registerBenchRoutes(
});
});
// ─── run launcher (P3.3: safety gates + P3.4: acquireHostAccess) ─────────
app.post('/api/bench/run', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const suiteId = body.suiteId as string;
@@ -148,7 +144,6 @@ export function registerBenchRoutes(
return reply.status(400).send({ error: 'suiteId is required' });
}
// Load suite.
const suiteRows = await sql<{
id: string;
name: string;
@@ -200,7 +195,6 @@ export function registerBenchRoutes(
return reply.status(400).send({ error: `no base URL configured for provider ${suite.providerId}` });
}
// Get seq for the host.
const seq = hostState?.seq ?? 0;
// Run the bench suite asynchronously (non-blocking HTTP response).
@@ -219,8 +213,6 @@ export function registerBenchRoutes(
});
});
// ─── runs listing ────────────────────────────────────────────────────────
app.get('/api/bench/runs', async (req: FastifyRequest, reply: FastifyReply) => {
const query = req.query as Record<string, string | undefined>;
const suiteId = query.suiteId;
@@ -353,8 +345,6 @@ export function registerBenchRoutes(
});
});
// ─── baselines ───────────────────────────────────────────────────────────
app.get('/api/bench/baselines', async (_req: FastifyRequest, reply: FastifyReply) => {
const rows = await sql<{
provider_id: string;
@@ -471,12 +461,11 @@ async function runBenchAsync(
WHERE id = ${runId}
`;
emitter.publish({
type: 'control_job' as const,
publishJob(emitter, {
seq,
jobType: 'bench' as const,
jobType: 'bench',
jobId: runId,
status: 'failed' as const,
status: 'failed',
detail: { error: msg },
});
}

View File

@@ -1,7 +1,7 @@
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import type { Sql } from '../db.js';
import type { DeltaEmitter } from '../index.js';
import type { FleetState } from '../services/fleet-state.js';
import type { DeltaEmitter } from '../services/delta-emitter.js';
import { publishJob } from '../services/publish-job.js';
import {
listEvalSuites,
getEvalSuite,
@@ -11,6 +11,8 @@ import {
seedEvalSuites,
} from '../services/eval-suites.js';
import { jsonbArray, jsonbObject } from '../services/jsonb.js';
import { acquireHostAccess } from '../services/host-access.js';
import type { AppContext } from '../app-context.js';
/**
* Register eval routes.
@@ -26,10 +28,9 @@ import { jsonbArray, jsonbObject } from '../services/jsonb.js';
*/
export function registerEvalRoutes(
app: FastifyInstance,
sql: Sql,
fleet: FleetState,
emitter: DeltaEmitter,
ctx: AppContext,
): void {
const { sql, fleet, emitter } = ctx;
// Seed suites from data/ YAML on startup (idempotent).
app.addHook('onReady', async () => {
await seedEvalSuites(sql).catch((err) => {
@@ -37,8 +38,6 @@ export function registerEvalRoutes(
});
});
// ─── suite CRUD ──────────────────────────────────────────────────────────
app.post('/api/eval/suite', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const id = (body.id as string) ?? null;
@@ -92,15 +91,11 @@ export function registerEvalRoutes(
});
});
// ─── seed from data/ ─────────────────────────────────────────────────────
app.post('/api/eval/seed', async (_req: FastifyRequest, reply: FastifyReply) => {
await seedEvalSuites(sql);
return reply.send({ ok: true });
});
// ─── run launcher ────────────────────────────────────────────────────────
app.post('/api/eval/run', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const suiteId = body.suiteId as string;
@@ -117,11 +112,15 @@ export function registerEvalRoutes(
return reply.status(404).send({ error: 'suite not found' });
}
const grant = await acquireHostAccess(providerId, 'eval');
if (!grant.ok) {
return reply.status(409).send({ error: 'host access denied', reason: grant.reason });
}
const tasks = jsonbArray(suite.tasks);
const judgeModel = suite.judge_model;
const seq = fleet.hosts.get(providerId)?.seq ?? 0;
// Start the eval run asynchronously.
void runEvalAsync(
{ suiteId, providerId, model, quant, tasks, judgeModel },
sql,
@@ -133,8 +132,6 @@ export function registerEvalRoutes(
return reply.status(202).send({ status: 'queued', suiteId, providerId, model });
});
// ─── runs listing ────────────────────────────────────────────────────────
app.get('/api/eval/runs', async (req: FastifyRequest, reply: FastifyReply) => {
const query = req.query as Record<string, string | undefined>;
const runs = await listEvalRuns(sql, query.suiteId, query.providerId);
@@ -203,8 +200,6 @@ export function registerEvalRoutes(
});
});
// ─── leaderboard ─────────────────────────────────────────────────────────
app.get('/api/eval/leaderboard', async (req: FastifyRequest, reply: FastifyReply) => {
const query = req.query as Record<string, string | undefined>;
const kind = query.kind as 'chat' | 'code' | undefined;
@@ -276,12 +271,11 @@ async function runEvalAsync(
VALUES (${runId}, ${suiteId}, 'eval', ${providerId}, ${model}, ${quant}, 'running', ${judgeModel}, clock_timestamp(), ${tasks.length})
`;
emitter.publish({
type: 'control_job' as const,
publishJob(emitter, {
seq,
jobType: 'eval' as const,
jobType: 'eval',
jobId: runId,
status: 'running' as const,
status: 'running',
detail: { suiteId, providerId, model, totalTasks: tasks.length },
});
@@ -336,12 +330,11 @@ async function runEvalAsync(
WHERE id = ${runId}
`;
emitter.publish({
type: 'control_job' as const,
publishJob(emitter, {
seq,
jobType: 'eval' as const,
jobType: 'eval',
jobId: runId,
status: error ? 'failed' as const : 'completed' as const,
status: error ? 'failed' : 'completed',
detail: { avgScore, error },
});
} catch (err) {
@@ -354,12 +347,11 @@ async function runEvalAsync(
WHERE id = ${runId}
`.catch(() => {});
emitter.publish({
type: 'control_job' as const,
publishJob(emitter, {
seq,
jobType: 'eval' as const,
jobType: 'eval',
jobId: runId,
status: 'failed' as const,
status: 'failed',
detail: { error: msg },
});
}

View File

@@ -1,13 +1,13 @@
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import type { Sql } from '../db.js';
import type { FleetState } from '../services/fleet-state.js';
import type { DeltaEmitter } from '../index.js';
import {
VIRTUAL_MODELS,
resolveCandidates,
splitComposite,
} from '../services/gateway.js';
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
import { recordFailure, recordSuccess } from '../services/circuit-breaker.js';
import type { AppContext } from '../app-context.js';
/**
* P7.1: OpenAI-compatible auto:* gateway.
@@ -25,11 +25,9 @@ import { resolveProviderBaseUrl } from '../services/llama-providers.js';
*/
export function registerGatewayRoutes(
app: FastifyInstance,
sql: Sql,
fleet: FleetState,
_emitter: DeltaEmitter,
ctx: AppContext,
): void {
// ─── model catalog ───────────────────────────────────────────────────────
const { sql, fleet } = ctx;
app.get('/v1/models', async (_req: FastifyRequest, reply: FastifyReply) => {
return reply.send({
@@ -43,10 +41,6 @@ export function registerGatewayRoutes(
});
});
// ─── props (for getModelContext) ─────────────────────────────────────────
// Resolve candidates and proxy the first healthy candidate's props so the
// caller can read default_generation_settings.n_ctx.
app.get('/upstream/:model/props', async (req: FastifyRequest, reply: FastifyReply) => {
const { model } = req.params as { model: string };
const { candidates } = await resolveCandidates(sql, fleet, model);
@@ -69,8 +63,6 @@ export function registerGatewayRoutes(
return reply.status(503).send({ error: 'no healthy candidate for virtual model', model });
});
// ─── chat completions (dispatch with failover) ───────────────────────────
app.post('/v1/chat/completions', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const requestedModel = body?.model as string | undefined;
@@ -113,11 +105,20 @@ export function registerGatewayRoutes(
});
if (!res.ok) {
// HTTP error before body — eligible for failover to the next candidate.
recordFailure(compositeId);
continue;
}
// A null body on an OK response is a broken upstream; fail over to the
// next candidate (nothing has been committed to the client yet).
const reader = stream ? res.body?.getReader() : null;
if (stream && !reader) {
recordFailure(compositeId);
continue;
}
// Success: dispatch chosen. Log and stream/return through.
recordSuccess(compositeId);
await logDispatch(sql, {
virtualModel,
chosen: compositeId,
@@ -128,16 +129,11 @@ export function registerGatewayRoutes(
durationMs: Date.now() - startedAt,
});
if (stream) {
if (stream && reader) {
reply.header('Content-Type', 'text/event-stream');
reply.header('Cache-Control', 'no-cache');
reply.header('Connection', 'keep-alive');
reply.raw.writeHead(200);
const reader = res.body?.getReader();
if (!reader) {
reply.raw.end();
return;
}
const decoder = new TextDecoder();
try {
while (true) {
@@ -155,7 +151,7 @@ export function registerGatewayRoutes(
const json = await res.json();
return reply.send(json);
} catch {
// Connection error — failover to the next candidate.
recordFailure(compositeId);
continue;
}
}

View File

@@ -11,7 +11,6 @@ import { getLlamaProviders, resolveProviderBaseUrl } from '../services/llama-pro
export function registerPlaygroundRoutes(
app: FastifyInstance,
): void {
// ─── model catalog ───────────────────────────────────────────────────────
app.get('/api/playground/models', async (_req: FastifyRequest, reply: FastifyReply) => {
// Resolve provider URLs from the loaded registry.
@@ -49,8 +48,6 @@ export function registerPlaygroundRoutes(
return reply.send({ models });
});
// ─── streaming chat ──────────────────────────────────────────────────────
app.post('/api/playground/chat', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const providerId = body.providerId as string;
@@ -138,8 +135,6 @@ export function registerPlaygroundRoutes(
}
});
// ─── A/B compare ─────────────────────────────────────────────────────────
app.post('/api/playground/chat-ab', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const providerIdA = body.providerIdA as string;
@@ -224,7 +219,6 @@ export function registerPlaygroundRoutes(
}
};
// Run both streams concurrently.
await Promise.all([
streamModel('A', baseUrlA, modelA),
streamModel('B', baseUrlB, modelB),

View File

@@ -5,9 +5,8 @@ import { dirname, resolve } from 'node:path';
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import type { Sql } from '../db.js';
import type { Config } from '../config.js';
import type { FleetState } from '../services/fleet-state.js';
import type { DeltaEmitter } from '../index.js';
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
import type { AppContext } from '../app-context.js';
import {
validateLlamaConfig,
computeDiff,
@@ -35,12 +34,10 @@ import { runModelPull, validateRepoId } from '../services/model-pull.js';
*/
export function registerSshConfigRoutes(
app: FastifyInstance,
sql: Sql,
config: Config,
fleet: FleetState,
emitter: DeltaEmitter,
ctx: AppContext,
exec: SshExec = sshExec,
): void {
const { sql, config, fleet, emitter } = ctx;
const schema = loadConfigSchema(config);
app.get('/api/hosts', async (_req: FastifyRequest, reply: FastifyReply) => {
@@ -181,7 +178,6 @@ export function registerSshConfigRoutes(
return reply.status(status).send(result);
});
// ─── model pull (non-blocking job) ─────────────────────────────────────────
app.post('/api/hosts/:id/pull', async (req: FastifyRequest, reply: FastifyReply) => {
const { id } = req.params as { id: string };
const body = (req.body as Record<string, unknown>) ?? {};
@@ -205,7 +201,7 @@ export function registerSshConfigRoutes(
const jobId = `pull_${Date.now()}_${randomUUID().slice(0, 8)}`;
const seq = fleet.hosts.get(id)?.seq ?? 0;
// Fire and forget; progress streams over control_job frames.
void runModelPull({ jobId, target, repo, mode, modelsDir }, exec, emitter, seq);
void runModelPull({ jobId, providerId: id, target, repo, mode, modelsDir }, exec, emitter, seq);
return reply.status(202).send({ status: 'queued', jobId, repo });
});

View File

@@ -1,8 +1,7 @@
import type { FastifyInstance } from 'fastify';
import WebSocket from 'ws';
import type { FleetState, HostState } from '../services/fleet-state.js';
import type { DeltaEmitter } from '../index.js';
import type { LogRelay } from '../services/log-relay.js';
import type { FleetState } from '../services/fleet-state.js';
import type { AppContext } from '../app-context.js';
/**
* WS endpoint: /api/ws/control
@@ -17,11 +16,10 @@ import type { LogRelay } from '../services/log-relay.js';
*/
export function registerControlWebSocket(
app: FastifyInstance,
fleet: FleetState,
emitter: DeltaEmitter,
logRelay: LogRelay | null = null,
ctx: AppContext,
): void {
app.get('/api/ws/control', { websocket: true }, (socket, req) => {
const { fleet, emitter, logRelay } = ctx;
app.get('/api/ws/control', { websocket: true }, (socket, _req) => {
const fleetState = fleet;
const snapshot = buildSnapshot(fleetState);
@@ -80,7 +78,7 @@ export function registerControlWebSocket(
*/
function buildSnapshot(fleet: FleetState): { hosts: Array<{
providerId: string;
liveness: 'connected' | 'reconnecting' | 'down';
liveness: 'connected' | 'down';
lastSeenAt: string | null;
seq: number;
models: Array<{

View File

@@ -2,7 +2,7 @@ import { describe, it, expect, vi, beforeEach } from 'vitest';
import { parseLlamaTimings, computeAggregates, runSingleBenchRequest } from '../../index.js';
import { computeRegressionFlag } from '../bench-engine.js';
import { createFleetState, ensureHostState } from '../fleet-state.js';
import { createDeltaEmitter } from '../../index.js';
import { createDeltaEmitter } from '../delta-emitter.js';
import type { Sql } from '../../db.js';
import type { Config } from '../../config.js';
import type { BenchSuite } from '../bench-engine.js';

View File

@@ -4,8 +4,10 @@ import {
parseVirtualModel,
orderCandidates,
splitComposite,
fleetModelCandidates,
} from '../gateway.js';
import type { ModelScore } from '../routing-scores.js';
import { createFleetState, ensureHostState } from '../fleet-state.js';
function score(compositeId: string, partial: Partial<ModelScore> = {}): ModelScore {
return {
@@ -90,3 +92,29 @@ describe('orderCandidates', () => {
expect(ordered).toEqual(['a/never-seen', 'a/known']);
});
});
describe('fleetModelCandidates (cold-start fallback)', () => {
it('lists connected hosts models, ready first, skips down hosts', () => {
const fleet = createFleetState();
const a = ensureHostState(fleet, 'sam-desktop');
a.liveness = 'connected';
a.models.set('m-ready', { model: 'm-ready', state: 'ready', ts: new Date(0), ttlDeadline: null, inflight: 0 });
a.models.set('m-stop', { model: 'm-stop', state: 'stopped', ts: new Date(0), ttlDeadline: null, inflight: 0 });
const b = ensureHostState(fleet, 'embedding');
b.liveness = 'down';
b.models.set('x', { model: 'x', state: 'ready', ts: new Date(0), ttlDeadline: null, inflight: 0 });
const c = fleetModelCandidates(fleet);
expect(c).toContain('sam-desktop/m-ready');
expect(c).toContain('sam-desktop/m-stop');
expect(c.indexOf('sam-desktop/m-ready')).toBeLessThan(c.indexOf('sam-desktop/m-stop')); // ready first
expect(c).not.toContain('embedding/x'); // down host excluded
});
it('returns [] for an all-down fleet', () => {
const fleet = createFleetState();
const a = ensureHostState(fleet, 'h');
a.liveness = 'down';
expect(fleetModelCandidates(fleet)).toEqual([]);
});
});

View File

@@ -1,102 +1,48 @@
import { describe, it, expect } from 'vitest';
import type { HostState } from '../fleet-state.js';
type Liveness = 'connected' | 'reconnecting' | 'down';
// Production never runs a reconnect state machine: a host is 'connected' when
// the SSE handshake/poll succeeds and 'down' when it drops (index.ts sets only
// those two). The 'reconnecting' state lives on the WS *connection* pill
// (ControlConnection in apps/web), not on per-host liveness. This pins that
// two-state model.
type Liveness = HostState['liveness'];
function transitionLiveness(current: Liveness, event: 'connect' | 'disconnect' | 'reconnect_attempt' | 'reconnect_success'): Liveness {
switch (event) {
case 'connect':
return 'connected';
case 'disconnect':
return 'down';
case 'reconnect_attempt':
return 'reconnecting';
case 'reconnect_success':
return 'connected';
}
function transitionLiveness(_current: Liveness, event: 'connect' | 'disconnect'): Liveness {
return event === 'connect' ? 'connected' : 'down';
}
function makeHost(liveness: Liveness, lastSeenAt: Date | null): HostState {
return {
providerId: 'test',
liveness,
lastSeenAt,
seq: 0,
inflightTotal: 0,
models: new Map(),
};
}
describe('liveness state machine', () => {
it('starts as down', () => {
const state: HostState = {
providerId: 'test',
liveness: 'down',
lastSeenAt: null,
seq: 0,
models: new Map(),
};
expect(state.liveness).toBe('down');
expect(makeHost('down', null).liveness).toBe('down');
});
it('connect -> connected', () => {
const state: HostState = {
providerId: 'test',
liveness: 'down',
lastSeenAt: null,
seq: 0,
models: new Map(),
};
const state = makeHost('down', null);
state.liveness = transitionLiveness(state.liveness, 'connect');
expect(state.liveness).toBe('connected');
});
it('connected -> down on disconnect', () => {
const state: HostState = {
providerId: 'test',
liveness: 'connected',
lastSeenAt: new Date(),
seq: 0,
models: new Map(),
};
const state = makeHost('connected', new Date());
state.liveness = transitionLiveness(state.liveness, 'disconnect');
expect(state.liveness).toBe('down');
});
it('down -> reconnecting on reconnect attempt', () => {
const state: HostState = {
providerId: 'test',
liveness: 'down',
lastSeenAt: null,
seq: 0,
models: new Map(),
};
state.liveness = transitionLiveness(state.liveness, 'reconnect_attempt');
expect(state.liveness).toBe('reconnecting');
});
it('reconnecting -> connected on reconnect success', () => {
const state: HostState = {
providerId: 'test',
liveness: 'reconnecting',
lastSeenAt: null,
seq: 0,
models: new Map(),
};
state.liveness = transitionLiveness(state.liveness, 'reconnect_success');
it('down -> connected on reconnect (no intermediate reconnecting state)', () => {
const state = makeHost('down', null);
state.liveness = transitionLiveness(state.liveness, 'connect');
expect(state.liveness).toBe('connected');
});
it('connected -> reconnecting on reconnect attempt', () => {
const state: HostState = {
providerId: 'test',
liveness: 'connected',
lastSeenAt: new Date(),
seq: 0,
models: new Map(),
};
state.liveness = transitionLiveness(state.liveness, 'reconnect_attempt');
expect(state.liveness).toBe('reconnecting');
});
it('reconnecting -> down on reconnect failure', () => {
const state: HostState = {
providerId: 'test',
liveness: 'reconnecting',
lastSeenAt: null,
seq: 0,
models: new Map(),
};
state.liveness = transitionLiveness(state.liveness, 'disconnect');
expect(state.liveness).toBe('down');
});
});

View File

@@ -1,7 +1,7 @@
import { describe, it, expect } from 'vitest';
import { validateRepoId, buildPullCommand, runModelPull } from '../model-pull.js';
import type { SshExec, ExecResult } from '../ssh-config.js';
import type { DeltaEmitter } from '../../index.js';
import type { DeltaEmitter } from '../delta-emitter.js';
describe('validateRepoId', () => {
it('accepts org/name', () => {
@@ -49,7 +49,7 @@ describe('runModelPull', () => {
it('rejects an invalid repo id before issuing any command', async () => {
const { emitter, frames } = emitterSpy();
const { exec, calls } = execReturning({ code: 0, stdout: '', stderr: '' });
const r = await runModelPull({ jobId: 'j1', target, repo: '../x', mode: 'wrapper' }, exec, emitter);
const r = await runModelPull({ jobId: 'j1', providerId: 'test-provider', target, repo: '../x', mode: 'wrapper' }, exec, emitter);
expect(r.ok).toBe(false);
expect(calls).toHaveLength(0);
expect(frames[frames.length - 1]).toMatchObject({ type: 'control_job', status: 'failed' });
@@ -58,7 +58,7 @@ describe('runModelPull', () => {
it('runs the wrapper pull verb and emits running then completed', async () => {
const { emitter, frames } = emitterSpy();
const { exec, calls } = execReturning({ code: 0, stdout: 'done', stderr: '' });
const r = await runModelPull({ jobId: 'j2', target, repo: 'Qwen/Q3', mode: 'wrapper' }, exec, emitter);
const r = await runModelPull({ jobId: 'j2', providerId: 'test-provider', target, repo: 'Qwen/Q3', mode: 'wrapper' }, exec, emitter);
expect(r.ok).toBe(true);
expect(calls).toEqual(['pull Qwen/Q3']);
expect(frames.map((f) => f.status)).toEqual(['running', 'completed']);
@@ -68,7 +68,7 @@ describe('runModelPull', () => {
it('reports a non-zero exit as failed', async () => {
const { emitter, frames } = emitterSpy();
const { exec } = execReturning({ code: 1, stdout: '', stderr: 'no such repo' });
const r = await runModelPull({ jobId: 'j3', target, repo: 'Qwen/Q3', mode: 'wrapper' }, exec, emitter);
const r = await runModelPull({ jobId: 'j3', providerId: 'test-provider', target, repo: 'Qwen/Q3', mode: 'wrapper' }, exec, emitter);
expect(r.ok).toBe(false);
expect(frames[frames.length - 1]).toMatchObject({ status: 'failed' });
});
@@ -76,7 +76,7 @@ describe('runModelPull', () => {
it('shell mode without a models dir fails fast', async () => {
const { emitter } = emitterSpy();
const { exec, calls } = execReturning({ code: 0, stdout: '', stderr: '' });
const r = await runModelPull({ jobId: 'j4', target, repo: 'Qwen/Q3', mode: 'shell' }, exec, emitter);
const r = await runModelPull({ jobId: 'j4', providerId: 'test-provider', target, repo: 'Qwen/Q3', mode: 'shell' }, exec, emitter);
expect(r.ok).toBe(false);
expect(calls).toHaveLength(0);
});

View File

@@ -2,8 +2,9 @@ import { describe, it, expect, vi, beforeEach } from 'vitest';
import { parseSseLine } from '../fleet-connector.js';
import type { LlamaSweepSSEEvent, MetricsEntry, ModelStatusEntry } from '../fleet-connector.js';
import { createFleetState, ensureHostState, incrementSeq } from '../fleet-state.js';
import { createDeltaEmitter, handleLlamaSweepEvent } from '../../index.js';
import type { DeltaEmitter } from '../../index.js';
import { createDeltaEmitter } from '../delta-emitter.js';
import { handleLlamaSweepEvent } from '../sse-pipeline.js';
import type { DeltaEmitter } from '../delta-emitter.js';
import type { Sql } from '../../db.js';
import type { Config } from '../../config.js';

View File

@@ -77,7 +77,6 @@ export class ActionQueue {
return { ok: false, error: `queue not initialized for ${action.providerId}` };
}
// Check bench in progress for unload actions
if (action.type === 'unload' && !action.confirmed) {
const inflight = deps.isInflightRequests();
if (inflight > 0) {
@@ -142,7 +141,6 @@ export class ActionQueue {
entry.error = 'host went down during queue wait';
state.queue.shift();
state.running = false;
// Process next
void this.processNext(providerId, deps);
return;
}

View File

@@ -9,7 +9,8 @@
*/
import type { Sql } from '../db.js';
import type { DeltaEmitter } from '../index.js';
import type { DeltaEmitter } from './delta-emitter.js';
import { publishJob } from './publish-job.js';
import { jsonbObject } from './jsonb.js';
// ─── types ──────────────────────────────────────────────────────────────────
@@ -281,13 +282,11 @@ export async function runBenchSuite(
VALUES (${runId}, ${suite.id}, 'bench', 'running', clock_timestamp(), ${totalSamples}, ${temperature}, ${topP})
`;
// Publish run started.
emitter.publish({
type: 'control_job' as const,
publishJob(emitter, {
seq,
jobType: 'bench' as const,
jobType: 'bench',
jobId: runId,
status: 'running' as const,
status: 'running',
detail: {
suiteId: suite.id,
providerId: suite.providerId,
@@ -326,7 +325,7 @@ export async function runBenchSuite(
groups.get(key)!.push(item);
}
for (const [key, group] of groups) {
for (const [_key, group] of groups) {
const concurrency = group[0]!.concurrency;
const batchSize = Math.min(concurrency, group.length);
@@ -367,13 +366,11 @@ export async function runBenchSuite(
currentRepetition: current.repetition,
});
// Publish progress
emitter.publish({
type: 'control_job' as const,
publishJob(emitter, {
seq,
jobType: 'bench' as const,
jobType: 'bench',
jobId: runId,
status: 'running' as const,
status: 'running',
detail: {
completedSamples: completed,
totalSamples,
@@ -423,13 +420,11 @@ export async function runBenchSuite(
WHERE id = ${runId}
`;
// Publish completion.
emitter.publish({
type: 'control_job' as const,
publishJob(emitter, {
seq,
jobType: 'bench' as const,
jobType: 'bench',
jobId: runId,
status: 'completed' as const,
status: 'completed',
detail: { ...aggregate, regressionFlag },
});
}

View File

@@ -0,0 +1,39 @@
interface BreakerEntry {
failures: number;
lastFailure: number;
cooldownUntil: number;
}
const breakers = new Map<string, BreakerEntry>();
const THRESHOLD = 3;
const COOLDOWN_MS = 30_000;
const WINDOW_MS = 60_000;
export function recordFailure(compositeId: string): void {
const now = Date.now();
const entry = breakers.get(compositeId);
if (!entry || now - entry.lastFailure > WINDOW_MS) {
breakers.set(compositeId, { failures: 1, lastFailure: now, cooldownUntil: 0 });
return;
}
entry.failures++;
entry.lastFailure = now;
if (entry.failures >= THRESHOLD) {
entry.cooldownUntil = now + COOLDOWN_MS;
}
}
export function recordSuccess(compositeId: string): void {
breakers.delete(compositeId);
}
export function isTripped(compositeId: string): boolean {
const entry = breakers.get(compositeId);
if (!entry) return false;
if (entry.cooldownUntil === 0) return false;
if (Date.now() > entry.cooldownUntil) {
breakers.delete(compositeId);
return false;
}
return true;
}

View File

@@ -0,0 +1,20 @@
export type DeltaCallback = (delta: unknown) => void;
export type DeltaEmitter = {
subscribe(cb: DeltaCallback): () => void;
publish(delta: unknown): void;
};
export function createDeltaEmitter(): DeltaEmitter {
const listeners = new Set<DeltaCallback>();
return {
subscribe(cb: DeltaCallback): () => void {
listeners.add(cb);
return () => { listeners.delete(cb); };
},
publish(delta: unknown): void {
for (const cb of listeners) {
try { cb(delta); } catch { /* ignore emitter errors */ }
}
},
};
}

View File

@@ -8,8 +8,6 @@ import type { Sql } from '../db.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
// ─── types ──────────────────────────────────────────────────────────────────
export interface CodeTask {
id: string;
prompt: string;
@@ -57,8 +55,6 @@ export interface EvalSuiteRow {
created_at: string;
}
// ─── YAML loader ────────────────────────────────────────────────────────────
const DATA_DIR = resolve(dirname(__filename), '../../data');
/**
@@ -151,8 +147,6 @@ function normalizeCriteria(rubric: Record<string, unknown>): RubricCriterion[] {
return result;
}
// ─── DB operations ──────────────────────────────────────────────────────────
/**
* Seed eval suites from data/ YAML files into the database.
* Uses INSERT ... ON CONFLICT DO NOTHING for idempotency.

View File

@@ -14,16 +14,12 @@
import type { FastifyBaseLogger } from 'fastify';
import type { Sql } from '../db.js';
// ─── jitter (pure) ──────────────────────────────────────────────────────────
/** Add random 0-50% jitter to a delay value. */
export function addJitter(delayMs: number): number {
const jitter = delayMs * Math.random() * 0.5;
return delayMs + jitter;
}
// ─── reconnect backoff ──────────────────────────────────────────────────────
export interface ReconnectPolicy {
baseMs: number;
maxMs: number;
@@ -50,14 +46,6 @@ export function reconnectDecision(
return { action: 'reconnect', delayMs: addJitter(capped) };
}
// ─── llama-swap SSE envelope types ──────────────────────────────────────────
// Real wire shape (apigroup.go):
// event:message
// data:{"type":"modelStatus|logData|metrics|inflight","data":"<ESCAPED JSON STRING>"}
// The SSE event name is ALWAYS 'message'. The discriminator is the outer JSON's
// .type field. The payload is DOUBLE-ENCODED: JSON.parse(data) gives {type, data:string},
// then JSON.parse(that.data) gives the actual payload.
// Per-type payload shapes, verified against the fork source
// (/opt/forks/llama-swap/internal/server/apigroup.go sendModels/sendLogData/
// sendMetrics/sendInFlight, apiModel struct at :20):
@@ -114,14 +102,11 @@ export interface InflightData {
total: number;
}
// ─── the loop ───────────────────────────────────────────────────────────────
export interface FleetConnectorDeps {
isUp: () => boolean;
sql: Sql;
log: FastifyBaseLogger;
onEvent: (providerId: string, event: LlamaSweepSSEEvent) => void | Promise<void>;
onReconcile: (providerId: string, metrics: MetricsEntry[]) => Promise<boolean>;
onReconnectGiveUp: (providerId: string) => Promise<void>;
sleep?: (ms: number) => Promise<void>;
policy?: ReconnectPolicy;

View File

@@ -0,0 +1,62 @@
import type { FleetState } from './fleet-state.js';
import { ensureHostState, stampLastSeen } from './fleet-state.js';
import type { getSql } from '../db.js';
import { jsonbObject } from './jsonb.js';
export async function rebuildFleetFromDB(fleet: FleetState, sql: ReturnType<typeof getSql>): Promise<void> {
// Latest event per (provider, model) via DISTINCT ON -- one row per model, the
// truly newest, instead of one-per-(provider,model,state) which over-reads and
// can tie on identical clock_timestamp() values (REV5).
const modelEvents = await sql<{ provider_id: string; model: string; state: string; ts: string; detail: string }[]>`
SELECT DISTINCT ON (provider_id, model) provider_id, model, state, ts, detail
FROM control_model_events
ORDER BY provider_id, model, ts DESC
`;
for (const row of modelEvents) {
const state = ensureHostState(fleet, row.provider_id);
state.liveness = 'down';
stampLastSeen(state);
// row.detail is jsonb (porsager returns it parsed); jsonbObject tolerates
// both a parsed object and a JSON string.
const detail: unknown = jsonbObject(row.detail);
// B4: ttlDeadline recalculation. Use event timestamp so the deadline reflects
// when the model was actually loaded, not when we rebuild.
const ttl = (detail as { ttl?: number })?.ttl;
const eventTs = new Date(row.ts).getTime();
const ttlDeadline = ttl ? new Date(eventTs + ttl * 1000) : null;
state.models.set(row.model, {
model: row.model,
state: row.state,
ts: new Date(row.ts),
ttlDeadline,
inflight: 0,
});
}
const lastRequests = await sql<{ provider_id: string; ts: string }[]>`
SELECT provider_id, ts FROM control_requests
WHERE ts IN (
SELECT MAX(ts) FROM control_requests GROUP BY provider_id
)
ORDER BY ts DESC
`;
for (const row of lastRequests) {
const state = ensureHostState(fleet, row.provider_id);
stampLastSeen(state);
}
const lastPerf = await sql<{ provider_id: string; ts: string }[]>`
SELECT provider_id, ts FROM control_perf_samples
WHERE ts IN (
SELECT MAX(ts) FROM control_perf_samples GROUP BY provider_id
)
ORDER BY ts DESC
`;
for (const row of lastPerf) {
const state = ensureHostState(fleet, row.provider_id);
stampLastSeen(state);
}
}

View File

@@ -10,7 +10,7 @@ export interface FleetState {
export interface HostState {
providerId: string;
liveness: 'connected' | 'reconnecting' | 'down';
liveness: 'connected' | 'down';
lastSeenAt: Date | null;
seq: number;
/** Host-level inflight total (the fork's SSE publishes only a total, not per-model). */
@@ -29,7 +29,7 @@ export interface ModelState {
export interface SnapshotData {
hosts: Array<{
providerId: string;
liveness: 'connected' | 'reconnecting' | 'down';
liveness: 'connected' | 'down';
lastSeenAt: string | null;
seq: number;
models: Array<{
@@ -57,8 +57,6 @@ export interface SnapshotData {
}>;
}
// ─── helpers for tests ──────────────────────────────────────────────────────
export function createFleetState(): FleetState {
return { hosts: new Map() };
}

View File

@@ -20,14 +20,12 @@ import type { Sql } from '../db.js';
import type { FleetState } from './fleet-state.js';
import { computeRoutingScores, type ModelScore } from './routing-scores.js';
import { jsonbStringArray } from './jsonb.js';
import { isTripped } from './circuit-breaker.js';
export { isGatewayVirtualModel } from '@boocode/contracts/gateway';
export const VIRTUAL_MODELS = ['auto', 'auto:code', 'auto:fast', 'auto:cheap'] as const;
export type VirtualModel = (typeof VIRTUAL_MODELS)[number];
export function isGatewayVirtualModel(id: string): boolean {
return id === 'auto' || id.startsWith('auto:');
}
/**
* Strip a composite/provider prefix the picker may prepend. The gateway
* registry provider id is 'auto', so BooChat may send 'auto/auto:code'.
@@ -70,7 +68,7 @@ export function orderCandidates(
if (policy.fallback && !ordered.includes(policy.fallback)) ordered.push(policy.fallback);
// Keep curated order; drop unhealthy. If a candidate isn't in the scores
// set at all (never seen), keep it — health is unknown, let dispatch try.
return ordered.filter((id) => !scores.some((s) => s.compositeId === id) || healthy.has(id));
return ordered.filter((id) => !scores.some((s) => s.compositeId === id) || (healthy.has(id) && !isTripped(id)));
}
// Derive from advisory scores by category metric.
@@ -89,7 +87,7 @@ export function orderCandidates(
};
return scores
.filter((s) => s.healthy && metric(s) != null)
.filter((s) => s.healthy && !isTripped(s.compositeId) && metric(s) != null)
.sort((a, b) => (metric(b) ?? -Infinity) - (metric(a) ?? -Infinity))
.map((s) => s.compositeId);
}
@@ -128,10 +126,37 @@ export async function resolveCandidates(
policyName = row.name;
}
const candidates = orderCandidates(virtualModel, policy, scores);
let candidates = orderCandidates(virtualModel, policy, scores);
// Cold-start fallback (G2): with no curated policy and no eval/traffic history,
// advisory scores are empty so orderCandidates returns []. Fall back to the
// live fleet model map so a healthy host still dispatches instead of 503ing.
if (candidates.length === 0) {
candidates = fleetModelCandidates(fleet);
}
return { virtualModel, candidates, policyName };
}
/**
* Candidate composite ids from live fleet state: connected hosts' known models,
* `ready` models first (already loaded => loadable + likely the chat model).
* Pure over the fleet snapshot. Used only as the cold-start fallback.
*/
export function fleetModelCandidates(fleet: FleetState): string[] {
const ready: string[] = [];
const other: string[] = [];
for (const host of fleet.hosts.values()) {
if (host.liveness !== 'connected') continue;
for (const m of host.models.values()) {
const id = `${host.providerId}/${m.model}`;
if (m.state === 'ready') ready.push(id);
else other.push(id);
}
}
return [...ready, ...other];
}
/** Split a composite id 'provider/model' into parts. */
export function splitComposite(compositeId: string): { providerId: string; model: string } | null {
const slash = compositeId.indexOf('/');

View File

@@ -11,8 +11,8 @@ export interface HostGrant {
}
export async function acquireHostAccess(
providerId: string,
purpose: string,
_providerId: string,
_purpose: string,
): Promise<HostGrant> {
// V1: no-op — always grant access.
return { ok: true };

View File

@@ -1,10 +1,9 @@
import type { Sql } from '../db.js';
import type { DeltaEmitter } from '../index.js';
import type { DeltaEmitter } from './delta-emitter.js';
import { publishJob } from './publish-job.js';
import { recordEvalResult, completeEvalRun } from './eval-suites.js';
import { resolveProviderBaseUrl } from './llama-providers.js';
// ─── types ──────────────────────────────────────────────────────────────────
export interface JudgeEvalParams {
runId: string;
providerId: string;
@@ -22,8 +21,6 @@ export interface JudgeResult {
error: string | null;
}
// ─── judge runner ───────────────────────────────────────────────────────────
/**
* Run a judge-based eval (chat quality, rubric scoring).
*
@@ -44,7 +41,7 @@ export async function runJudgeEval(
logger: import('fastify').FastifyBaseLogger,
onProgress: (progress: JudgeProgress) => void,
): Promise<JudgeResult> {
const { runId, providerId, model, tasks, judgeModel, quant } = params;
const { runId, providerId, model, tasks, judgeModel, quant: _quant } = params;
// Resolve the target model's base URL.
const baseUrl = resolveProviderBaseUrl(providerId);
@@ -122,12 +119,11 @@ export async function runJudgeEval(
completedTasks++;
onProgress({ completedTasks });
emitter.publish({
type: 'control_job' as const,
publishJob(emitter, {
seq,
jobType: 'eval' as const,
jobType: 'eval',
jobId: runId,
status: 'running' as const,
status: 'running',
detail: {
completedTasks,
totalTasks: tasks.length,

View File

@@ -8,8 +8,10 @@
* wrapper mode; in shell mode it is the only argument and is regex-clean).
*/
import type { DeltaEmitter } from '../index.js';
import type { DeltaEmitter } from './delta-emitter.js';
import { publishJob } from './publish-job.js';
import type { SshExec, SshTarget, SshMode } from './ssh-config.js';
import { acquireHostAccess } from './host-access.js';
/**
* HF repo id: org/name. Each segment MUST start with an alphanumeric (HF's own
@@ -31,11 +33,15 @@ export function buildPullCommand(mode: SshMode, repo: string, modelsDir?: string
if (mode === 'wrapper') return `pull ${repo}`;
const dir = (modelsDir ?? '').replace(/\/+$/, '');
const local = `${dir}/${repo.replace(/\//g, '__')}`;
return `huggingface-cli download ${repo} --local-dir '${local}'`;
// POSIX single-quote escape the path: handles spaces AND an embedded quote in
// modelsDir (which comes from the request body). repo is already regex-clean.
const quoted = `'${local.replace(/'/g, `'\\''`)}'`;
return `huggingface-cli download ${repo} --local-dir ${quoted}`;
}
export interface PullParams {
jobId: string;
providerId: string;
target: SshTarget;
repo: string;
mode: SshMode;
@@ -57,49 +63,37 @@ export async function runModelPull(
emitter: DeltaEmitter,
seq: number = 0,
): Promise<PullResult> {
const { jobId, target, repo, mode, modelsDir } = params;
const { jobId, providerId, target, repo, mode, modelsDir } = params;
const grant = await acquireHostAccess(providerId, 'pull');
if (!grant.ok) {
publishJob(emitter, { seq, jobType: 'action', jobId, status: 'failed', detail: { kind: 'pull', repo, error: `host access denied: ${grant.reason}` } });
return { ok: false, error: `host access denied: ${grant.reason}` };
}
if (!validateRepoId(repo)) {
emitter.publish({
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
status: 'failed' as const, detail: { kind: 'pull', repo, error: 'invalid repo id' },
});
publishJob(emitter, { seq, jobType: 'action', jobId, status: 'failed', detail: { kind: 'pull', repo, error: 'invalid repo id' } });
return { ok: false, error: 'invalid repo id' };
}
if (mode === 'shell' && !modelsDir) {
emitter.publish({
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
status: 'failed' as const, detail: { kind: 'pull', repo, error: 'shell mode requires a models directory' },
});
publishJob(emitter, { seq, jobType: 'action', jobId, status: 'failed', detail: { kind: 'pull', repo, error: 'shell mode requires a models directory' } });
return { ok: false, error: 'shell mode requires a models directory' };
}
emitter.publish({
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
status: 'running' as const, detail: { kind: 'pull', repo },
});
publishJob(emitter, { seq, jobType: 'action', jobId, status: 'running', detail: { kind: 'pull', repo } });
try {
const res = await exec(target, buildPullCommand(mode, repo, modelsDir));
if (res.code !== 0) {
const error = `pull failed (exit ${res.code}): ${res.stderr.slice(0, 500)}`;
emitter.publish({
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
status: 'failed' as const, detail: { kind: 'pull', repo, error },
});
publishJob(emitter, { seq, jobType: 'action', jobId, status: 'failed', detail: { kind: 'pull', repo, error } });
return { ok: false, error };
}
emitter.publish({
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
status: 'completed' as const, detail: { kind: 'pull', repo, output: res.stdout.slice(-500) },
});
publishJob(emitter, { seq, jobType: 'action', jobId, status: 'completed', detail: { kind: 'pull', repo, output: res.stdout.slice(-500) } });
return { ok: true };
} catch (err) {
const error = (err as Error).message ?? String(err);
emitter.publish({
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
status: 'failed' as const, detail: { kind: 'pull', repo, error },
});
publishJob(emitter, { seq, jobType: 'action', jobId, status: 'failed', detail: { kind: 'pull', repo, error } });
return { ok: false, error };
}
}

View File

@@ -0,0 +1,82 @@
import type { FleetState } from './fleet-state.js';
import { ensureHostState, stampLastSeen, incrementSeq } from './fleet-state.js';
import type { DeltaEmitter } from './delta-emitter.js';
import type { getSql } from '../db.js';
import type { loadConfig } from '../config.js';
export async function pollPerformance(
sql: ReturnType<typeof getSql>,
config: ReturnType<typeof loadConfig>,
providerId: string,
baseUrl: string,
fleet: FleetState,
emitter: DeltaEmitter,
): Promise<void> {
void config;
const state = ensureHostState(fleet, providerId);
const watermark = await sql<{ ts: string | null }[]>`
SELECT MAX(ts) AS ts FROM control_perf_samples WHERE provider_id = ${providerId}
`;
// porsager returns timestamptz as a Date object; interpolating it raw yields
// Date.toString() which llama-swap rejects with 400.
const afterParam = watermark[0]?.ts
? `?after=${encodeURIComponent(new Date(watermark[0].ts).toISOString())}`
: '';
const url = `${baseUrl}/api/performance${afterParam}`;
try {
const fetchSignal = AbortSignal.timeout(10_000);
const res = await fetch(url, { signal: fetchSignal });
if (!res.ok) return;
const data = await res.json() as { gpu_stats?: unknown[]; sys_stats?: unknown[] } | null;
if (!data) return;
const gpuMap = new Map<string, unknown>();
for (const g of data.gpu_stats ?? []) {
const gpu = g as { timestamp?: string };
if (gpu.timestamp) {
gpuMap.set(gpu.timestamp, g);
}
}
const sysMap = new Map<string, unknown>();
for (const s of data.sys_stats ?? []) {
const sys = s as { timestamp?: string };
if (sys.timestamp) {
sysMap.set(sys.timestamp, s);
}
}
const allTimestamps = new Set([...gpuMap.keys(), ...sysMap.keys()]);
if (allTimestamps.size === 0) return;
stampLastSeen(state);
for (const ts of allTimestamps) {
const gpu = gpuMap.get(ts) ?? null;
const sys = sysMap.get(ts) ?? null;
await sql`
INSERT INTO control_perf_samples (provider_id, ts, gpu, sys)
VALUES (${providerId}, ${ts}, ${sql.json(gpu as never)}, ${sql.json(sys as never)})
ON CONFLICT (provider_id, ts) DO NOTHING
`;
const seq = incrementSeq(state);
emitter.publish({
type: 'control_perf' as const,
seq,
providerId,
ts,
gpu,
sys,
});
}
} catch (err) {
const msg = (err as Error).message ?? String(err);
console.warn({ providerId, err: msg }, 'fleet: perf poll failed');
}
}

View File

@@ -0,0 +1,18 @@
import type { WsFrame } from '@boocode/contracts/ws-frames';
import type { DeltaEmitter } from './delta-emitter.js';
type ControlJobFrame = Extract<WsFrame, { type: 'control_job' }>;
export type JobType = ControlJobFrame['jobType'];
export type JobStatus = ControlJobFrame['status'];
export interface PublishJobParams {
seq: number;
jobType: JobType;
jobId: string;
status: JobStatus;
detail?: Record<string, unknown>;
}
export function publishJob(emitter: DeltaEmitter, params: PublishJobParams): void {
emitter.publish({ type: 'control_job' as const, ...params });
}

View File

@@ -141,8 +141,10 @@ export function trimCapture(captureJson: string | null, sizeKB: number): string
if (!captureJson) return null;
const sizeBytes = Buffer.byteLength(captureJson, 'utf8');
if (sizeBytes <= sizeKB * 1024) return captureJson;
// Trim the capture to fit within the cap.
return captureJson.slice(0, Math.floor(sizeKB * 1024));
// Trim by BYTES, not JS chars: a char-index slice can split a multi-byte
// codepoint and emit invalid UTF-8 (DB write error / corruption). Buffer
// subarray + toString('utf8') truncates at the last whole codepoint.
return Buffer.from(captureJson, 'utf8').subarray(0, Math.floor(sizeKB * 1024)).toString('utf8');
}
/**

View File

@@ -37,6 +37,8 @@ export interface ModelScore {
avgLatencyMs: number | null;
/** Recent request count in the live window. */
sampleCount: number;
/** Avg gen tok/s over the last 5 minutes from route_dispatch_log, or null. */
recentGenTps: number | null;
/** Whether the owning host is currently connected. */
healthy: boolean;
/** Category badges this model currently wins. */
@@ -143,6 +145,18 @@ export async function computeRoutingScores(
GROUP BY provider_id, model
`;
// 2.5. Recent latency — control_requests last 5 minutes for EMA blend.
const recentCutoff = new Date(Date.now() - 5 * 60_000).toISOString();
const recentLatencyRows = await sql<{ provider_id: string; model: string; recent_tps: number | null }[]>`
SELECT provider_id,
model,
AVG(gen_tps) FILTER (WHERE gen_tps > 0) AS recent_tps
FROM control_requests
WHERE ts >= ${recentCutoff}
AND model IS NOT NULL
GROUP BY provider_id, model
`;
// 3. Merge signals keyed by compositeId.
const byKey = new Map<string, ModelScore>();
const keyOf = (providerId: string, model: string) => `${providerId}/${model}`;
@@ -160,6 +174,7 @@ export async function computeRoutingScores(
evalScore: null,
avgGenTps: null,
avgLatencyMs: null,
recentGenTps: null,
sampleCount: 0,
healthy: fleet.hosts.get(providerId)?.liveness === 'connected',
badges: [],
@@ -184,6 +199,19 @@ export async function computeRoutingScores(
s.sampleCount = row.sample_count;
}
for (const row of recentLatencyRows) {
const s = ensure(row.provider_id, row.model);
s.recentGenTps = row.recent_tps;
}
// 4. EMA blend: effective gen_tps = 0.7 * recent + 0.3 * history.
// Fall through to history-only when recent is null.
for (const s of byKey.values()) {
if (s.recentGenTps != null && s.avgGenTps != null) {
s.avgGenTps = 0.7 * s.recentGenTps + 0.3 * s.avgGenTps;
}
}
// Deterministic order before badge assignment so ties are stable.
const scores = Array.from(byKey.values()).sort((a, b) =>
a.compositeId < b.compositeId ? -1 : a.compositeId > b.compositeId ? 1 : 0,

View File

@@ -1,10 +1,10 @@
import { spawn, type ChildProcess } from 'node:child_process';
import { randomUUID } from 'node:crypto';
import type { Sql } from '../db.js';
import type { DeltaEmitter } from '../index.js';
import type { DeltaEmitter } from './delta-emitter.js';
import { publishJob } from './publish-job.js';
import { recordEvalResult } from './eval-suites.js';
// ─── types ──────────────────────────────────────────────────────────────────
import { acquireHostAccess } from './host-access.js';
export interface SandboxEvalParams {
runId: string;
@@ -28,8 +28,6 @@ export interface SandboxContainer {
timeoutHandle: NodeJS.Timeout | null;
}
// ─── hardening constants (LAW, not suggestions) ─────────────────────────────
const SANDBOX_IMAGE = process.env.SANDBOX_IMAGE ?? 'node:20-bookworm-slim';
const SANDBOX_MEMORY = process.env.SANDBOX_MEMORY ?? '512m';
const SANDBOX_CPU = process.env.SANDBOX_CPU ?? '0.5';
@@ -38,8 +36,6 @@ const SANDBOX_TIMEOUT_MS = Number(process.env.SANDBOX_TIMEOUT_MS ?? '30000');
const SANDBOX_CONCURRENCY = Number(process.env.SANDBOX_CONCURRENCY ?? '4');
const SANDBOX_LABEL = 'boocontrol-eval';
// ─── sandbox runner ─────────────────────────────────────────────────────────
/**
* Run a code sandbox eval: each task generates code via LLM, executes in
* an ephemeral Docker container with hardening flags, and scores pass@1.
@@ -70,6 +66,11 @@ export async function runCodeEval(
): Promise<SandboxResult> {
const { runId, tasks } = params;
const grant = await acquireHostAccess(params.providerId, 'sandbox');
if (!grant.ok) {
return { error: `host access denied: ${grant.reason}` };
}
// Orphan prune at engine start.
await pruneOrphanContainers();
@@ -99,7 +100,6 @@ export async function runCodeEval(
// Generate code from LLM.
const generatedCode = await generateCode(params.providerId, params.model, prompt, language);
// Execute in sandbox.
const execResult = await executeInSandbox(generatedCode, testCode, language);
const executionMs = Date.now() - startTime;
@@ -123,12 +123,11 @@ export async function runCodeEval(
null,
);
emitter.publish({
type: 'control_job' as const,
publishJob(emitter, {
seq,
jobType: 'eval' as const,
jobType: 'eval',
jobId: runId,
status: 'running' as const,
status: 'running',
detail: {
taskId,
taskIndex: globalIdx,
@@ -169,7 +168,6 @@ export async function runCodeEval(
}),
);
// Log batch results.
for (const result of results) {
if (result.status === 'rejected') {
console.error('sandbox: batch task rejected:', result.reason);
@@ -243,7 +241,6 @@ async function executeInSandbox(
return new Promise((resolve, reject) => {
const containerId = `eval_${randomUUID().slice(0, 12)}`;
// Build the combined script: generated code + test code.
const script = buildExecutionScript(generatedCode, testCode, language);
// SECURITY: Hardened Docker run command.
@@ -366,7 +363,6 @@ async function pruneOrphanContainers(): Promise<void> {
pruneCmd.on('close', async () => {
const containerIds = output.trim().split('\n').filter(Boolean);
if (containerIds.length > 0) {
console.log({ count: containerIds.length }, 'sandbox: pruning orphan containers');
const kill = spawn('docker', ['kill', ...containerIds]);
await new Promise((r) => {
kill.on('close', r);

View File

@@ -0,0 +1,210 @@
import type { FleetState } from './fleet-state.js';
import { ensureHostState, stampLastSeen, incrementSeq } from './fleet-state.js';
import type { LlamaSweepSSEEvent, MetricsEntry } from './fleet-connector.js';
import type { LogRelay } from './log-relay.js';
import type { DeltaEmitter } from './delta-emitter.js';
import type { getSql } from '../db.js';
import type { loadConfig } from '../config.js';
import { trimCapture, parseCaptureJson } from './retention.js';
import { detectGap } from './reconcile.js';
export interface MappedMetricsEntry {
id: number;
ts: string;
model: string;
req_path: string;
status_code: number;
duration_ms: number;
cache_tokens: number;
input_tokens: number;
output_tokens: number;
prompt_tps: number;
gen_tps: number;
has_capture: boolean;
/** P4: NULL for ring data -- ActivityLogEntry does not carry request headers. */
source: string | null;
}
export function mapMetricsEntry(entry: MetricsEntry): MappedMetricsEntry {
return {
id: entry.id,
ts: entry.timestamp,
model: entry.model,
req_path: entry.req_path,
status_code: entry.resp_status_code,
duration_ms: entry.duration_ms,
cache_tokens: entry.tokens.cache_tokens,
input_tokens: entry.tokens.input_tokens,
output_tokens: entry.tokens.output_tokens,
prompt_tps: entry.tokens.prompt_per_second,
gen_tps: entry.tokens.tokens_per_second,
has_capture: entry.has_capture,
source: null,
};
}
export async function handleLlamaSweepEvent(
fleet: FleetState,
sql: ReturnType<typeof getSql>,
config: ReturnType<typeof loadConfig>,
providerId: string,
emitter: DeltaEmitter,
event: LlamaSweepSSEEvent,
logRelay: LogRelay | null = null,
): Promise<void> {
const state = ensureHostState(fleet, providerId);
stampLastSeen(state);
switch (event.type) {
case 'modelStatus': {
// Real payload: FULL-FLEET array of {id, state, ...} (fork apiModel).
// Derive transitions by diffing against current state; persist only changes.
state.liveness = 'connected';
const changed: Array<{ model: string; state: string }> = [];
for (const m of event.data) {
const prev = state.models.get(m.id);
if (!prev || prev.state !== m.state) {
changed.push({ model: m.id, state: m.state });
}
state.models.set(m.id, {
model: m.id,
state: m.state,
ts: new Date(),
ttlDeadline: prev?.ttlDeadline ?? null,
inflight: prev?.inflight ?? 0,
});
}
if (changed.length === 0) break;
const seq = incrementSeq(state);
for (const c of changed) {
await sql`
INSERT INTO control_model_events (provider_id, model, state, ts, detail)
VALUES (${providerId}, ${c.model}, ${c.state}, clock_timestamp(), ${sql.json({} as never)})
ON CONFLICT (provider_id, model, state, ts) DO NOTHING
`;
}
emitter.publish({
type: 'control_fleet' as const,
seq,
hosts: [{
providerId: state.providerId,
liveness: state.liveness,
lastSeenAt: state.lastSeenAt?.toISOString() ?? null,
seq: state.seq,
models: Array.from(state.models.values()).map((m) => ({
model: m.model,
state: m.state,
ts: m.ts.toISOString(),
ttlDeadline: m.ttlDeadline?.toISOString() ?? null,
inflight: m.inflight,
})),
}],
});
break;
}
case 'logData': {
const source = event.data.source as 'proxy' | 'upstream' | 'model';
const text = event.data.data;
if (logRelay) {
logRelay.append(providerId, source, text);
}
const seq = incrementSeq(state);
emitter.publish({
type: 'control_log' as const,
seq,
providerId,
source,
line: text,
ts: new Date().toISOString(),
});
break;
}
case 'metrics': {
const entries = event.data;
await handleReconcile(fleet, sql, config, providerId, emitter, event.data).catch((err) => {
const msg = (err as Error).message ?? String(err);
console.warn({ providerId, err: msg }, 'fleet: reconcile failed');
});
for (const entry of entries) {
const captureTrimmed = entry.capture ? trimCapture(entry.capture, config.CAPTURE_SIZE_KB) : null;
const captureObj = captureTrimmed ? parseCaptureJson(captureTrimmed) : null;
const mapped = mapMetricsEntry(entry);
await sql`
INSERT INTO control_requests (provider_id, swap_entry_id, ts, model, req_path, status_code, duration_ms, cache_tokens, input_tokens, output_tokens, prompt_tps, gen_tps, has_capture, capture, source)
VALUES (${providerId}, ${mapped.id}, ${mapped.ts}, ${mapped.model}, ${mapped.req_path}, ${mapped.status_code}, ${mapped.duration_ms}, ${mapped.cache_tokens}, ${mapped.input_tokens}, ${mapped.output_tokens}, ${mapped.prompt_tps}, ${mapped.gen_tps}, ${mapped.has_capture}, ${captureObj ? sql.json(captureObj as never) : sql`NULL::jsonb`}, ${mapped.source})
ON CONFLICT (provider_id, swap_entry_id, ts) DO NOTHING
`;
emitter.publish({
type: 'control_activity' as const,
seq: state.seq,
providerId,
entry: {
id: mapped.id,
ts: mapped.ts,
model: mapped.model,
reqPath: mapped.req_path,
statusCode: mapped.status_code,
durationMs: mapped.duration_ms,
},
});
}
break;
}
case 'inflight': {
state.inflightTotal = event.data.total;
break;
}
}
}
async function handleReconcile(
fleet: FleetState,
sql: ReturnType<typeof getSql>,
config: ReturnType<typeof loadConfig>,
providerId: string,
emitter: DeltaEmitter,
metrics: MetricsEntry[],
): Promise<boolean> {
const state = ensureHostState(fleet, providerId);
stampLastSeen(state);
state.liveness = 'connected';
const entries = metrics ?? [];
const oldestReconcileTs = entries.length > 0
? entries[entries.length - 1]!.timestamp
: null;
if (oldestReconcileTs) {
const newestPersisted = await sql<{ ts: string }[]>`
SELECT ts FROM control_requests
WHERE provider_id = ${providerId}
ORDER BY ts DESC LIMIT 1
`;
if (newestPersisted.length > 0) {
const newestRow = newestPersisted[0]!;
if (detectGap(oldestReconcileTs, newestRow.ts)) {
await sql`
INSERT INTO control_model_events (provider_id, model, state, ts, detail)
VALUES (${providerId}, '*', 'gap_suspected', clock_timestamp(), ${sql.json({
oldestReconcile: oldestReconcileTs,
newestPersisted: newestRow.ts,
} as never)})
ON CONFLICT (provider_id, model, state, ts) DO NOTHING
`;
}
}
}
for (const entry of entries) {
const mapped = mapMetricsEntry(entry);
await sql`
INSERT INTO control_requests (provider_id, swap_entry_id, ts, model, req_path, status_code, duration_ms, cache_tokens, input_tokens, output_tokens, prompt_tps, gen_tps, has_capture, source)
VALUES (${providerId}, ${mapped.id}, ${mapped.ts}, ${mapped.model}, ${mapped.req_path}, ${mapped.status_code}, ${mapped.duration_ms}, ${mapped.cache_tokens}, ${mapped.input_tokens}, ${mapped.output_tokens}, ${mapped.prompt_tps}, ${mapped.gen_tps}, ${mapped.has_capture}, ${mapped.source})
ON CONFLICT (provider_id, swap_entry_id, ts) DO NOTHING
`;
}
void emitter;
return true;
}

View File

@@ -23,8 +23,6 @@ const require = createRequire(import.meta.url);
const Ajv = require('ajv') as typeof import('ajv').default;
const addFormats = require('ajv-formats') as typeof import('ajv-formats').default;
// ─── host SSH target ─────────────────────────────────────────────────────────
export interface SshTarget {
host: string;
user: string;
@@ -40,8 +38,6 @@ export interface ExecResult {
/** Injectable SSH executor. `stdin`, when present, is piped to the remote command. */
export type SshExec = (target: SshTarget, command: string, stdin?: string) => Promise<ExecResult>;
// ─── pure: schema validation ─────────────────────────────────────────────────
export interface ValidationResult {
valid: boolean;
errors: string[];
@@ -89,8 +85,6 @@ export function validateLlamaConfig(yamlText: string, schema: object): Validatio
return { valid: false, errors: errors.length ? errors : ['schema validation failed'], parsed };
}
// ─── pure: unified-ish diff ──────────────────────────────────────────────────
/**
* Produce a compact line diff between two texts. Trims a common prefix/suffix
* and marks the changed middle with -/+ lines. Sufficient for a preview; not a
@@ -120,20 +114,12 @@ export function computeDiff(oldText: string, newText: string): string {
return out.join('\n');
}
// ─── pure: backup filename ───────────────────────────────────────────────────
/** Timestamped backup path: `<configPath>.bak-YYYYMMDDTHHMMSSZ`. */
export function backupFilename(configPath: string, now: Date): string {
const stamp = now.toISOString().replace(/[-:]/g, '').replace(/\.\d+Z$/, 'Z');
return `${configPath}.bak-${stamp}`;
}
// ─── RemoteOps seam (shell vs wrapper) ───────────────────────────────────────
//
// 'shell' mode issues raw shell commands (P9.1 behavior). 'wrapper' mode issues
// fixed verbs so the key can be bound to an authorized_keys forced command that
// hardcodes the paths. Both drive the same apply pipeline.
export type SshMode = 'shell' | 'wrapper';
export interface RemoteOps {
@@ -201,8 +187,6 @@ export function makeRemoteOps(mode: SshMode, target: SshTarget, configPath: stri
return mode === 'wrapper' ? wrapperOps(target, exec) : shellOps(target, configPath, exec);
}
// ─── orchestration (injectable exec) ─────────────────────────────────────────
/** Read the remote config file (mode-aware; defaults to shell for compat). */
export async function readRemoteConfig(
target: SshTarget,
@@ -328,8 +312,6 @@ function shellQuote(s: string): string {
return `'${s.replace(/'/g, `'\\''`)}'`;
}
// ─── real SSH executor (spawn) ───────────────────────────────────────────────
/**
* Default SSH executor. Uses the system `ssh` with an explicit identity file and
* IdentitiesOnly so the agent's default key is never offered (the boocode Gitea
@@ -353,6 +335,9 @@ export const sshExec: SshExec = (target, command, stdin) => {
child.stderr.on('data', (d) => { stderr += d.toString(); });
child.on('error', (err) => resolve({ code: 127, stdout, stderr: `${stderr}${(err as Error).message}` }));
child.on('close', (code) => resolve({ code: code ?? 1, stdout, stderr }));
// Suppress EPIPE etc. if the remote exits before consuming stdin (e.g. auth
// failure under BatchMode) — an unhandled stream 'error' would crash the process.
child.stdin.on('error', () => {});
if (stdin !== undefined) {
child.stdin.write(stdin);
}