chore: snapshot main sync
This commit is contained in:
15
apps/control/src/app-context.ts
Normal file
15
apps/control/src/app-context.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
import type { Sql } from './db.js';
|
||||
import type { Config } from './config.js';
|
||||
import type { FleetState } from './services/fleet-state.js';
|
||||
import type { DeltaEmitter } from './services/delta-emitter.js';
|
||||
import type { ActionQueue } from './services/action-queue.js';
|
||||
import type { LogRelay } from './services/log-relay.js';
|
||||
|
||||
export interface AppContext {
|
||||
sql: Sql;
|
||||
config: Config;
|
||||
fleet: FleetState;
|
||||
emitter: DeltaEmitter;
|
||||
actionQueue: ActionQueue;
|
||||
logRelay: LogRelay;
|
||||
}
|
||||
@@ -1,15 +1,11 @@
|
||||
import Fastify from 'fastify';
|
||||
import fastifyWebsocket from '@fastify/websocket';
|
||||
import '@fastify/websocket';
|
||||
import { loadConfig } from './config.js';
|
||||
import { getSql, applySchema, pingDb, waitForTable } from './db.js';
|
||||
import type { FleetState, HostState } from './services/fleet-state.js';
|
||||
import { createFleetState, ensureHostState, stampLastSeen, incrementSeq } from './services/fleet-state.js';
|
||||
import { createFleetState, ensureHostState } from "./services/fleet-state.js";
|
||||
import { registerControlWebSocket } from './routes/ws.js';
|
||||
import type { LlamaSweepSSEEvent, MetricsEntry } from './services/fleet-connector.js';
|
||||
import { startFleetConnector } from './services/fleet-connector.js';
|
||||
import { buildRetentionConfig, runRollup, pruneRawSamples, pruneActivity, pruneModelEvents, trimCapture, parseCaptureJson } from './services/retention.js';
|
||||
import { detectGap } from './services/reconcile.js';
|
||||
import { jsonbObject } from './services/jsonb.js';
|
||||
import { startFleetConnector } from "./services/fleet-connector.js";
|
||||
import { buildRetentionConfig, runRollup, pruneRawSamples, pruneActivity, pruneModelEvents } from './services/retention.js';
|
||||
import { ActionQueue } from './services/action-queue.js';
|
||||
import { LogRelay } from './services/log-relay.js';
|
||||
import { registerActionRoutes } from './routes/actions.js';
|
||||
@@ -22,407 +18,14 @@ import { registerReportRoutes, startReportScheduler } from './routes/reports.js'
|
||||
import { registerGatewayRoutes } from './routes/gateway.js';
|
||||
import { registerPolicyRoutes } from './routes/policies.js';
|
||||
import { registerSshConfigRoutes } from './routes/ssh-config.js';
|
||||
import { loadLlamaProviders, getLlamaProviders, resolveProviderBaseUrl } from './services/llama-providers.js';
|
||||
|
||||
// ─── delta emitter (B3 fix) ─────────────────────────────────────────────────
|
||||
|
||||
export type DeltaCallback = (delta: unknown) => void;
|
||||
export type DeltaEmitter = {
|
||||
subscribe(cb: DeltaCallback): () => void;
|
||||
publish(delta: unknown): void;
|
||||
};
|
||||
|
||||
export function createDeltaEmitter(): DeltaEmitter {
|
||||
const listeners = new Set<DeltaCallback>();
|
||||
return {
|
||||
subscribe(cb: DeltaCallback): () => void {
|
||||
listeners.add(cb);
|
||||
return () => { listeners.delete(cb); };
|
||||
},
|
||||
publish(delta: unknown): void {
|
||||
for (const cb of listeners) {
|
||||
try { cb(delta); } catch { /* ignore emitter errors */ }
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ─── metrics entry field-name mapper ─────────────────────────────────────────
|
||||
// Real /api/metrics shape has nested tokens and different field names:
|
||||
// {id, timestamp, model, req_path, resp_status_code, tokens:{...}, duration_ms, has_capture}
|
||||
// Map to the column names used in control_requests.
|
||||
|
||||
interface MappedMetricsEntry {
|
||||
id: number;
|
||||
ts: string;
|
||||
model: string;
|
||||
req_path: string;
|
||||
status_code: number;
|
||||
duration_ms: number;
|
||||
cache_tokens: number;
|
||||
input_tokens: number;
|
||||
output_tokens: number;
|
||||
prompt_tps: number;
|
||||
gen_tps: number;
|
||||
has_capture: boolean;
|
||||
/** P4: NULL for ring data — ActivityLogEntry does not carry request headers. */
|
||||
source: string | null;
|
||||
}
|
||||
|
||||
function mapMetricsEntry(entry: MetricsEntry): MappedMetricsEntry {
|
||||
return {
|
||||
id: entry.id,
|
||||
ts: entry.timestamp,
|
||||
model: entry.model,
|
||||
req_path: entry.req_path,
|
||||
status_code: entry.resp_status_code,
|
||||
duration_ms: entry.duration_ms,
|
||||
cache_tokens: entry.tokens.cache_tokens,
|
||||
input_tokens: entry.tokens.input_tokens,
|
||||
output_tokens: entry.tokens.output_tokens,
|
||||
prompt_tps: entry.tokens.prompt_per_second,
|
||||
gen_tps: entry.tokens.tokens_per_second,
|
||||
has_capture: entry.has_capture,
|
||||
/** P4: NULL — ActivityLogEntry does not carry request headers. */
|
||||
source: null,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── SSE event handlers (B5 fix: await onEvent; B2 fix: incrementSeq) ───────
|
||||
|
||||
export async function handleLlamaSweepEvent(
|
||||
fleet: FleetState,
|
||||
sql: ReturnType<typeof getSql>,
|
||||
config: ReturnType<typeof loadConfig>,
|
||||
providerId: string,
|
||||
emitter: DeltaEmitter,
|
||||
event: LlamaSweepSSEEvent,
|
||||
logRelay: LogRelay | null = null,
|
||||
): Promise<void> {
|
||||
const state = ensureHostState(fleet, providerId);
|
||||
stampLastSeen(state);
|
||||
|
||||
switch (event.type) {
|
||||
case 'modelStatus': {
|
||||
// Real payload: FULL-FLEET array of {id, state, ...} (fork apiModel).
|
||||
// Derive transitions by diffing against current state; persist only changes.
|
||||
state.liveness = 'connected';
|
||||
const changed: Array<{ model: string; state: string }> = [];
|
||||
for (const m of event.data) {
|
||||
const prev = state.models.get(m.id);
|
||||
if (!prev || prev.state !== m.state) {
|
||||
changed.push({ model: m.id, state: m.state });
|
||||
}
|
||||
state.models.set(m.id, {
|
||||
model: m.id,
|
||||
state: m.state,
|
||||
ts: new Date(),
|
||||
ttlDeadline: prev?.ttlDeadline ?? null,
|
||||
inflight: prev?.inflight ?? 0,
|
||||
});
|
||||
}
|
||||
if (changed.length === 0) break;
|
||||
const seq = incrementSeq(state);
|
||||
for (const c of changed) {
|
||||
await sql`
|
||||
INSERT INTO control_model_events (provider_id, model, state, ts, detail)
|
||||
VALUES (${providerId}, ${c.model}, ${c.state}, clock_timestamp(), ${sql.json({} as never)})
|
||||
ON CONFLICT (provider_id, model, state, ts) DO NOTHING
|
||||
`;
|
||||
}
|
||||
// Publish delta to WS subscribers (B3 fix).
|
||||
emitter.publish({
|
||||
type: 'control_fleet' as const,
|
||||
seq,
|
||||
hosts: [{
|
||||
providerId: state.providerId,
|
||||
liveness: state.liveness,
|
||||
lastSeenAt: state.lastSeenAt?.toISOString() ?? null,
|
||||
seq: state.seq,
|
||||
models: Array.from(state.models.values()).map((m) => ({
|
||||
model: m.model,
|
||||
state: m.state,
|
||||
ts: m.ts.toISOString(),
|
||||
ttlDeadline: m.ttlDeadline?.toISOString() ?? null,
|
||||
inflight: m.inflight,
|
||||
})),
|
||||
}],
|
||||
});
|
||||
break;
|
||||
}
|
||||
case 'logData': {
|
||||
// Logs are relay-only; no persistence by default.
|
||||
const source = event.data.source as 'proxy' | 'upstream' | 'model';
|
||||
// Real payload field is 'data' (fork sendLogData), may contain multiple lines.
|
||||
const text = event.data.data;
|
||||
if (logRelay) {
|
||||
logRelay.append(providerId, source, text);
|
||||
}
|
||||
const seq = incrementSeq(state);
|
||||
emitter.publish({
|
||||
type: 'control_log' as const,
|
||||
seq,
|
||||
providerId,
|
||||
source,
|
||||
line: text,
|
||||
});
|
||||
break;
|
||||
}
|
||||
case 'metrics': {
|
||||
// Real payload: BARE array of ActivityLogEntry (fork sendMetrics).
|
||||
const entries = event.data;
|
||||
// B5 fix: await onEvent (handleReconcile is async).
|
||||
const seq = incrementSeq(state);
|
||||
await handleReconcile(fleet, sql, config, providerId, emitter, event.data).catch((err) => {
|
||||
// A1: log the error instead of swallowing silently.
|
||||
const msg = (err as Error).message ?? String(err);
|
||||
console.warn({ providerId, err: msg }, 'fleet: reconcile failed');
|
||||
});
|
||||
// Publish activity deltas.
|
||||
for (const entry of entries) {
|
||||
const captureTrimmed = entry.capture ? trimCapture(entry.capture, config.CAPTURE_SIZE_KB) : null;
|
||||
const captureObj = captureTrimmed ? parseCaptureJson(captureTrimmed) : null;
|
||||
// Map real field names: resp_status_code -> status_code, tokens.* nested, timestamp -> ts.
|
||||
const mapped = mapMetricsEntry(entry);
|
||||
await sql`
|
||||
INSERT INTO control_requests (provider_id, swap_entry_id, ts, model, req_path, status_code, duration_ms, cache_tokens, input_tokens, output_tokens, prompt_tps, gen_tps, has_capture, capture, source)
|
||||
VALUES (${providerId}, ${mapped.id}, ${mapped.ts}, ${mapped.model}, ${mapped.req_path}, ${mapped.status_code}, ${mapped.duration_ms}, ${mapped.cache_tokens}, ${mapped.input_tokens}, ${mapped.output_tokens}, ${mapped.prompt_tps}, ${mapped.gen_tps}, ${mapped.has_capture}, ${captureObj ? sql.json(captureObj as never) : sql`NULL::jsonb`}, ${mapped.source})
|
||||
ON CONFLICT (provider_id, swap_entry_id, ts) DO NOTHING
|
||||
`;
|
||||
emitter.publish({
|
||||
type: 'control_activity' as const,
|
||||
seq: state.seq,
|
||||
providerId,
|
||||
entry: {
|
||||
id: mapped.id,
|
||||
ts: mapped.ts,
|
||||
model: mapped.model,
|
||||
reqPath: mapped.req_path,
|
||||
statusCode: mapped.status_code,
|
||||
durationMs: mapped.duration_ms,
|
||||
},
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'inflight': {
|
||||
// Real payload: {total} -- host-level total (fork sendInFlight); the fork
|
||||
// does not publish per-model inflight over SSE.
|
||||
state.inflightTotal = event.data.total;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─── reconcile handler (B7 fix: called from metrics event) ───────────────────
|
||||
|
||||
async function handleReconcile(
|
||||
fleet: FleetState,
|
||||
sql: ReturnType<typeof getSql>,
|
||||
config: ReturnType<typeof loadConfig>,
|
||||
providerId: string,
|
||||
emitter: DeltaEmitter,
|
||||
metrics: MetricsEntry[],
|
||||
): Promise<boolean> {
|
||||
const state = ensureHostState(fleet, providerId);
|
||||
stampLastSeen(state);
|
||||
state.liveness = 'connected';
|
||||
|
||||
// Detect gap: if oldest reconcile entry is newer than newest persisted entry
|
||||
// for that provider, the ring wrapped past our tail.
|
||||
const entries = metrics ?? [];
|
||||
const oldestReconcileTs = entries.length > 0
|
||||
? entries[entries.length - 1]!.timestamp
|
||||
: null;
|
||||
|
||||
if (oldestReconcileTs) {
|
||||
const newestPersisted = await sql<{ ts: string }[]>`
|
||||
SELECT ts FROM control_requests
|
||||
WHERE provider_id = ${providerId}
|
||||
ORDER BY ts DESC LIMIT 1
|
||||
`;
|
||||
|
||||
if (newestPersisted.length > 0) {
|
||||
const newestRow = newestPersisted[0]!;
|
||||
if (detectGap(oldestReconcileTs, newestRow.ts)) {
|
||||
await sql`
|
||||
INSERT INTO control_model_events (provider_id, model, state, ts, detail)
|
||||
VALUES (${providerId}, '*', 'gap_suspected', clock_timestamp(), ${sql.json({
|
||||
oldestReconcile: oldestReconcileTs,
|
||||
newestPersisted: newestRow.ts,
|
||||
} as never)})
|
||||
ON CONFLICT (provider_id, model, state, ts) DO NOTHING
|
||||
`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ingest reconcile entries (dedup via UNIQUE constraint).
|
||||
for (const entry of entries) {
|
||||
const mapped = mapMetricsEntry(entry);
|
||||
await sql`
|
||||
INSERT INTO control_requests (provider_id, swap_entry_id, ts, model, req_path, status_code, duration_ms, cache_tokens, input_tokens, output_tokens, prompt_tps, gen_tps, has_capture, source)
|
||||
VALUES (${providerId}, ${mapped.id}, ${mapped.ts}, ${mapped.model}, ${mapped.req_path}, ${mapped.status_code}, ${mapped.duration_ms}, ${mapped.cache_tokens}, ${mapped.input_tokens}, ${mapped.output_tokens}, ${mapped.prompt_tps}, ${mapped.gen_tps}, ${mapped.has_capture}, ${mapped.source})
|
||||
ON CONFLICT (provider_id, swap_entry_id, ts) DO NOTHING
|
||||
`;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ─── perf poller (A7 fix: add timeout; A8 fix: log errors) ───────────────────
|
||||
|
||||
async function pollPerformance(
|
||||
sql: ReturnType<typeof getSql>,
|
||||
config: ReturnType<typeof loadConfig>,
|
||||
providerId: string,
|
||||
baseUrl: string,
|
||||
fleet: FleetState,
|
||||
emitter: DeltaEmitter,
|
||||
): Promise<void> {
|
||||
const state = ensureHostState(fleet, providerId);
|
||||
|
||||
// Recover watermark from MAX(ts) per provider.
|
||||
const watermark = await sql<{ ts: string | null }[]>`
|
||||
SELECT MAX(ts) AS ts FROM control_perf_samples WHERE provider_id = ${providerId}
|
||||
`;
|
||||
|
||||
// porsager returns timestamptz as a Date object; interpolating it raw yields
|
||||
// Date.toString() ("Thu Jun 12 2026 ...") which llama-swap rejects with 400.
|
||||
const afterParam = watermark[0]?.ts
|
||||
? `?after=${encodeURIComponent(new Date(watermark[0].ts).toISOString())}`
|
||||
: '';
|
||||
const url = `${baseUrl}/api/performance${afterParam}`;
|
||||
|
||||
try {
|
||||
// A7 fix: add fetch timeout via AbortController.
|
||||
const fetchSignal = AbortSignal.timeout(10_000);
|
||||
const res = await fetch(url, { signal: fetchSignal });
|
||||
if (!res.ok) return;
|
||||
|
||||
// Real shape: { gpu_stats: GpuStat[], sys_stats: SysStat[] }
|
||||
const data = await res.json() as { gpu_stats?: unknown[]; sys_stats?: unknown[] } | null;
|
||||
if (!data) return;
|
||||
|
||||
// Pair gpu_stats and sys_stats by timestamp.
|
||||
const gpuMap = new Map<string, unknown>();
|
||||
for (const g of data.gpu_stats ?? []) {
|
||||
const gpu = g as { timestamp?: string };
|
||||
if (gpu.timestamp) {
|
||||
gpuMap.set(gpu.timestamp, g);
|
||||
}
|
||||
}
|
||||
|
||||
const sysMap = new Map<string, unknown>();
|
||||
for (const s of data.sys_stats ?? []) {
|
||||
const sys = s as { timestamp?: string };
|
||||
if (sys.timestamp) {
|
||||
sysMap.set(sys.timestamp, s);
|
||||
}
|
||||
}
|
||||
|
||||
// Collect all unique timestamps.
|
||||
const allTimestamps = new Set([...gpuMap.keys(), ...sysMap.keys()]);
|
||||
if (allTimestamps.size === 0) return;
|
||||
|
||||
stampLastSeen(state);
|
||||
|
||||
for (const ts of allTimestamps) {
|
||||
const gpu = gpuMap.get(ts) ?? null;
|
||||
const sys = sysMap.get(ts) ?? null;
|
||||
|
||||
await sql`
|
||||
INSERT INTO control_perf_samples (provider_id, ts, gpu, sys)
|
||||
VALUES (${providerId}, ${ts}, ${sql.json(gpu as never)}, ${sql.json(sys as never)})
|
||||
ON CONFLICT (provider_id, ts) DO NOTHING
|
||||
`;
|
||||
|
||||
const seq = incrementSeq(state);
|
||||
emitter.publish({
|
||||
type: 'control_perf' as const,
|
||||
seq,
|
||||
providerId,
|
||||
ts,
|
||||
gpu,
|
||||
sys,
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
// A8 fix: log the error instead of swallowing silently.
|
||||
const msg = (err as Error).message ?? String(err);
|
||||
console.warn({ providerId, err: msg }, 'fleet: perf poll failed');
|
||||
}
|
||||
}
|
||||
|
||||
// ─── fleet-state rebuild from DB (A1/F2 fix) ─────────────────────────────────
|
||||
|
||||
async function rebuildFleetFromDB(fleet: FleetState, sql: ReturnType<typeof getSql>): Promise<void> {
|
||||
// Query control_model_events for latest model state per provider.
|
||||
// B3: ORDER BY ASC so iteration processes oldest first; Map.set() overwrites
|
||||
// with the latest state for each model, so the newest event wins.
|
||||
const modelEvents = await sql<{ provider_id: string; model: string; state: string; ts: string; detail: string }[]>`
|
||||
SELECT provider_id, model, state, ts, detail
|
||||
FROM control_model_events
|
||||
WHERE ts IN (
|
||||
SELECT MAX(ts) FROM control_model_events
|
||||
GROUP BY provider_id, model, state
|
||||
)
|
||||
ORDER BY ts ASC
|
||||
`;
|
||||
|
||||
for (const row of modelEvents) {
|
||||
const state = ensureHostState(fleet, row.provider_id);
|
||||
state.liveness = 'down';
|
||||
stampLastSeen(state);
|
||||
// row.detail is jsonb (porsager returns it parsed); jsonbObject tolerates
|
||||
// both a parsed object and a JSON string.
|
||||
const detail: unknown = jsonbObject(row.detail);
|
||||
// B4: ttlDeadline recalculation. The live modelStatus handler (index.ts:57)
|
||||
// computes ttlDeadline = new Date(Date.now() + ttl * 1000), relative to event
|
||||
// arrival time. For rebuild, use the event timestamp so the deadline reflects
|
||||
// when the model was actually loaded, not when we rebuild.
|
||||
const ttl = (detail as { ttl?: number })?.ttl;
|
||||
const eventTs = new Date(row.ts).getTime();
|
||||
const ttlDeadline = ttl ? new Date(eventTs + ttl * 1000) : null;
|
||||
state.models.set(row.model, {
|
||||
model: row.model,
|
||||
state: row.state,
|
||||
ts: new Date(row.ts),
|
||||
ttlDeadline,
|
||||
inflight: 0,
|
||||
});
|
||||
}
|
||||
|
||||
// Query control_requests for last activity.
|
||||
const lastRequests = await sql<{ provider_id: string; ts: string }[]>`
|
||||
SELECT provider_id, ts FROM control_requests
|
||||
WHERE ts IN (
|
||||
SELECT MAX(ts) FROM control_requests GROUP BY provider_id
|
||||
)
|
||||
ORDER BY ts DESC
|
||||
`;
|
||||
|
||||
for (const row of lastRequests) {
|
||||
const state = ensureHostState(fleet, row.provider_id);
|
||||
stampLastSeen(state);
|
||||
}
|
||||
|
||||
// Query control_perf_samples for latest perf sample.
|
||||
const lastPerf = await sql<{ provider_id: string; ts: string }[]>`
|
||||
SELECT provider_id, ts FROM control_perf_samples
|
||||
WHERE ts IN (
|
||||
SELECT MAX(ts) FROM control_perf_samples GROUP BY provider_id
|
||||
)
|
||||
ORDER BY ts DESC
|
||||
`;
|
||||
|
||||
for (const row of lastPerf) {
|
||||
const state = ensureHostState(fleet, row.provider_id);
|
||||
stampLastSeen(state);
|
||||
}
|
||||
}
|
||||
|
||||
// ─── main ───────────────────────────────────────────────────────────────────
|
||||
import { loadLlamaProviders } from "./services/llama-providers.js";
|
||||
import { GATEWAY_KIND } from "@boocode/contracts/gateway";
|
||||
import { createDeltaEmitter } from "./services/delta-emitter.js";
|
||||
import type { AppContext } from './app-context.js';
|
||||
export type { DeltaEmitter } from './services/delta-emitter.js';
|
||||
import { handleLlamaSweepEvent } from './services/sse-pipeline.js';
|
||||
import { pollPerformance } from './services/perf-poller.js';
|
||||
import { rebuildFleetFromDB } from './services/fleet-rebuild.js';
|
||||
|
||||
async function main() {
|
||||
const config = loadConfig();
|
||||
@@ -456,18 +59,19 @@ async function main() {
|
||||
// P2: Action queue + log relay
|
||||
const actionQueue = new ActionQueue();
|
||||
const logRelay = new LogRelay();
|
||||
registerControlWebSocket(app, fleet, emitter, logRelay);
|
||||
registerActionRoutes(app, actionQueue, fleet, emitter);
|
||||
const ctx: AppContext = { sql, config, fleet, emitter, actionQueue, logRelay };
|
||||
registerControlWebSocket(app, ctx);
|
||||
registerActionRoutes(app, ctx);
|
||||
registerCaptureRoutes(app, sql);
|
||||
setBenchApp(app.log);
|
||||
registerBenchRoutes(app, sql, fleet, emitter);
|
||||
registerBenchRoutes(app, ctx);
|
||||
registerPlaygroundRoutes(app);
|
||||
registerEvalRoutes(app, sql, fleet, emitter);
|
||||
registerEvalRoutes(app, ctx);
|
||||
registerRoutingRoutes(app, sql, fleet);
|
||||
registerReportRoutes(app, sql);
|
||||
registerGatewayRoutes(app, sql, fleet, emitter);
|
||||
registerGatewayRoutes(app, ctx);
|
||||
registerPolicyRoutes(app, sql);
|
||||
registerSshConfigRoutes(app, sql, config, fleet, emitter);
|
||||
registerSshConfigRoutes(app, ctx);
|
||||
|
||||
// Health endpoint.
|
||||
app.get('/api/health', async (_req: unknown, reply: import('fastify').FastifyReply) => {
|
||||
@@ -488,11 +92,7 @@ async function main() {
|
||||
const registry = loadLlamaProviders(config.LLAMA_PROVIDERS_PATH, config.LLAMA_SWAP_URL);
|
||||
app.log.info({ count: registry.providers.length }, 'fleet: provider registry loaded');
|
||||
|
||||
// P7.2: the auto:* gateway is itself a registry entry (kind boocontrol-gateway)
|
||||
// so BooChat adopts it as a provider. BooControl must NOT treat it as a fleet
|
||||
// host — it has no llama-swap SSE/perf surface and its baseUrl points back at
|
||||
// this service. Filter it out of every fleet operation.
|
||||
const fleetProviders = registry.providers.filter((p) => p.kind !== 'boocontrol-gateway');
|
||||
const fleetProviders = registry.providers.filter((p) => p.kind !== GATEWAY_KIND);
|
||||
|
||||
// JOIN registry providers with control_hosts for the enabled flag.
|
||||
// Insert a control_hosts row ON CONFLICT DO NOTHING for any registry provider
|
||||
@@ -545,7 +145,6 @@ async function main() {
|
||||
sql,
|
||||
log: app.log,
|
||||
onEvent: (pid, event) => handleLlamaSweepEvent(fleet, sql, config, pid, emitter, event, logRelay),
|
||||
onReconcile: (pid, metrics) => handleReconcile(fleet, sql, config, pid, emitter, metrics),
|
||||
onReconnectGiveUp: async (pid) => {
|
||||
const state = ensureHostState(fleet, pid);
|
||||
state.liveness = 'down';
|
||||
@@ -567,15 +166,16 @@ async function main() {
|
||||
// Retention job: daily timer — iterate registry providers.
|
||||
const retentionConfig = buildRetentionConfig(config);
|
||||
const retentionTimer = setInterval(async () => {
|
||||
// Per-provider work: rollup + raw-sample prune (both scoped to provider_id).
|
||||
for (const provider of fleetProviders) {
|
||||
const enabled = enabledMap.get(provider.id) ?? true;
|
||||
if (!enabled) continue;
|
||||
await runRollup(sql, provider.id, retentionConfig.rawHours);
|
||||
// A2 fix: chunk pruneRawSamples (already chunked), also chunk pruneActivity and pruneModelEvents.
|
||||
await pruneRawSamples(sql, provider.id, retentionConfig.rawHours);
|
||||
await pruneActivity(sql, retentionConfig.rawHours);
|
||||
await pruneModelEvents(sql, retentionConfig.rollupDays * 24);
|
||||
}
|
||||
// Global prunes (no provider_id filter) run ONCE, not once per provider.
|
||||
await pruneActivity(sql, retentionConfig.rawHours);
|
||||
await pruneModelEvents(sql, retentionConfig.rollupDays * 24);
|
||||
}, 24 * 3600_000); // daily
|
||||
|
||||
// P6.2: Report digest scheduler (catch-up on boot, then hourly).
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import type { ActionQueue } from '../services/action-queue.js';
|
||||
import type { FleetState } from '../services/fleet-state.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import { publishJob } from '../services/publish-job.js';
|
||||
import type { AppContext } from '../app-context.js';
|
||||
|
||||
/**
|
||||
* Register action submission routes.
|
||||
@@ -12,10 +11,9 @@ import type { DeltaEmitter } from '../index.js';
|
||||
*/
|
||||
export function registerActionRoutes(
|
||||
app: FastifyInstance,
|
||||
actionQueue: ActionQueue,
|
||||
fleet: FleetState,
|
||||
emitter: DeltaEmitter,
|
||||
ctx: AppContext,
|
||||
): void {
|
||||
const { actionQueue, fleet, emitter } = ctx;
|
||||
app.post('/api/action/submit', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const type = body.type as string;
|
||||
@@ -30,7 +28,6 @@ export function registerActionRoutes(
|
||||
return reply.status(400).send({ error: 'providerId is required' });
|
||||
}
|
||||
|
||||
// Check host liveness
|
||||
const hostState = fleet.hosts.get(providerId);
|
||||
if (!hostState || hostState.liveness === 'down') {
|
||||
return reply.status(409).send({ error: 'host offline' });
|
||||
@@ -63,13 +60,11 @@ export function registerActionRoutes(
|
||||
return reply.status(409).send({ error: result.error });
|
||||
}
|
||||
|
||||
// Publish action queued event
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
publishJob(emitter, {
|
||||
seq: hostState.seq,
|
||||
jobType: 'action' as const,
|
||||
jobType: 'action',
|
||||
jobId: action.actionId,
|
||||
status: 'queued' as const,
|
||||
status: 'queued',
|
||||
detail: {
|
||||
actionType: action.type,
|
||||
providerId: action.providerId,
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import type { FastifyBaseLogger, FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
import type { FleetState } from '../services/fleet-state.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import type { DeltaEmitter } from '../services/delta-emitter.js';
|
||||
import { publishJob } from '../services/publish-job.js';
|
||||
import { acquireHostAccess } from '../services/host-access.js';
|
||||
import type { BenchSuite, BenchRunProgress } from '../services/bench-engine.js';
|
||||
import { runBenchSuite } from '../services/bench-engine.js';
|
||||
import { runBenchSuite, type BenchSuite, type BenchRunProgress } from "../services/bench-engine.js";
|
||||
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
|
||||
import { jsonbNumberArray, jsonbObject } from '../services/jsonb.js';
|
||||
import type { AppContext } from '../app-context.js';
|
||||
|
||||
/**
|
||||
* Register bench routes.
|
||||
@@ -22,11 +22,9 @@ import { jsonbNumberArray, jsonbObject } from '../services/jsonb.js';
|
||||
*/
|
||||
export function registerBenchRoutes(
|
||||
app: FastifyInstance,
|
||||
sql: Sql,
|
||||
fleet: FleetState,
|
||||
emitter: DeltaEmitter,
|
||||
ctx: AppContext,
|
||||
): void {
|
||||
// ─── suite CRUD ──────────────────────────────────────────────────────────
|
||||
const { sql, fleet, emitter } = ctx;
|
||||
|
||||
app.post('/api/bench/suite', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
@@ -136,8 +134,6 @@ export function registerBenchRoutes(
|
||||
});
|
||||
});
|
||||
|
||||
// ─── run launcher (P3.3: safety gates + P3.4: acquireHostAccess) ─────────
|
||||
|
||||
app.post('/api/bench/run', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const suiteId = body.suiteId as string;
|
||||
@@ -148,7 +144,6 @@ export function registerBenchRoutes(
|
||||
return reply.status(400).send({ error: 'suiteId is required' });
|
||||
}
|
||||
|
||||
// Load suite.
|
||||
const suiteRows = await sql<{
|
||||
id: string;
|
||||
name: string;
|
||||
@@ -200,7 +195,6 @@ export function registerBenchRoutes(
|
||||
return reply.status(400).send({ error: `no base URL configured for provider ${suite.providerId}` });
|
||||
}
|
||||
|
||||
// Get seq for the host.
|
||||
const seq = hostState?.seq ?? 0;
|
||||
|
||||
// Run the bench suite asynchronously (non-blocking HTTP response).
|
||||
@@ -219,8 +213,6 @@ export function registerBenchRoutes(
|
||||
});
|
||||
});
|
||||
|
||||
// ─── runs listing ────────────────────────────────────────────────────────
|
||||
|
||||
app.get('/api/bench/runs', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const query = req.query as Record<string, string | undefined>;
|
||||
const suiteId = query.suiteId;
|
||||
@@ -353,8 +345,6 @@ export function registerBenchRoutes(
|
||||
});
|
||||
});
|
||||
|
||||
// ─── baselines ───────────────────────────────────────────────────────────
|
||||
|
||||
app.get('/api/bench/baselines', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
const rows = await sql<{
|
||||
provider_id: string;
|
||||
@@ -471,12 +461,11 @@ async function runBenchAsync(
|
||||
WHERE id = ${runId}
|
||||
`;
|
||||
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
publishJob(emitter, {
|
||||
seq,
|
||||
jobType: 'bench' as const,
|
||||
jobType: 'bench',
|
||||
jobId: runId,
|
||||
status: 'failed' as const,
|
||||
status: 'failed',
|
||||
detail: { error: msg },
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import type { FleetState } from '../services/fleet-state.js';
|
||||
import type { DeltaEmitter } from '../services/delta-emitter.js';
|
||||
import { publishJob } from '../services/publish-job.js';
|
||||
import {
|
||||
listEvalSuites,
|
||||
getEvalSuite,
|
||||
@@ -11,6 +11,8 @@ import {
|
||||
seedEvalSuites,
|
||||
} from '../services/eval-suites.js';
|
||||
import { jsonbArray, jsonbObject } from '../services/jsonb.js';
|
||||
import { acquireHostAccess } from '../services/host-access.js';
|
||||
import type { AppContext } from '../app-context.js';
|
||||
|
||||
/**
|
||||
* Register eval routes.
|
||||
@@ -26,10 +28,9 @@ import { jsonbArray, jsonbObject } from '../services/jsonb.js';
|
||||
*/
|
||||
export function registerEvalRoutes(
|
||||
app: FastifyInstance,
|
||||
sql: Sql,
|
||||
fleet: FleetState,
|
||||
emitter: DeltaEmitter,
|
||||
ctx: AppContext,
|
||||
): void {
|
||||
const { sql, fleet, emitter } = ctx;
|
||||
// Seed suites from data/ YAML on startup (idempotent).
|
||||
app.addHook('onReady', async () => {
|
||||
await seedEvalSuites(sql).catch((err) => {
|
||||
@@ -37,8 +38,6 @@ export function registerEvalRoutes(
|
||||
});
|
||||
});
|
||||
|
||||
// ─── suite CRUD ──────────────────────────────────────────────────────────
|
||||
|
||||
app.post('/api/eval/suite', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const id = (body.id as string) ?? null;
|
||||
@@ -92,15 +91,11 @@ export function registerEvalRoutes(
|
||||
});
|
||||
});
|
||||
|
||||
// ─── seed from data/ ─────────────────────────────────────────────────────
|
||||
|
||||
app.post('/api/eval/seed', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
await seedEvalSuites(sql);
|
||||
return reply.send({ ok: true });
|
||||
});
|
||||
|
||||
// ─── run launcher ────────────────────────────────────────────────────────
|
||||
|
||||
app.post('/api/eval/run', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const suiteId = body.suiteId as string;
|
||||
@@ -117,11 +112,15 @@ export function registerEvalRoutes(
|
||||
return reply.status(404).send({ error: 'suite not found' });
|
||||
}
|
||||
|
||||
const grant = await acquireHostAccess(providerId, 'eval');
|
||||
if (!grant.ok) {
|
||||
return reply.status(409).send({ error: 'host access denied', reason: grant.reason });
|
||||
}
|
||||
|
||||
const tasks = jsonbArray(suite.tasks);
|
||||
const judgeModel = suite.judge_model;
|
||||
const seq = fleet.hosts.get(providerId)?.seq ?? 0;
|
||||
|
||||
// Start the eval run asynchronously.
|
||||
void runEvalAsync(
|
||||
{ suiteId, providerId, model, quant, tasks, judgeModel },
|
||||
sql,
|
||||
@@ -133,8 +132,6 @@ export function registerEvalRoutes(
|
||||
return reply.status(202).send({ status: 'queued', suiteId, providerId, model });
|
||||
});
|
||||
|
||||
// ─── runs listing ────────────────────────────────────────────────────────
|
||||
|
||||
app.get('/api/eval/runs', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const query = req.query as Record<string, string | undefined>;
|
||||
const runs = await listEvalRuns(sql, query.suiteId, query.providerId);
|
||||
@@ -203,8 +200,6 @@ export function registerEvalRoutes(
|
||||
});
|
||||
});
|
||||
|
||||
// ─── leaderboard ─────────────────────────────────────────────────────────
|
||||
|
||||
app.get('/api/eval/leaderboard', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const query = req.query as Record<string, string | undefined>;
|
||||
const kind = query.kind as 'chat' | 'code' | undefined;
|
||||
@@ -276,12 +271,11 @@ async function runEvalAsync(
|
||||
VALUES (${runId}, ${suiteId}, 'eval', ${providerId}, ${model}, ${quant}, 'running', ${judgeModel}, clock_timestamp(), ${tasks.length})
|
||||
`;
|
||||
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
publishJob(emitter, {
|
||||
seq,
|
||||
jobType: 'eval' as const,
|
||||
jobType: 'eval',
|
||||
jobId: runId,
|
||||
status: 'running' as const,
|
||||
status: 'running',
|
||||
detail: { suiteId, providerId, model, totalTasks: tasks.length },
|
||||
});
|
||||
|
||||
@@ -336,12 +330,11 @@ async function runEvalAsync(
|
||||
WHERE id = ${runId}
|
||||
`;
|
||||
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
publishJob(emitter, {
|
||||
seq,
|
||||
jobType: 'eval' as const,
|
||||
jobType: 'eval',
|
||||
jobId: runId,
|
||||
status: error ? 'failed' as const : 'completed' as const,
|
||||
status: error ? 'failed' : 'completed',
|
||||
detail: { avgScore, error },
|
||||
});
|
||||
} catch (err) {
|
||||
@@ -354,12 +347,11 @@ async function runEvalAsync(
|
||||
WHERE id = ${runId}
|
||||
`.catch(() => {});
|
||||
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
publishJob(emitter, {
|
||||
seq,
|
||||
jobType: 'eval' as const,
|
||||
jobType: 'eval',
|
||||
jobId: runId,
|
||||
status: 'failed' as const,
|
||||
status: 'failed',
|
||||
detail: { error: msg },
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
import type { FleetState } from '../services/fleet-state.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import {
|
||||
VIRTUAL_MODELS,
|
||||
resolveCandidates,
|
||||
splitComposite,
|
||||
} from '../services/gateway.js';
|
||||
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
|
||||
import { recordFailure, recordSuccess } from '../services/circuit-breaker.js';
|
||||
import type { AppContext } from '../app-context.js';
|
||||
|
||||
/**
|
||||
* P7.1: OpenAI-compatible auto:* gateway.
|
||||
@@ -25,11 +25,9 @@ import { resolveProviderBaseUrl } from '../services/llama-providers.js';
|
||||
*/
|
||||
export function registerGatewayRoutes(
|
||||
app: FastifyInstance,
|
||||
sql: Sql,
|
||||
fleet: FleetState,
|
||||
_emitter: DeltaEmitter,
|
||||
ctx: AppContext,
|
||||
): void {
|
||||
// ─── model catalog ───────────────────────────────────────────────────────
|
||||
const { sql, fleet } = ctx;
|
||||
|
||||
app.get('/v1/models', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
return reply.send({
|
||||
@@ -43,10 +41,6 @@ export function registerGatewayRoutes(
|
||||
});
|
||||
});
|
||||
|
||||
// ─── props (for getModelContext) ─────────────────────────────────────────
|
||||
// Resolve candidates and proxy the first healthy candidate's props so the
|
||||
// caller can read default_generation_settings.n_ctx.
|
||||
|
||||
app.get('/upstream/:model/props', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { model } = req.params as { model: string };
|
||||
const { candidates } = await resolveCandidates(sql, fleet, model);
|
||||
@@ -69,8 +63,6 @@ export function registerGatewayRoutes(
|
||||
return reply.status(503).send({ error: 'no healthy candidate for virtual model', model });
|
||||
});
|
||||
|
||||
// ─── chat completions (dispatch with failover) ───────────────────────────
|
||||
|
||||
app.post('/v1/chat/completions', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const requestedModel = body?.model as string | undefined;
|
||||
@@ -113,11 +105,20 @@ export function registerGatewayRoutes(
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
// HTTP error before body — eligible for failover to the next candidate.
|
||||
recordFailure(compositeId);
|
||||
continue;
|
||||
}
|
||||
|
||||
// A null body on an OK response is a broken upstream; fail over to the
|
||||
// next candidate (nothing has been committed to the client yet).
|
||||
const reader = stream ? res.body?.getReader() : null;
|
||||
if (stream && !reader) {
|
||||
recordFailure(compositeId);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Success: dispatch chosen. Log and stream/return through.
|
||||
recordSuccess(compositeId);
|
||||
await logDispatch(sql, {
|
||||
virtualModel,
|
||||
chosen: compositeId,
|
||||
@@ -128,16 +129,11 @@ export function registerGatewayRoutes(
|
||||
durationMs: Date.now() - startedAt,
|
||||
});
|
||||
|
||||
if (stream) {
|
||||
if (stream && reader) {
|
||||
reply.header('Content-Type', 'text/event-stream');
|
||||
reply.header('Cache-Control', 'no-cache');
|
||||
reply.header('Connection', 'keep-alive');
|
||||
reply.raw.writeHead(200);
|
||||
const reader = res.body?.getReader();
|
||||
if (!reader) {
|
||||
reply.raw.end();
|
||||
return;
|
||||
}
|
||||
const decoder = new TextDecoder();
|
||||
try {
|
||||
while (true) {
|
||||
@@ -155,7 +151,7 @@ export function registerGatewayRoutes(
|
||||
const json = await res.json();
|
||||
return reply.send(json);
|
||||
} catch {
|
||||
// Connection error — failover to the next candidate.
|
||||
recordFailure(compositeId);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,7 +11,6 @@ import { getLlamaProviders, resolveProviderBaseUrl } from '../services/llama-pro
|
||||
export function registerPlaygroundRoutes(
|
||||
app: FastifyInstance,
|
||||
): void {
|
||||
// ─── model catalog ───────────────────────────────────────────────────────
|
||||
|
||||
app.get('/api/playground/models', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
// Resolve provider URLs from the loaded registry.
|
||||
@@ -49,8 +48,6 @@ export function registerPlaygroundRoutes(
|
||||
return reply.send({ models });
|
||||
});
|
||||
|
||||
// ─── streaming chat ──────────────────────────────────────────────────────
|
||||
|
||||
app.post('/api/playground/chat', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const providerId = body.providerId as string;
|
||||
@@ -138,8 +135,6 @@ export function registerPlaygroundRoutes(
|
||||
}
|
||||
});
|
||||
|
||||
// ─── A/B compare ─────────────────────────────────────────────────────────
|
||||
|
||||
app.post('/api/playground/chat-ab', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const providerIdA = body.providerIdA as string;
|
||||
@@ -224,7 +219,6 @@ export function registerPlaygroundRoutes(
|
||||
}
|
||||
};
|
||||
|
||||
// Run both streams concurrently.
|
||||
await Promise.all([
|
||||
streamModel('A', baseUrlA, modelA),
|
||||
streamModel('B', baseUrlB, modelB),
|
||||
|
||||
@@ -5,9 +5,8 @@ import { dirname, resolve } from 'node:path';
|
||||
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
import type { Config } from '../config.js';
|
||||
import type { FleetState } from '../services/fleet-state.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
|
||||
import type { AppContext } from '../app-context.js';
|
||||
import {
|
||||
validateLlamaConfig,
|
||||
computeDiff,
|
||||
@@ -35,12 +34,10 @@ import { runModelPull, validateRepoId } from '../services/model-pull.js';
|
||||
*/
|
||||
export function registerSshConfigRoutes(
|
||||
app: FastifyInstance,
|
||||
sql: Sql,
|
||||
config: Config,
|
||||
fleet: FleetState,
|
||||
emitter: DeltaEmitter,
|
||||
ctx: AppContext,
|
||||
exec: SshExec = sshExec,
|
||||
): void {
|
||||
const { sql, config, fleet, emitter } = ctx;
|
||||
const schema = loadConfigSchema(config);
|
||||
|
||||
app.get('/api/hosts', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
@@ -181,7 +178,6 @@ export function registerSshConfigRoutes(
|
||||
return reply.status(status).send(result);
|
||||
});
|
||||
|
||||
// ─── model pull (non-blocking job) ─────────────────────────────────────────
|
||||
app.post('/api/hosts/:id/pull', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { id } = req.params as { id: string };
|
||||
const body = (req.body as Record<string, unknown>) ?? {};
|
||||
@@ -205,7 +201,7 @@ export function registerSshConfigRoutes(
|
||||
const jobId = `pull_${Date.now()}_${randomUUID().slice(0, 8)}`;
|
||||
const seq = fleet.hosts.get(id)?.seq ?? 0;
|
||||
// Fire and forget; progress streams over control_job frames.
|
||||
void runModelPull({ jobId, target, repo, mode, modelsDir }, exec, emitter, seq);
|
||||
void runModelPull({ jobId, providerId: id, target, repo, mode, modelsDir }, exec, emitter, seq);
|
||||
|
||||
return reply.status(202).send({ status: 'queued', jobId, repo });
|
||||
});
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import WebSocket from 'ws';
|
||||
import type { FleetState, HostState } from '../services/fleet-state.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import type { LogRelay } from '../services/log-relay.js';
|
||||
import type { FleetState } from '../services/fleet-state.js';
|
||||
import type { AppContext } from '../app-context.js';
|
||||
|
||||
/**
|
||||
* WS endpoint: /api/ws/control
|
||||
@@ -17,11 +16,10 @@ import type { LogRelay } from '../services/log-relay.js';
|
||||
*/
|
||||
export function registerControlWebSocket(
|
||||
app: FastifyInstance,
|
||||
fleet: FleetState,
|
||||
emitter: DeltaEmitter,
|
||||
logRelay: LogRelay | null = null,
|
||||
ctx: AppContext,
|
||||
): void {
|
||||
app.get('/api/ws/control', { websocket: true }, (socket, req) => {
|
||||
const { fleet, emitter, logRelay } = ctx;
|
||||
app.get('/api/ws/control', { websocket: true }, (socket, _req) => {
|
||||
const fleetState = fleet;
|
||||
const snapshot = buildSnapshot(fleetState);
|
||||
|
||||
@@ -80,7 +78,7 @@ export function registerControlWebSocket(
|
||||
*/
|
||||
function buildSnapshot(fleet: FleetState): { hosts: Array<{
|
||||
providerId: string;
|
||||
liveness: 'connected' | 'reconnecting' | 'down';
|
||||
liveness: 'connected' | 'down';
|
||||
lastSeenAt: string | null;
|
||||
seq: number;
|
||||
models: Array<{
|
||||
|
||||
@@ -2,7 +2,7 @@ import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { parseLlamaTimings, computeAggregates, runSingleBenchRequest } from '../../index.js';
|
||||
import { computeRegressionFlag } from '../bench-engine.js';
|
||||
import { createFleetState, ensureHostState } from '../fleet-state.js';
|
||||
import { createDeltaEmitter } from '../../index.js';
|
||||
import { createDeltaEmitter } from '../delta-emitter.js';
|
||||
import type { Sql } from '../../db.js';
|
||||
import type { Config } from '../../config.js';
|
||||
import type { BenchSuite } from '../bench-engine.js';
|
||||
|
||||
@@ -4,8 +4,10 @@ import {
|
||||
parseVirtualModel,
|
||||
orderCandidates,
|
||||
splitComposite,
|
||||
fleetModelCandidates,
|
||||
} from '../gateway.js';
|
||||
import type { ModelScore } from '../routing-scores.js';
|
||||
import { createFleetState, ensureHostState } from '../fleet-state.js';
|
||||
|
||||
function score(compositeId: string, partial: Partial<ModelScore> = {}): ModelScore {
|
||||
return {
|
||||
@@ -90,3 +92,29 @@ describe('orderCandidates', () => {
|
||||
expect(ordered).toEqual(['a/never-seen', 'a/known']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('fleetModelCandidates (cold-start fallback)', () => {
|
||||
it('lists connected hosts models, ready first, skips down hosts', () => {
|
||||
const fleet = createFleetState();
|
||||
const a = ensureHostState(fleet, 'sam-desktop');
|
||||
a.liveness = 'connected';
|
||||
a.models.set('m-ready', { model: 'm-ready', state: 'ready', ts: new Date(0), ttlDeadline: null, inflight: 0 });
|
||||
a.models.set('m-stop', { model: 'm-stop', state: 'stopped', ts: new Date(0), ttlDeadline: null, inflight: 0 });
|
||||
const b = ensureHostState(fleet, 'embedding');
|
||||
b.liveness = 'down';
|
||||
b.models.set('x', { model: 'x', state: 'ready', ts: new Date(0), ttlDeadline: null, inflight: 0 });
|
||||
|
||||
const c = fleetModelCandidates(fleet);
|
||||
expect(c).toContain('sam-desktop/m-ready');
|
||||
expect(c).toContain('sam-desktop/m-stop');
|
||||
expect(c.indexOf('sam-desktop/m-ready')).toBeLessThan(c.indexOf('sam-desktop/m-stop')); // ready first
|
||||
expect(c).not.toContain('embedding/x'); // down host excluded
|
||||
});
|
||||
|
||||
it('returns [] for an all-down fleet', () => {
|
||||
const fleet = createFleetState();
|
||||
const a = ensureHostState(fleet, 'h');
|
||||
a.liveness = 'down';
|
||||
expect(fleetModelCandidates(fleet)).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,102 +1,48 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import type { HostState } from '../fleet-state.js';
|
||||
|
||||
type Liveness = 'connected' | 'reconnecting' | 'down';
|
||||
// Production never runs a reconnect state machine: a host is 'connected' when
|
||||
// the SSE handshake/poll succeeds and 'down' when it drops (index.ts sets only
|
||||
// those two). The 'reconnecting' state lives on the WS *connection* pill
|
||||
// (ControlConnection in apps/web), not on per-host liveness. This pins that
|
||||
// two-state model.
|
||||
type Liveness = HostState['liveness'];
|
||||
|
||||
function transitionLiveness(current: Liveness, event: 'connect' | 'disconnect' | 'reconnect_attempt' | 'reconnect_success'): Liveness {
|
||||
switch (event) {
|
||||
case 'connect':
|
||||
return 'connected';
|
||||
case 'disconnect':
|
||||
return 'down';
|
||||
case 'reconnect_attempt':
|
||||
return 'reconnecting';
|
||||
case 'reconnect_success':
|
||||
return 'connected';
|
||||
}
|
||||
function transitionLiveness(_current: Liveness, event: 'connect' | 'disconnect'): Liveness {
|
||||
return event === 'connect' ? 'connected' : 'down';
|
||||
}
|
||||
|
||||
function makeHost(liveness: Liveness, lastSeenAt: Date | null): HostState {
|
||||
return {
|
||||
providerId: 'test',
|
||||
liveness,
|
||||
lastSeenAt,
|
||||
seq: 0,
|
||||
inflightTotal: 0,
|
||||
models: new Map(),
|
||||
};
|
||||
}
|
||||
|
||||
describe('liveness state machine', () => {
|
||||
it('starts as down', () => {
|
||||
const state: HostState = {
|
||||
providerId: 'test',
|
||||
liveness: 'down',
|
||||
lastSeenAt: null,
|
||||
seq: 0,
|
||||
models: new Map(),
|
||||
};
|
||||
expect(state.liveness).toBe('down');
|
||||
expect(makeHost('down', null).liveness).toBe('down');
|
||||
});
|
||||
|
||||
it('connect -> connected', () => {
|
||||
const state: HostState = {
|
||||
providerId: 'test',
|
||||
liveness: 'down',
|
||||
lastSeenAt: null,
|
||||
seq: 0,
|
||||
models: new Map(),
|
||||
};
|
||||
const state = makeHost('down', null);
|
||||
state.liveness = transitionLiveness(state.liveness, 'connect');
|
||||
expect(state.liveness).toBe('connected');
|
||||
});
|
||||
|
||||
it('connected -> down on disconnect', () => {
|
||||
const state: HostState = {
|
||||
providerId: 'test',
|
||||
liveness: 'connected',
|
||||
lastSeenAt: new Date(),
|
||||
seq: 0,
|
||||
models: new Map(),
|
||||
};
|
||||
const state = makeHost('connected', new Date());
|
||||
state.liveness = transitionLiveness(state.liveness, 'disconnect');
|
||||
expect(state.liveness).toBe('down');
|
||||
});
|
||||
|
||||
it('down -> reconnecting on reconnect attempt', () => {
|
||||
const state: HostState = {
|
||||
providerId: 'test',
|
||||
liveness: 'down',
|
||||
lastSeenAt: null,
|
||||
seq: 0,
|
||||
models: new Map(),
|
||||
};
|
||||
state.liveness = transitionLiveness(state.liveness, 'reconnect_attempt');
|
||||
expect(state.liveness).toBe('reconnecting');
|
||||
});
|
||||
|
||||
it('reconnecting -> connected on reconnect success', () => {
|
||||
const state: HostState = {
|
||||
providerId: 'test',
|
||||
liveness: 'reconnecting',
|
||||
lastSeenAt: null,
|
||||
seq: 0,
|
||||
models: new Map(),
|
||||
};
|
||||
state.liveness = transitionLiveness(state.liveness, 'reconnect_success');
|
||||
it('down -> connected on reconnect (no intermediate reconnecting state)', () => {
|
||||
const state = makeHost('down', null);
|
||||
state.liveness = transitionLiveness(state.liveness, 'connect');
|
||||
expect(state.liveness).toBe('connected');
|
||||
});
|
||||
|
||||
it('connected -> reconnecting on reconnect attempt', () => {
|
||||
const state: HostState = {
|
||||
providerId: 'test',
|
||||
liveness: 'connected',
|
||||
lastSeenAt: new Date(),
|
||||
seq: 0,
|
||||
models: new Map(),
|
||||
};
|
||||
state.liveness = transitionLiveness(state.liveness, 'reconnect_attempt');
|
||||
expect(state.liveness).toBe('reconnecting');
|
||||
});
|
||||
|
||||
it('reconnecting -> down on reconnect failure', () => {
|
||||
const state: HostState = {
|
||||
providerId: 'test',
|
||||
liveness: 'reconnecting',
|
||||
lastSeenAt: null,
|
||||
seq: 0,
|
||||
models: new Map(),
|
||||
};
|
||||
state.liveness = transitionLiveness(state.liveness, 'disconnect');
|
||||
expect(state.liveness).toBe('down');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { validateRepoId, buildPullCommand, runModelPull } from '../model-pull.js';
|
||||
import type { SshExec, ExecResult } from '../ssh-config.js';
|
||||
import type { DeltaEmitter } from '../../index.js';
|
||||
import type { DeltaEmitter } from '../delta-emitter.js';
|
||||
|
||||
describe('validateRepoId', () => {
|
||||
it('accepts org/name', () => {
|
||||
@@ -49,7 +49,7 @@ describe('runModelPull', () => {
|
||||
it('rejects an invalid repo id before issuing any command', async () => {
|
||||
const { emitter, frames } = emitterSpy();
|
||||
const { exec, calls } = execReturning({ code: 0, stdout: '', stderr: '' });
|
||||
const r = await runModelPull({ jobId: 'j1', target, repo: '../x', mode: 'wrapper' }, exec, emitter);
|
||||
const r = await runModelPull({ jobId: 'j1', providerId: 'test-provider', target, repo: '../x', mode: 'wrapper' }, exec, emitter);
|
||||
expect(r.ok).toBe(false);
|
||||
expect(calls).toHaveLength(0);
|
||||
expect(frames[frames.length - 1]).toMatchObject({ type: 'control_job', status: 'failed' });
|
||||
@@ -58,7 +58,7 @@ describe('runModelPull', () => {
|
||||
it('runs the wrapper pull verb and emits running then completed', async () => {
|
||||
const { emitter, frames } = emitterSpy();
|
||||
const { exec, calls } = execReturning({ code: 0, stdout: 'done', stderr: '' });
|
||||
const r = await runModelPull({ jobId: 'j2', target, repo: 'Qwen/Q3', mode: 'wrapper' }, exec, emitter);
|
||||
const r = await runModelPull({ jobId: 'j2', providerId: 'test-provider', target, repo: 'Qwen/Q3', mode: 'wrapper' }, exec, emitter);
|
||||
expect(r.ok).toBe(true);
|
||||
expect(calls).toEqual(['pull Qwen/Q3']);
|
||||
expect(frames.map((f) => f.status)).toEqual(['running', 'completed']);
|
||||
@@ -68,7 +68,7 @@ describe('runModelPull', () => {
|
||||
it('reports a non-zero exit as failed', async () => {
|
||||
const { emitter, frames } = emitterSpy();
|
||||
const { exec } = execReturning({ code: 1, stdout: '', stderr: 'no such repo' });
|
||||
const r = await runModelPull({ jobId: 'j3', target, repo: 'Qwen/Q3', mode: 'wrapper' }, exec, emitter);
|
||||
const r = await runModelPull({ jobId: 'j3', providerId: 'test-provider', target, repo: 'Qwen/Q3', mode: 'wrapper' }, exec, emitter);
|
||||
expect(r.ok).toBe(false);
|
||||
expect(frames[frames.length - 1]).toMatchObject({ status: 'failed' });
|
||||
});
|
||||
@@ -76,7 +76,7 @@ describe('runModelPull', () => {
|
||||
it('shell mode without a models dir fails fast', async () => {
|
||||
const { emitter } = emitterSpy();
|
||||
const { exec, calls } = execReturning({ code: 0, stdout: '', stderr: '' });
|
||||
const r = await runModelPull({ jobId: 'j4', target, repo: 'Qwen/Q3', mode: 'shell' }, exec, emitter);
|
||||
const r = await runModelPull({ jobId: 'j4', providerId: 'test-provider', target, repo: 'Qwen/Q3', mode: 'shell' }, exec, emitter);
|
||||
expect(r.ok).toBe(false);
|
||||
expect(calls).toHaveLength(0);
|
||||
});
|
||||
|
||||
@@ -2,8 +2,9 @@ import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { parseSseLine } from '../fleet-connector.js';
|
||||
import type { LlamaSweepSSEEvent, MetricsEntry, ModelStatusEntry } from '../fleet-connector.js';
|
||||
import { createFleetState, ensureHostState, incrementSeq } from '../fleet-state.js';
|
||||
import { createDeltaEmitter, handleLlamaSweepEvent } from '../../index.js';
|
||||
import type { DeltaEmitter } from '../../index.js';
|
||||
import { createDeltaEmitter } from '../delta-emitter.js';
|
||||
import { handleLlamaSweepEvent } from '../sse-pipeline.js';
|
||||
import type { DeltaEmitter } from '../delta-emitter.js';
|
||||
import type { Sql } from '../../db.js';
|
||||
import type { Config } from '../../config.js';
|
||||
|
||||
|
||||
@@ -77,7 +77,6 @@ export class ActionQueue {
|
||||
return { ok: false, error: `queue not initialized for ${action.providerId}` };
|
||||
}
|
||||
|
||||
// Check bench in progress for unload actions
|
||||
if (action.type === 'unload' && !action.confirmed) {
|
||||
const inflight = deps.isInflightRequests();
|
||||
if (inflight > 0) {
|
||||
@@ -142,7 +141,6 @@ export class ActionQueue {
|
||||
entry.error = 'host went down during queue wait';
|
||||
state.queue.shift();
|
||||
state.running = false;
|
||||
// Process next
|
||||
void this.processNext(providerId, deps);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -9,7 +9,8 @@
|
||||
*/
|
||||
|
||||
import type { Sql } from '../db.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import type { DeltaEmitter } from './delta-emitter.js';
|
||||
import { publishJob } from './publish-job.js';
|
||||
import { jsonbObject } from './jsonb.js';
|
||||
|
||||
// ─── types ──────────────────────────────────────────────────────────────────
|
||||
@@ -281,13 +282,11 @@ export async function runBenchSuite(
|
||||
VALUES (${runId}, ${suite.id}, 'bench', 'running', clock_timestamp(), ${totalSamples}, ${temperature}, ${topP})
|
||||
`;
|
||||
|
||||
// Publish run started.
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
publishJob(emitter, {
|
||||
seq,
|
||||
jobType: 'bench' as const,
|
||||
jobType: 'bench',
|
||||
jobId: runId,
|
||||
status: 'running' as const,
|
||||
status: 'running',
|
||||
detail: {
|
||||
suiteId: suite.id,
|
||||
providerId: suite.providerId,
|
||||
@@ -326,7 +325,7 @@ export async function runBenchSuite(
|
||||
groups.get(key)!.push(item);
|
||||
}
|
||||
|
||||
for (const [key, group] of groups) {
|
||||
for (const [_key, group] of groups) {
|
||||
const concurrency = group[0]!.concurrency;
|
||||
const batchSize = Math.min(concurrency, group.length);
|
||||
|
||||
@@ -367,13 +366,11 @@ export async function runBenchSuite(
|
||||
currentRepetition: current.repetition,
|
||||
});
|
||||
|
||||
// Publish progress
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
publishJob(emitter, {
|
||||
seq,
|
||||
jobType: 'bench' as const,
|
||||
jobType: 'bench',
|
||||
jobId: runId,
|
||||
status: 'running' as const,
|
||||
status: 'running',
|
||||
detail: {
|
||||
completedSamples: completed,
|
||||
totalSamples,
|
||||
@@ -423,13 +420,11 @@ export async function runBenchSuite(
|
||||
WHERE id = ${runId}
|
||||
`;
|
||||
|
||||
// Publish completion.
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
publishJob(emitter, {
|
||||
seq,
|
||||
jobType: 'bench' as const,
|
||||
jobType: 'bench',
|
||||
jobId: runId,
|
||||
status: 'completed' as const,
|
||||
status: 'completed',
|
||||
detail: { ...aggregate, regressionFlag },
|
||||
});
|
||||
}
|
||||
|
||||
39
apps/control/src/services/circuit-breaker.ts
Normal file
39
apps/control/src/services/circuit-breaker.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
interface BreakerEntry {
|
||||
failures: number;
|
||||
lastFailure: number;
|
||||
cooldownUntil: number;
|
||||
}
|
||||
|
||||
const breakers = new Map<string, BreakerEntry>();
|
||||
const THRESHOLD = 3;
|
||||
const COOLDOWN_MS = 30_000;
|
||||
const WINDOW_MS = 60_000;
|
||||
|
||||
export function recordFailure(compositeId: string): void {
|
||||
const now = Date.now();
|
||||
const entry = breakers.get(compositeId);
|
||||
if (!entry || now - entry.lastFailure > WINDOW_MS) {
|
||||
breakers.set(compositeId, { failures: 1, lastFailure: now, cooldownUntil: 0 });
|
||||
return;
|
||||
}
|
||||
entry.failures++;
|
||||
entry.lastFailure = now;
|
||||
if (entry.failures >= THRESHOLD) {
|
||||
entry.cooldownUntil = now + COOLDOWN_MS;
|
||||
}
|
||||
}
|
||||
|
||||
export function recordSuccess(compositeId: string): void {
|
||||
breakers.delete(compositeId);
|
||||
}
|
||||
|
||||
export function isTripped(compositeId: string): boolean {
|
||||
const entry = breakers.get(compositeId);
|
||||
if (!entry) return false;
|
||||
if (entry.cooldownUntil === 0) return false;
|
||||
if (Date.now() > entry.cooldownUntil) {
|
||||
breakers.delete(compositeId);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
20
apps/control/src/services/delta-emitter.ts
Normal file
20
apps/control/src/services/delta-emitter.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
export type DeltaCallback = (delta: unknown) => void;
|
||||
export type DeltaEmitter = {
|
||||
subscribe(cb: DeltaCallback): () => void;
|
||||
publish(delta: unknown): void;
|
||||
};
|
||||
|
||||
export function createDeltaEmitter(): DeltaEmitter {
|
||||
const listeners = new Set<DeltaCallback>();
|
||||
return {
|
||||
subscribe(cb: DeltaCallback): () => void {
|
||||
listeners.add(cb);
|
||||
return () => { listeners.delete(cb); };
|
||||
},
|
||||
publish(delta: unknown): void {
|
||||
for (const cb of listeners) {
|
||||
try { cb(delta); } catch { /* ignore emitter errors */ }
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -8,8 +8,6 @@ import type { Sql } from '../db.js';
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
// ─── types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface CodeTask {
|
||||
id: string;
|
||||
prompt: string;
|
||||
@@ -57,8 +55,6 @@ export interface EvalSuiteRow {
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
// ─── YAML loader ────────────────────────────────────────────────────────────
|
||||
|
||||
const DATA_DIR = resolve(dirname(__filename), '../../data');
|
||||
|
||||
/**
|
||||
@@ -151,8 +147,6 @@ function normalizeCriteria(rubric: Record<string, unknown>): RubricCriterion[] {
|
||||
return result;
|
||||
}
|
||||
|
||||
// ─── DB operations ──────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Seed eval suites from data/ YAML files into the database.
|
||||
* Uses INSERT ... ON CONFLICT DO NOTHING for idempotency.
|
||||
|
||||
@@ -14,16 +14,12 @@
|
||||
import type { FastifyBaseLogger } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
|
||||
// ─── jitter (pure) ──────────────────────────────────────────────────────────
|
||||
|
||||
/** Add random 0-50% jitter to a delay value. */
|
||||
export function addJitter(delayMs: number): number {
|
||||
const jitter = delayMs * Math.random() * 0.5;
|
||||
return delayMs + jitter;
|
||||
}
|
||||
|
||||
// ─── reconnect backoff ──────────────────────────────────────────────────────
|
||||
|
||||
export interface ReconnectPolicy {
|
||||
baseMs: number;
|
||||
maxMs: number;
|
||||
@@ -50,14 +46,6 @@ export function reconnectDecision(
|
||||
return { action: 'reconnect', delayMs: addJitter(capped) };
|
||||
}
|
||||
|
||||
// ─── llama-swap SSE envelope types ──────────────────────────────────────────
|
||||
// Real wire shape (apigroup.go):
|
||||
// event:message
|
||||
// data:{"type":"modelStatus|logData|metrics|inflight","data":"<ESCAPED JSON STRING>"}
|
||||
// The SSE event name is ALWAYS 'message'. The discriminator is the outer JSON's
|
||||
// .type field. The payload is DOUBLE-ENCODED: JSON.parse(data) gives {type, data:string},
|
||||
// then JSON.parse(that.data) gives the actual payload.
|
||||
|
||||
// Per-type payload shapes, verified against the fork source
|
||||
// (/opt/forks/llama-swap/internal/server/apigroup.go sendModels/sendLogData/
|
||||
// sendMetrics/sendInFlight, apiModel struct at :20):
|
||||
@@ -114,14 +102,11 @@ export interface InflightData {
|
||||
total: number;
|
||||
}
|
||||
|
||||
// ─── the loop ───────────────────────────────────────────────────────────────
|
||||
|
||||
export interface FleetConnectorDeps {
|
||||
isUp: () => boolean;
|
||||
sql: Sql;
|
||||
log: FastifyBaseLogger;
|
||||
onEvent: (providerId: string, event: LlamaSweepSSEEvent) => void | Promise<void>;
|
||||
onReconcile: (providerId: string, metrics: MetricsEntry[]) => Promise<boolean>;
|
||||
onReconnectGiveUp: (providerId: string) => Promise<void>;
|
||||
sleep?: (ms: number) => Promise<void>;
|
||||
policy?: ReconnectPolicy;
|
||||
|
||||
62
apps/control/src/services/fleet-rebuild.ts
Normal file
62
apps/control/src/services/fleet-rebuild.ts
Normal file
@@ -0,0 +1,62 @@
|
||||
import type { FleetState } from './fleet-state.js';
|
||||
import { ensureHostState, stampLastSeen } from './fleet-state.js';
|
||||
import type { getSql } from '../db.js';
|
||||
import { jsonbObject } from './jsonb.js';
|
||||
|
||||
export async function rebuildFleetFromDB(fleet: FleetState, sql: ReturnType<typeof getSql>): Promise<void> {
|
||||
// Latest event per (provider, model) via DISTINCT ON -- one row per model, the
|
||||
// truly newest, instead of one-per-(provider,model,state) which over-reads and
|
||||
// can tie on identical clock_timestamp() values (REV5).
|
||||
const modelEvents = await sql<{ provider_id: string; model: string; state: string; ts: string; detail: string }[]>`
|
||||
SELECT DISTINCT ON (provider_id, model) provider_id, model, state, ts, detail
|
||||
FROM control_model_events
|
||||
ORDER BY provider_id, model, ts DESC
|
||||
`;
|
||||
|
||||
for (const row of modelEvents) {
|
||||
const state = ensureHostState(fleet, row.provider_id);
|
||||
state.liveness = 'down';
|
||||
stampLastSeen(state);
|
||||
// row.detail is jsonb (porsager returns it parsed); jsonbObject tolerates
|
||||
// both a parsed object and a JSON string.
|
||||
const detail: unknown = jsonbObject(row.detail);
|
||||
// B4: ttlDeadline recalculation. Use event timestamp so the deadline reflects
|
||||
// when the model was actually loaded, not when we rebuild.
|
||||
const ttl = (detail as { ttl?: number })?.ttl;
|
||||
const eventTs = new Date(row.ts).getTime();
|
||||
const ttlDeadline = ttl ? new Date(eventTs + ttl * 1000) : null;
|
||||
state.models.set(row.model, {
|
||||
model: row.model,
|
||||
state: row.state,
|
||||
ts: new Date(row.ts),
|
||||
ttlDeadline,
|
||||
inflight: 0,
|
||||
});
|
||||
}
|
||||
|
||||
const lastRequests = await sql<{ provider_id: string; ts: string }[]>`
|
||||
SELECT provider_id, ts FROM control_requests
|
||||
WHERE ts IN (
|
||||
SELECT MAX(ts) FROM control_requests GROUP BY provider_id
|
||||
)
|
||||
ORDER BY ts DESC
|
||||
`;
|
||||
|
||||
for (const row of lastRequests) {
|
||||
const state = ensureHostState(fleet, row.provider_id);
|
||||
stampLastSeen(state);
|
||||
}
|
||||
|
||||
const lastPerf = await sql<{ provider_id: string; ts: string }[]>`
|
||||
SELECT provider_id, ts FROM control_perf_samples
|
||||
WHERE ts IN (
|
||||
SELECT MAX(ts) FROM control_perf_samples GROUP BY provider_id
|
||||
)
|
||||
ORDER BY ts DESC
|
||||
`;
|
||||
|
||||
for (const row of lastPerf) {
|
||||
const state = ensureHostState(fleet, row.provider_id);
|
||||
stampLastSeen(state);
|
||||
}
|
||||
}
|
||||
@@ -10,7 +10,7 @@ export interface FleetState {
|
||||
|
||||
export interface HostState {
|
||||
providerId: string;
|
||||
liveness: 'connected' | 'reconnecting' | 'down';
|
||||
liveness: 'connected' | 'down';
|
||||
lastSeenAt: Date | null;
|
||||
seq: number;
|
||||
/** Host-level inflight total (the fork's SSE publishes only a total, not per-model). */
|
||||
@@ -29,7 +29,7 @@ export interface ModelState {
|
||||
export interface SnapshotData {
|
||||
hosts: Array<{
|
||||
providerId: string;
|
||||
liveness: 'connected' | 'reconnecting' | 'down';
|
||||
liveness: 'connected' | 'down';
|
||||
lastSeenAt: string | null;
|
||||
seq: number;
|
||||
models: Array<{
|
||||
@@ -57,8 +57,6 @@ export interface SnapshotData {
|
||||
}>;
|
||||
}
|
||||
|
||||
// ─── helpers for tests ──────────────────────────────────────────────────────
|
||||
|
||||
export function createFleetState(): FleetState {
|
||||
return { hosts: new Map() };
|
||||
}
|
||||
|
||||
@@ -20,14 +20,12 @@ import type { Sql } from '../db.js';
|
||||
import type { FleetState } from './fleet-state.js';
|
||||
import { computeRoutingScores, type ModelScore } from './routing-scores.js';
|
||||
import { jsonbStringArray } from './jsonb.js';
|
||||
import { isTripped } from './circuit-breaker.js';
|
||||
export { isGatewayVirtualModel } from '@boocode/contracts/gateway';
|
||||
|
||||
export const VIRTUAL_MODELS = ['auto', 'auto:code', 'auto:fast', 'auto:cheap'] as const;
|
||||
export type VirtualModel = (typeof VIRTUAL_MODELS)[number];
|
||||
|
||||
export function isGatewayVirtualModel(id: string): boolean {
|
||||
return id === 'auto' || id.startsWith('auto:');
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip a composite/provider prefix the picker may prepend. The gateway
|
||||
* registry provider id is 'auto', so BooChat may send 'auto/auto:code'.
|
||||
@@ -70,7 +68,7 @@ export function orderCandidates(
|
||||
if (policy.fallback && !ordered.includes(policy.fallback)) ordered.push(policy.fallback);
|
||||
// Keep curated order; drop unhealthy. If a candidate isn't in the scores
|
||||
// set at all (never seen), keep it — health is unknown, let dispatch try.
|
||||
return ordered.filter((id) => !scores.some((s) => s.compositeId === id) || healthy.has(id));
|
||||
return ordered.filter((id) => !scores.some((s) => s.compositeId === id) || (healthy.has(id) && !isTripped(id)));
|
||||
}
|
||||
|
||||
// Derive from advisory scores by category metric.
|
||||
@@ -89,7 +87,7 @@ export function orderCandidates(
|
||||
};
|
||||
|
||||
return scores
|
||||
.filter((s) => s.healthy && metric(s) != null)
|
||||
.filter((s) => s.healthy && !isTripped(s.compositeId) && metric(s) != null)
|
||||
.sort((a, b) => (metric(b) ?? -Infinity) - (metric(a) ?? -Infinity))
|
||||
.map((s) => s.compositeId);
|
||||
}
|
||||
@@ -128,10 +126,37 @@ export async function resolveCandidates(
|
||||
policyName = row.name;
|
||||
}
|
||||
|
||||
const candidates = orderCandidates(virtualModel, policy, scores);
|
||||
let candidates = orderCandidates(virtualModel, policy, scores);
|
||||
|
||||
// Cold-start fallback (G2): with no curated policy and no eval/traffic history,
|
||||
// advisory scores are empty so orderCandidates returns []. Fall back to the
|
||||
// live fleet model map so a healthy host still dispatches instead of 503ing.
|
||||
if (candidates.length === 0) {
|
||||
candidates = fleetModelCandidates(fleet);
|
||||
}
|
||||
|
||||
return { virtualModel, candidates, policyName };
|
||||
}
|
||||
|
||||
/**
|
||||
* Candidate composite ids from live fleet state: connected hosts' known models,
|
||||
* `ready` models first (already loaded => loadable + likely the chat model).
|
||||
* Pure over the fleet snapshot. Used only as the cold-start fallback.
|
||||
*/
|
||||
export function fleetModelCandidates(fleet: FleetState): string[] {
|
||||
const ready: string[] = [];
|
||||
const other: string[] = [];
|
||||
for (const host of fleet.hosts.values()) {
|
||||
if (host.liveness !== 'connected') continue;
|
||||
for (const m of host.models.values()) {
|
||||
const id = `${host.providerId}/${m.model}`;
|
||||
if (m.state === 'ready') ready.push(id);
|
||||
else other.push(id);
|
||||
}
|
||||
}
|
||||
return [...ready, ...other];
|
||||
}
|
||||
|
||||
/** Split a composite id 'provider/model' into parts. */
|
||||
export function splitComposite(compositeId: string): { providerId: string; model: string } | null {
|
||||
const slash = compositeId.indexOf('/');
|
||||
|
||||
@@ -11,8 +11,8 @@ export interface HostGrant {
|
||||
}
|
||||
|
||||
export async function acquireHostAccess(
|
||||
providerId: string,
|
||||
purpose: string,
|
||||
_providerId: string,
|
||||
_purpose: string,
|
||||
): Promise<HostGrant> {
|
||||
// V1: no-op — always grant access.
|
||||
return { ok: true };
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
import type { Sql } from '../db.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import type { DeltaEmitter } from './delta-emitter.js';
|
||||
import { publishJob } from './publish-job.js';
|
||||
import { recordEvalResult, completeEvalRun } from './eval-suites.js';
|
||||
import { resolveProviderBaseUrl } from './llama-providers.js';
|
||||
|
||||
// ─── types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface JudgeEvalParams {
|
||||
runId: string;
|
||||
providerId: string;
|
||||
@@ -22,8 +21,6 @@ export interface JudgeResult {
|
||||
error: string | null;
|
||||
}
|
||||
|
||||
// ─── judge runner ───────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Run a judge-based eval (chat quality, rubric scoring).
|
||||
*
|
||||
@@ -44,7 +41,7 @@ export async function runJudgeEval(
|
||||
logger: import('fastify').FastifyBaseLogger,
|
||||
onProgress: (progress: JudgeProgress) => void,
|
||||
): Promise<JudgeResult> {
|
||||
const { runId, providerId, model, tasks, judgeModel, quant } = params;
|
||||
const { runId, providerId, model, tasks, judgeModel, quant: _quant } = params;
|
||||
|
||||
// Resolve the target model's base URL.
|
||||
const baseUrl = resolveProviderBaseUrl(providerId);
|
||||
@@ -122,12 +119,11 @@ export async function runJudgeEval(
|
||||
completedTasks++;
|
||||
onProgress({ completedTasks });
|
||||
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
publishJob(emitter, {
|
||||
seq,
|
||||
jobType: 'eval' as const,
|
||||
jobType: 'eval',
|
||||
jobId: runId,
|
||||
status: 'running' as const,
|
||||
status: 'running',
|
||||
detail: {
|
||||
completedTasks,
|
||||
totalTasks: tasks.length,
|
||||
|
||||
@@ -8,8 +8,10 @@
|
||||
* wrapper mode; in shell mode it is the only argument and is regex-clean).
|
||||
*/
|
||||
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import type { DeltaEmitter } from './delta-emitter.js';
|
||||
import { publishJob } from './publish-job.js';
|
||||
import type { SshExec, SshTarget, SshMode } from './ssh-config.js';
|
||||
import { acquireHostAccess } from './host-access.js';
|
||||
|
||||
/**
|
||||
* HF repo id: org/name. Each segment MUST start with an alphanumeric (HF's own
|
||||
@@ -31,11 +33,15 @@ export function buildPullCommand(mode: SshMode, repo: string, modelsDir?: string
|
||||
if (mode === 'wrapper') return `pull ${repo}`;
|
||||
const dir = (modelsDir ?? '').replace(/\/+$/, '');
|
||||
const local = `${dir}/${repo.replace(/\//g, '__')}`;
|
||||
return `huggingface-cli download ${repo} --local-dir '${local}'`;
|
||||
// POSIX single-quote escape the path: handles spaces AND an embedded quote in
|
||||
// modelsDir (which comes from the request body). repo is already regex-clean.
|
||||
const quoted = `'${local.replace(/'/g, `'\\''`)}'`;
|
||||
return `huggingface-cli download ${repo} --local-dir ${quoted}`;
|
||||
}
|
||||
|
||||
export interface PullParams {
|
||||
jobId: string;
|
||||
providerId: string;
|
||||
target: SshTarget;
|
||||
repo: string;
|
||||
mode: SshMode;
|
||||
@@ -57,49 +63,37 @@ export async function runModelPull(
|
||||
emitter: DeltaEmitter,
|
||||
seq: number = 0,
|
||||
): Promise<PullResult> {
|
||||
const { jobId, target, repo, mode, modelsDir } = params;
|
||||
const { jobId, providerId, target, repo, mode, modelsDir } = params;
|
||||
|
||||
const grant = await acquireHostAccess(providerId, 'pull');
|
||||
if (!grant.ok) {
|
||||
publishJob(emitter, { seq, jobType: 'action', jobId, status: 'failed', detail: { kind: 'pull', repo, error: `host access denied: ${grant.reason}` } });
|
||||
return { ok: false, error: `host access denied: ${grant.reason}` };
|
||||
}
|
||||
|
||||
if (!validateRepoId(repo)) {
|
||||
emitter.publish({
|
||||
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
|
||||
status: 'failed' as const, detail: { kind: 'pull', repo, error: 'invalid repo id' },
|
||||
});
|
||||
publishJob(emitter, { seq, jobType: 'action', jobId, status: 'failed', detail: { kind: 'pull', repo, error: 'invalid repo id' } });
|
||||
return { ok: false, error: 'invalid repo id' };
|
||||
}
|
||||
if (mode === 'shell' && !modelsDir) {
|
||||
emitter.publish({
|
||||
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
|
||||
status: 'failed' as const, detail: { kind: 'pull', repo, error: 'shell mode requires a models directory' },
|
||||
});
|
||||
publishJob(emitter, { seq, jobType: 'action', jobId, status: 'failed', detail: { kind: 'pull', repo, error: 'shell mode requires a models directory' } });
|
||||
return { ok: false, error: 'shell mode requires a models directory' };
|
||||
}
|
||||
|
||||
emitter.publish({
|
||||
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
|
||||
status: 'running' as const, detail: { kind: 'pull', repo },
|
||||
});
|
||||
publishJob(emitter, { seq, jobType: 'action', jobId, status: 'running', detail: { kind: 'pull', repo } });
|
||||
|
||||
try {
|
||||
const res = await exec(target, buildPullCommand(mode, repo, modelsDir));
|
||||
if (res.code !== 0) {
|
||||
const error = `pull failed (exit ${res.code}): ${res.stderr.slice(0, 500)}`;
|
||||
emitter.publish({
|
||||
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
|
||||
status: 'failed' as const, detail: { kind: 'pull', repo, error },
|
||||
});
|
||||
publishJob(emitter, { seq, jobType: 'action', jobId, status: 'failed', detail: { kind: 'pull', repo, error } });
|
||||
return { ok: false, error };
|
||||
}
|
||||
emitter.publish({
|
||||
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
|
||||
status: 'completed' as const, detail: { kind: 'pull', repo, output: res.stdout.slice(-500) },
|
||||
});
|
||||
publishJob(emitter, { seq, jobType: 'action', jobId, status: 'completed', detail: { kind: 'pull', repo, output: res.stdout.slice(-500) } });
|
||||
return { ok: true };
|
||||
} catch (err) {
|
||||
const error = (err as Error).message ?? String(err);
|
||||
emitter.publish({
|
||||
type: 'control_job' as const, seq, jobType: 'action' as const, jobId,
|
||||
status: 'failed' as const, detail: { kind: 'pull', repo, error },
|
||||
});
|
||||
publishJob(emitter, { seq, jobType: 'action', jobId, status: 'failed', detail: { kind: 'pull', repo, error } });
|
||||
return { ok: false, error };
|
||||
}
|
||||
}
|
||||
|
||||
82
apps/control/src/services/perf-poller.ts
Normal file
82
apps/control/src/services/perf-poller.ts
Normal file
@@ -0,0 +1,82 @@
|
||||
import type { FleetState } from './fleet-state.js';
|
||||
import { ensureHostState, stampLastSeen, incrementSeq } from './fleet-state.js';
|
||||
import type { DeltaEmitter } from './delta-emitter.js';
|
||||
import type { getSql } from '../db.js';
|
||||
import type { loadConfig } from '../config.js';
|
||||
|
||||
export async function pollPerformance(
|
||||
sql: ReturnType<typeof getSql>,
|
||||
config: ReturnType<typeof loadConfig>,
|
||||
providerId: string,
|
||||
baseUrl: string,
|
||||
fleet: FleetState,
|
||||
emitter: DeltaEmitter,
|
||||
): Promise<void> {
|
||||
void config;
|
||||
const state = ensureHostState(fleet, providerId);
|
||||
|
||||
const watermark = await sql<{ ts: string | null }[]>`
|
||||
SELECT MAX(ts) AS ts FROM control_perf_samples WHERE provider_id = ${providerId}
|
||||
`;
|
||||
|
||||
// porsager returns timestamptz as a Date object; interpolating it raw yields
|
||||
// Date.toString() which llama-swap rejects with 400.
|
||||
const afterParam = watermark[0]?.ts
|
||||
? `?after=${encodeURIComponent(new Date(watermark[0].ts).toISOString())}`
|
||||
: '';
|
||||
const url = `${baseUrl}/api/performance${afterParam}`;
|
||||
|
||||
try {
|
||||
const fetchSignal = AbortSignal.timeout(10_000);
|
||||
const res = await fetch(url, { signal: fetchSignal });
|
||||
if (!res.ok) return;
|
||||
|
||||
const data = await res.json() as { gpu_stats?: unknown[]; sys_stats?: unknown[] } | null;
|
||||
if (!data) return;
|
||||
|
||||
const gpuMap = new Map<string, unknown>();
|
||||
for (const g of data.gpu_stats ?? []) {
|
||||
const gpu = g as { timestamp?: string };
|
||||
if (gpu.timestamp) {
|
||||
gpuMap.set(gpu.timestamp, g);
|
||||
}
|
||||
}
|
||||
|
||||
const sysMap = new Map<string, unknown>();
|
||||
for (const s of data.sys_stats ?? []) {
|
||||
const sys = s as { timestamp?: string };
|
||||
if (sys.timestamp) {
|
||||
sysMap.set(sys.timestamp, s);
|
||||
}
|
||||
}
|
||||
|
||||
const allTimestamps = new Set([...gpuMap.keys(), ...sysMap.keys()]);
|
||||
if (allTimestamps.size === 0) return;
|
||||
|
||||
stampLastSeen(state);
|
||||
|
||||
for (const ts of allTimestamps) {
|
||||
const gpu = gpuMap.get(ts) ?? null;
|
||||
const sys = sysMap.get(ts) ?? null;
|
||||
|
||||
await sql`
|
||||
INSERT INTO control_perf_samples (provider_id, ts, gpu, sys)
|
||||
VALUES (${providerId}, ${ts}, ${sql.json(gpu as never)}, ${sql.json(sys as never)})
|
||||
ON CONFLICT (provider_id, ts) DO NOTHING
|
||||
`;
|
||||
|
||||
const seq = incrementSeq(state);
|
||||
emitter.publish({
|
||||
type: 'control_perf' as const,
|
||||
seq,
|
||||
providerId,
|
||||
ts,
|
||||
gpu,
|
||||
sys,
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
const msg = (err as Error).message ?? String(err);
|
||||
console.warn({ providerId, err: msg }, 'fleet: perf poll failed');
|
||||
}
|
||||
}
|
||||
18
apps/control/src/services/publish-job.ts
Normal file
18
apps/control/src/services/publish-job.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
import type { WsFrame } from '@boocode/contracts/ws-frames';
|
||||
import type { DeltaEmitter } from './delta-emitter.js';
|
||||
|
||||
type ControlJobFrame = Extract<WsFrame, { type: 'control_job' }>;
|
||||
export type JobType = ControlJobFrame['jobType'];
|
||||
export type JobStatus = ControlJobFrame['status'];
|
||||
|
||||
export interface PublishJobParams {
|
||||
seq: number;
|
||||
jobType: JobType;
|
||||
jobId: string;
|
||||
status: JobStatus;
|
||||
detail?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export function publishJob(emitter: DeltaEmitter, params: PublishJobParams): void {
|
||||
emitter.publish({ type: 'control_job' as const, ...params });
|
||||
}
|
||||
@@ -141,8 +141,10 @@ export function trimCapture(captureJson: string | null, sizeKB: number): string
|
||||
if (!captureJson) return null;
|
||||
const sizeBytes = Buffer.byteLength(captureJson, 'utf8');
|
||||
if (sizeBytes <= sizeKB * 1024) return captureJson;
|
||||
// Trim the capture to fit within the cap.
|
||||
return captureJson.slice(0, Math.floor(sizeKB * 1024));
|
||||
// Trim by BYTES, not JS chars: a char-index slice can split a multi-byte
|
||||
// codepoint and emit invalid UTF-8 (DB write error / corruption). Buffer
|
||||
// subarray + toString('utf8') truncates at the last whole codepoint.
|
||||
return Buffer.from(captureJson, 'utf8').subarray(0, Math.floor(sizeKB * 1024)).toString('utf8');
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -37,6 +37,8 @@ export interface ModelScore {
|
||||
avgLatencyMs: number | null;
|
||||
/** Recent request count in the live window. */
|
||||
sampleCount: number;
|
||||
/** Avg gen tok/s over the last 5 minutes from route_dispatch_log, or null. */
|
||||
recentGenTps: number | null;
|
||||
/** Whether the owning host is currently connected. */
|
||||
healthy: boolean;
|
||||
/** Category badges this model currently wins. */
|
||||
@@ -143,6 +145,18 @@ export async function computeRoutingScores(
|
||||
GROUP BY provider_id, model
|
||||
`;
|
||||
|
||||
// 2.5. Recent latency — control_requests last 5 minutes for EMA blend.
|
||||
const recentCutoff = new Date(Date.now() - 5 * 60_000).toISOString();
|
||||
const recentLatencyRows = await sql<{ provider_id: string; model: string; recent_tps: number | null }[]>`
|
||||
SELECT provider_id,
|
||||
model,
|
||||
AVG(gen_tps) FILTER (WHERE gen_tps > 0) AS recent_tps
|
||||
FROM control_requests
|
||||
WHERE ts >= ${recentCutoff}
|
||||
AND model IS NOT NULL
|
||||
GROUP BY provider_id, model
|
||||
`;
|
||||
|
||||
// 3. Merge signals keyed by compositeId.
|
||||
const byKey = new Map<string, ModelScore>();
|
||||
const keyOf = (providerId: string, model: string) => `${providerId}/${model}`;
|
||||
@@ -160,6 +174,7 @@ export async function computeRoutingScores(
|
||||
evalScore: null,
|
||||
avgGenTps: null,
|
||||
avgLatencyMs: null,
|
||||
recentGenTps: null,
|
||||
sampleCount: 0,
|
||||
healthy: fleet.hosts.get(providerId)?.liveness === 'connected',
|
||||
badges: [],
|
||||
@@ -184,6 +199,19 @@ export async function computeRoutingScores(
|
||||
s.sampleCount = row.sample_count;
|
||||
}
|
||||
|
||||
for (const row of recentLatencyRows) {
|
||||
const s = ensure(row.provider_id, row.model);
|
||||
s.recentGenTps = row.recent_tps;
|
||||
}
|
||||
|
||||
// 4. EMA blend: effective gen_tps = 0.7 * recent + 0.3 * history.
|
||||
// Fall through to history-only when recent is null.
|
||||
for (const s of byKey.values()) {
|
||||
if (s.recentGenTps != null && s.avgGenTps != null) {
|
||||
s.avgGenTps = 0.7 * s.recentGenTps + 0.3 * s.avgGenTps;
|
||||
}
|
||||
}
|
||||
|
||||
// Deterministic order before badge assignment so ties are stable.
|
||||
const scores = Array.from(byKey.values()).sort((a, b) =>
|
||||
a.compositeId < b.compositeId ? -1 : a.compositeId > b.compositeId ? 1 : 0,
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import { spawn, type ChildProcess } from 'node:child_process';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import type { Sql } from '../db.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import type { DeltaEmitter } from './delta-emitter.js';
|
||||
import { publishJob } from './publish-job.js';
|
||||
import { recordEvalResult } from './eval-suites.js';
|
||||
|
||||
// ─── types ──────────────────────────────────────────────────────────────────
|
||||
import { acquireHostAccess } from './host-access.js';
|
||||
|
||||
export interface SandboxEvalParams {
|
||||
runId: string;
|
||||
@@ -28,8 +28,6 @@ export interface SandboxContainer {
|
||||
timeoutHandle: NodeJS.Timeout | null;
|
||||
}
|
||||
|
||||
// ─── hardening constants (LAW, not suggestions) ─────────────────────────────
|
||||
|
||||
const SANDBOX_IMAGE = process.env.SANDBOX_IMAGE ?? 'node:20-bookworm-slim';
|
||||
const SANDBOX_MEMORY = process.env.SANDBOX_MEMORY ?? '512m';
|
||||
const SANDBOX_CPU = process.env.SANDBOX_CPU ?? '0.5';
|
||||
@@ -38,8 +36,6 @@ const SANDBOX_TIMEOUT_MS = Number(process.env.SANDBOX_TIMEOUT_MS ?? '30000');
|
||||
const SANDBOX_CONCURRENCY = Number(process.env.SANDBOX_CONCURRENCY ?? '4');
|
||||
const SANDBOX_LABEL = 'boocontrol-eval';
|
||||
|
||||
// ─── sandbox runner ─────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Run a code sandbox eval: each task generates code via LLM, executes in
|
||||
* an ephemeral Docker container with hardening flags, and scores pass@1.
|
||||
@@ -70,6 +66,11 @@ export async function runCodeEval(
|
||||
): Promise<SandboxResult> {
|
||||
const { runId, tasks } = params;
|
||||
|
||||
const grant = await acquireHostAccess(params.providerId, 'sandbox');
|
||||
if (!grant.ok) {
|
||||
return { error: `host access denied: ${grant.reason}` };
|
||||
}
|
||||
|
||||
// Orphan prune at engine start.
|
||||
await pruneOrphanContainers();
|
||||
|
||||
@@ -99,7 +100,6 @@ export async function runCodeEval(
|
||||
// Generate code from LLM.
|
||||
const generatedCode = await generateCode(params.providerId, params.model, prompt, language);
|
||||
|
||||
// Execute in sandbox.
|
||||
const execResult = await executeInSandbox(generatedCode, testCode, language);
|
||||
|
||||
const executionMs = Date.now() - startTime;
|
||||
@@ -123,12 +123,11 @@ export async function runCodeEval(
|
||||
null,
|
||||
);
|
||||
|
||||
emitter.publish({
|
||||
type: 'control_job' as const,
|
||||
publishJob(emitter, {
|
||||
seq,
|
||||
jobType: 'eval' as const,
|
||||
jobType: 'eval',
|
||||
jobId: runId,
|
||||
status: 'running' as const,
|
||||
status: 'running',
|
||||
detail: {
|
||||
taskId,
|
||||
taskIndex: globalIdx,
|
||||
@@ -169,7 +168,6 @@ export async function runCodeEval(
|
||||
}),
|
||||
);
|
||||
|
||||
// Log batch results.
|
||||
for (const result of results) {
|
||||
if (result.status === 'rejected') {
|
||||
console.error('sandbox: batch task rejected:', result.reason);
|
||||
@@ -243,7 +241,6 @@ async function executeInSandbox(
|
||||
return new Promise((resolve, reject) => {
|
||||
const containerId = `eval_${randomUUID().slice(0, 12)}`;
|
||||
|
||||
// Build the combined script: generated code + test code.
|
||||
const script = buildExecutionScript(generatedCode, testCode, language);
|
||||
|
||||
// SECURITY: Hardened Docker run command.
|
||||
@@ -366,7 +363,6 @@ async function pruneOrphanContainers(): Promise<void> {
|
||||
pruneCmd.on('close', async () => {
|
||||
const containerIds = output.trim().split('\n').filter(Boolean);
|
||||
if (containerIds.length > 0) {
|
||||
console.log({ count: containerIds.length }, 'sandbox: pruning orphan containers');
|
||||
const kill = spawn('docker', ['kill', ...containerIds]);
|
||||
await new Promise((r) => {
|
||||
kill.on('close', r);
|
||||
|
||||
210
apps/control/src/services/sse-pipeline.ts
Normal file
210
apps/control/src/services/sse-pipeline.ts
Normal file
@@ -0,0 +1,210 @@
|
||||
import type { FleetState } from './fleet-state.js';
|
||||
import { ensureHostState, stampLastSeen, incrementSeq } from './fleet-state.js';
|
||||
import type { LlamaSweepSSEEvent, MetricsEntry } from './fleet-connector.js';
|
||||
import type { LogRelay } from './log-relay.js';
|
||||
import type { DeltaEmitter } from './delta-emitter.js';
|
||||
import type { getSql } from '../db.js';
|
||||
import type { loadConfig } from '../config.js';
|
||||
import { trimCapture, parseCaptureJson } from './retention.js';
|
||||
import { detectGap } from './reconcile.js';
|
||||
|
||||
export interface MappedMetricsEntry {
|
||||
id: number;
|
||||
ts: string;
|
||||
model: string;
|
||||
req_path: string;
|
||||
status_code: number;
|
||||
duration_ms: number;
|
||||
cache_tokens: number;
|
||||
input_tokens: number;
|
||||
output_tokens: number;
|
||||
prompt_tps: number;
|
||||
gen_tps: number;
|
||||
has_capture: boolean;
|
||||
/** P4: NULL for ring data -- ActivityLogEntry does not carry request headers. */
|
||||
source: string | null;
|
||||
}
|
||||
|
||||
export function mapMetricsEntry(entry: MetricsEntry): MappedMetricsEntry {
|
||||
return {
|
||||
id: entry.id,
|
||||
ts: entry.timestamp,
|
||||
model: entry.model,
|
||||
req_path: entry.req_path,
|
||||
status_code: entry.resp_status_code,
|
||||
duration_ms: entry.duration_ms,
|
||||
cache_tokens: entry.tokens.cache_tokens,
|
||||
input_tokens: entry.tokens.input_tokens,
|
||||
output_tokens: entry.tokens.output_tokens,
|
||||
prompt_tps: entry.tokens.prompt_per_second,
|
||||
gen_tps: entry.tokens.tokens_per_second,
|
||||
has_capture: entry.has_capture,
|
||||
source: null,
|
||||
};
|
||||
}
|
||||
|
||||
export async function handleLlamaSweepEvent(
|
||||
fleet: FleetState,
|
||||
sql: ReturnType<typeof getSql>,
|
||||
config: ReturnType<typeof loadConfig>,
|
||||
providerId: string,
|
||||
emitter: DeltaEmitter,
|
||||
event: LlamaSweepSSEEvent,
|
||||
logRelay: LogRelay | null = null,
|
||||
): Promise<void> {
|
||||
const state = ensureHostState(fleet, providerId);
|
||||
stampLastSeen(state);
|
||||
|
||||
switch (event.type) {
|
||||
case 'modelStatus': {
|
||||
// Real payload: FULL-FLEET array of {id, state, ...} (fork apiModel).
|
||||
// Derive transitions by diffing against current state; persist only changes.
|
||||
state.liveness = 'connected';
|
||||
const changed: Array<{ model: string; state: string }> = [];
|
||||
for (const m of event.data) {
|
||||
const prev = state.models.get(m.id);
|
||||
if (!prev || prev.state !== m.state) {
|
||||
changed.push({ model: m.id, state: m.state });
|
||||
}
|
||||
state.models.set(m.id, {
|
||||
model: m.id,
|
||||
state: m.state,
|
||||
ts: new Date(),
|
||||
ttlDeadline: prev?.ttlDeadline ?? null,
|
||||
inflight: prev?.inflight ?? 0,
|
||||
});
|
||||
}
|
||||
if (changed.length === 0) break;
|
||||
const seq = incrementSeq(state);
|
||||
for (const c of changed) {
|
||||
await sql`
|
||||
INSERT INTO control_model_events (provider_id, model, state, ts, detail)
|
||||
VALUES (${providerId}, ${c.model}, ${c.state}, clock_timestamp(), ${sql.json({} as never)})
|
||||
ON CONFLICT (provider_id, model, state, ts) DO NOTHING
|
||||
`;
|
||||
}
|
||||
emitter.publish({
|
||||
type: 'control_fleet' as const,
|
||||
seq,
|
||||
hosts: [{
|
||||
providerId: state.providerId,
|
||||
liveness: state.liveness,
|
||||
lastSeenAt: state.lastSeenAt?.toISOString() ?? null,
|
||||
seq: state.seq,
|
||||
models: Array.from(state.models.values()).map((m) => ({
|
||||
model: m.model,
|
||||
state: m.state,
|
||||
ts: m.ts.toISOString(),
|
||||
ttlDeadline: m.ttlDeadline?.toISOString() ?? null,
|
||||
inflight: m.inflight,
|
||||
})),
|
||||
}],
|
||||
});
|
||||
break;
|
||||
}
|
||||
case 'logData': {
|
||||
const source = event.data.source as 'proxy' | 'upstream' | 'model';
|
||||
const text = event.data.data;
|
||||
if (logRelay) {
|
||||
logRelay.append(providerId, source, text);
|
||||
}
|
||||
const seq = incrementSeq(state);
|
||||
emitter.publish({
|
||||
type: 'control_log' as const,
|
||||
seq,
|
||||
providerId,
|
||||
source,
|
||||
line: text,
|
||||
ts: new Date().toISOString(),
|
||||
});
|
||||
break;
|
||||
}
|
||||
case 'metrics': {
|
||||
const entries = event.data;
|
||||
await handleReconcile(fleet, sql, config, providerId, emitter, event.data).catch((err) => {
|
||||
const msg = (err as Error).message ?? String(err);
|
||||
console.warn({ providerId, err: msg }, 'fleet: reconcile failed');
|
||||
});
|
||||
for (const entry of entries) {
|
||||
const captureTrimmed = entry.capture ? trimCapture(entry.capture, config.CAPTURE_SIZE_KB) : null;
|
||||
const captureObj = captureTrimmed ? parseCaptureJson(captureTrimmed) : null;
|
||||
const mapped = mapMetricsEntry(entry);
|
||||
await sql`
|
||||
INSERT INTO control_requests (provider_id, swap_entry_id, ts, model, req_path, status_code, duration_ms, cache_tokens, input_tokens, output_tokens, prompt_tps, gen_tps, has_capture, capture, source)
|
||||
VALUES (${providerId}, ${mapped.id}, ${mapped.ts}, ${mapped.model}, ${mapped.req_path}, ${mapped.status_code}, ${mapped.duration_ms}, ${mapped.cache_tokens}, ${mapped.input_tokens}, ${mapped.output_tokens}, ${mapped.prompt_tps}, ${mapped.gen_tps}, ${mapped.has_capture}, ${captureObj ? sql.json(captureObj as never) : sql`NULL::jsonb`}, ${mapped.source})
|
||||
ON CONFLICT (provider_id, swap_entry_id, ts) DO NOTHING
|
||||
`;
|
||||
emitter.publish({
|
||||
type: 'control_activity' as const,
|
||||
seq: state.seq,
|
||||
providerId,
|
||||
entry: {
|
||||
id: mapped.id,
|
||||
ts: mapped.ts,
|
||||
model: mapped.model,
|
||||
reqPath: mapped.req_path,
|
||||
statusCode: mapped.status_code,
|
||||
durationMs: mapped.duration_ms,
|
||||
},
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'inflight': {
|
||||
state.inflightTotal = event.data.total;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function handleReconcile(
|
||||
fleet: FleetState,
|
||||
sql: ReturnType<typeof getSql>,
|
||||
config: ReturnType<typeof loadConfig>,
|
||||
providerId: string,
|
||||
emitter: DeltaEmitter,
|
||||
metrics: MetricsEntry[],
|
||||
): Promise<boolean> {
|
||||
const state = ensureHostState(fleet, providerId);
|
||||
stampLastSeen(state);
|
||||
state.liveness = 'connected';
|
||||
|
||||
const entries = metrics ?? [];
|
||||
const oldestReconcileTs = entries.length > 0
|
||||
? entries[entries.length - 1]!.timestamp
|
||||
: null;
|
||||
|
||||
if (oldestReconcileTs) {
|
||||
const newestPersisted = await sql<{ ts: string }[]>`
|
||||
SELECT ts FROM control_requests
|
||||
WHERE provider_id = ${providerId}
|
||||
ORDER BY ts DESC LIMIT 1
|
||||
`;
|
||||
|
||||
if (newestPersisted.length > 0) {
|
||||
const newestRow = newestPersisted[0]!;
|
||||
if (detectGap(oldestReconcileTs, newestRow.ts)) {
|
||||
await sql`
|
||||
INSERT INTO control_model_events (provider_id, model, state, ts, detail)
|
||||
VALUES (${providerId}, '*', 'gap_suspected', clock_timestamp(), ${sql.json({
|
||||
oldestReconcile: oldestReconcileTs,
|
||||
newestPersisted: newestRow.ts,
|
||||
} as never)})
|
||||
ON CONFLICT (provider_id, model, state, ts) DO NOTHING
|
||||
`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const entry of entries) {
|
||||
const mapped = mapMetricsEntry(entry);
|
||||
await sql`
|
||||
INSERT INTO control_requests (provider_id, swap_entry_id, ts, model, req_path, status_code, duration_ms, cache_tokens, input_tokens, output_tokens, prompt_tps, gen_tps, has_capture, source)
|
||||
VALUES (${providerId}, ${mapped.id}, ${mapped.ts}, ${mapped.model}, ${mapped.req_path}, ${mapped.status_code}, ${mapped.duration_ms}, ${mapped.cache_tokens}, ${mapped.input_tokens}, ${mapped.output_tokens}, ${mapped.prompt_tps}, ${mapped.gen_tps}, ${mapped.has_capture}, ${mapped.source})
|
||||
ON CONFLICT (provider_id, swap_entry_id, ts) DO NOTHING
|
||||
`;
|
||||
}
|
||||
|
||||
void emitter;
|
||||
return true;
|
||||
}
|
||||
@@ -23,8 +23,6 @@ const require = createRequire(import.meta.url);
|
||||
const Ajv = require('ajv') as typeof import('ajv').default;
|
||||
const addFormats = require('ajv-formats') as typeof import('ajv-formats').default;
|
||||
|
||||
// ─── host SSH target ─────────────────────────────────────────────────────────
|
||||
|
||||
export interface SshTarget {
|
||||
host: string;
|
||||
user: string;
|
||||
@@ -40,8 +38,6 @@ export interface ExecResult {
|
||||
/** Injectable SSH executor. `stdin`, when present, is piped to the remote command. */
|
||||
export type SshExec = (target: SshTarget, command: string, stdin?: string) => Promise<ExecResult>;
|
||||
|
||||
// ─── pure: schema validation ─────────────────────────────────────────────────
|
||||
|
||||
export interface ValidationResult {
|
||||
valid: boolean;
|
||||
errors: string[];
|
||||
@@ -89,8 +85,6 @@ export function validateLlamaConfig(yamlText: string, schema: object): Validatio
|
||||
return { valid: false, errors: errors.length ? errors : ['schema validation failed'], parsed };
|
||||
}
|
||||
|
||||
// ─── pure: unified-ish diff ──────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Produce a compact line diff between two texts. Trims a common prefix/suffix
|
||||
* and marks the changed middle with -/+ lines. Sufficient for a preview; not a
|
||||
@@ -120,20 +114,12 @@ export function computeDiff(oldText: string, newText: string): string {
|
||||
return out.join('\n');
|
||||
}
|
||||
|
||||
// ─── pure: backup filename ───────────────────────────────────────────────────
|
||||
|
||||
/** Timestamped backup path: `<configPath>.bak-YYYYMMDDTHHMMSSZ`. */
|
||||
export function backupFilename(configPath: string, now: Date): string {
|
||||
const stamp = now.toISOString().replace(/[-:]/g, '').replace(/\.\d+Z$/, 'Z');
|
||||
return `${configPath}.bak-${stamp}`;
|
||||
}
|
||||
|
||||
// ─── RemoteOps seam (shell vs wrapper) ───────────────────────────────────────
|
||||
//
|
||||
// 'shell' mode issues raw shell commands (P9.1 behavior). 'wrapper' mode issues
|
||||
// fixed verbs so the key can be bound to an authorized_keys forced command that
|
||||
// hardcodes the paths. Both drive the same apply pipeline.
|
||||
|
||||
export type SshMode = 'shell' | 'wrapper';
|
||||
|
||||
export interface RemoteOps {
|
||||
@@ -201,8 +187,6 @@ export function makeRemoteOps(mode: SshMode, target: SshTarget, configPath: stri
|
||||
return mode === 'wrapper' ? wrapperOps(target, exec) : shellOps(target, configPath, exec);
|
||||
}
|
||||
|
||||
// ─── orchestration (injectable exec) ─────────────────────────────────────────
|
||||
|
||||
/** Read the remote config file (mode-aware; defaults to shell for compat). */
|
||||
export async function readRemoteConfig(
|
||||
target: SshTarget,
|
||||
@@ -328,8 +312,6 @@ function shellQuote(s: string): string {
|
||||
return `'${s.replace(/'/g, `'\\''`)}'`;
|
||||
}
|
||||
|
||||
// ─── real SSH executor (spawn) ───────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Default SSH executor. Uses the system `ssh` with an explicit identity file and
|
||||
* IdentitiesOnly so the agent's default key is never offered (the boocode Gitea
|
||||
@@ -353,6 +335,9 @@ export const sshExec: SshExec = (target, command, stdin) => {
|
||||
child.stderr.on('data', (d) => { stderr += d.toString(); });
|
||||
child.on('error', (err) => resolve({ code: 127, stdout, stderr: `${stderr}${(err as Error).message}` }));
|
||||
child.on('close', (code) => resolve({ code: code ?? 1, stdout, stderr }));
|
||||
// Suppress EPIPE etc. if the remote exits before consuming stdin (e.g. auth
|
||||
// failure under BatchMode) — an unhandled stream 'error' would crash the process.
|
||||
child.stdin.on('error', () => {});
|
||||
if (stdin !== undefined) {
|
||||
child.stdin.write(stdin);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user