chore: snapshot working tree - pty_exited notifications + in-flight inference WIP

feat(booterm): structured pty_exited WS notifications. Plan-validated, impl-validated, code-reviewed green (contracts build clean, contracts test 29/29, booterm + web typecheck clean). wip: in-progress inference/provider refactor (agents.ts, provider.ts, new llama-providers.ts, removed llama-args-validator), plus arena, dispatcher, compaction, schema changes. openspec: pty-exit-notifications complete; x-agent-flags planned (not yet implemented).
2026-06-14 12:48:47 +00:00
parent 0ed506f1da
commit b18de2a331
204 changed files with 25344 additions and 867 deletions
--- a/apps/control/src/routes/gateway.ts
+++ b/apps/control/src/routes/gateway.ts
@@ -0,0 +1,205 @@
+import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
+import type { Sql } from '../db.js';
+import type { FleetState } from '../services/fleet-state.js';
+import type { DeltaEmitter } from '../index.js';
+import {
+  VIRTUAL_MODELS,
+  resolveCandidates,
+  splitComposite,
+} from '../services/gateway.js';
+import { resolveProviderBaseUrl } from '../services/llama-providers.js';
+
+/**
+ * P7.1: OpenAI-compatible auto:* gateway.
+ *
+ * BooChat reaches this server directly (registry baseUrl), NOT through the
+ * /api/control proxy, so streaming works end to end. Endpoints mirror the
+ * llama-swap wire surface BooChat's provider adapter expects:
+ *
+ *   GET  /v1/models                — advertise the virtual models
+ *   POST /v1/chat/completions      — resolve a policy, dispatch with failover
+ *   GET  /upstream/:model/props    — props for getModelContext (best candidate)
+ *
+ * Every dispatch forwards X-Boo-Source to the chosen target so attribution
+ * survives the extra hop, and is recorded in route_dispatch_log.
+ */
+export function registerGatewayRoutes(
+  app: FastifyInstance,
+  sql: Sql,
+  fleet: FleetState,
+  _emitter: DeltaEmitter,
+): void {
+  // ─── model catalog ───────────────────────────────────────────────────────
+
+  app.get('/v1/models', async (_req: FastifyRequest, reply: FastifyReply) => {
+    return reply.send({
+      object: 'list',
+      data: VIRTUAL_MODELS.map((id) => ({
+        id,
+        object: 'model',
+        created: 0,
+        owned_by: 'boocontrol-gateway',
+      })),
+    });
+  });
+
+  // ─── props (for getModelContext) ─────────────────────────────────────────
+  // Resolve candidates and proxy the first healthy candidate's props so the
+  // caller can read default_generation_settings.n_ctx.
+
+  app.get('/upstream/:model/props', async (req: FastifyRequest, reply: FastifyReply) => {
+    const { model } = req.params as { model: string };
+    const { candidates } = await resolveCandidates(sql, fleet, model);
+
+    for (const compositeId of candidates) {
+      const split = splitComposite(compositeId);
+      if (!split) continue;
+      const baseUrl = resolveProviderBaseUrl(split.providerId);
+      if (!baseUrl) continue;
+      try {
+        const url = `${baseUrl.replace(/\/+$/, '')}/upstream/${encodeURIComponent(split.model)}/props`;
+        const res = await fetch(url, { signal: AbortSignal.timeout(5_000) });
+        if (!res.ok) continue;
+        const body = await res.json();
+        return reply.send(body);
+      } catch {
+        continue;
+      }
+    }
+    return reply.status(503).send({ error: 'no healthy candidate for virtual model', model });
+  });
+
+  // ─── chat completions (dispatch with failover) ───────────────────────────
+
+  app.post('/v1/chat/completions', async (req: FastifyRequest, reply: FastifyReply) => {
+    const body = req.body as Record<string, unknown>;
+    const requestedModel = body?.model as string | undefined;
+    if (!requestedModel) {
+      return reply.status(400).send({ error: { message: 'model is required' } });
+    }
+
+    const source = (req.headers['x-boo-source'] as string | undefined) ?? null;
+    const stream = body.stream === true;
+    const { virtualModel, candidates } = await resolveCandidates(sql, fleet, requestedModel);
+
+    if (candidates.length === 0) {
+      await logDispatch(sql, { virtualModel, chosen: null, tried: [], status: 'no_candidates', source, error: 'no healthy candidates', durationMs: 0 });
+      return reply.status(503).send({
+        error: { message: `routing gateway: no healthy candidate for ${virtualModel}`, type: 'gateway_error' },
+      });
+    }
+
+    const tried: string[] = [];
+    const startedAt = Date.now();
+
+    for (const compositeId of candidates) {
+      const split = splitComposite(compositeId);
+      if (!split) continue;
+      const baseUrl = resolveProviderBaseUrl(split.providerId);
+      if (!baseUrl) continue;
+      tried.push(compositeId);
+
+      const upstreamHeaders: Record<string, string> = { 'Content-Type': 'application/json' };
+      if (source) upstreamHeaders['X-Boo-Source'] = source;
+
+      const upstreamBody = JSON.stringify({ ...body, model: split.model });
+
+      try {
+        const res = await fetch(`${baseUrl.replace(/\/+$/, '')}/v1/chat/completions`, {
+          method: 'POST',
+          headers: upstreamHeaders,
+          body: upstreamBody,
+          signal: AbortSignal.timeout(300_000),
+        });
+
+        if (!res.ok) {
+          // HTTP error before body — eligible for failover to the next candidate.
+          continue;
+        }
+
+        // Success: dispatch chosen. Log and stream/return through.
+        await logDispatch(sql, {
+          virtualModel,
+          chosen: compositeId,
+          tried,
+          status: 'dispatched',
+          source,
+          error: null,
+          durationMs: Date.now() - startedAt,
+        });
+
+        if (stream) {
+          reply.header('Content-Type', 'text/event-stream');
+          reply.header('Cache-Control', 'no-cache');
+          reply.header('Connection', 'keep-alive');
+          reply.raw.writeHead(200);
+          const reader = res.body?.getReader();
+          if (!reader) {
+            reply.raw.end();
+            return;
+          }
+          const decoder = new TextDecoder();
+          try {
+            while (true) {
+              const { done, value } = await reader.read();
+              if (done) break;
+              reply.raw.write(decoder.decode(value, { stream: true }));
+            }
+          } finally {
+            reply.raw.end();
+          }
+          return;
+        }
+
+        // Non-streaming: pass JSON through.
+        const json = await res.json();
+        return reply.send(json);
+      } catch {
+        // Connection error — failover to the next candidate.
+        continue;
+      }
+    }
+
+    // All candidates exhausted.
+    await logDispatch(sql, {
+      virtualModel,
+      chosen: null,
+      tried,
+      status: 'failed',
+      source,
+      error: 'all candidates failed',
+      durationMs: Date.now() - startedAt,
+    });
+    return reply.status(502).send({
+      error: { message: `routing gateway: all candidates failed for ${virtualModel}`, type: 'gateway_error' },
+    });
+  });
+}
+
+async function logDispatch(
+  sql: Sql,
+  entry: {
+    virtualModel: string;
+    chosen: string | null;
+    tried: string[];
+    status: string;
+    source: string | null;
+    error: string | null;
+    durationMs: number;
+  },
+): Promise<void> {
+  const split = entry.chosen ? splitComposite(entry.chosen) : null;
+  await sql`
+    INSERT INTO route_dispatch_log (virtual_model, chosen_provider_id, chosen_model, candidates_tried, status, source, error, duration_ms)
+    VALUES (
+      ${entry.virtualModel},
+      ${split?.providerId ?? null},
+      ${split?.model ?? null},
+      ${sql.json(entry.tried as never)},
+      ${entry.status},
+      ${entry.source},
+      ${entry.error},
+      ${entry.durationMs}
+    )
+  `.catch(() => { /* logging must never break dispatch */ });
+}