import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify'; import type { Sql } from '../db.js'; import { VIRTUAL_MODELS, resolveCandidates, splitComposite, } from '../services/gateway.js'; import { resolveProviderBaseUrl } from '../services/llama-providers.js'; import { recordFailure, recordSuccess } from '../services/circuit-breaker.js'; import type { AppContext } from '../app-context.js'; /** * P7.1: OpenAI-compatible auto:* gateway. * * BooChat reaches this server directly (registry baseUrl), NOT through the * /api/control proxy, so streaming works end to end. Endpoints mirror the * llama-swap wire surface BooChat's provider adapter expects: * * GET /v1/models — advertise the virtual models * POST /v1/chat/completions — resolve a policy, dispatch with failover * GET /upstream/:model/props — props for getModelContext (best candidate) * * Every dispatch forwards X-Boo-Source to the chosen target so attribution * survives the extra hop, and is recorded in route_dispatch_log. */ export function registerGatewayRoutes( app: FastifyInstance, ctx: AppContext, ): void { const { sql, fleet } = ctx; app.get('/v1/models', async (_req: FastifyRequest, reply: FastifyReply) => { return reply.send({ object: 'list', data: VIRTUAL_MODELS.map((id) => ({ id, object: 'model', created: 0, owned_by: 'boocontrol-gateway', })), }); }); app.get('/upstream/:model/props', async (req: FastifyRequest, reply: FastifyReply) => { const { model } = req.params as { model: string }; const { candidates } = await resolveCandidates(sql, fleet, model); for (const compositeId of candidates) { const split = splitComposite(compositeId); if (!split) continue; const baseUrl = resolveProviderBaseUrl(split.providerId); if (!baseUrl) continue; try { const url = `${baseUrl.replace(/\/+$/, '')}/upstream/${encodeURIComponent(split.model)}/props`; const res = await fetch(url, { signal: AbortSignal.timeout(5_000) }); if (!res.ok) continue; const body = await res.json(); return reply.send(body); } catch { continue; } } return reply.status(503).send({ error: 'no healthy candidate for virtual model', model }); }); app.post('/v1/chat/completions', async (req: FastifyRequest, reply: FastifyReply) => { const body = req.body as Record; const requestedModel = body?.model as string | undefined; if (!requestedModel) { return reply.status(400).send({ error: { message: 'model is required' } }); } const source = (req.headers['x-boo-source'] as string | undefined) ?? null; const stream = body.stream === true; const { virtualModel, candidates } = await resolveCandidates(sql, fleet, requestedModel); if (candidates.length === 0) { await logDispatch(sql, { virtualModel, chosen: null, tried: [], status: 'no_candidates', source, error: 'no healthy candidates', durationMs: 0 }); return reply.status(503).send({ error: { message: `routing gateway: no healthy candidate for ${virtualModel}`, type: 'gateway_error' }, }); } const tried: string[] = []; const startedAt = Date.now(); for (const compositeId of candidates) { const split = splitComposite(compositeId); if (!split) continue; const baseUrl = resolveProviderBaseUrl(split.providerId); if (!baseUrl) continue; tried.push(compositeId); const upstreamHeaders: Record = { 'Content-Type': 'application/json' }; if (source) upstreamHeaders['X-Boo-Source'] = source; const upstreamBody = JSON.stringify({ ...body, model: split.model }); try { const res = await fetch(`${baseUrl.replace(/\/+$/, '')}/v1/chat/completions`, { method: 'POST', headers: upstreamHeaders, body: upstreamBody, signal: AbortSignal.timeout(300_000), }); if (!res.ok) { recordFailure(compositeId); continue; } // A null body on an OK response is a broken upstream; fail over to the // next candidate (nothing has been committed to the client yet). const reader = stream ? res.body?.getReader() : null; if (stream && !reader) { recordFailure(compositeId); continue; } // Success: dispatch chosen. Log and stream/return through. recordSuccess(compositeId); await logDispatch(sql, { virtualModel, chosen: compositeId, tried, status: 'dispatched', source, error: null, durationMs: Date.now() - startedAt, }); if (stream && reader) { reply.header('Content-Type', 'text/event-stream'); reply.header('Cache-Control', 'no-cache'); reply.header('Connection', 'keep-alive'); reply.raw.writeHead(200); const decoder = new TextDecoder(); try { while (true) { const { done, value } = await reader.read(); if (done) break; reply.raw.write(decoder.decode(value, { stream: true })); } } finally { reply.raw.end(); } return; } // Non-streaming: pass JSON through. const json = await res.json(); return reply.send(json); } catch { recordFailure(compositeId); continue; } } // All candidates exhausted. await logDispatch(sql, { virtualModel, chosen: null, tried, status: 'failed', source, error: 'all candidates failed', durationMs: Date.now() - startedAt, }); return reply.status(502).send({ error: { message: `routing gateway: all candidates failed for ${virtualModel}`, type: 'gateway_error' }, }); }); } async function logDispatch( sql: Sql, entry: { virtualModel: string; chosen: string | null; tried: string[]; status: string; source: string | null; error: string | null; durationMs: number; }, ): Promise { const split = entry.chosen ? splitComposite(entry.chosen) : null; await sql` INSERT INTO route_dispatch_log (virtual_model, chosen_provider_id, chosen_model, candidates_tried, status, source, error, duration_ms) VALUES ( ${entry.virtualModel}, ${split?.providerId ?? null}, ${split?.model ?? null}, ${sql.json(entry.tried as never)}, ${entry.status}, ${entry.source}, ${entry.error}, ${entry.durationMs} ) `.catch(() => { /* logging must never break dispatch */ }); }