Files
boocode/apps/control/src/routes/gateway.ts

202 lines
6.6 KiB
TypeScript

import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import type { Sql } from '../db.js';
import {
VIRTUAL_MODELS,
resolveCandidates,
splitComposite,
} from '../services/gateway.js';
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
import { recordFailure, recordSuccess } from '../services/circuit-breaker.js';
import type { AppContext } from '../app-context.js';
/**
* P7.1: OpenAI-compatible auto:* gateway.
*
* BooChat reaches this server directly (registry baseUrl), NOT through the
* /api/control proxy, so streaming works end to end. Endpoints mirror the
* llama-swap wire surface BooChat's provider adapter expects:
*
* GET /v1/models — advertise the virtual models
* POST /v1/chat/completions — resolve a policy, dispatch with failover
* GET /upstream/:model/props — props for getModelContext (best candidate)
*
* Every dispatch forwards X-Boo-Source to the chosen target so attribution
* survives the extra hop, and is recorded in route_dispatch_log.
*/
export function registerGatewayRoutes(
app: FastifyInstance,
ctx: AppContext,
): void {
const { sql, fleet } = ctx;
app.get('/v1/models', async (_req: FastifyRequest, reply: FastifyReply) => {
return reply.send({
object: 'list',
data: VIRTUAL_MODELS.map((id) => ({
id,
object: 'model',
created: 0,
owned_by: 'boocontrol-gateway',
})),
});
});
app.get('/upstream/:model/props', async (req: FastifyRequest, reply: FastifyReply) => {
const { model } = req.params as { model: string };
const { candidates } = await resolveCandidates(sql, fleet, model);
for (const compositeId of candidates) {
const split = splitComposite(compositeId);
if (!split) continue;
const baseUrl = resolveProviderBaseUrl(split.providerId);
if (!baseUrl) continue;
try {
const url = `${baseUrl.replace(/\/+$/, '')}/upstream/${encodeURIComponent(split.model)}/props`;
const res = await fetch(url, { signal: AbortSignal.timeout(5_000) });
if (!res.ok) continue;
const body = await res.json();
return reply.send(body);
} catch {
continue;
}
}
return reply.status(503).send({ error: 'no healthy candidate for virtual model', model });
});
app.post('/v1/chat/completions', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const requestedModel = body?.model as string | undefined;
if (!requestedModel) {
return reply.status(400).send({ error: { message: 'model is required' } });
}
const source = (req.headers['x-boo-source'] as string | undefined) ?? null;
const stream = body.stream === true;
const { virtualModel, candidates } = await resolveCandidates(sql, fleet, requestedModel);
if (candidates.length === 0) {
await logDispatch(sql, { virtualModel, chosen: null, tried: [], status: 'no_candidates', source, error: 'no healthy candidates', durationMs: 0 });
return reply.status(503).send({
error: { message: `routing gateway: no healthy candidate for ${virtualModel}`, type: 'gateway_error' },
});
}
const tried: string[] = [];
const startedAt = Date.now();
for (const compositeId of candidates) {
const split = splitComposite(compositeId);
if (!split) continue;
const baseUrl = resolveProviderBaseUrl(split.providerId);
if (!baseUrl) continue;
tried.push(compositeId);
const upstreamHeaders: Record<string, string> = { 'Content-Type': 'application/json' };
if (source) upstreamHeaders['X-Boo-Source'] = source;
const upstreamBody = JSON.stringify({ ...body, model: split.model });
try {
const res = await fetch(`${baseUrl.replace(/\/+$/, '')}/v1/chat/completions`, {
method: 'POST',
headers: upstreamHeaders,
body: upstreamBody,
signal: AbortSignal.timeout(300_000),
});
if (!res.ok) {
recordFailure(compositeId);
continue;
}
// A null body on an OK response is a broken upstream; fail over to the
// next candidate (nothing has been committed to the client yet).
const reader = stream ? res.body?.getReader() : null;
if (stream && !reader) {
recordFailure(compositeId);
continue;
}
// Success: dispatch chosen. Log and stream/return through.
recordSuccess(compositeId);
await logDispatch(sql, {
virtualModel,
chosen: compositeId,
tried,
status: 'dispatched',
source,
error: null,
durationMs: Date.now() - startedAt,
});
if (stream && reader) {
reply.header('Content-Type', 'text/event-stream');
reply.header('Cache-Control', 'no-cache');
reply.header('Connection', 'keep-alive');
reply.raw.writeHead(200);
const decoder = new TextDecoder();
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
reply.raw.write(decoder.decode(value, { stream: true }));
}
} finally {
reply.raw.end();
}
return;
}
// Non-streaming: pass JSON through.
const json = await res.json();
return reply.send(json);
} catch {
recordFailure(compositeId);
continue;
}
}
// All candidates exhausted.
await logDispatch(sql, {
virtualModel,
chosen: null,
tried,
status: 'failed',
source,
error: 'all candidates failed',
durationMs: Date.now() - startedAt,
});
return reply.status(502).send({
error: { message: `routing gateway: all candidates failed for ${virtualModel}`, type: 'gateway_error' },
});
});
}
async function logDispatch(
sql: Sql,
entry: {
virtualModel: string;
chosen: string | null;
tried: string[];
status: string;
source: string | null;
error: string | null;
durationMs: number;
},
): Promise<void> {
const split = entry.chosen ? splitComposite(entry.chosen) : null;
await sql`
INSERT INTO route_dispatch_log (virtual_model, chosen_provider_id, chosen_model, candidates_tried, status, source, error, duration_ms)
VALUES (
${entry.virtualModel},
${split?.providerId ?? null},
${split?.model ?? null},
${sql.json(entry.tried as never)},
${entry.status},
${entry.source},
${entry.error},
${entry.durationMs}
)
`.catch(() => { /* logging must never break dispatch */ });
}