chore: snapshot working tree - pty_exited notifications + in-flight inference WIP
feat(booterm): structured pty_exited WS notifications. Plan-validated, impl-validated, code-reviewed green (contracts build clean, contracts test 29/29, booterm + web typecheck clean). wip: in-progress inference/provider refactor (agents.ts, provider.ts, new llama-providers.ts, removed llama-args-validator), plus arena, dispatcher, compaction, schema changes. openspec: pty-exit-notifications complete; x-agent-flags planned (not yet implemented).
This commit is contained in:
205
apps/control/src/routes/gateway.ts
Normal file
205
apps/control/src/routes/gateway.ts
Normal file
@@ -0,0 +1,205 @@
|
||||
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
import type { FleetState } from '../services/fleet-state.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import {
|
||||
VIRTUAL_MODELS,
|
||||
resolveCandidates,
|
||||
splitComposite,
|
||||
} from '../services/gateway.js';
|
||||
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
|
||||
|
||||
/**
|
||||
* P7.1: OpenAI-compatible auto:* gateway.
|
||||
*
|
||||
* BooChat reaches this server directly (registry baseUrl), NOT through the
|
||||
* /api/control proxy, so streaming works end to end. Endpoints mirror the
|
||||
* llama-swap wire surface BooChat's provider adapter expects:
|
||||
*
|
||||
* GET /v1/models — advertise the virtual models
|
||||
* POST /v1/chat/completions — resolve a policy, dispatch with failover
|
||||
* GET /upstream/:model/props — props for getModelContext (best candidate)
|
||||
*
|
||||
* Every dispatch forwards X-Boo-Source to the chosen target so attribution
|
||||
* survives the extra hop, and is recorded in route_dispatch_log.
|
||||
*/
|
||||
export function registerGatewayRoutes(
|
||||
app: FastifyInstance,
|
||||
sql: Sql,
|
||||
fleet: FleetState,
|
||||
_emitter: DeltaEmitter,
|
||||
): void {
|
||||
// ─── model catalog ───────────────────────────────────────────────────────
|
||||
|
||||
app.get('/v1/models', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
return reply.send({
|
||||
object: 'list',
|
||||
data: VIRTUAL_MODELS.map((id) => ({
|
||||
id,
|
||||
object: 'model',
|
||||
created: 0,
|
||||
owned_by: 'boocontrol-gateway',
|
||||
})),
|
||||
});
|
||||
});
|
||||
|
||||
// ─── props (for getModelContext) ─────────────────────────────────────────
|
||||
// Resolve candidates and proxy the first healthy candidate's props so the
|
||||
// caller can read default_generation_settings.n_ctx.
|
||||
|
||||
app.get('/upstream/:model/props', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { model } = req.params as { model: string };
|
||||
const { candidates } = await resolveCandidates(sql, fleet, model);
|
||||
|
||||
for (const compositeId of candidates) {
|
||||
const split = splitComposite(compositeId);
|
||||
if (!split) continue;
|
||||
const baseUrl = resolveProviderBaseUrl(split.providerId);
|
||||
if (!baseUrl) continue;
|
||||
try {
|
||||
const url = `${baseUrl.replace(/\/+$/, '')}/upstream/${encodeURIComponent(split.model)}/props`;
|
||||
const res = await fetch(url, { signal: AbortSignal.timeout(5_000) });
|
||||
if (!res.ok) continue;
|
||||
const body = await res.json();
|
||||
return reply.send(body);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return reply.status(503).send({ error: 'no healthy candidate for virtual model', model });
|
||||
});
|
||||
|
||||
// ─── chat completions (dispatch with failover) ───────────────────────────
|
||||
|
||||
app.post('/v1/chat/completions', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const requestedModel = body?.model as string | undefined;
|
||||
if (!requestedModel) {
|
||||
return reply.status(400).send({ error: { message: 'model is required' } });
|
||||
}
|
||||
|
||||
const source = (req.headers['x-boo-source'] as string | undefined) ?? null;
|
||||
const stream = body.stream === true;
|
||||
const { virtualModel, candidates } = await resolveCandidates(sql, fleet, requestedModel);
|
||||
|
||||
if (candidates.length === 0) {
|
||||
await logDispatch(sql, { virtualModel, chosen: null, tried: [], status: 'no_candidates', source, error: 'no healthy candidates', durationMs: 0 });
|
||||
return reply.status(503).send({
|
||||
error: { message: `routing gateway: no healthy candidate for ${virtualModel}`, type: 'gateway_error' },
|
||||
});
|
||||
}
|
||||
|
||||
const tried: string[] = [];
|
||||
const startedAt = Date.now();
|
||||
|
||||
for (const compositeId of candidates) {
|
||||
const split = splitComposite(compositeId);
|
||||
if (!split) continue;
|
||||
const baseUrl = resolveProviderBaseUrl(split.providerId);
|
||||
if (!baseUrl) continue;
|
||||
tried.push(compositeId);
|
||||
|
||||
const upstreamHeaders: Record<string, string> = { 'Content-Type': 'application/json' };
|
||||
if (source) upstreamHeaders['X-Boo-Source'] = source;
|
||||
|
||||
const upstreamBody = JSON.stringify({ ...body, model: split.model });
|
||||
|
||||
try {
|
||||
const res = await fetch(`${baseUrl.replace(/\/+$/, '')}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: upstreamHeaders,
|
||||
body: upstreamBody,
|
||||
signal: AbortSignal.timeout(300_000),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
// HTTP error before body — eligible for failover to the next candidate.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Success: dispatch chosen. Log and stream/return through.
|
||||
await logDispatch(sql, {
|
||||
virtualModel,
|
||||
chosen: compositeId,
|
||||
tried,
|
||||
status: 'dispatched',
|
||||
source,
|
||||
error: null,
|
||||
durationMs: Date.now() - startedAt,
|
||||
});
|
||||
|
||||
if (stream) {
|
||||
reply.header('Content-Type', 'text/event-stream');
|
||||
reply.header('Cache-Control', 'no-cache');
|
||||
reply.header('Connection', 'keep-alive');
|
||||
reply.raw.writeHead(200);
|
||||
const reader = res.body?.getReader();
|
||||
if (!reader) {
|
||||
reply.raw.end();
|
||||
return;
|
||||
}
|
||||
const decoder = new TextDecoder();
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
reply.raw.write(decoder.decode(value, { stream: true }));
|
||||
}
|
||||
} finally {
|
||||
reply.raw.end();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Non-streaming: pass JSON through.
|
||||
const json = await res.json();
|
||||
return reply.send(json);
|
||||
} catch {
|
||||
// Connection error — failover to the next candidate.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// All candidates exhausted.
|
||||
await logDispatch(sql, {
|
||||
virtualModel,
|
||||
chosen: null,
|
||||
tried,
|
||||
status: 'failed',
|
||||
source,
|
||||
error: 'all candidates failed',
|
||||
durationMs: Date.now() - startedAt,
|
||||
});
|
||||
return reply.status(502).send({
|
||||
error: { message: `routing gateway: all candidates failed for ${virtualModel}`, type: 'gateway_error' },
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function logDispatch(
|
||||
sql: Sql,
|
||||
entry: {
|
||||
virtualModel: string;
|
||||
chosen: string | null;
|
||||
tried: string[];
|
||||
status: string;
|
||||
source: string | null;
|
||||
error: string | null;
|
||||
durationMs: number;
|
||||
},
|
||||
): Promise<void> {
|
||||
const split = entry.chosen ? splitComposite(entry.chosen) : null;
|
||||
await sql`
|
||||
INSERT INTO route_dispatch_log (virtual_model, chosen_provider_id, chosen_model, candidates_tried, status, source, error, duration_ms)
|
||||
VALUES (
|
||||
${entry.virtualModel},
|
||||
${split?.providerId ?? null},
|
||||
${split?.model ?? null},
|
||||
${sql.json(entry.tried as never)},
|
||||
${entry.status},
|
||||
${entry.source},
|
||||
${entry.error},
|
||||
${entry.durationMs}
|
||||
)
|
||||
`.catch(() => { /* logging must never break dispatch */ });
|
||||
}
|
||||
Reference in New Issue
Block a user