boocode/apps/control/src/routes/gateway.ts

import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import type { Sql } from '../db.js';
import {
  VIRTUAL_MODELS,
  resolveCandidates,
  splitComposite,
} from '../services/gateway.js';
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
import { recordFailure, recordSuccess } from '../services/circuit-breaker.js';
import type { AppContext } from '../app-context.js';

/**
 * P7.1: OpenAI-compatible auto:* gateway.
 *
 * BooChat reaches this server directly (registry baseUrl), NOT through the
 * /api/control proxy, so streaming works end to end. Endpoints mirror the
 * llama-swap wire surface BooChat's provider adapter expects:
 *
 *   GET  /v1/models                — advertise the virtual models
 *   POST /v1/chat/completions      — resolve a policy, dispatch with failover
 *   GET  /upstream/:model/props    — props for getModelContext (best candidate)
 *
 * Every dispatch forwards X-Boo-Source to the chosen target so attribution
 * survives the extra hop, and is recorded in route_dispatch_log.
 */
export function registerGatewayRoutes(
  app: FastifyInstance,
  ctx: AppContext,
): void {
  const { sql, fleet } = ctx;

  app.get('/v1/models', async (_req: FastifyRequest, reply: FastifyReply) => {
    return reply.send({
      object: 'list',
      data: VIRTUAL_MODELS.map((id) => ({
        id,
        object: 'model',
        created: 0,
        owned_by: 'boocontrol-gateway',
      })),
    });
  });

  app.get('/upstream/:model/props', async (req: FastifyRequest, reply: FastifyReply) => {
    const { model } = req.params as { model: string };
    const { candidates } = await resolveCandidates(sql, fleet, model);

    for (const compositeId of candidates) {
      const split = splitComposite(compositeId);
      if (!split) continue;
      const baseUrl = resolveProviderBaseUrl(split.providerId);
      if (!baseUrl) continue;
      try {
        const url = `${baseUrl.replace(/\/+$/, '')}/upstream/${encodeURIComponent(split.model)}/props`;
        const res = await fetch(url, { signal: AbortSignal.timeout(5_000) });
        if (!res.ok) continue;
        const body = await res.json();
        return reply.send(body);
      } catch {
        continue;
      }
    }
    return reply.status(503).send({ error: 'no healthy candidate for virtual model', model });
  });

  app.post('/v1/chat/completions', async (req: FastifyRequest, reply: FastifyReply) => {
    const body = req.body as Record<string, unknown>;
    const requestedModel = body?.model as string | undefined;
    if (!requestedModel) {
      return reply.status(400).send({ error: { message: 'model is required' } });
    }

    const source = (req.headers['x-boo-source'] as string | undefined) ?? null;
    const stream = body.stream === true;
    const { virtualModel, candidates } = await resolveCandidates(sql, fleet, requestedModel);

    if (candidates.length === 0) {
      await logDispatch(sql, { virtualModel, chosen: null, tried: [], status: 'no_candidates', source, error: 'no healthy candidates', durationMs: 0 });
      return reply.status(503).send({
        error: { message: `routing gateway: no healthy candidate for ${virtualModel}`, type: 'gateway_error' },
      });
    }

    const tried: string[] = [];
    const startedAt = Date.now();

    for (const compositeId of candidates) {
      const split = splitComposite(compositeId);
      if (!split) continue;
      const baseUrl = resolveProviderBaseUrl(split.providerId);
      if (!baseUrl) continue;
      tried.push(compositeId);

      const upstreamHeaders: Record<string, string> = { 'Content-Type': 'application/json' };
      if (source) upstreamHeaders['X-Boo-Source'] = source;

      const upstreamBody = JSON.stringify({ ...body, model: split.model });

      try {
        const res = await fetch(`${baseUrl.replace(/\/+$/, '')}/v1/chat/completions`, {
          method: 'POST',
          headers: upstreamHeaders,
          body: upstreamBody,
          signal: AbortSignal.timeout(300_000),
        });

        if (!res.ok) {
          recordFailure(compositeId);
          continue;
        }

        // A null body on an OK response is a broken upstream; fail over to the
        // next candidate (nothing has been committed to the client yet).
        const reader = stream ? res.body?.getReader() : null;
        if (stream && !reader) {
          recordFailure(compositeId);
          continue;
        }

        // Success: dispatch chosen. Log and stream/return through.
        recordSuccess(compositeId);
        await logDispatch(sql, {
          virtualModel,
          chosen: compositeId,
          tried,
          status: 'dispatched',
          source,
          error: null,
          durationMs: Date.now() - startedAt,
        });

        if (stream && reader) {
          reply.header('Content-Type', 'text/event-stream');
          reply.header('Cache-Control', 'no-cache');
          reply.header('Connection', 'keep-alive');
          reply.raw.writeHead(200);
          const decoder = new TextDecoder();
          try {
            while (true) {
              const { done, value } = await reader.read();
              if (done) break;
              reply.raw.write(decoder.decode(value, { stream: true }));
            }
          } finally {
            reply.raw.end();
          }
          return;
        }

        // Non-streaming: pass JSON through.
        const json = await res.json();
        return reply.send(json);
      } catch {
        recordFailure(compositeId);
        continue;
      }
    }

    // All candidates exhausted.
    await logDispatch(sql, {
      virtualModel,
      chosen: null,
      tried,
      status: 'failed',
      source,
      error: 'all candidates failed',
      durationMs: Date.now() - startedAt,
    });
    return reply.status(502).send({
      error: { message: `routing gateway: all candidates failed for ${virtualModel}`, type: 'gateway_error' },
    });
  });
}

async function logDispatch(
  sql: Sql,
  entry: {
    virtualModel: string;
    chosen: string | null;
    tried: string[];
    status: string;
    source: string | null;
    error: string | null;
    durationMs: number;
  },
): Promise<void> {
  const split = entry.chosen ? splitComposite(entry.chosen) : null;
  await sql`
    INSERT INTO route_dispatch_log (virtual_model, chosen_provider_id, chosen_model, candidates_tried, status, source, error, duration_ms)
    VALUES (
      ${entry.virtualModel},
      ${split?.providerId ?? null},
      ${split?.model ?? null},
      ${sql.json(entry.tried as never)},
      ${entry.status},
      ${entry.source},
      ${entry.error},
      ${entry.durationMs}
    )
  `.catch(() => { /* logging must never break dispatch */ });
}