boocode/apps/control/src/routes/playground.ts

import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import { getLlamaProviders, resolveProviderBaseUrl } from '../services/llama-providers.js';

/**
 * Playground routes: model select, param controls, streaming chat.
 *
 * GET  /api/playground/models       — list available models from providers
 * POST /api/playground/chat         — streaming chat against a model
 * POST /api/playground/chat-ab      — side-by-side A/B compare
 */
export function registerPlaygroundRoutes(
  app: FastifyInstance,
): void {
  // ─── model catalog ───────────────────────────────────────────────────────

  app.get('/api/playground/models', async (_req: FastifyRequest, reply: FastifyReply) => {
    // Resolve provider URLs from the loaded registry.
    const registry = getLlamaProviders();
    const providers = registry.providers.map((p) => ({
      id: p.id,
      baseUrl: p.baseUrl,
    }));

    const results = await Promise.allSettled(
      providers.map(async (p) => {
        try {
          const res = await fetch(`${p.baseUrl}/v1/models`, {
            signal: AbortSignal.timeout(5_000),
          });
          if (!res.ok) return null;
          const data = await res.json() as { data?: Array<{ id: string }> };
          return {
            providerId: p.id,
            models: data?.data?.map((m) => m.id) ?? [],
          };
        } catch {
          return null;
        }
      }),
    );

    const models: Array<{ providerId: string; models: string[] }> = [];
    for (const r of results) {
      if (r.status === 'fulfilled' && r.value) {
        models.push(r.value);
      }
    }

    return reply.send({ models });
  });

  // ─── streaming chat ──────────────────────────────────────────────────────

  app.post('/api/playground/chat', async (req: FastifyRequest, reply: FastifyReply) => {
    const body = req.body as Record<string, unknown>;
    const providerId = body.providerId as string;
    const model = body.model as string;
    const messages = body.messages as Array<{ role: string; content: string }>;
    const temperature = (body.temperature as number) ?? 0.7;
    const topP = (body.topP as number) ?? 0.9;
    const maxTokens = (body.maxTokens as number) ?? 1024;

    if (!providerId || !model || !messages?.length) {
      return reply.status(400).send({ error: 'providerId, model, and messages are required' });
    }

    const baseUrl = resolveProviderBaseUrl(providerId);
    if (!baseUrl) {
      return reply.status(400).send({ error: `unknown provider: ${providerId}` });
    }

    // Stream the response back to the client via SSE.
    reply.header('Content-Type', 'text/event-stream');
    reply.header('Cache-Control', 'no-cache');
    reply.header('Connection', 'keep-alive');
    reply.raw.writeHead(200);

    try {
      const res = await fetch(`${baseUrl}/v1/chat/completions`, {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({
          model,
          messages,
          temperature,
          top_p: topP,
          max_tokens: maxTokens,
          stream: true,
        }),
        signal: AbortSignal.timeout(120_000),
      });

      if (!res.ok) {
        const errBody = await res.text().catch(() => '');
        reply.raw.write(`data: ${JSON.stringify({ error: `Request failed: ${res.status} ${errBody.slice(0, 200)}` })}\n\n`);
        reply.raw.end();
        return;
      }

      const reader = res.body?.getReader();
      if (!reader) {
        reply.raw.write('data: {"error": "No response body"}\n\n');
        reply.raw.end();
        return;
      }

      const decoder = new TextDecoder();
      let buffer = '';

      while (true) {
        const { done, value } = await reader.read();
        if (done) break;

        buffer += decoder.decode(value, { stream: true });
        const lines = buffer.split('\n');
        buffer = lines.pop() ?? '';

        for (const line of lines) {
          const trimmed = line.trim();
          if (!trimmed) continue;
          if (trimmed === 'data: [DONE]') {
            reply.raw.write('data: [DONE]\n\n');
            continue;
          }
          // N3: pass through the raw SSE line from upstream as-is.
          // If it already has 'data: ' prefix, don't double-prefix.
          const payload = trimmed.startsWith('data: ') ? trimmed : `data: ${trimmed}`;
          reply.raw.write(`${payload}\n\n`);
        }
      }

      reply.raw.write('data: [DONE]\n\n');
    } catch (err) {
      const msg = (err as Error).message ?? String(err);
      reply.raw.write(`data: ${JSON.stringify({ error: msg })}\n\n`);
    } finally {
      reply.raw.end();
    }
  });

  // ─── A/B compare ─────────────────────────────────────────────────────────

  app.post('/api/playground/chat-ab', async (req: FastifyRequest, reply: FastifyReply) => {
    const body = req.body as Record<string, unknown>;
    const providerIdA = body.providerIdA as string;
    const modelA = body.modelA as string;
    const providerIdB = body.providerIdB as string;
    const modelB = body.modelB as string;
    const messages = body.messages as Array<{ role: string; content: string }>;
    const temperature = (body.temperature as number) ?? 0.7;
    const topP = (body.topP as number) ?? 0.9;
    const maxTokens = (body.maxTokens as number) ?? 1024;

    if (!providerIdA || !modelA || !providerIdB || !modelB || !messages?.length) {
      return reply.status(400).send({ error: 'Both models and messages are required' });
    }

    const baseUrlA = resolveProviderBaseUrl(providerIdA);
    const baseUrlB = resolveProviderBaseUrl(providerIdB);

    if (!baseUrlA || !baseUrlB) {
      return reply.status(400).send({ error: 'One or both providers unknown' });
    }

    // Stream both responses via SSE with lane identifiers.
    reply.header('Content-Type', 'text/event-stream');
    reply.header('Cache-Control', 'no-cache');
    reply.header('Connection', 'keep-alive');
    reply.raw.writeHead(200);

    const streamModel = async (lane: 'A' | 'B', baseUrl: string, model: string) => {
      try {
        const res = await fetch(`${baseUrl}/v1/chat/completions`, {
          method: 'POST',
          headers: { 'Content-Type': 'application/json' },
          body: JSON.stringify({
            model,
            messages,
            temperature,
            top_p: topP,
            max_tokens: maxTokens,
            stream: true,
          }),
          signal: AbortSignal.timeout(120_000),
        });

        if (!res.ok) {
          const errBody = await res.text().catch(() => '');
          reply.raw.write(`data: ${JSON.stringify({ lane, error: `Request failed: ${res.status}` })}\n\n`);
          return;
        }

        const reader = res.body?.getReader();
        if (!reader) return;

        const decoder = new TextDecoder();
        let buffer = '';

        while (true) {
          const { done, value } = await reader.read();
          if (done) break;

          buffer += decoder.decode(value, { stream: true });
          const lines = buffer.split('\n');
          buffer = lines.pop() ?? '';

          for (const line of lines) {
            const trimmed = line.trim();
            if (!trimmed) continue;
            if (trimmed === 'data: [DONE]') {
              reply.raw.write(`data: ${JSON.stringify({ lane, done: true })}\n\n`);
              continue;
            }
            // N3: strip 'data: ' prefix from upstream before re-wrapping with lane info.
            const payload = trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed;
            reply.raw.write(`data: ${JSON.stringify({ lane, raw: payload })}\n\n`);
          }
        }

        reply.raw.write(`data: ${JSON.stringify({ lane, done: true })}\n\n`);
      } catch (err) {
        const msg = (err as Error).message ?? String(err);
        reply.raw.write(`data: ${JSON.stringify({ lane, error: msg })}\n\n`);
      }
    };

    // Run both streams concurrently.
    await Promise.all([
      streamModel('A', baseUrlA, modelA),
      streamModel('B', baseUrlB, modelB),
    ]);

    reply.raw.end();
  });
}