import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify'; import { getLlamaProviders, resolveProviderBaseUrl } from '../services/llama-providers.js'; /** * Playground routes: model select, param controls, streaming chat. * * GET /api/playground/models — list available models from providers * POST /api/playground/chat — streaming chat against a model * POST /api/playground/chat-ab — side-by-side A/B compare */ export function registerPlaygroundRoutes( app: FastifyInstance, ): void { // ─── model catalog ─────────────────────────────────────────────────────── app.get('/api/playground/models', async (_req: FastifyRequest, reply: FastifyReply) => { // Resolve provider URLs from the loaded registry. const registry = getLlamaProviders(); const providers = registry.providers.map((p) => ({ id: p.id, baseUrl: p.baseUrl, })); const results = await Promise.allSettled( providers.map(async (p) => { try { const res = await fetch(`${p.baseUrl}/v1/models`, { signal: AbortSignal.timeout(5_000), }); if (!res.ok) return null; const data = await res.json() as { data?: Array<{ id: string }> }; return { providerId: p.id, models: data?.data?.map((m) => m.id) ?? [], }; } catch { return null; } }), ); const models: Array<{ providerId: string; models: string[] }> = []; for (const r of results) { if (r.status === 'fulfilled' && r.value) { models.push(r.value); } } return reply.send({ models }); }); // ─── streaming chat ────────────────────────────────────────────────────── app.post('/api/playground/chat', async (req: FastifyRequest, reply: FastifyReply) => { const body = req.body as Record; const providerId = body.providerId as string; const model = body.model as string; const messages = body.messages as Array<{ role: string; content: string }>; const temperature = (body.temperature as number) ?? 0.7; const topP = (body.topP as number) ?? 0.9; const maxTokens = (body.maxTokens as number) ?? 1024; if (!providerId || !model || !messages?.length) { return reply.status(400).send({ error: 'providerId, model, and messages are required' }); } const baseUrl = resolveProviderBaseUrl(providerId); if (!baseUrl) { return reply.status(400).send({ error: `unknown provider: ${providerId}` }); } // Stream the response back to the client via SSE. reply.header('Content-Type', 'text/event-stream'); reply.header('Cache-Control', 'no-cache'); reply.header('Connection', 'keep-alive'); reply.raw.writeHead(200); try { const res = await fetch(`${baseUrl}/v1/chat/completions`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model, messages, temperature, top_p: topP, max_tokens: maxTokens, stream: true, }), signal: AbortSignal.timeout(120_000), }); if (!res.ok) { const errBody = await res.text().catch(() => ''); reply.raw.write(`data: ${JSON.stringify({ error: `Request failed: ${res.status} ${errBody.slice(0, 200)}` })}\n\n`); reply.raw.end(); return; } const reader = res.body?.getReader(); if (!reader) { reply.raw.write('data: {"error": "No response body"}\n\n'); reply.raw.end(); return; } const decoder = new TextDecoder(); let buffer = ''; while (true) { const { done, value } = await reader.read(); if (done) break; buffer += decoder.decode(value, { stream: true }); const lines = buffer.split('\n'); buffer = lines.pop() ?? ''; for (const line of lines) { const trimmed = line.trim(); if (!trimmed) continue; if (trimmed === 'data: [DONE]') { reply.raw.write('data: [DONE]\n\n'); continue; } // N3: pass through the raw SSE line from upstream as-is. // If it already has 'data: ' prefix, don't double-prefix. const payload = trimmed.startsWith('data: ') ? trimmed : `data: ${trimmed}`; reply.raw.write(`${payload}\n\n`); } } reply.raw.write('data: [DONE]\n\n'); } catch (err) { const msg = (err as Error).message ?? String(err); reply.raw.write(`data: ${JSON.stringify({ error: msg })}\n\n`); } finally { reply.raw.end(); } }); // ─── A/B compare ───────────────────────────────────────────────────────── app.post('/api/playground/chat-ab', async (req: FastifyRequest, reply: FastifyReply) => { const body = req.body as Record; const providerIdA = body.providerIdA as string; const modelA = body.modelA as string; const providerIdB = body.providerIdB as string; const modelB = body.modelB as string; const messages = body.messages as Array<{ role: string; content: string }>; const temperature = (body.temperature as number) ?? 0.7; const topP = (body.topP as number) ?? 0.9; const maxTokens = (body.maxTokens as number) ?? 1024; if (!providerIdA || !modelA || !providerIdB || !modelB || !messages?.length) { return reply.status(400).send({ error: 'Both models and messages are required' }); } const baseUrlA = resolveProviderBaseUrl(providerIdA); const baseUrlB = resolveProviderBaseUrl(providerIdB); if (!baseUrlA || !baseUrlB) { return reply.status(400).send({ error: 'One or both providers unknown' }); } // Stream both responses via SSE with lane identifiers. reply.header('Content-Type', 'text/event-stream'); reply.header('Cache-Control', 'no-cache'); reply.header('Connection', 'keep-alive'); reply.raw.writeHead(200); const streamModel = async (lane: 'A' | 'B', baseUrl: string, model: string) => { try { const res = await fetch(`${baseUrl}/v1/chat/completions`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model, messages, temperature, top_p: topP, max_tokens: maxTokens, stream: true, }), signal: AbortSignal.timeout(120_000), }); if (!res.ok) { const errBody = await res.text().catch(() => ''); reply.raw.write(`data: ${JSON.stringify({ lane, error: `Request failed: ${res.status}` })}\n\n`); return; } const reader = res.body?.getReader(); if (!reader) return; const decoder = new TextDecoder(); let buffer = ''; while (true) { const { done, value } = await reader.read(); if (done) break; buffer += decoder.decode(value, { stream: true }); const lines = buffer.split('\n'); buffer = lines.pop() ?? ''; for (const line of lines) { const trimmed = line.trim(); if (!trimmed) continue; if (trimmed === 'data: [DONE]') { reply.raw.write(`data: ${JSON.stringify({ lane, done: true })}\n\n`); continue; } // N3: strip 'data: ' prefix from upstream before re-wrapping with lane info. const payload = trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed; reply.raw.write(`data: ${JSON.stringify({ lane, raw: payload })}\n\n`); } } reply.raw.write(`data: ${JSON.stringify({ lane, done: true })}\n\n`); } catch (err) { const msg = (err as Error).message ?? String(err); reply.raw.write(`data: ${JSON.stringify({ lane, error: msg })}\n\n`); } }; // Run both streams concurrently. await Promise.all([ streamModel('A', baseUrlA, modelA), streamModel('B', baseUrlB, modelB), ]); reply.raw.end(); }); }