Files
boocode/apps/control/src/routes/playground.ts
indifferentketchup b18de2a331 chore: snapshot working tree - pty_exited notifications + in-flight inference WIP
feat(booterm): structured pty_exited WS notifications. Plan-validated, impl-validated, code-reviewed green (contracts build clean, contracts test 29/29, booterm + web typecheck clean).

wip: in-progress inference/provider refactor (agents.ts, provider.ts, new llama-providers.ts, removed llama-args-validator), plus arena, dispatcher, compaction, schema changes.

openspec: pty-exit-notifications complete; x-agent-flags planned (not yet implemented).
2026-06-14 12:48:47 +00:00

235 lines
8.3 KiB
TypeScript

import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import { getLlamaProviders, resolveProviderBaseUrl } from '../services/llama-providers.js';
/**
* Playground routes: model select, param controls, streaming chat.
*
* GET /api/playground/models — list available models from providers
* POST /api/playground/chat — streaming chat against a model
* POST /api/playground/chat-ab — side-by-side A/B compare
*/
export function registerPlaygroundRoutes(
app: FastifyInstance,
): void {
// ─── model catalog ───────────────────────────────────────────────────────
app.get('/api/playground/models', async (_req: FastifyRequest, reply: FastifyReply) => {
// Resolve provider URLs from the loaded registry.
const registry = getLlamaProviders();
const providers = registry.providers.map((p) => ({
id: p.id,
baseUrl: p.baseUrl,
}));
const results = await Promise.allSettled(
providers.map(async (p) => {
try {
const res = await fetch(`${p.baseUrl}/v1/models`, {
signal: AbortSignal.timeout(5_000),
});
if (!res.ok) return null;
const data = await res.json() as { data?: Array<{ id: string }> };
return {
providerId: p.id,
models: data?.data?.map((m) => m.id) ?? [],
};
} catch {
return null;
}
}),
);
const models: Array<{ providerId: string; models: string[] }> = [];
for (const r of results) {
if (r.status === 'fulfilled' && r.value) {
models.push(r.value);
}
}
return reply.send({ models });
});
// ─── streaming chat ──────────────────────────────────────────────────────
app.post('/api/playground/chat', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const providerId = body.providerId as string;
const model = body.model as string;
const messages = body.messages as Array<{ role: string; content: string }>;
const temperature = (body.temperature as number) ?? 0.7;
const topP = (body.topP as number) ?? 0.9;
const maxTokens = (body.maxTokens as number) ?? 1024;
if (!providerId || !model || !messages?.length) {
return reply.status(400).send({ error: 'providerId, model, and messages are required' });
}
const baseUrl = resolveProviderBaseUrl(providerId);
if (!baseUrl) {
return reply.status(400).send({ error: `unknown provider: ${providerId}` });
}
// Stream the response back to the client via SSE.
reply.header('Content-Type', 'text/event-stream');
reply.header('Cache-Control', 'no-cache');
reply.header('Connection', 'keep-alive');
reply.raw.writeHead(200);
try {
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
messages,
temperature,
top_p: topP,
max_tokens: maxTokens,
stream: true,
}),
signal: AbortSignal.timeout(120_000),
});
if (!res.ok) {
const errBody = await res.text().catch(() => '');
reply.raw.write(`data: ${JSON.stringify({ error: `Request failed: ${res.status} ${errBody.slice(0, 200)}` })}\n\n`);
reply.raw.end();
return;
}
const reader = res.body?.getReader();
if (!reader) {
reply.raw.write('data: {"error": "No response body"}\n\n');
reply.raw.end();
return;
}
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() ?? '';
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed) continue;
if (trimmed === 'data: [DONE]') {
reply.raw.write('data: [DONE]\n\n');
continue;
}
// N3: pass through the raw SSE line from upstream as-is.
// If it already has 'data: ' prefix, don't double-prefix.
const payload = trimmed.startsWith('data: ') ? trimmed : `data: ${trimmed}`;
reply.raw.write(`${payload}\n\n`);
}
}
reply.raw.write('data: [DONE]\n\n');
} catch (err) {
const msg = (err as Error).message ?? String(err);
reply.raw.write(`data: ${JSON.stringify({ error: msg })}\n\n`);
} finally {
reply.raw.end();
}
});
// ─── A/B compare ─────────────────────────────────────────────────────────
app.post('/api/playground/chat-ab', async (req: FastifyRequest, reply: FastifyReply) => {
const body = req.body as Record<string, unknown>;
const providerIdA = body.providerIdA as string;
const modelA = body.modelA as string;
const providerIdB = body.providerIdB as string;
const modelB = body.modelB as string;
const messages = body.messages as Array<{ role: string; content: string }>;
const temperature = (body.temperature as number) ?? 0.7;
const topP = (body.topP as number) ?? 0.9;
const maxTokens = (body.maxTokens as number) ?? 1024;
if (!providerIdA || !modelA || !providerIdB || !modelB || !messages?.length) {
return reply.status(400).send({ error: 'Both models and messages are required' });
}
const baseUrlA = resolveProviderBaseUrl(providerIdA);
const baseUrlB = resolveProviderBaseUrl(providerIdB);
if (!baseUrlA || !baseUrlB) {
return reply.status(400).send({ error: 'One or both providers unknown' });
}
// Stream both responses via SSE with lane identifiers.
reply.header('Content-Type', 'text/event-stream');
reply.header('Cache-Control', 'no-cache');
reply.header('Connection', 'keep-alive');
reply.raw.writeHead(200);
const streamModel = async (lane: 'A' | 'B', baseUrl: string, model: string) => {
try {
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
messages,
temperature,
top_p: topP,
max_tokens: maxTokens,
stream: true,
}),
signal: AbortSignal.timeout(120_000),
});
if (!res.ok) {
const errBody = await res.text().catch(() => '');
reply.raw.write(`data: ${JSON.stringify({ lane, error: `Request failed: ${res.status}` })}\n\n`);
return;
}
const reader = res.body?.getReader();
if (!reader) return;
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() ?? '';
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed) continue;
if (trimmed === 'data: [DONE]') {
reply.raw.write(`data: ${JSON.stringify({ lane, done: true })}\n\n`);
continue;
}
// N3: strip 'data: ' prefix from upstream before re-wrapping with lane info.
const payload = trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed;
reply.raw.write(`data: ${JSON.stringify({ lane, raw: payload })}\n\n`);
}
}
reply.raw.write(`data: ${JSON.stringify({ lane, done: true })}\n\n`);
} catch (err) {
const msg = (err as Error).message ?? String(err);
reply.raw.write(`data: ${JSON.stringify({ lane, error: msg })}\n\n`);
}
};
// Run both streams concurrently.
await Promise.all([
streamModel('A', baseUrlA, modelA),
streamModel('B', baseUrlB, modelB),
]);
reply.raw.end();
});
}