feat(booterm): structured pty_exited WS notifications. Plan-validated, impl-validated, code-reviewed green (contracts build clean, contracts test 29/29, booterm + web typecheck clean). wip: in-progress inference/provider refactor (agents.ts, provider.ts, new llama-providers.ts, removed llama-args-validator), plus arena, dispatcher, compaction, schema changes. openspec: pty-exit-notifications complete; x-agent-flags planned (not yet implemented).
235 lines
8.3 KiB
TypeScript
235 lines
8.3 KiB
TypeScript
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
|
import { getLlamaProviders, resolveProviderBaseUrl } from '../services/llama-providers.js';
|
|
|
|
/**
|
|
* Playground routes: model select, param controls, streaming chat.
|
|
*
|
|
* GET /api/playground/models — list available models from providers
|
|
* POST /api/playground/chat — streaming chat against a model
|
|
* POST /api/playground/chat-ab — side-by-side A/B compare
|
|
*/
|
|
export function registerPlaygroundRoutes(
|
|
app: FastifyInstance,
|
|
): void {
|
|
// ─── model catalog ───────────────────────────────────────────────────────
|
|
|
|
app.get('/api/playground/models', async (_req: FastifyRequest, reply: FastifyReply) => {
|
|
// Resolve provider URLs from the loaded registry.
|
|
const registry = getLlamaProviders();
|
|
const providers = registry.providers.map((p) => ({
|
|
id: p.id,
|
|
baseUrl: p.baseUrl,
|
|
}));
|
|
|
|
const results = await Promise.allSettled(
|
|
providers.map(async (p) => {
|
|
try {
|
|
const res = await fetch(`${p.baseUrl}/v1/models`, {
|
|
signal: AbortSignal.timeout(5_000),
|
|
});
|
|
if (!res.ok) return null;
|
|
const data = await res.json() as { data?: Array<{ id: string }> };
|
|
return {
|
|
providerId: p.id,
|
|
models: data?.data?.map((m) => m.id) ?? [],
|
|
};
|
|
} catch {
|
|
return null;
|
|
}
|
|
}),
|
|
);
|
|
|
|
const models: Array<{ providerId: string; models: string[] }> = [];
|
|
for (const r of results) {
|
|
if (r.status === 'fulfilled' && r.value) {
|
|
models.push(r.value);
|
|
}
|
|
}
|
|
|
|
return reply.send({ models });
|
|
});
|
|
|
|
// ─── streaming chat ──────────────────────────────────────────────────────
|
|
|
|
app.post('/api/playground/chat', async (req: FastifyRequest, reply: FastifyReply) => {
|
|
const body = req.body as Record<string, unknown>;
|
|
const providerId = body.providerId as string;
|
|
const model = body.model as string;
|
|
const messages = body.messages as Array<{ role: string; content: string }>;
|
|
const temperature = (body.temperature as number) ?? 0.7;
|
|
const topP = (body.topP as number) ?? 0.9;
|
|
const maxTokens = (body.maxTokens as number) ?? 1024;
|
|
|
|
if (!providerId || !model || !messages?.length) {
|
|
return reply.status(400).send({ error: 'providerId, model, and messages are required' });
|
|
}
|
|
|
|
const baseUrl = resolveProviderBaseUrl(providerId);
|
|
if (!baseUrl) {
|
|
return reply.status(400).send({ error: `unknown provider: ${providerId}` });
|
|
}
|
|
|
|
// Stream the response back to the client via SSE.
|
|
reply.header('Content-Type', 'text/event-stream');
|
|
reply.header('Cache-Control', 'no-cache');
|
|
reply.header('Connection', 'keep-alive');
|
|
reply.raw.writeHead(200);
|
|
|
|
try {
|
|
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
model,
|
|
messages,
|
|
temperature,
|
|
top_p: topP,
|
|
max_tokens: maxTokens,
|
|
stream: true,
|
|
}),
|
|
signal: AbortSignal.timeout(120_000),
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const errBody = await res.text().catch(() => '');
|
|
reply.raw.write(`data: ${JSON.stringify({ error: `Request failed: ${res.status} ${errBody.slice(0, 200)}` })}\n\n`);
|
|
reply.raw.end();
|
|
return;
|
|
}
|
|
|
|
const reader = res.body?.getReader();
|
|
if (!reader) {
|
|
reply.raw.write('data: {"error": "No response body"}\n\n');
|
|
reply.raw.end();
|
|
return;
|
|
}
|
|
|
|
const decoder = new TextDecoder();
|
|
let buffer = '';
|
|
|
|
while (true) {
|
|
const { done, value } = await reader.read();
|
|
if (done) break;
|
|
|
|
buffer += decoder.decode(value, { stream: true });
|
|
const lines = buffer.split('\n');
|
|
buffer = lines.pop() ?? '';
|
|
|
|
for (const line of lines) {
|
|
const trimmed = line.trim();
|
|
if (!trimmed) continue;
|
|
if (trimmed === 'data: [DONE]') {
|
|
reply.raw.write('data: [DONE]\n\n');
|
|
continue;
|
|
}
|
|
// N3: pass through the raw SSE line from upstream as-is.
|
|
// If it already has 'data: ' prefix, don't double-prefix.
|
|
const payload = trimmed.startsWith('data: ') ? trimmed : `data: ${trimmed}`;
|
|
reply.raw.write(`${payload}\n\n`);
|
|
}
|
|
}
|
|
|
|
reply.raw.write('data: [DONE]\n\n');
|
|
} catch (err) {
|
|
const msg = (err as Error).message ?? String(err);
|
|
reply.raw.write(`data: ${JSON.stringify({ error: msg })}\n\n`);
|
|
} finally {
|
|
reply.raw.end();
|
|
}
|
|
});
|
|
|
|
// ─── A/B compare ─────────────────────────────────────────────────────────
|
|
|
|
app.post('/api/playground/chat-ab', async (req: FastifyRequest, reply: FastifyReply) => {
|
|
const body = req.body as Record<string, unknown>;
|
|
const providerIdA = body.providerIdA as string;
|
|
const modelA = body.modelA as string;
|
|
const providerIdB = body.providerIdB as string;
|
|
const modelB = body.modelB as string;
|
|
const messages = body.messages as Array<{ role: string; content: string }>;
|
|
const temperature = (body.temperature as number) ?? 0.7;
|
|
const topP = (body.topP as number) ?? 0.9;
|
|
const maxTokens = (body.maxTokens as number) ?? 1024;
|
|
|
|
if (!providerIdA || !modelA || !providerIdB || !modelB || !messages?.length) {
|
|
return reply.status(400).send({ error: 'Both models and messages are required' });
|
|
}
|
|
|
|
const baseUrlA = resolveProviderBaseUrl(providerIdA);
|
|
const baseUrlB = resolveProviderBaseUrl(providerIdB);
|
|
|
|
if (!baseUrlA || !baseUrlB) {
|
|
return reply.status(400).send({ error: 'One or both providers unknown' });
|
|
}
|
|
|
|
// Stream both responses via SSE with lane identifiers.
|
|
reply.header('Content-Type', 'text/event-stream');
|
|
reply.header('Cache-Control', 'no-cache');
|
|
reply.header('Connection', 'keep-alive');
|
|
reply.raw.writeHead(200);
|
|
|
|
const streamModel = async (lane: 'A' | 'B', baseUrl: string, model: string) => {
|
|
try {
|
|
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
model,
|
|
messages,
|
|
temperature,
|
|
top_p: topP,
|
|
max_tokens: maxTokens,
|
|
stream: true,
|
|
}),
|
|
signal: AbortSignal.timeout(120_000),
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const errBody = await res.text().catch(() => '');
|
|
reply.raw.write(`data: ${JSON.stringify({ lane, error: `Request failed: ${res.status}` })}\n\n`);
|
|
return;
|
|
}
|
|
|
|
const reader = res.body?.getReader();
|
|
if (!reader) return;
|
|
|
|
const decoder = new TextDecoder();
|
|
let buffer = '';
|
|
|
|
while (true) {
|
|
const { done, value } = await reader.read();
|
|
if (done) break;
|
|
|
|
buffer += decoder.decode(value, { stream: true });
|
|
const lines = buffer.split('\n');
|
|
buffer = lines.pop() ?? '';
|
|
|
|
for (const line of lines) {
|
|
const trimmed = line.trim();
|
|
if (!trimmed) continue;
|
|
if (trimmed === 'data: [DONE]') {
|
|
reply.raw.write(`data: ${JSON.stringify({ lane, done: true })}\n\n`);
|
|
continue;
|
|
}
|
|
// N3: strip 'data: ' prefix from upstream before re-wrapping with lane info.
|
|
const payload = trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed;
|
|
reply.raw.write(`data: ${JSON.stringify({ lane, raw: payload })}\n\n`);
|
|
}
|
|
}
|
|
|
|
reply.raw.write(`data: ${JSON.stringify({ lane, done: true })}\n\n`);
|
|
} catch (err) {
|
|
const msg = (err as Error).message ?? String(err);
|
|
reply.raw.write(`data: ${JSON.stringify({ lane, error: msg })}\n\n`);
|
|
}
|
|
};
|
|
|
|
// Run both streams concurrently.
|
|
await Promise.all([
|
|
streamModel('A', baseUrlA, modelA),
|
|
streamModel('B', baseUrlB, modelB),
|
|
]);
|
|
|
|
reply.raw.end();
|
|
});
|
|
} |