chore: snapshot working tree - pty_exited notifications + in-flight inference WIP
feat(booterm): structured pty_exited WS notifications. Plan-validated, impl-validated, code-reviewed green (contracts build clean, contracts test 29/29, booterm + web typecheck clean). wip: in-progress inference/provider refactor (agents.ts, provider.ts, new llama-providers.ts, removed llama-args-validator), plus arena, dispatcher, compaction, schema changes. openspec: pty-exit-notifications complete; x-agent-flags planned (not yet implemented).
This commit is contained in:
235
apps/control/src/routes/playground.ts
Normal file
235
apps/control/src/routes/playground.ts
Normal file
@@ -0,0 +1,235 @@
|
||||
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import { getLlamaProviders, resolveProviderBaseUrl } from '../services/llama-providers.js';
|
||||
|
||||
/**
|
||||
* Playground routes: model select, param controls, streaming chat.
|
||||
*
|
||||
* GET /api/playground/models — list available models from providers
|
||||
* POST /api/playground/chat — streaming chat against a model
|
||||
* POST /api/playground/chat-ab — side-by-side A/B compare
|
||||
*/
|
||||
export function registerPlaygroundRoutes(
|
||||
app: FastifyInstance,
|
||||
): void {
|
||||
// ─── model catalog ───────────────────────────────────────────────────────
|
||||
|
||||
app.get('/api/playground/models', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
// Resolve provider URLs from the loaded registry.
|
||||
const registry = getLlamaProviders();
|
||||
const providers = registry.providers.map((p) => ({
|
||||
id: p.id,
|
||||
baseUrl: p.baseUrl,
|
||||
}));
|
||||
|
||||
const results = await Promise.allSettled(
|
||||
providers.map(async (p) => {
|
||||
try {
|
||||
const res = await fetch(`${p.baseUrl}/v1/models`, {
|
||||
signal: AbortSignal.timeout(5_000),
|
||||
});
|
||||
if (!res.ok) return null;
|
||||
const data = await res.json() as { data?: Array<{ id: string }> };
|
||||
return {
|
||||
providerId: p.id,
|
||||
models: data?.data?.map((m) => m.id) ?? [],
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
const models: Array<{ providerId: string; models: string[] }> = [];
|
||||
for (const r of results) {
|
||||
if (r.status === 'fulfilled' && r.value) {
|
||||
models.push(r.value);
|
||||
}
|
||||
}
|
||||
|
||||
return reply.send({ models });
|
||||
});
|
||||
|
||||
// ─── streaming chat ──────────────────────────────────────────────────────
|
||||
|
||||
app.post('/api/playground/chat', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const providerId = body.providerId as string;
|
||||
const model = body.model as string;
|
||||
const messages = body.messages as Array<{ role: string; content: string }>;
|
||||
const temperature = (body.temperature as number) ?? 0.7;
|
||||
const topP = (body.topP as number) ?? 0.9;
|
||||
const maxTokens = (body.maxTokens as number) ?? 1024;
|
||||
|
||||
if (!providerId || !model || !messages?.length) {
|
||||
return reply.status(400).send({ error: 'providerId, model, and messages are required' });
|
||||
}
|
||||
|
||||
const baseUrl = resolveProviderBaseUrl(providerId);
|
||||
if (!baseUrl) {
|
||||
return reply.status(400).send({ error: `unknown provider: ${providerId}` });
|
||||
}
|
||||
|
||||
// Stream the response back to the client via SSE.
|
||||
reply.header('Content-Type', 'text/event-stream');
|
||||
reply.header('Cache-Control', 'no-cache');
|
||||
reply.header('Connection', 'keep-alive');
|
||||
reply.raw.writeHead(200);
|
||||
|
||||
try {
|
||||
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages,
|
||||
temperature,
|
||||
top_p: topP,
|
||||
max_tokens: maxTokens,
|
||||
stream: true,
|
||||
}),
|
||||
signal: AbortSignal.timeout(120_000),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const errBody = await res.text().catch(() => '');
|
||||
reply.raw.write(`data: ${JSON.stringify({ error: `Request failed: ${res.status} ${errBody.slice(0, 200)}` })}\n\n`);
|
||||
reply.raw.end();
|
||||
return;
|
||||
}
|
||||
|
||||
const reader = res.body?.getReader();
|
||||
if (!reader) {
|
||||
reply.raw.write('data: {"error": "No response body"}\n\n');
|
||||
reply.raw.end();
|
||||
return;
|
||||
}
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() ?? '';
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
if (trimmed === 'data: [DONE]') {
|
||||
reply.raw.write('data: [DONE]\n\n');
|
||||
continue;
|
||||
}
|
||||
// N3: pass through the raw SSE line from upstream as-is.
|
||||
// If it already has 'data: ' prefix, don't double-prefix.
|
||||
const payload = trimmed.startsWith('data: ') ? trimmed : `data: ${trimmed}`;
|
||||
reply.raw.write(`${payload}\n\n`);
|
||||
}
|
||||
}
|
||||
|
||||
reply.raw.write('data: [DONE]\n\n');
|
||||
} catch (err) {
|
||||
const msg = (err as Error).message ?? String(err);
|
||||
reply.raw.write(`data: ${JSON.stringify({ error: msg })}\n\n`);
|
||||
} finally {
|
||||
reply.raw.end();
|
||||
}
|
||||
});
|
||||
|
||||
// ─── A/B compare ─────────────────────────────────────────────────────────
|
||||
|
||||
app.post('/api/playground/chat-ab', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const providerIdA = body.providerIdA as string;
|
||||
const modelA = body.modelA as string;
|
||||
const providerIdB = body.providerIdB as string;
|
||||
const modelB = body.modelB as string;
|
||||
const messages = body.messages as Array<{ role: string; content: string }>;
|
||||
const temperature = (body.temperature as number) ?? 0.7;
|
||||
const topP = (body.topP as number) ?? 0.9;
|
||||
const maxTokens = (body.maxTokens as number) ?? 1024;
|
||||
|
||||
if (!providerIdA || !modelA || !providerIdB || !modelB || !messages?.length) {
|
||||
return reply.status(400).send({ error: 'Both models and messages are required' });
|
||||
}
|
||||
|
||||
const baseUrlA = resolveProviderBaseUrl(providerIdA);
|
||||
const baseUrlB = resolveProviderBaseUrl(providerIdB);
|
||||
|
||||
if (!baseUrlA || !baseUrlB) {
|
||||
return reply.status(400).send({ error: 'One or both providers unknown' });
|
||||
}
|
||||
|
||||
// Stream both responses via SSE with lane identifiers.
|
||||
reply.header('Content-Type', 'text/event-stream');
|
||||
reply.header('Cache-Control', 'no-cache');
|
||||
reply.header('Connection', 'keep-alive');
|
||||
reply.raw.writeHead(200);
|
||||
|
||||
const streamModel = async (lane: 'A' | 'B', baseUrl: string, model: string) => {
|
||||
try {
|
||||
const res = await fetch(`${baseUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages,
|
||||
temperature,
|
||||
top_p: topP,
|
||||
max_tokens: maxTokens,
|
||||
stream: true,
|
||||
}),
|
||||
signal: AbortSignal.timeout(120_000),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const errBody = await res.text().catch(() => '');
|
||||
reply.raw.write(`data: ${JSON.stringify({ lane, error: `Request failed: ${res.status}` })}\n\n`);
|
||||
return;
|
||||
}
|
||||
|
||||
const reader = res.body?.getReader();
|
||||
if (!reader) return;
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() ?? '';
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
if (trimmed === 'data: [DONE]') {
|
||||
reply.raw.write(`data: ${JSON.stringify({ lane, done: true })}\n\n`);
|
||||
continue;
|
||||
}
|
||||
// N3: strip 'data: ' prefix from upstream before re-wrapping with lane info.
|
||||
const payload = trimmed.startsWith('data: ') ? trimmed.slice(6) : trimmed;
|
||||
reply.raw.write(`data: ${JSON.stringify({ lane, raw: payload })}\n\n`);
|
||||
}
|
||||
}
|
||||
|
||||
reply.raw.write(`data: ${JSON.stringify({ lane, done: true })}\n\n`);
|
||||
} catch (err) {
|
||||
const msg = (err as Error).message ?? String(err);
|
||||
reply.raw.write(`data: ${JSON.stringify({ lane, error: msg })}\n\n`);
|
||||
}
|
||||
};
|
||||
|
||||
// Run both streams concurrently.
|
||||
await Promise.all([
|
||||
streamModel('A', baseUrlA, modelA),
|
||||
streamModel('B', baseUrlB, modelB),
|
||||
]);
|
||||
|
||||
reply.raw.end();
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user