Adds Inference tab to SettingsPane with controls for temperature, top-p, top-k, min-p, and other inference parameters. Server-side route and provider config wiring to pass overrides through the inference pipeline.
407 lines
16 KiB
TypeScript
407 lines
16 KiB
TypeScript
import Fastify from 'fastify';
|
|
import fastifyWebsocket from '@fastify/websocket';
|
|
import { loadConfig } from './config.js';
|
|
import { getSql, applySchema, pingDb, closeDb } from './db.js';
|
|
import { startMcpServer } from './services/mcp-server.js';
|
|
// v2.0.0 Phase 2B: workspace dependency on @boocode/server — reuse the
|
|
// inference loop, broker, and tool registry without duplication.
|
|
import { createInferenceRunner } from '@boocode/server/inference';
|
|
import { createBroker } from '@boocode/server/broker';
|
|
import { appendMcpTools, ALL_TOOLS } from '@boocode/server/tools';
|
|
import type { Config as ServerConfig } from '@boocode/server/config';
|
|
import type { WsFrame } from '@boocode/contracts/ws-frames';
|
|
// v2.0.0 Phase 2C: write tools + adapter for BooChat ToolDef compatibility.
|
|
import { WRITE_TOOLS } from './services/tools/index.js';
|
|
import { adaptWriteTool } from './services/tools/adapter.js';
|
|
import { runWithInferenceContext } from './services/tools/inference_context.js';
|
|
// Routes
|
|
import { registerMessageRoutes } from './routes/messages.js';
|
|
import { registerSkillRoutes } from './routes/skills.js';
|
|
import { registerPendingRoutes } from './routes/pending.js';
|
|
import { registerCheckpointRoutes } from './routes/checkpoints.js';
|
|
import { registerAgentSessionRoutes } from './routes/agent-sessions.js';
|
|
import { registerTaskRoutes } from './routes/tasks.js';
|
|
import { registerInboxRoutes } from './routes/inbox.js';
|
|
import { registerStatsRoutes } from './routes/stats.js';
|
|
import { registerRunsRoutes } from './routes/runs.js';
|
|
import { registerArenaRoutes } from './routes/arena.js';
|
|
import { registerProviderRoutes } from './routes/providers.js';
|
|
import { registerWorktreeSafetyRoutes } from './routes/worktree-safety.js';
|
|
import { registerLifecycleRoutes } from './routes/lifecycle.js';
|
|
import { registerAnalyticsRoutes } from './routes/analytics.js';
|
|
import { registerWebSocket } from './routes/ws.js';
|
|
// Phase 4: dispatcher + agent probe
|
|
import { createDispatcher } from './services/dispatcher.js';
|
|
// Orchestrator (Phase 2): DB-backed flow-runner; advances on the dispatcher's
|
|
// onTaskTerminal hook.
|
|
import { createFlowRunner } from './services/flow-runner.js';
|
|
// Arena: DB-backed battle-runner; also advances on the onTaskTerminal hook.
|
|
import { createBattleRunner, type DispatchContestantFn } from './services/arena-runner.js';
|
|
import { createAnalyzer } from './services/arena-analyzer.js';
|
|
import { agentPool } from './services/agent-pool.js';
|
|
import { createOrphanWorktreeReaper } from './services/orphan-worktree-reaper.js';
|
|
import { probeAgents } from './services/agent-probe.js';
|
|
import { getProviderSnapshot, persistProbedModels, fetchLlamaSwapModels } from './services/provider-snapshot.js';
|
|
import { setPermissionHooks } from './services/permission-waiter.js';
|
|
import { publishAgentStatus } from './services/agent-status-publish.js';
|
|
import { homedir } from 'node:os';
|
|
|
|
async function main() {
|
|
// MCP mode: stdio transport, no HTTP server
|
|
if (process.argv.includes('--mcp')) {
|
|
const config = loadConfig();
|
|
const sql = getSql(config);
|
|
await applySchema(sql);
|
|
await startMcpServer(sql);
|
|
return;
|
|
}
|
|
|
|
const config = loadConfig();
|
|
|
|
const app = Fastify({
|
|
logger: { level: config.LOG_LEVEL },
|
|
});
|
|
|
|
// Allow empty JSON bodies (same pattern as apps/server).
|
|
app.removeContentTypeParser(['application/json']);
|
|
app.addContentTypeParser('application/json', { parseAs: 'string' }, (_req, body, done) => {
|
|
const str = (body as string) ?? '';
|
|
if (str.trim().length === 0) {
|
|
done(null, {});
|
|
return;
|
|
}
|
|
try {
|
|
done(null, JSON.parse(str));
|
|
} catch (err) {
|
|
done(err as Error, undefined);
|
|
}
|
|
});
|
|
|
|
const sql = getSql(config);
|
|
await applySchema(sql);
|
|
app.log.info('database schema applied');
|
|
|
|
// Broker: in-memory pub/sub for session + user channel streaming.
|
|
const broker = createBroker(app.log);
|
|
|
|
// agent-status-normalize (#10): the permission hooks carry only taskId +
|
|
// sessionId, but the tasks row holds the (chat_id, agent) pair the status frame
|
|
// is keyed on. Resolve it best-effort so a blocked/working status accompanies
|
|
// every permission_requested/permission_resolved. Returns null when the task
|
|
// lacks a chat_id or agent (sessionless creators) — we simply skip the status.
|
|
const resolveChatAgent = async (
|
|
taskId: string,
|
|
): Promise<{ chatId: string; agent: string } | null> => {
|
|
const [row] = await sql<{ chat_id: string | null; agent: string | null }[]>`
|
|
SELECT chat_id, agent FROM tasks WHERE id = ${taskId}
|
|
`;
|
|
if (!row?.chat_id || !row.agent) return null;
|
|
return { chatId: row.chat_id, agent: row.agent };
|
|
};
|
|
|
|
setPermissionHooks({
|
|
onPrompt: async (prompt) => {
|
|
await sql`
|
|
UPDATE tasks SET state = 'blocked' WHERE id = ${prompt.taskId} AND state = 'running'
|
|
`;
|
|
broker.publishFrame(prompt.sessionId, {
|
|
type: 'permission_requested',
|
|
task_id: prompt.taskId,
|
|
session_id: prompt.sessionId,
|
|
kind: prompt.kind,
|
|
tool_title: prompt.toolTitle,
|
|
...(prompt.input ? { input: prompt.input } : {}),
|
|
options: prompt.options.map((o) => ({ option_id: o.optionId, label: o.label })),
|
|
} as WsFrame);
|
|
// #10: agent is blocked on a human decision.
|
|
const ca = await resolveChatAgent(prompt.taskId).catch(() => null);
|
|
if (ca) {
|
|
publishAgentStatus(
|
|
broker.publishFrame,
|
|
prompt.sessionId,
|
|
ca.chatId,
|
|
ca.agent,
|
|
'blocked',
|
|
'permission_request',
|
|
);
|
|
}
|
|
},
|
|
onResolved: async (taskId, sessionId) => {
|
|
await sql`
|
|
UPDATE tasks SET state = 'running' WHERE id = ${taskId} AND state = 'blocked'
|
|
`;
|
|
broker.publishFrame(sessionId, {
|
|
type: 'permission_resolved',
|
|
task_id: taskId,
|
|
session_id: sessionId,
|
|
} as WsFrame);
|
|
// #10: human responded — agent resumes work.
|
|
const ca = await resolveChatAgent(taskId).catch(() => null);
|
|
if (ca) {
|
|
publishAgentStatus(
|
|
broker.publishFrame,
|
|
sessionId,
|
|
ca.chatId,
|
|
ca.agent,
|
|
'working',
|
|
'permission_resolved',
|
|
);
|
|
}
|
|
},
|
|
});
|
|
|
|
// --- Tool registry extension ---
|
|
// Append BooCoder write tools (adapted to BooChat's ToolDef interface) to
|
|
// the shared ALL_TOOLS registry. appendMcpTools re-sorts and rebuilds
|
|
// TOOLS_BY_NAME so tool-phase.ts dispatch sees the full set.
|
|
const adaptedWriteTools = WRITE_TOOLS.map((t) => adaptWriteTool(t));
|
|
appendMcpTools(adaptedWriteTools);
|
|
app.log.info(`tool registry: ${ALL_TOOLS.length} tools loaded (${WRITE_TOOLS.length} write tools)`);
|
|
|
|
// Inference runner: same engine as BooChat, uses ALL_TOOLS (which includes
|
|
// the appended write tools) for tool dispatch.
|
|
const inference = createInferenceRunner(
|
|
{
|
|
sql,
|
|
config: config as unknown as ServerConfig,
|
|
log: app.log,
|
|
publish: (sessionId, frame) => {
|
|
broker.publishFrame(sessionId, frame as unknown as WsFrame);
|
|
},
|
|
broker,
|
|
},
|
|
(user, frame) => {
|
|
broker.publishUserFrame(user, frame as unknown as WsFrame);
|
|
}
|
|
);
|
|
|
|
// Wrap the inference runner to bind the write-tool context around each run.
|
|
// enqueue() starts its async loop synchronously, so wrapping the call in
|
|
// runWithInferenceContext propagates the per-run context (sql, sessionId, the
|
|
// Plan/Ask/Bypass gate) through every awaited tool execution — and concurrent
|
|
// runs (a user message racing a dispatcher-polled native task) each get their
|
|
// own, instead of clobbering a shared global.
|
|
const inferenceApi = {
|
|
enqueue: (
|
|
sessionId: string,
|
|
chatId: string,
|
|
assistantId: string,
|
|
user: string,
|
|
permissionMode?: 'plan' | 'ask' | 'bypass',
|
|
) => {
|
|
runWithInferenceContext({ sql, sessionId, taskId: null, permissionMode }, () => {
|
|
inference.enqueue(sessionId, chatId, assistantId, user);
|
|
});
|
|
},
|
|
cancel: async (sessionId: string, chatId: string) => {
|
|
// No context to clear — AsyncLocalStorage scopes it to each run's own chain.
|
|
return inference.cancel(sessionId, chatId);
|
|
},
|
|
hasActive: (chatId: string) => inference.hasActive(chatId),
|
|
};
|
|
|
|
// Register WebSocket support
|
|
await app.register(fastifyWebsocket);
|
|
|
|
// Health endpoint
|
|
app.get('/api/health', async (_req, reply) => {
|
|
const dbOk = await pingDb(sql);
|
|
const status = dbOk ? 200 : 503;
|
|
return reply.status(status).send({
|
|
ok: dbOk,
|
|
db: dbOk,
|
|
tools: ALL_TOOLS.length,
|
|
});
|
|
});
|
|
|
|
// Phase 4: probe available agents on startup
|
|
await probeAgents(sql, app.log);
|
|
|
|
// Warm provider snapshot in background (ACP cold probes + model merges)
|
|
void getProviderSnapshot(sql, config, homedir(), true)
|
|
.then((entries) => persistProbedModels(sql, entries, app.log))
|
|
.catch((err) => {
|
|
app.log.warn(
|
|
{ err: err instanceof Error ? err.message : String(err) },
|
|
'provider-snapshot: warm failed',
|
|
);
|
|
});
|
|
|
|
// Orchestrator (Phase 2): the flow-runner reacts to the dispatcher's
|
|
// onTaskTerminal hook to advance flow_runs. Created before the dispatcher so its
|
|
// terminal callback can be wired in.
|
|
const flowRunner = createFlowRunner({ sql, broker, log: app.log, config });
|
|
|
|
// Arena SEAM (a): build the local-model set from the live llama-swap model list.
|
|
// Both bare IDs ('qwen3.6-35b') and prefixed IDs ('llama-swap/qwen3.6-35b') are
|
|
// included so opencode-style prefixed contestants and native-style bare contestants
|
|
// both classify correctly as local.
|
|
const localModelsList = await fetchLlamaSwapModels(config).catch(() => []);
|
|
const localModels = new Set([
|
|
...localModelsList.map((m) => m.id),
|
|
...localModelsList.map((m) => `llama-swap/${m.id}`),
|
|
]);
|
|
|
|
// Arena dispatch function — Phase 4 SEAM (b).
|
|
// Coding: insert a tasks row with agent=identity (null for native/boocode);
|
|
// the dispatcher creates a worktree and runs the external agent (or native).
|
|
// Q&A: pre-create a session with agent_id stamped to the persona slug so native
|
|
// inference loads the persona's system_prompt + tools from AGENTS.md;
|
|
// task.session_id is pre-set so runNativeInference reuses the session.
|
|
const dispatchContestant: DispatchContestantFn = async ({
|
|
projectId,
|
|
prompt,
|
|
identity,
|
|
model,
|
|
battleType,
|
|
}) => {
|
|
if (battleType === 'qa') {
|
|
const sessionName = `Arena Q&A [${identity}]: ${prompt.slice(0, 30)}`;
|
|
const [session] = await sql<{ id: string }[]>`
|
|
INSERT INTO sessions (project_id, name, model, agent_id, status)
|
|
VALUES (${projectId}, ${sessionName}, ${model}, ${identity}, 'open')
|
|
RETURNING id
|
|
`;
|
|
const [task] = await sql<{ id: string }[]>`
|
|
INSERT INTO tasks (project_id, input, model, session_id)
|
|
VALUES (${projectId}, ${prompt}, ${model}, ${session!.id})
|
|
RETURNING id
|
|
`;
|
|
return { taskId: task!.id, sessionId: session!.id };
|
|
}
|
|
// Coding: boocode = native inference (no external agent); any other identity
|
|
// is an external agent name (claude, opencode, qwen, goose) that maps to
|
|
// available_agents and gets its own per-task worktree via runExternalAgent.
|
|
// Session is created lazily by the dispatcher, so sessionId is unknown here.
|
|
const agentName = identity === 'boocode' ? null : identity;
|
|
const [task] = await sql<{ id: string }[]>`
|
|
INSERT INTO tasks (project_id, input, agent, model)
|
|
VALUES (${projectId}, ${prompt}, ${agentName}, ${model})
|
|
RETURNING id
|
|
`;
|
|
return { taskId: task!.id, sessionId: null };
|
|
};
|
|
|
|
// Arena analyzer: two-stage digest→judge (v1). Pluggable seam — a v2 Han
|
|
// Orchestrator flow can replace this without schema changes.
|
|
const analyzer = createAnalyzer({
|
|
sql,
|
|
broker,
|
|
log: app.log,
|
|
config,
|
|
localModels,
|
|
});
|
|
|
|
// Arena battle-runner: notified on the same onTaskTerminal hook as the flow-runner.
|
|
const battleRunner = createBattleRunner({
|
|
sql,
|
|
broker,
|
|
log: app.log,
|
|
dispatch: dispatchContestant,
|
|
onBattleComplete: (battleId) => {
|
|
void analyzer.analyze(battleId);
|
|
},
|
|
onCrossExamStart: ({ battleId, crossExamId, identity, model }) => {
|
|
void analyzer.crossExamine(battleId, crossExamId, { identity, model });
|
|
},
|
|
localModels,
|
|
});
|
|
|
|
// Compose onTaskTerminal: both flow-runner and battle-runner are notified.
|
|
// Each ignores tasks it doesn't own (flow-runner checks flow_steps.task_id;
|
|
// battle-runner checks contestants.task_id).
|
|
const onTaskTerminal = (taskId: string, state: string): void => {
|
|
flowRunner.handleTaskTerminal(taskId, state);
|
|
battleRunner.handleTaskTerminal(taskId, state);
|
|
};
|
|
|
|
// Phase 4: dispatcher — polls tasks table and runs inference. The composed
|
|
// onTaskTerminal hook notifies both the flow-runner and the battle-runner when
|
|
// any task settles.
|
|
const dispatcher = createDispatcher({
|
|
sql,
|
|
inference: inferenceApi,
|
|
broker,
|
|
log: app.log,
|
|
config,
|
|
onTaskTerminal,
|
|
});
|
|
dispatcher.start();
|
|
|
|
// Re-advance in-flight flow_runs and battles after a coder restart. Both run
|
|
// AFTER dispatcher.start() so re-dispatched 'pending' tasks are picked up.
|
|
void flowRunner.initResume().catch((err) => {
|
|
app.log.error(
|
|
{ err: err instanceof Error ? err.message : String(err) },
|
|
'flow-runner: initResume failed',
|
|
);
|
|
});
|
|
void battleRunner.initResume().catch((err) => {
|
|
app.log.error(
|
|
{ err: err instanceof Error ? err.message : String(err) },
|
|
'arena: initResume failed',
|
|
);
|
|
});
|
|
|
|
// v2.6 Phase 3: configure + start the agent-pool lifecycle sweep (idle-TTL +
|
|
// LRU-cap eviction of warm backends, plus each backend's proactive health probe)
|
|
// and the orphan-worktree reaper. Both run on the same periodic timer.
|
|
agentPool.configure({
|
|
idleTtlMs: config.AGENT_POOL_IDLE_TTL_MS,
|
|
maxLive: config.AGENT_POOL_MAX_LIVE,
|
|
sweepIntervalMs: config.LIFECYCLE_SWEEP_INTERVAL_MS,
|
|
log: app.log,
|
|
});
|
|
agentPool.startReaper(app.log);
|
|
const orphanReaper = createOrphanWorktreeReaper({
|
|
sql,
|
|
log: app.log,
|
|
intervalMs: config.LIFECYCLE_SWEEP_INTERVAL_MS,
|
|
graceMs: config.ORPHAN_WORKTREE_GRACE_MS,
|
|
});
|
|
orphanReaper.start();
|
|
|
|
app.addHook('onClose', async () => {
|
|
// stop() first so in-flight dispatcher turns settle, then stop the reapers and
|
|
// drain the pool (kills opencode server + warm ACP children).
|
|
await dispatcher.stop();
|
|
orphanReaper.stop();
|
|
await agentPool.dispose();
|
|
});
|
|
|
|
// Register routes
|
|
registerMessageRoutes(app, sql, broker, inferenceApi);
|
|
registerSkillRoutes(app, sql, broker, inferenceApi);
|
|
registerPendingRoutes(app, sql);
|
|
registerCheckpointRoutes(app, sql);
|
|
registerAgentSessionRoutes(app, sql);
|
|
registerTaskRoutes(app, sql, inferenceApi, dispatcher.cancelExternalTask);
|
|
registerInboxRoutes(app, sql);
|
|
registerStatsRoutes(app, sql);
|
|
registerRunsRoutes(app, sql, flowRunner, dispatcher.cancelExternalTask);
|
|
registerArenaRoutes(app, sql, battleRunner, dispatcher.cancelExternalTask, config);
|
|
registerProviderRoutes(app, sql, config);
|
|
registerWorktreeSafetyRoutes(app, sql);
|
|
registerLifecycleRoutes(app, sql);
|
|
registerAnalyticsRoutes(app, sql);
|
|
registerWebSocket(app, sql, broker);
|
|
|
|
// Graceful shutdown
|
|
const shutdown = async () => {
|
|
app.log.info('shutting down');
|
|
await app.close();
|
|
await closeDb();
|
|
process.exit(0);
|
|
};
|
|
process.on('SIGTERM', shutdown);
|
|
process.on('SIGINT', shutdown);
|
|
|
|
await app.listen({ port: config.PORT, host: config.HOST });
|
|
app.log.info(`BooCoder listening on ${config.HOST}:${config.PORT}`);
|
|
}
|
|
|
|
main().catch((err) => {
|
|
console.error('fatal:', err);
|
|
process.exit(1);
|
|
});
|