feat(server): inference state-graph + supervisor, memory tools, MCP client, schema, routes
- Add state-graph.ts: typed state machine for inference lifecycle - Add supervisor.ts: agent supervisor pattern for multi-agent coordination - Add export-formatter.ts: structured export formatting - Add manage_memory.ts: memory CRUD tool for agent persistence - Add get_wiki_article.ts: codecontext wiki article retrieval - Extend memory/index.ts: 3-tier memory (context/daily/core) - Extend MCP client: mcp-config.ts env-var substitution - Update schema.sql: agent_sessions, tasks, pending_changes extensions - Update API types: MessageMetadata, ErrorReason, AgentSessionConfig - Update routes: chats, messages, sessions — column renames and agent_session_id - Update inference: error handler, payload builder, stream phase, turn orchestrator
This commit is contained in:
@@ -1,18 +1,33 @@
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import { z } from 'zod';
|
||||
import crypto from 'node:crypto';
|
||||
import type { Sql } from '../db.js';
|
||||
import type { Config } from '../config.js';
|
||||
import type { Broker } from '../services/broker.js';
|
||||
import type { Chat, Message } from '../types/api.js';
|
||||
import { getModelContext } from '../services/model-context.js';
|
||||
import { notifyCoderClose } from '../services/coder-notify.js';
|
||||
import { MESSAGE_COLUMNS } from '../services/message-columns.js';
|
||||
import { formatJson, formatMarkdown } from '../services/export-formatter.js';
|
||||
export interface CompareHandlers {
|
||||
enqueueCompare: (
|
||||
sessionId: string,
|
||||
chatId: string,
|
||||
assistantMessageId: string,
|
||||
modelOverride: string,
|
||||
compareGroupId: string,
|
||||
) => void;
|
||||
cancelInference: (sessionId: string, chatId: string) => Promise<boolean>;
|
||||
hasActiveInference: (chatId: string) => boolean;
|
||||
}
|
||||
|
||||
const CreateBody = z.object({
|
||||
name: z.string().min(1).max(200).optional(),
|
||||
});
|
||||
|
||||
const PatchBody = z.object({
|
||||
name: z.string().min(1).max(200),
|
||||
name: z.string().min(1).max(200).optional(),
|
||||
model: z.string().min(1).optional(),
|
||||
});
|
||||
|
||||
const ForkBody = z.object({
|
||||
@@ -26,10 +41,17 @@ const DiscardStaleBody = z.object({
|
||||
|
||||
const STALE_MIN_AGE_SECONDS = 60;
|
||||
|
||||
const CompareBody = z.object({
|
||||
message: z.string().min(1).max(64_000),
|
||||
models: z.array(z.string().min(1)).min(2).max(3),
|
||||
});
|
||||
|
||||
export function registerChatRoutes(
|
||||
app: FastifyInstance,
|
||||
sql: Sql,
|
||||
broker: Broker
|
||||
broker: Broker,
|
||||
config?: Config,
|
||||
compareHandlers?: CompareHandlers,
|
||||
): void {
|
||||
app.get<{ Params: { id: string }; Querystring: { status?: string } }>(
|
||||
'/api/sessions/:id/chats',
|
||||
@@ -122,12 +144,15 @@ export function registerChatRoutes(
|
||||
reply.code(400);
|
||||
return { error: 'invalid body', details: parsed.error.flatten() };
|
||||
}
|
||||
const { name, model } = parsed.data;
|
||||
const sets: Array<ReturnType<typeof sql>> = [sql`updated_at = clock_timestamp()`];
|
||||
if (name !== undefined) sets.push(sql`name = ${name}`);
|
||||
if (model !== undefined) sets.push(sql`model = ${model}`);
|
||||
const rows = await sql<Chat[]>`
|
||||
UPDATE chats
|
||||
SET name = ${parsed.data.name},
|
||||
updated_at = clock_timestamp()
|
||||
SET ${(sql as any).join(sets, sql`, `)}
|
||||
WHERE id = ${req.params.id}
|
||||
RETURNING id, session_id, name, status, created_at, updated_at
|
||||
RETURNING id, session_id, name, model, status, created_at, updated_at
|
||||
`;
|
||||
if (rows.length === 0) {
|
||||
reply.code(404);
|
||||
@@ -448,4 +473,128 @@ export function registerChatRoutes(
|
||||
return rows;
|
||||
}
|
||||
);
|
||||
|
||||
app.get<{ Params: { id: string }; Querystring: { format?: string } }>(
|
||||
'/api/chats/:id/export',
|
||||
async (req, reply) => {
|
||||
const format = req.query.format ?? 'json';
|
||||
if (format !== 'json' && format !== 'markdown') {
|
||||
reply.code(400);
|
||||
return { error: 'format must be json or markdown' };
|
||||
}
|
||||
|
||||
const chat = await sql<Chat[]>`SELECT * FROM chats WHERE id = ${req.params.id}`;
|
||||
if (chat.length === 0) {
|
||||
reply.code(404);
|
||||
return { error: 'chat not found' };
|
||||
}
|
||||
|
||||
const messages = await sql<Message[]>`
|
||||
SELECT ${sql.unsafe(MESSAGE_COLUMNS)}
|
||||
FROM messages_with_parts
|
||||
WHERE chat_id = ${req.params.id}
|
||||
ORDER BY created_at ASC, id ASC
|
||||
`;
|
||||
|
||||
if (format === 'markdown') {
|
||||
reply.header('Content-Type', 'text/markdown');
|
||||
return formatMarkdown(chat[0]!, messages, chat[0]!.model);
|
||||
}
|
||||
|
||||
reply.header('Content-Type', 'application/json');
|
||||
return formatJson(chat[0]!, messages, chat[0]!.model);
|
||||
}
|
||||
);
|
||||
|
||||
// v2.8-compare: send the same message to N models and stream back parallel
|
||||
// responses. Creates N assistant messages (one per model) and launches N
|
||||
// parallel inference runs with model overrides. Each publishes frames
|
||||
// scoped to the shared compare_group_id so the frontend can group them.
|
||||
if (config && compareHandlers) {
|
||||
app.post<{ Params: { id: string } }>(
|
||||
'/api/chats/:id/compare',
|
||||
async (req, reply) => {
|
||||
const parsed = CompareBody.safeParse(req.body);
|
||||
if (!parsed.success) {
|
||||
reply.code(400);
|
||||
return { error: 'invalid body', details: parsed.error.flatten() };
|
||||
}
|
||||
|
||||
const { message, models } = parsed.data;
|
||||
|
||||
// Check for active inference first.
|
||||
if (compareHandlers.hasActiveInference(req.params.id)) {
|
||||
reply.code(409);
|
||||
return { error: 'chat is currently streaming; stop it first' };
|
||||
}
|
||||
|
||||
const chatRows = await sql<Chat[]>`
|
||||
SELECT id, session_id FROM chats WHERE id = ${req.params.id} AND status = 'open'
|
||||
`;
|
||||
if (chatRows.length === 0) {
|
||||
reply.code(404);
|
||||
return { error: 'chat not found' };
|
||||
}
|
||||
const chat = chatRows[0]!;
|
||||
const sessionId = chat.session_id;
|
||||
const compareGroupId = crypto.randomUUID();
|
||||
|
||||
// Insert user message + N assistant messages in a single transaction.
|
||||
const result = await sql.begin(async (tx) => {
|
||||
const [userMsg] = await tx<{ id: string }[]>`
|
||||
INSERT INTO messages (session_id, chat_id, role, content, status, created_at, metadata)
|
||||
VALUES (${sessionId}, ${chat.id}, 'user', ${message}, 'complete', clock_timestamp(), NULL)
|
||||
RETURNING id
|
||||
`;
|
||||
|
||||
const responses: Array<{ model: string; assistant_message_id: string }> = [];
|
||||
for (const model of models) {
|
||||
const [asst] = await tx<{ id: string }[]>`
|
||||
INSERT INTO messages (session_id, chat_id, role, content, status, created_at, metadata)
|
||||
VALUES (
|
||||
${sessionId}, ${chat.id}, 'assistant', '', 'streaming', clock_timestamp(),
|
||||
${tx.json({ compare_group_id: compareGroupId, model } as never)}
|
||||
)
|
||||
RETURNING id
|
||||
`;
|
||||
responses.push({ model, assistant_message_id: asst!.id });
|
||||
}
|
||||
|
||||
await tx`UPDATE sessions SET updated_at = clock_timestamp() WHERE id = ${sessionId}`;
|
||||
await tx`UPDATE chats SET updated_at = clock_timestamp() WHERE id = ${chat.id}`;
|
||||
|
||||
return { user_message_id: userMsg!.id, responses };
|
||||
});
|
||||
|
||||
// Publish user message frames.
|
||||
broker.publishFrame(sessionId, {
|
||||
type: 'message_started',
|
||||
message_id: result.user_message_id,
|
||||
chat_id: chat.id,
|
||||
role: 'user',
|
||||
});
|
||||
broker.publishFrame(sessionId, {
|
||||
type: 'delta',
|
||||
message_id: result.user_message_id,
|
||||
chat_id: chat.id,
|
||||
content: message,
|
||||
});
|
||||
broker.publishFrame(sessionId, {
|
||||
type: 'message_complete',
|
||||
message_id: result.user_message_id,
|
||||
chat_id: chat.id,
|
||||
});
|
||||
|
||||
// Enqueue N parallel inference runs with model overrides.
|
||||
for (const resp of result.responses) {
|
||||
compareHandlers.enqueueCompare(
|
||||
sessionId, chat.id, resp.assistant_message_id, resp.model, compareGroupId,
|
||||
);
|
||||
}
|
||||
|
||||
reply.code(202);
|
||||
return { compare_group_id: compareGroupId, ...result };
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user