feat(server): inference state-graph + supervisor, memory tools, MCP client, schema, routes

- Add state-graph.ts: typed state machine for inference lifecycle
- Add supervisor.ts: agent supervisor pattern for multi-agent coordination
- Add export-formatter.ts: structured export formatting
- Add manage_memory.ts: memory CRUD tool for agent persistence
- Add get_wiki_article.ts: codecontext wiki article retrieval
- Extend memory/index.ts: 3-tier memory (context/daily/core)
- Extend MCP client: mcp-config.ts env-var substitution
- Update schema.sql: agent_sessions, tasks, pending_changes extensions
- Update API types: MessageMetadata, ErrorReason, AgentSessionConfig
- Update routes: chats, messages, sessions — column renames and agent_session_id
- Update inference: error handler, payload builder, stream phase, turn orchestrator
This commit is contained in:
2026-06-08 03:48:47 +00:00
parent 4a6623112c
commit d360051329
19 changed files with 1546 additions and 230 deletions

View File

@@ -74,6 +74,7 @@ export async function handleAbortOrError(
type: 'message_complete',
message_id: assistantMessageId,
chat_id: chatId,
...(args.compareGroupId ? { compare_group_id: args.compareGroupId } : {}),
});
ctx.log.info({ sessionId, chatId, assistantMessageId }, 'inference cancelled');
} else {
@@ -90,6 +91,7 @@ export async function handleAbortOrError(
chat_id: chatId,
error: errMsg,
reason: 'llm_provider_error',
...(args.compareGroupId ? { compare_group_id: args.compareGroupId } : {}),
});
ctx.log.error({ err, sessionId, assistantMessageId }, 'inference failed');
}
@@ -125,6 +127,7 @@ export async function finalizeStreamedRow(
cacheTokens?: number | null;
reasoningTokens?: number | null;
beforeComplete?: () => Promise<void>;
compareGroupId?: string;
},
): Promise<void> {
// v1.11.3: see executeToolPhase for the rationale.
@@ -158,6 +161,7 @@ export async function finalizeStreamedRow(
started_at: opts.startedAt,
finished_at: updated?.finished_at ?? null,
model: opts.model,
...(opts.compareGroupId ? { compare_group_id: opts.compareGroupId } : {}),
});
}
@@ -182,6 +186,7 @@ export async function finalizeEmpty(
type: 'message_complete',
message_id: assistantMessageId,
chat_id: chatId,
...(args.compareGroupId ? { compare_group_id: args.compareGroupId } : {}),
});
}
@@ -281,6 +286,7 @@ export async function finalizeCompletion(
started_at: startedAt,
finished_at: updated?.finished_at ?? null,
model: session.model,
...(args.compareGroupId ? { compare_group_id: args.compareGroupId } : {}),
});
ctx.log.info(
{

View File

@@ -8,6 +8,7 @@ export {
createInferenceRunner,
MAX_STEPS,
runInference,
runInferenceWithModel,
} from './turn.js';
// P5: the shared pipeline types moved from turn.ts to types.ts (breaking the
// hub-and-leaf near-cycle). Re-exported here so the public surface is unchanged.
@@ -21,3 +22,4 @@ export type {
export type { ToolPhaseResult } from './tool-phase.js';
export { detectDoomLoop, DOOM_LOOP_THRESHOLD } from './sentinels.js';
export { buildMessagesPayload } from './payload.js';
export { runGraph, type GraphNodeType, type GraphState, type GraphResult } from './state-graph.js';

View File

@@ -194,6 +194,14 @@ export async function buildMessagesPayload(
out.push(msg);
continue;
}
// TODO(vDeepSeek): when m has image attachments, use a content array
// with text + image parts (see multi-modal.ts:imageAttachmentsToParts).
// The AI SDK ModelMessage content shape supports:
// content: [
// { type: 'text', text: '...' },
// { type: 'image', image: 'data:image/png;base64,...' }
// ]
// The @ai-sdk/deepseek provider handles the image parts natively.
out.push({ role: 'user', content: m.content });
}
return out;
@@ -206,7 +214,7 @@ export async function loadContext(
): Promise<{ session: Session; project: Project; history: Message[] } | null> {
const sessionRows = await sql<Session[]>`
SELECT id, project_id, name, model, system_prompt, status, created_at, updated_at,
agent_id, web_search_enabled, allowed_read_paths
agent_id, web_search_enabled, allowed_read_paths, state_graph_enabled
FROM sessions WHERE id = ${sessionId}
`;
if (sessionRows.length === 0) return null;

View File

@@ -0,0 +1,531 @@
// P5: Optional declarative state graph engine for the inference turn loop.
//
// Replaces the procedural `while (stepNumber < effectiveCap)` in turn.ts
// with a node-based execution model. Default OFF via
// session.state_graph_enabled — zero behavior change when disabled.
//
// Nodes wrap EXISTING infrastructure (no new I/O patterns):
// PLAN → top-of-loop gate, compaction, loadContext, buildMessagesPayload,
// executeStreamPhase
// CALL_TOOL → executeToolPhase
// OBSERVE → process tool results, update loop locals
// REFLECT → decidePostToolAction, sentinel insertion, mistake tracker
// SYNTHESIZE → terminal (graph loop exits)
import type { Agent, Project, Session, ToolCall } from '../../types/api.js';
import { resolveProjectRoot } from '../path_guard.js';
import { rewriteSearchQuery } from '../task-search-rewrite.js';
import * as compaction from '../compaction.js';
import { decideStep, decidePostToolAction } from './step-decision.js';
import {
recordStep,
MISTAKE_RECOVERY_NOTE,
type MistakeState,
} from './mistake-tracker.js';
import {
buildMessagesPayload,
loadContext,
} from './payload.js';
import { toDcpMessages, transformMessages, fromDcpMessages } from './dcp/index.js';
import {
finalizeCompletion,
finalizeEmpty,
handleAbortOrError,
} from './error-handler.js';
import {
executeStreamPhase,
} from './stream-phase.js';
import { executeToolPhase, type ToolPhaseResult } from './tool-phase.js';
import type {
InferenceContext,
StreamPhaseState,
StreamResult,
TurnArgs,
} from './types.js';
import {
runCapHitSummary,
runDoomLoopSummary,
insertMistakeRecoverySentinel,
} from './sentinel-summaries.js';
import { execFile } from 'node:child_process';
import { readFileSync, existsSync } from 'node:fs';
import { join } from 'node:path';
const BUILD_TIMEOUT_MS = 60_000;
const BUILD_OUTPUT_CAP = 8_000;
async function detectAndRunBuild(
ctx: InferenceContext,
projectRoot: string,
sessionId: string,
chatId: string,
model: string,
existingNote: string | undefined,
): Promise<string | undefined> {
if (!model.startsWith('deepseek-')) return undefined;
const pkgPath = join(projectRoot, 'package.json');
if (!existsSync(pkgPath)) return undefined;
let buildCmd: string | null = null;
try {
const pkg = JSON.parse(readFileSync(pkgPath, 'utf8')) as { scripts?: Record<string, string> };
if (pkg.scripts?.build) buildCmd = 'build';
else if (pkg.scripts?.compile) buildCmd = 'compile';
else if (pkg.scripts?.typecheck) buildCmd = 'typecheck';
} catch {
return undefined;
}
if (!buildCmd) return undefined;
const hasPnpm = existsSync(join(projectRoot, 'pnpm-lock.yaml'));
const hasYarn = existsSync(join(projectRoot, 'yarn.lock'));
const pm = hasPnpm ? 'pnpm' : hasYarn ? 'yarn' : 'npm';
try {
const out = await new Promise<string>((resolve, reject) => {
execFile(pm, ['run', buildCmd!], { cwd: projectRoot, timeout: BUILD_TIMEOUT_MS, maxBuffer: BUILD_OUTPUT_CAP * 2 },
(err, stdout, stderr) => {
if (err && (err as NodeJS.ErrnoException).code === 'ENOENT') {
resolve('');
return;
}
const merged = (stdout + '\n' + stderr).trim();
resolve(merged.slice(0, BUILD_OUTPUT_CAP));
},
);
});
if (!out) return undefined;
ctx.log.info({ sessionId, chatId, buildCmd, outputLen: out.length }, 'auto-fix: build failed');
const combined = existingNote
? existingNote + '\n\n--- Build error ---\n' + out.slice(0, BUILD_OUTPUT_CAP - existingNote.length)
: '--- Build error ---\n' + out.slice(0, BUILD_OUTPUT_CAP);
return combined;
} catch {
return undefined;
}
}
// -- Types ----------------------------------------------------------------
export type GraphNodeType = 'PLAN' | 'CALL_TOOL' | 'OBSERVE' | 'REFLECT' | 'SYNTHESIZE';
export interface GraphState {
stepNumber: number;
toolsUsed: number;
recentToolCalls: ToolCall[];
assistantMessageId: string;
mistakeTracker: MistakeState;
pendingRecoveryNote?: string;
effectiveCap: number;
budget: number;
projectRoot: string;
iterSession?: Session;
iterProject?: Project;
streamResult?: StreamResult;
startedAt?: string | null;
toolPhaseResult?: ToolPhaseResult;
shouldStop: boolean;
}
interface GraphNode {
type: GraphNodeType;
edges: Array<{ to: GraphNodeType; condition: (state: GraphState) => boolean }>;
execute: (
ctx: InferenceContext,
args: TurnArgs,
state: GraphState,
agent: Agent | null,
) => Promise<void>;
}
export interface GraphResult {
stepNumber: number;
assistantMessageId: string;
toolsUsed: number;
recentToolCalls: ToolCall[];
mistakeTracker: MistakeState;
}
// -- Default graph --------------------------------------------------------
export function createDefaultGraph(): GraphNode[] {
return [
{
type: 'PLAN',
edges: [
{ to: 'CALL_TOOL', condition: (s) => !!s.streamResult && s.streamResult.toolCalls.length > 0 },
{ to: 'SYNTHESIZE', condition: () => true },
],
execute: planNode,
},
{
type: 'CALL_TOOL',
edges: [
{ to: 'OBSERVE', condition: () => true },
],
execute: callToolNode,
},
{
type: 'OBSERVE',
edges: [
{ to: 'REFLECT', condition: (s) => s.toolPhaseResult?.action === 'continue' },
{ to: 'SYNTHESIZE', condition: () => true },
],
execute: observeNode,
},
{
type: 'REFLECT',
edges: [
{ to: 'PLAN', condition: (s) => s.stepNumber < s.effectiveCap },
{ to: 'SYNTHESIZE', condition: () => true },
],
execute: reflectNode,
},
{
type: 'SYNTHESIZE',
edges: [],
execute: async () => {},
},
];
}
// -- Graph runner ---------------------------------------------------------
export async function runGraph(
ctx: InferenceContext,
args: TurnArgs,
extra: { effectiveCap: number; budget: number; agent: Agent | null; projectRoot: string },
): Promise<GraphResult> {
const { effectiveCap, budget, agent } = extra;
const state: GraphState = {
stepNumber: 0,
toolsUsed: args.toolsUsed,
recentToolCalls: args.recentToolCalls,
assistantMessageId: args.assistantMessageId,
mistakeTracker: args.mistakeTracker,
pendingRecoveryNote: args.pendingRecoveryNote,
effectiveCap,
budget,
projectRoot: extra.projectRoot,
shouldStop: false,
};
const graph = createDefaultGraph();
let currentNode: GraphNodeType = 'PLAN';
while (currentNode !== 'SYNTHESIZE' && !state.shouldStop) {
const node = graph.find((n) => n.type === currentNode)!;
await node.execute(ctx, args, state, agent);
if (state.shouldStop) break;
const nextEdge = node.edges.find((e) => e.condition(state));
if (!nextEdge) break;
currentNode = nextEdge.to;
}
return {
stepNumber: state.stepNumber,
assistantMessageId: state.assistantMessageId,
toolsUsed: state.toolsUsed,
recentToolCalls: state.recentToolCalls,
mistakeTracker: state.mistakeTracker,
};
}
// -- PLAN node ------------------------------------------------------------
// Top-of-loop gate → compaction → loadContext → DCP → buildPayload → stream
async function planNode(
ctx: InferenceContext,
args: TurnArgs,
state: GraphState,
agent: Agent | null,
): Promise<void> {
const { sessionId, chatId, signal } = args;
// 1. Top-of-loop gate: doom-loop, then budget (pure decisions)
const decision = decideStep({
recentToolCalls: state.recentToolCalls,
toolsUsed: state.toolsUsed,
budget: state.budget,
});
if (decision.kind === 'doom') {
const loaded = await loadContext(ctx.sql, sessionId, chatId);
if (loaded) {
const dlSession = args.modelOverride ? { ...loaded.session, model: args.modelOverride } : loaded.session;
const iterArgs: TurnArgs = {
sessionId, chatId, assistantMessageId: state.assistantMessageId,
toolsUsed: state.toolsUsed, recentToolCalls: state.recentToolCalls,
mistakeTracker: state.mistakeTracker, signal,
};
await runDoomLoopSummary(ctx, iterArgs, dlSession, loaded.project, loaded.history, agent, decision.loop);
}
state.shouldStop = true;
return;
}
if (decision.kind === 'budget') {
const loaded = await loadContext(ctx.sql, sessionId, chatId);
if (loaded) {
const bhSession = args.modelOverride ? { ...loaded.session, model: args.modelOverride } : loaded.session;
const iterArgs: TurnArgs = {
sessionId, chatId, assistantMessageId: state.assistantMessageId,
toolsUsed: state.toolsUsed, recentToolCalls: state.recentToolCalls,
mistakeTracker: state.mistakeTracker, signal,
};
await runCapHitSummary(ctx, iterArgs, bhSession, loaded.project, loaded.history, agent, state.budget);
}
state.shouldStop = true;
return;
}
// decision.kind === 'stream' → proceed.
// 2. Compaction check
const chatFlag = await ctx.sql<{ needs_compaction: boolean }[]>`
SELECT needs_compaction FROM chats WHERE id = ${chatId}
`;
if (chatFlag[0]?.needs_compaction) {
try {
await compaction.process({
sql: ctx.sql, config: ctx.config, log: ctx.log,
broker: ctx.broker, chatId, hooks: ctx.hooks,
});
} catch (err) {
ctx.log.warn({ err, chatId }, 'auto-compaction failed; clearing flag and proceeding');
await ctx.sql`UPDATE chats SET needs_compaction = false WHERE id = ${chatId}`;
}
}
// 3. Load context (must re-load each iteration — new messages)
const loaded = await loadContext(ctx.sql, sessionId, chatId);
if (!loaded) {
ctx.log.warn({ sessionId }, 'inference: session or project missing mid-loop');
state.shouldStop = true;
return;
}
let { session: iterSession, project: iterProject, history } = loaded;
if (args.modelOverride) {
iterSession = { ...iterSession, model: args.modelOverride };
}
state.iterSession = iterSession;
state.iterProject = iterProject;
const projectRoot = await resolveProjectRoot(iterProject.path);
state.projectRoot = projectRoot;
// 4. DCP transform
try {
const dcpMsgs = toDcpMessages(history);
const { messages: pruned, stats } = transformMessages(chatId, dcpMsgs);
if (stats.removedCount > 0) {
ctx.log.info({ chatId, ...stats }, 'dcp: transform removed messages');
history = fromDcpMessages(pruned) as typeof history;
}
} catch (err) {
ctx.log.warn({ err: err instanceof Error ? err.message : String(err), chatId }, 'dcp: transform skipped');
}
// 5. Log step boundary
ctx.log.info(
{ sessionId, chatId, step: state.stepNumber, assistantMessageId: state.assistantMessageId },
'step_start',
);
// 6. Build messages + stream phase
const messages = await buildMessagesPayload(iterSession, iterProject, history, agent, ctx.log);
const webToolsEnabled =
iterSession.web_search_enabled ?? iterProject.default_web_search_enabled ?? false;
if (state.stepNumber === 0 && webToolsEnabled && messages.length >= 2) {
const lastUserMsg = [...messages].reverse().find((m) => m.role === 'user');
if (lastUserMsg?.content) {
const hint = await rewriteSearchQuery(lastUserMsg.content);
if (hint && messages[0]?.role === 'system' && messages[0].content) {
messages[0].content += `\n\nThe user's search intent can be summarized as: "${hint}"`;
}
}
}
if (state.pendingRecoveryNote) {
messages.push({ role: 'system', content: state.pendingRecoveryNote });
state.pendingRecoveryNote = undefined;
}
// 7. Stream phase
const iterArgs: TurnArgs = {
sessionId, chatId, assistantMessageId: state.assistantMessageId,
toolsUsed: state.toolsUsed, recentToolCalls: state.recentToolCalls,
mistakeTracker: state.mistakeTracker, signal,
};
const streamState: StreamPhaseState = { accumulated: '', startedAt: null };
try {
const result = await executeStreamPhase(ctx, iterArgs, iterSession, messages, streamState, agent, webToolsEnabled);
state.streamResult = result;
state.startedAt = streamState.startedAt;
// Non-tool finish: Stop hook + finalize here (edge from PLAN → SYNTHESIZE
// will break the graph loop after this node returns).
if (result.toolCalls.length === 0) {
if (ctx.hooks) {
ctx.hooks.run('Stop', {
event: 'Stop',
session_id: sessionId,
chat_id: chatId,
last_assistant_text: result.content.slice(0, 500),
turn: state.stepNumber,
}).catch(() => {});
}
await finalizeCompletion(ctx, iterArgs, result, streamState.startedAt, iterSession);
}
} catch (err) {
await handleAbortOrError(ctx, iterArgs, streamState.accumulated, err);
state.shouldStop = true;
}
}
// -- CALL_TOOL node -------------------------------------------------------
// Executes the tool phase and stores the result for OBSERVE.
async function callToolNode(
ctx: InferenceContext,
args: TurnArgs,
state: GraphState,
agent: Agent | null,
): Promise<void> {
const { sessionId, chatId } = args;
const result = state.streamResult;
if (!result) {
ctx.log.warn({ sessionId }, 'state-graph: CALL_TOOL without stream result');
state.shouldStop = true;
return;
}
const session = state.iterSession;
if (!session) {
ctx.log.warn({ sessionId }, 'state-graph: CALL_TOOL without iterSession');
state.shouldStop = true;
return;
}
try {
state.toolPhaseResult = await executeToolPhase(
ctx, args, result, state.startedAt ?? null,
session, state.projectRoot, agent, state.stepNumber,
);
} catch (err) {
ctx.log.error({ err, sessionId, chatId, step: state.stepNumber }, 'tool phase threw unexpectedly');
state.shouldStop = true;
}
}
// -- OBSERVE node ---------------------------------------------------------
// Processes tool results: updates loop locals, mistake tracking, build errors.
async function observeNode(
ctx: InferenceContext,
args: TurnArgs,
state: GraphState,
_agent: Agent | null,
): Promise<void> {
const { sessionId, chatId } = args;
const tpr = state.toolPhaseResult;
if (!tpr) {
state.shouldStop = true;
return;
}
// Update loop locals (mirrors the existing while-loop post-tool logic)
state.toolsUsed += tpr.toolCallCount;
state.recentToolCalls = [...state.recentToolCalls, ...tpr.toolCalls];
state.stepNumber++;
// Fold tool outcomes into the mistake tracker
for (const o of tpr.outcomes) {
recordStep(state.mistakeTracker, o);
}
// Auto-fix: after write tools, attempt build and inject errors.
const WRITE_TOOLS = new Set(['edit_file', 'create_file', 'delete_file', 'apply_pending']);
const hasWriteTools = tpr.toolCalls.some((tc) => WRITE_TOOLS.has(tc.name));
if (hasWriteTools && state.iterSession) {
detectAndRunBuild(ctx, state.projectRoot, sessionId, chatId, state.iterSession.model, state.pendingRecoveryNote)
.then((buildError) => {
if (buildError) state.pendingRecoveryNote = buildError;
})
.catch(() => {});
}
}
// -- REFLECT node ---------------------------------------------------------
// Post-tool decision: decidePostToolAction, nudge/escalate/continue handling.
async function reflectNode(
ctx: InferenceContext,
args: TurnArgs,
state: GraphState,
_agent: Agent | null,
): Promise<void> {
const { sessionId, chatId, signal } = args;
const tpr = state.toolPhaseResult;
if (!tpr) {
state.shouldStop = true;
return;
}
const post = decidePostToolAction(tpr.action, state.mistakeTracker);
if (post === 'stop') {
state.shouldStop = true;
return;
}
if (post === 'nudge') {
state.pendingRecoveryNote = MISTAKE_RECOVERY_NOTE;
const failureKinds = [...state.mistakeTracker.run];
await insertMistakeRecoverySentinel(ctx, sessionId, chatId, {
failureKinds,
count: failureKinds.length,
escalated: false,
canContinue: true,
});
state.mistakeTracker.nudges += 1;
state.mistakeTracker.run = [];
ctx.log.info(
{ sessionId, chatId, step: state.stepNumber, nudges: state.mistakeTracker.nudges, failureKinds },
'mistake_recovery nudge',
);
// Continue to next PLAN node — edges check step < cap.
if (state.assistantMessageId !== tpr.nextAssistantId && tpr.nextAssistantId) {
state.assistantMessageId = tpr.nextAssistantId;
}
return;
}
if (post === 'escalate') {
const failureKinds = [...state.mistakeTracker.run];
if (tpr.nextAssistantId) {
state.assistantMessageId = tpr.nextAssistantId;
}
const escalateArgs: TurnArgs = {
sessionId, chatId, assistantMessageId: state.assistantMessageId,
toolsUsed: state.toolsUsed, recentToolCalls: state.recentToolCalls,
mistakeTracker: state.mistakeTracker, signal,
};
await finalizeEmpty(ctx, escalateArgs);
await insertMistakeRecoverySentinel(ctx, sessionId, chatId, {
failureKinds,
count: failureKinds.length,
escalated: true,
canContinue: true,
});
ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() });
ctx.log.info(
{ sessionId, chatId, step: state.stepNumber, failureKinds },
'mistake_recovery escalate — stopping turn',
);
state.shouldStop = true;
return;
}
// 'continue' — advance to next assistant message.
if (tpr.nextAssistantId) {
state.assistantMessageId = tpr.nextAssistantId;
}
}

View File

@@ -56,6 +56,7 @@ export async function executeStreamPhase(
message_id: assistantMessageId,
chat_id: chatId,
role: 'assistant',
...(args.compareGroupId ? { compare_group_id: args.compareGroupId } : {}),
});
const flusher = createContentFlusher(ctx.sql, assistantMessageId, () => state.accumulated);
@@ -119,6 +120,7 @@ export async function executeStreamPhase(
message_id: assistantMessageId,
chat_id: chatId,
content: delta,
...(args.compareGroupId ? { compare_group_id: args.compareGroupId } : {}),
});
ctx.log.debug({ sessionId, delta }, 'inference delta');
flusher.scheduleFlush();

View File

@@ -0,0 +1,75 @@
// Supervisor agent: routes user requests to the best agent via a cheap LLM
// classification call. Activated when session.agent_id === 'supervisor'.
import type { Agent } from '../../types/api.js';
import { taskModelCompletion } from '../task-model.js';
export interface SupervisorRoute {
agent_id: string;
confidence: number;
reasoning: string;
}
const SUPERVISOR_SYSTEM_PROMPT = `You are a router. Given the user's request and the available agents, choose the best agent to handle the request.
Rules:
- Match the request to the agent whose description and toolset best fits the task.
- For code review / bug finding requests → code-reviewer
- For debugging / diagnosing failures → debugger
- For refactoring / simplifying code → refactorer
- For architecture / design / planning → architect or planner
- For security audits → security-auditor
- For building prompts for other agents → prompt-builder
- For exploring / understanding unfamiliar code → recon
- For implementing / writing code changes → builder
- Respond with ONLY the agent id (e.g. "builder") or "none" if no agent fits.
- Do not include any other text, punctuation, or explanation.`;
const MAX_ROUTING_TOKENS = 30;
/**
* Given the user's latest message and available agents, classifies which agent
* should handle this turn. Returns null to fall through to default (no agent).
*/
export async function resolveSupervisorTurn(
latestUserMessage: string,
agents: Agent[],
fallbackModel?: string,
): Promise<SupervisorRoute | null> {
// Build agent listing — skip the supervisor itself to avoid self-routing.
const agentList = agents
.filter((a) => a.id !== 'supervisor')
.map((a) => `- ${a.id}: ${a.description} (${a.tools.length} tools)`)
.join('\n');
if (!agentList) {
return null;
}
const userPrompt = `Available agents:\n${agentList}\n\nUser request: ${latestUserMessage.slice(0, 2000)}`;
const response = await taskModelCompletion({
system: SUPERVISOR_SYSTEM_PROMPT,
user: userPrompt,
maxTokens: MAX_ROUTING_TOKENS,
temperature: 0.1,
fallbackModel,
});
const agentId = response.trim().toLowerCase();
if (!agentId || agentId === 'none') {
return null;
}
// Map back to a real agent to validate the id.
const matched = agents.find((a) => a.id === agentId);
if (!matched) {
return null;
}
return {
agent_id: matched.id,
confidence: 1,
reasoning: `supervisor routed to "${matched.name}" based on request classification`,
};
}

View File

@@ -8,7 +8,7 @@ import type {
import { resolveProjectRoot } from '../path_guard.js';
import { maybeAutoNameChat } from '../auto_name.js';
import { rewriteSearchQuery } from '../task-search-rewrite.js';
import { getAgentById } from '../agents.js';
import { getAgentById, getAgentsForProject } from '../agents.js';
import * as compaction from '../compaction.js';
import { resolveTurnConfig } from './turn-config.js';
import { decideStep, decidePostToolAction } from './step-decision.js';
@@ -49,6 +49,8 @@ import {
runStepCapSummary,
insertMistakeRecoverySentinel,
} from './sentinel-summaries.js';
import { resolveSupervisorTurn } from './supervisor.js';
import { runGraph } from './state-graph.js';
// vWhale: auto-fix — detect build command from package.json, run it, return
// error text for injection into next iteration. Best-effort, never throws.
@@ -149,10 +151,39 @@ export async function runAssistantTurn(
ctx.log.warn({ sessionId }, 'inference: session or project missing');
return;
}
const { session, project } = initialLoaded;
const agent = session.agent_id
let { session, project, history: initialHistory } = initialLoaded;
if (args.modelOverride) {
session = { ...session, model: args.modelOverride };
}
let agent = session.agent_id
? await getAgentById(project.path, session.agent_id)
: null;
// vSupervisor: if the session is set to supervisor mode, resolve the real
// agent via a cheap classification call. Falls through to default (no agent)
// if routing returns null.
if (agent?.id === 'supervisor') {
const { agents: availableAgents } = await getAgentsForProject(project.path);
const latestUser = [...initialHistory].reverse().find((m) => m.role === 'user');
const userMessage = latestUser?.content ?? '';
if (userMessage) {
const route = await resolveSupervisorTurn(userMessage, availableAgents, session.model ?? undefined);
if (route) {
ctx.log.info(
{ sessionId, chatId, resolvedAgent: route.agent_id, reasoning: route.reasoning },
'supervisor: routed turn',
);
agent = await getAgentById(project.path, route.agent_id);
} else {
ctx.log.info({ sessionId, chatId }, 'supervisor: no agent matched, falling through to default');
agent = null;
}
} else {
ctx.log.info({ sessionId, chatId }, 'supervisor: no user message found, falling through to default');
agent = null;
}
}
// P5: pure per-turn config (budget + cap math + text-only flag).
const { effectiveCap, budget, isTextOnly } = resolveTurnConfig(agent);
@@ -162,7 +193,8 @@ export async function runAssistantTurn(
if (isTextOnly) {
const loaded = await loadContext(ctx.sql, sessionId, chatId);
if (loaded) {
await runTextOnlyTurn(ctx, args, loaded.session, loaded.project, loaded.history, agent);
const txtSession = args.modelOverride ? { ...loaded.session, model: args.modelOverride } : loaded.session;
await runTextOnlyTurn(ctx, args, txtSession, loaded.project, loaded.history, agent);
}
return;
}
@@ -178,228 +210,244 @@ export async function runAssistantTurn(
const mistakeTracker = args.mistakeTracker;
let pendingRecoveryNote: string | undefined = args.pendingRecoveryNote;
while (stepNumber < effectiveCap) {
// ---- top-of-loop gate: doom-loop, then budget (pure decision) ----
const decision = decideStep({ recentToolCalls, toolsUsed, budget });
if (decision.kind === 'doom') {
// Need fresh history for the summary.
const loaded = await loadContext(ctx.sql, sessionId, chatId);
if (loaded) {
const iterArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, mistakeTracker, signal };
await runDoomLoopSummary(ctx, iterArgs, loaded.session, loaded.project, loaded.history, agent, decision.loop);
if (session.state_graph_enabled) {
// ---- optional state graph path ----
const gProjectRoot = await resolveProjectRoot(project.path);
const graphResult = await runGraph(ctx, args, { effectiveCap, budget, agent, projectRoot: gProjectRoot });
stepNumber = graphResult.stepNumber;
toolsUsed = graphResult.toolsUsed;
recentToolCalls = graphResult.recentToolCalls;
assistantMessageId = graphResult.assistantMessageId;
// mistakeTracker is the same object reference (mutated in place by the graph).
} else {
while (stepNumber < effectiveCap) {
// ---- top-of-loop gate: doom-loop, then budget (pure decision) ----
const decision = decideStep({ recentToolCalls, toolsUsed, budget });
if (decision.kind === 'doom') {
// Need fresh history for the summary.
const loaded = await loadContext(ctx.sql, sessionId, chatId);
if (loaded) {
const dlSession = args.modelOverride ? { ...loaded.session, model: args.modelOverride } : loaded.session;
const iterArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, mistakeTracker, signal };
await runDoomLoopSummary(ctx, iterArgs, dlSession, loaded.project, loaded.history, agent, decision.loop);
}
break;
}
break;
}
if (decision.kind === 'budget') {
const loaded = await loadContext(ctx.sql, sessionId, chatId);
if (loaded) {
const iterArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, mistakeTracker, signal };
await runCapHitSummary(ctx, iterArgs, loaded.session, loaded.project, loaded.history, agent, budget);
if (decision.kind === 'budget') {
const loaded = await loadContext(ctx.sql, sessionId, chatId);
if (loaded) {
const bhSession = args.modelOverride ? { ...loaded.session, model: args.modelOverride } : loaded.session;
const iterArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, mistakeTracker, signal };
await runCapHitSummary(ctx, iterArgs, bhSession, loaded.project, loaded.history, agent, budget);
}
break;
}
break;
}
// decision.kind === 'stream' → proceed with compaction + stream + tools.
// decision.kind === 'stream' → proceed with compaction + stream + tools.
// ---- compaction check ----
// v1.11: if the prior turn flagged this chat for compaction, run it
// before loadContext so we read post-compaction history. Swallow
// failures and proceed with un-compacted history.
const chatFlag = await ctx.sql<{ needs_compaction: boolean }[]>`
SELECT needs_compaction FROM chats WHERE id = ${chatId}
`;
if (chatFlag[0]?.needs_compaction) {
try {
await compaction.process({
sql: ctx.sql,
config: ctx.config,
log: ctx.log,
broker: ctx.broker,
chatId,
hooks: ctx.hooks,
});
} catch (err) {
ctx.log.warn({ err, chatId }, 'auto-compaction failed; clearing flag and proceeding');
await ctx.sql`UPDATE chats SET needs_compaction = false WHERE id = ${chatId}`;
}
}
// ---- load context (must re-load each iteration — new messages since last step) ----
const loaded = await loadContext(ctx.sql, sessionId, chatId);
if (!loaded) {
ctx.log.warn({ sessionId }, 'inference: session or project missing mid-loop');
break;
}
let { session: iterSession, project: iterProject, history } = loaded;
const projectRoot = await resolveProjectRoot(iterProject.path);
try {
const dcpMsgs = toDcpMessages(history);
const { messages: pruned, stats } = transformMessages(chatId, dcpMsgs);
if (stats.removedCount > 0) {
ctx.log.info({ chatId, ...stats }, 'dcp: transform removed messages');
history = fromDcpMessages(pruned) as typeof history;
}
} catch (err) {
ctx.log.warn({ err: err instanceof Error ? err.message : String(err), chatId }, 'dcp: transform skipped');
}
// v1.14.0: log step boundary for instrumentation. step_start parts are in
// the schema CHECK but not emitted here — writing to the assistant message
// before the stream phase creates a sequence-0 collision with
// partsFromAssistantMessage. A WS frame or structured log is sufficient
// since the frontend doesn't render step boundaries in v1.14.
ctx.log.info({ sessionId, chatId, step: stepNumber, assistantMessageId }, 'step_start');
// ---- build messages + stream phase ----
const messages = await buildMessagesPayload(iterSession, iterProject, history, agent, ctx.log);
const webToolsEnabled =
iterSession.web_search_enabled ?? iterProject.default_web_search_enabled ?? false;
if (stepNumber === 0 && webToolsEnabled && messages.length >= 2) {
const lastUserMsg = [...messages].reverse().find((m) => m.role === 'user');
if (lastUserMsg?.content) {
const hint = await rewriteSearchQuery(lastUserMsg.content);
if (hint && messages[0]?.role === 'system' && messages[0].content) {
messages[0].content += `\n\nThe user's search intent can be summarized as: "${hint}"`;
// ---- compaction check ----
// v1.11: if the prior turn flagged this chat for compaction, run it
// before loadContext so we read post-compaction history. Swallow
// failures and proceed with un-compacted history.
const chatFlag = await ctx.sql<{ needs_compaction: boolean }[]>`
SELECT needs_compaction FROM chats WHERE id = ${chatId}
`;
if (chatFlag[0]?.needs_compaction) {
try {
await compaction.process({
sql: ctx.sql,
config: ctx.config,
log: ctx.log,
broker: ctx.broker,
chatId,
hooks: ctx.hooks,
});
} catch (err) {
ctx.log.warn({ err, chatId }, 'auto-compaction failed; clearing flag and proceeding');
await ctx.sql`UPDATE chats SET needs_compaction = false WHERE id = ${chatId}`;
}
}
}
// v#12 MistakeTracker: if the prior iteration's nudge fired, append the
// transient recovery note to THIS payload (consumed exactly once, then
// cleared). Never persisted — same lifecycle as the cap-hit/doom-loop
// summary notes, which live only inside the in-memory messages array.
if (pendingRecoveryNote) {
messages.push({ role: 'system', content: pendingRecoveryNote });
pendingRecoveryNote = undefined;
}
const iterArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, mistakeTracker, signal };
const state: StreamPhaseState = { accumulated: '', startedAt: null };
let result: StreamResult;
try {
result = await executeStreamPhase(ctx, iterArgs, iterSession, messages, state, agent, webToolsEnabled);
} catch (err) {
await handleAbortOrError(ctx, iterArgs, state.accumulated, err);
break;
}
// ---- non-tool finish → finalize and exit ----
if (result.toolCalls.length === 0) {
// vWhale: Stop hook (best-effort, non-blocking).
if (ctx.hooks) {
ctx.hooks.run('Stop', {
event: 'Stop',
session_id: sessionId,
chat_id: chatId,
last_assistant_text: result.content.slice(0, 500),
turn: stepNumber,
}).catch(() => {});
// ---- load context (must re-load each iteration — new messages since last step) ----
const loaded = await loadContext(ctx.sql, sessionId, chatId);
if (!loaded) {
ctx.log.warn({ sessionId }, 'inference: session or project missing mid-loop');
break;
}
await finalizeCompletion(ctx, iterArgs, result, state.startedAt, iterSession);
break;
}
let { session: iterSession, project: iterProject, history } = loaded;
if (args.modelOverride) {
iterSession = { ...iterSession, model: args.modelOverride };
}
const projectRoot = await resolveProjectRoot(iterProject.path);
// ---- steps: 0 edge case ----
// effectiveCap check above guarantees we're inside the loop, but this
// guard handles the theoretical case where the model emits tool calls
// on step 0 when effectiveCap would have been 0 (impossible since the
// while condition prevents entry, but kept for safety). If effectiveCap
// is 1 and we're on step 0, tool calls ARE executed — steps counts
// iterations, not post-first-stream.
try {
const dcpMsgs = toDcpMessages(history);
const { messages: pruned, stats } = transformMessages(chatId, dcpMsgs);
if (stats.removedCount > 0) {
ctx.log.info({ chatId, ...stats }, 'dcp: transform removed messages');
history = fromDcpMessages(pruned) as typeof history;
}
} catch (err) {
ctx.log.warn({ err: err instanceof Error ? err.message : String(err), chatId }, 'dcp: transform skipped');
}
// ---- tool phase ----
let toolPhaseResult: ToolPhaseResult;
try {
toolPhaseResult = await executeToolPhase(ctx, iterArgs, result, state.startedAt, iterSession, projectRoot, agent, stepNumber);
} catch (err) {
// Tool phase errors are unexpected (individual tool failures are
// caught inside executeToolPhase). Log and break.
ctx.log.error({ err, sessionId, chatId, step: stepNumber }, 'tool phase threw unexpectedly');
break;
}
// v1.14.0: log step boundary for instrumentation. step_start parts are in
// the schema CHECK but not emitted here — writing to the assistant message
// before the stream phase creates a sequence-0 collision with
// partsFromAssistantMessage. A WS frame or structured log is sufficient
// since the frontend doesn't render step boundaries in v1.14.
ctx.log.info({ sessionId, chatId, step: stepNumber, assistantMessageId }, 'step_start');
// ---- update loop locals ----
toolsUsed += toolPhaseResult.toolCallCount;
recentToolCalls = [...recentToolCalls, ...toolPhaseResult.toolCalls];
stepNumber++;
// ---- build messages + stream phase ----
const messages = await buildMessagesPayload(iterSession, iterProject, history, agent, ctx.log);
const webToolsEnabled =
iterSession.web_search_enabled ?? iterProject.default_web_search_enabled ?? false;
// v#12 MistakeTracker: fold this iteration's tool outcomes into the
// tracker, in order. recordStep mutates `mistakeTracker` in place (it is
// the same object referenced by args). A 'success' clears the streak.
for (const o of toolPhaseResult.outcomes) {
recordStep(mistakeTracker, o);
}
if (stepNumber === 0 && webToolsEnabled && messages.length >= 2) {
const lastUserMsg = [...messages].reverse().find((m) => m.role === 'user');
if (lastUserMsg?.content) {
const hint = await rewriteSearchQuery(lastUserMsg.content);
if (hint && messages[0]?.role === 'system' && messages[0].content) {
messages[0].content += `\n\nThe user's search intent can be summarized as: "${hint}"`;
}
}
}
// vWhale: auto-fix — after write tools, attempt build and inject errors.
const WRITE_TOOLS = new Set(['edit_file', 'create_file', 'delete_file', 'apply_pending']);
const hasWriteTools = toolPhaseResult.toolCalls.some((tc) => WRITE_TOOLS.has(tc.name));
if (hasWriteTools) {
detectAndRunBuild(ctx, projectRoot, sessionId, chatId, iterSession.model, pendingRecoveryNote)
.then((buildError) => {
if (buildError) pendingRecoveryNote = buildError;
})
.catch(() => {});
}
// v#12 MistakeTracker: if the prior iteration's nudge fired, append the
// transient recovery note to THIS payload (consumed exactly once, then
// cleared). Never persisted — same lifecycle as the cap-hit/doom-loop
// summary notes, which live only inside the in-memory messages array.
if (pendingRecoveryNote) {
messages.push({ role: 'system', content: pendingRecoveryNote });
pendingRecoveryNote = undefined;
}
// v#12 MistakeTracker: post-tool decision (pure). 'stop' = the tool phase
// returned a non-'continue' action ('paused' for user input, or
// 'synthesis_done') — neither a nudge nor an escalate would change the
// control flow, so the mistake check is skipped. On 'continue' the
// heterogeneous-failure pattern gates nudge/escalate/continue. Complements
// the doom-loop gate above, which only catches *identical* repeats.
const post = decidePostToolAction(toolPhaseResult.action, mistakeTracker);
if (post === 'stop') {
break;
}
if (post === 'nudge') {
// Soft intervention: inject model-facing recovery guidance into the NEXT
// step's payload, drop a UI sentinel, bump nudges, reset the streak, and
// continue. The note is consumed (and cleared) at the top of the next
// iteration's payload build.
pendingRecoveryNote = MISTAKE_RECOVERY_NOTE;
const failureKinds = [...mistakeTracker.run];
await insertMistakeRecoverySentinel(ctx, sessionId, chatId, {
failureKinds,
count: failureKinds.length,
escalated: false,
canContinue: true,
});
mistakeTracker.nudges += 1;
mistakeTracker.run = [];
ctx.log.info(
{ sessionId, chatId, step: stepNumber, nudges: mistakeTracker.nudges, failureKinds },
'mistake_recovery nudge',
);
const iterArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, mistakeTracker, signal };
const state: StreamPhaseState = { accumulated: '', startedAt: null };
let result: StreamResult;
try {
result = await executeStreamPhase(ctx, iterArgs, iterSession, messages, state, agent, webToolsEnabled);
} catch (err) {
await handleAbortOrError(ctx, iterArgs, state.accumulated, err);
break;
}
// ---- non-tool finish → finalize and exit ----
if (result.toolCalls.length === 0) {
// vWhale: Stop hook (best-effort, non-blocking).
if (ctx.hooks) {
ctx.hooks.run('Stop', {
event: 'Stop',
session_id: sessionId,
chat_id: chatId,
last_assistant_text: result.content.slice(0, 500),
turn: stepNumber,
}).catch(() => {});
}
await finalizeCompletion(ctx, iterArgs, result, state.startedAt, iterSession);
break;
}
// ---- steps: 0 edge case ----
// effectiveCap check above guarantees we're inside the loop, but this
// guard handles the theoretical case where the model emits tool calls
// on step 0 when effectiveCap would have been 0 (impossible since the
// while condition prevents entry, but kept for safety). If effectiveCap
// is 1 and we're on step 0, tool calls ARE executed — steps counts
// iterations, not post-first-stream.
// ---- tool phase ----
let toolPhaseResult: ToolPhaseResult;
try {
toolPhaseResult = await executeToolPhase(ctx, iterArgs, result, state.startedAt, iterSession, projectRoot, agent, stepNumber);
} catch (err) {
// Tool phase errors are unexpected (individual tool failures are
// caught inside executeToolPhase). Log and break.
ctx.log.error({ err, sessionId, chatId, step: stepNumber }, 'tool phase threw unexpectedly');
break;
}
// ---- update loop locals ----
toolsUsed += toolPhaseResult.toolCallCount;
recentToolCalls = [...recentToolCalls, ...toolPhaseResult.toolCalls];
stepNumber++;
// v#12 MistakeTracker: fold this iteration's tool outcomes into the
// tracker, in order. recordStep mutates `mistakeTracker` in place (it is
// the same object referenced by args). A 'success' clears the streak.
for (const o of toolPhaseResult.outcomes) {
recordStep(mistakeTracker, o);
}
// vWhale: auto-fix — after write tools, attempt build and inject errors.
const WRITE_TOOLS = new Set(['edit_file', 'create_file', 'delete_file', 'apply_pending']);
const hasWriteTools = toolPhaseResult.toolCalls.some((tc) => WRITE_TOOLS.has(tc.name));
if (hasWriteTools) {
detectAndRunBuild(ctx, projectRoot, sessionId, chatId, iterSession.model, pendingRecoveryNote)
.then((buildError) => {
if (buildError) pendingRecoveryNote = buildError;
})
.catch(() => {});
}
// v#12 MistakeTracker: post-tool decision (pure). 'stop' = the tool phase
// returned a non-'continue' action ('paused' for user input, or
// 'synthesis_done') — neither a nudge nor an escalate would change the
// control flow, so the mistake check is skipped. On 'continue' the
// heterogeneous-failure pattern gates nudge/escalate/continue. Complements
// the doom-loop gate above, which only catches *identical* repeats.
const post = decidePostToolAction(toolPhaseResult.action, mistakeTracker);
if (post === 'stop') {
break;
}
if (post === 'nudge') {
// Soft intervention: inject model-facing recovery guidance into the NEXT
// step's payload, drop a UI sentinel, bump nudges, reset the streak, and
// continue. The note is consumed (and cleared) at the top of the next
// iteration's payload build.
pendingRecoveryNote = MISTAKE_RECOVERY_NOTE;
const failureKinds = [...mistakeTracker.run];
await insertMistakeRecoverySentinel(ctx, sessionId, chatId, {
failureKinds,
count: failureKinds.length,
escalated: false,
canContinue: true,
});
mistakeTracker.nudges += 1;
mistakeTracker.run = [];
ctx.log.info(
{ sessionId, chatId, step: stepNumber, nudges: mistakeTracker.nudges, failureKinds },
'mistake_recovery nudge',
);
assistantMessageId = toolPhaseResult.nextAssistantId!;
continue;
}
if (post === 'escalate') {
// The nudge didn't break the failure run — stop the turn (cap-hit-style)
// to avoid burning the whole step budget on heterogeneous failures. The
// next assistant row is still 'streaming'; finalize it as an empty
// complete row so the slot doesn't dangle, then drop the escalate
// sentinel.
const failureKinds = [...mistakeTracker.run];
assistantMessageId = toolPhaseResult.nextAssistantId!;
const escalateArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, mistakeTracker, signal };
await finalizeEmpty(ctx, escalateArgs);
await insertMistakeRecoverySentinel(ctx, sessionId, chatId, {
failureKinds,
count: failureKinds.length,
escalated: true,
canContinue: true,
});
ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() });
ctx.log.info(
{ sessionId, chatId, step: stepNumber, failureKinds },
'mistake_recovery escalate — stopping turn',
);
break;
}
// 'continue' — advance to next assistant message.
assistantMessageId = toolPhaseResult.nextAssistantId!;
continue;
}
if (post === 'escalate') {
// The nudge didn't break the failure run — stop the turn (cap-hit-style)
// to avoid burning the whole step budget on heterogeneous failures. The
// next assistant row is still 'streaming'; finalize it as an empty
// complete row so the slot doesn't dangle, then drop the escalate
// sentinel.
const failureKinds = [...mistakeTracker.run];
assistantMessageId = toolPhaseResult.nextAssistantId!;
const escalateArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, mistakeTracker, signal };
await finalizeEmpty(ctx, escalateArgs);
await insertMistakeRecoverySentinel(ctx, sessionId, chatId, {
failureKinds,
count: failureKinds.length,
escalated: true,
canContinue: true,
});
ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() });
ctx.log.info(
{ sessionId, chatId, step: stepNumber, failureKinds },
'mistake_recovery escalate — stopping turn',
);
break;
}
// 'continue' — advance to next assistant message.
assistantMessageId = toolPhaseResult.nextAssistantId!;
}
// vWhale: Stop hook at post-loop exit (best-effort, non-blocking).
@@ -438,8 +486,9 @@ export async function runAssistantTurn(
if (stepNumber >= effectiveCap && effectiveCap < Infinity) {
const loaded = await loadContext(ctx.sql, sessionId, chatId);
if (loaded) {
const scSession = args.modelOverride ? { ...loaded.session, model: args.modelOverride } : loaded.session;
const capArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, mistakeTracker, signal };
await runStepCapSummary(ctx, capArgs, loaded.session, loaded.project, loaded.history, agent, stepNumber, effectiveCap);
await runStepCapSummary(ctx, capArgs, scSession, loaded.project, loaded.history, agent, stepNumber, effectiveCap);
}
}
}
@@ -510,6 +559,31 @@ export async function runInference(
});
}
// v2.8-compare: run inference with a model override and compare group id.
// Used by the compare endpoint to run the same message through N models in
// parallel. Each call publishes frames scoped to its compare_group_id.
export async function runInferenceWithModel(
ctx: InferenceContext,
sessionId: string,
chatId: string,
assistantMessageId: string,
modelOverride: string,
compareGroupId: string,
signal?: AbortSignal,
): Promise<void> {
return runAssistantTurn(ctx, {
sessionId,
chatId,
assistantMessageId,
toolsUsed: 0,
recentToolCalls: [],
mistakeTracker: freshMistakeState(),
modelOverride,
compareGroupId,
signal,
});
}
// v1.8.2: cap-hit summary flow. Called instead of erroring when the loop
// hits its budget. Reuses the in-flight assistant message slot to stream a
// short wrap-up reply with the synthetic note prepended and tools disabled,

View File

@@ -52,7 +52,9 @@ export interface InferenceFrame {
// arena frames
| 'battle_started'
| 'contestant_updated'
| 'battle_updated';
| 'battle_updated'
// inter-agent message
| 'agent_message';
message_id?: string;
message_ids?: string[];
chat_id?: string;
@@ -103,6 +105,11 @@ export interface InferenceFrame {
status?: string;
run_status?: 'running' | 'completed' | 'failed' | 'cancelled';
report?: string;
// v2.8-compare: groups messages belonging to the same compare operation.
compare_group_id?: string;
// inter-agent message
sender_step_id?: string;
channel?: string;
// arena frames
battle_id?: string;
battle_type?: 'coding' | 'qa';
@@ -177,5 +184,10 @@ export interface TurnArgs {
// Never persisted — mirrors how the cap-hit/doom-loop notes live only inside
// the summary call's messages array.
pendingRecoveryNote?: string;
// v2.8-compare: when set, overrides the session model for this single turn.
// Used by the compare endpoint to run the same message through N models.
modelOverride?: string;
// v2.8-compare: opaque group id that rides on every published frame.
compareGroupId?: string;
signal: AbortSignal | undefined;
}