v1.8.2: tool loop cap-hit summary + tool call UI compaction
Old hardcoded MAX_TOOL_LOOP_DEPTH=15 replaced by per-agent max_tool_calls (1-100, AGENTS.md frontmatter) with defaults: 30 for read-only-only agents, 10 for agents that include any non-read-only tool, 15 for raw chat. When the loop hits cap, fire one final summary call with tools disabled, stream the wrap-up into the in-flight assistant message, then insert a system sentinel with metadata.kind='cap_hit'. The sentinel renders an amber bubble with a Continue button (latest sentinel only) that POSTs to a new /api/chats/:id/continue route to extend. Hard ceiling: 3 cap-hits per chat (2 continues max) — third sentinel reports can_continue=false. Error frames carry a machine-readable reason code alongside human error text. Failed messages persist the reason via metadata.kind='error' so the bubble renders specifics on reload (WS error frame is one-shot). Tool call UI rewired: ToolCallLine renders inline (↳ name args spinner/check/✗, expand-on-tap for args+result); ToolCallGroup collapses 3+ consecutive same-tool runs into a compact card. MessageList owns a three-pass pre-render (flatten + fold tool results onto matching runs by id + group same-tool runs + number sentinels). MessageBubble drops tool rendering and adds the sentinel / error-reason branches. ToolCallCard deleted. Roadmap follow-up logged: add explicit max_tool_calls: 30 to the 6 agents in /data/AGENTS.md and /opt/boocode/AGENTS.md post-ship for discoverability (defaults handle behavior identically). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -231,7 +231,7 @@ export function registerChatRoutes(
|
||||
INSERT INTO messages (
|
||||
session_id, chat_id, role, content, kind, tool_calls, tool_results,
|
||||
status, tokens_used, ctx_used, ctx_max, started_at, finished_at,
|
||||
created_at
|
||||
created_at, metadata
|
||||
)
|
||||
SELECT
|
||||
${source.session_id}, ${chat!.id}, role, content, kind,
|
||||
@@ -239,7 +239,8 @@ export function registerChatRoutes(
|
||||
tokens_used, ctx_used, ctx_max, started_at, finished_at,
|
||||
clock_timestamp() + (
|
||||
ROW_NUMBER() OVER (ORDER BY created_at ASC, id ASC) * INTERVAL '1 microsecond'
|
||||
)
|
||||
),
|
||||
metadata
|
||||
FROM messages
|
||||
WHERE chat_id = ${source.id}
|
||||
AND created_at <= ${target.created_at}::timestamptz
|
||||
@@ -268,7 +269,7 @@ export function registerChatRoutes(
|
||||
}
|
||||
const rows = await sql<Message[]>`
|
||||
SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq,
|
||||
tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at
|
||||
tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata
|
||||
FROM messages
|
||||
WHERE chat_id = ${req.params.id}
|
||||
ORDER BY created_at ASC, id ASC
|
||||
|
||||
@@ -7,6 +7,13 @@ const SendBody = z.object({
|
||||
content: z.string().min(1).max(64_000),
|
||||
});
|
||||
|
||||
// v1.8.2: Continue extends an inference loop that hit the tool budget. Caller
|
||||
// passes the sentinel message it's continuing from; server validates shape
|
||||
// and the per-chat hard ceiling before resuming.
|
||||
const ContinueBody = z.object({
|
||||
sentinel_message_id: z.string().uuid(),
|
||||
});
|
||||
|
||||
interface MessageHandlers {
|
||||
enqueueInference: (sessionId: string, chatId: string, assistantMessageId: string, user: string) => void;
|
||||
enqueueCompact: (sessionId: string, chatId: string, compactMessageId: string, user: string) => void;
|
||||
@@ -36,7 +43,7 @@ export function registerMessageRoutes(
|
||||
}
|
||||
const rows = await sql<Message[]>`
|
||||
SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq,
|
||||
tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at
|
||||
tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata
|
||||
FROM messages
|
||||
WHERE session_id = ${req.params.id}
|
||||
ORDER BY created_at ASC, id ASC
|
||||
@@ -253,6 +260,76 @@ export function registerMessageRoutes(
|
||||
}
|
||||
);
|
||||
|
||||
app.post<{ Params: { id: string } }>(
|
||||
'/api/chats/:id/continue',
|
||||
async (req, reply) => {
|
||||
const parsed = ContinueBody.safeParse(req.body);
|
||||
if (!parsed.success) {
|
||||
reply.code(400);
|
||||
return { error: 'invalid body', details: parsed.error.flatten() };
|
||||
}
|
||||
|
||||
const chatRows = await sql<Chat[]>`
|
||||
SELECT id, session_id FROM chats WHERE id = ${req.params.id} AND status = 'open'
|
||||
`;
|
||||
if (chatRows.length === 0) {
|
||||
reply.code(404);
|
||||
return { error: 'chat not found' };
|
||||
}
|
||||
const chat = chatRows[0]!;
|
||||
const sessionId = chat.session_id;
|
||||
|
||||
// Cap-hit sentinels are only ever inserted after a turn completes, so
|
||||
// there must not be an active inference at this moment. If there is,
|
||||
// the client is racing the cap-hit summary that just emitted the
|
||||
// sentinel — bail rather than enqueue a parallel run.
|
||||
if (handlers.hasActiveInference(chat.id)) {
|
||||
reply.code(409);
|
||||
return { error: 'chat is currently streaming' };
|
||||
}
|
||||
|
||||
const sentinel = await sql<{ metadata: { kind?: unknown; can_continue?: unknown } | null }[]>`
|
||||
SELECT metadata
|
||||
FROM messages
|
||||
WHERE id = ${parsed.data.sentinel_message_id}
|
||||
AND chat_id = ${chat.id}
|
||||
AND role = 'system'
|
||||
`;
|
||||
if (sentinel.length === 0) {
|
||||
reply.code(404);
|
||||
return { error: 'sentinel not found' };
|
||||
}
|
||||
const meta = sentinel[0]!.metadata;
|
||||
if (!meta || meta.kind !== 'cap_hit') {
|
||||
reply.code(400);
|
||||
return { error: 'message is not a cap-hit sentinel' };
|
||||
}
|
||||
// Server-side hard ceiling check. UI already disables the button when
|
||||
// can_continue is false; defending against a stale tab or a direct
|
||||
// API hit is the only reason this lives on the server too.
|
||||
if (meta.can_continue !== true) {
|
||||
reply.code(409);
|
||||
return { error: 'hard limit reached for this chat' };
|
||||
}
|
||||
|
||||
const result = await sql.begin(async (tx) => {
|
||||
const [assistantMsg] = await tx<{ id: string }[]>`
|
||||
INSERT INTO messages (session_id, chat_id, role, content, status, created_at)
|
||||
VALUES (${sessionId}, ${chat.id}, 'assistant', '', 'streaming', clock_timestamp())
|
||||
RETURNING id
|
||||
`;
|
||||
await tx`UPDATE sessions SET updated_at = clock_timestamp() WHERE id = ${sessionId}`;
|
||||
await tx`UPDATE chats SET updated_at = clock_timestamp() WHERE id = ${chat.id}`;
|
||||
return { assistant_message_id: assistantMsg!.id };
|
||||
});
|
||||
|
||||
handlers.enqueueInference(sessionId, chat.id, result.assistant_message_id, 'default');
|
||||
|
||||
reply.code(202);
|
||||
return result;
|
||||
}
|
||||
);
|
||||
|
||||
app.post<{ Params: { id: string } }>(
|
||||
'/api/chats/:id/force_send',
|
||||
async (req, reply) => {
|
||||
|
||||
@@ -23,7 +23,7 @@ export function registerWebSocket(
|
||||
|
||||
const messages = await sql<Message[]>`
|
||||
SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq,
|
||||
tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at
|
||||
tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata
|
||||
FROM messages
|
||||
WHERE session_id = ${sessionId}
|
||||
ORDER BY created_at ASC, id ASC
|
||||
|
||||
@@ -158,3 +158,10 @@ END $$;
|
||||
-- the DB; they live in builtins (services/agents.ts) and a per-project AGENTS.md.
|
||||
-- agent_id is the slugified agent name. NULL means "use BooCode defaults".
|
||||
ALTER TABLE sessions ADD COLUMN IF NOT EXISTS agent_id TEXT;
|
||||
|
||||
-- v1.8.2: per-message metadata for sentinels (cap-hit) and structured error
|
||||
-- reasons. JSONB so future kinds can extend without further schema churn.
|
||||
-- Shape for cap_hit: { kind: 'cap_hit', used: number, limit: number,
|
||||
-- agent_name: string|null, can_continue: boolean }
|
||||
-- Shape for errors: { error_reason: 'llm_provider_error'|..., error_text: string }
|
||||
ALTER TABLE messages ADD COLUMN IF NOT EXISTS metadata JSONB;
|
||||
|
||||
@@ -21,6 +21,7 @@ function makeSession(overrides: Partial<Session> = {}): Session {
|
||||
status: 'open',
|
||||
created_at: new Date(0).toISOString(),
|
||||
updated_at: new Date(0).toISOString(),
|
||||
agent_id: null,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
@@ -62,6 +63,7 @@ function makeMessage(
|
||||
started_at: null,
|
||||
finished_at: null,
|
||||
created_at: new Date(counter * 1000).toISOString(),
|
||||
metadata: null,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -29,6 +29,9 @@ interface ParsedFrontmatter {
|
||||
tools?: string[];
|
||||
description?: string;
|
||||
model?: string;
|
||||
// v1.8.2: optional per-agent tool-loop budget. Absent → inference resolves
|
||||
// from the agent's toolset at runtime.
|
||||
max_tool_calls?: number;
|
||||
}
|
||||
|
||||
function stripQuotes(s: string): string {
|
||||
@@ -89,6 +92,21 @@ function parseFrontmatter(yaml: string): { data: ParsedFrontmatter; errors: stri
|
||||
data.description = stripQuotes(valueRaw);
|
||||
} else if (key === 'model') {
|
||||
data.model = stripQuotes(valueRaw);
|
||||
} else if (key === 'max_tool_calls') {
|
||||
// v1.8.2: 1..100 inclusive integer. Out-of-range values are skipped
|
||||
// with a warning rather than throwing — agents shouldn't be unusable
|
||||
// because of a typo on a defaulted field. Non-numeric or non-integer
|
||||
// still hard-fails the block, matching `temperature` behavior.
|
||||
const n = Number(valueRaw);
|
||||
if (Number.isInteger(n) && n >= 1 && n <= 100) {
|
||||
data.max_tool_calls = n;
|
||||
} else if (Number.isInteger(n)) {
|
||||
console.warn(
|
||||
`agents: max_tool_calls ${n} out of range 1-100, ignoring (falling back to default)`,
|
||||
);
|
||||
} else {
|
||||
errors.push(`max_tool_calls must be an integer 1-100 (got "${valueRaw}")`);
|
||||
}
|
||||
}
|
||||
// Unknown keys silently ignored — forward-compat.
|
||||
}
|
||||
@@ -177,6 +195,7 @@ function parseAgentSection(section: RawSection): Omit<Agent, 'source'> {
|
||||
temperature: typeof fm.temperature === 'number' ? fm.temperature : DEFAULT_TEMPERATURE,
|
||||
tools: filteredTools,
|
||||
model: typeof fm.model === 'string' && fm.model.length > 0 ? fm.model : null,
|
||||
max_tool_calls: typeof fm.max_tool_calls === 'number' ? fm.max_tool_calls : null,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,23 @@
|
||||
import type { FastifyBaseLogger } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
import type { Config } from '../config.js';
|
||||
import type { Agent, Message, Project, Session, ToolCall, UserStreamFrame } from '../types/api.js';
|
||||
import { ALL_TOOLS, TOOLS_BY_NAME, toolJsonSchemas, type ToolJsonSchema } from './tools.js';
|
||||
import type {
|
||||
Agent,
|
||||
ErrorReason,
|
||||
Message,
|
||||
MessageMetadata,
|
||||
Project,
|
||||
Session,
|
||||
ToolCall,
|
||||
UserStreamFrame,
|
||||
} from '../types/api.js';
|
||||
import {
|
||||
ALL_TOOLS,
|
||||
READ_ONLY_TOOL_NAMES,
|
||||
TOOLS_BY_NAME,
|
||||
toolJsonSchemas,
|
||||
type ToolJsonSchema,
|
||||
} from './tools.js';
|
||||
import { PathScopeError, resolveProjectRoot } from './path_guard.js';
|
||||
import { maybeAutoNameChat } from './auto_name.js';
|
||||
import { getAgentById } from './agents.js';
|
||||
@@ -11,7 +26,39 @@ const BASE_SYSTEM_PROMPT = (projectPath: string) =>
|
||||
`You are BooCode Chat, a code investigation assistant. The user is working on a project located at ${projectPath}. Use the file-read tools (view_file, list_dir, grep, find_files) to investigate code when needed. Be concise. Cite file paths and line numbers when discussing code. Do not hallucinate file contents — read the file first. Tool results may be truncated; if so, narrow your query rather than guessing.`;
|
||||
|
||||
const DB_FLUSH_INTERVAL_MS = 500;
|
||||
const MAX_TOOL_LOOP_DEPTH = 15;
|
||||
|
||||
// v1.8.2: tool-call budget defaults. Resolved per-turn by resolveToolBudget.
|
||||
// - Agent with explicit max_tool_calls: that value.
|
||||
// - Agent with read-only-only tools: BUDGET_READ_ONLY (30).
|
||||
// - Agent with any non-read-only tool: BUDGET_NON_READ_ONLY (10).
|
||||
// - No agent (raw chat): BUDGET_NO_AGENT (15).
|
||||
const BUDGET_READ_ONLY = 30;
|
||||
const BUDGET_NON_READ_ONLY = 10;
|
||||
const BUDGET_NO_AGENT = 15;
|
||||
|
||||
const READ_ONLY_SET: ReadonlySet<string> = new Set(READ_ONLY_TOOL_NAMES);
|
||||
|
||||
function resolveToolBudget(agent: Agent | null): number {
|
||||
if (agent?.max_tool_calls != null) return agent.max_tool_calls;
|
||||
if (!agent) return BUDGET_NO_AGENT;
|
||||
const allReadOnly = agent.tools.every((t) => READ_ONLY_SET.has(t));
|
||||
return allReadOnly ? BUDGET_READ_ONLY : BUDGET_NON_READ_ONLY;
|
||||
}
|
||||
|
||||
// Synthetic system note appended to the cap-hit summary call. Verbatim from
|
||||
// the v1.8.2 spec — do not paraphrase: the model is more reliable when the
|
||||
// instruction is short, declarative, and identical across calls.
|
||||
const CAP_HIT_SUMMARY_NOTE = (limit: number) =>
|
||||
`You've reached the tool budget (${limit} calls). Produce the best answer you can with what you have. Do not call more tools.`;
|
||||
|
||||
function isCapHitSentinel(m: Message): boolean {
|
||||
return (
|
||||
m.role === 'system' &&
|
||||
m.metadata !== null &&
|
||||
typeof m.metadata === 'object' &&
|
||||
(m.metadata as { kind?: unknown }).kind === 'cap_hit'
|
||||
);
|
||||
}
|
||||
|
||||
export interface InferenceFrame {
|
||||
type:
|
||||
@@ -29,12 +76,22 @@ export interface InferenceFrame {
|
||||
chat_id?: string;
|
||||
tool_message_id?: string;
|
||||
tool_call_id?: string;
|
||||
role?: 'assistant' | 'tool' | 'user';
|
||||
// v1.8.2: 'system' added so cap-hit sentinel messages can announce themselves
|
||||
// through the normal message_started → delta → message_complete sequence.
|
||||
role?: 'assistant' | 'tool' | 'user' | 'system';
|
||||
content?: string;
|
||||
tool_call?: ToolCall;
|
||||
output?: unknown;
|
||||
truncated?: boolean;
|
||||
error?: string;
|
||||
// v1.8.2: structured error reason. Set on `type: 'error'` so the UI can
|
||||
// surface a specific message; `error` stays the human-readable text.
|
||||
reason?: ErrorReason;
|
||||
// v1.8.2: piggybacks on `message_complete` so static or terminally-resolved
|
||||
// messages can carry their persisted metadata to the live stream without a
|
||||
// refetch (sentinels carry { kind: 'cap_hit', ... }; failed messages carry
|
||||
// { kind: 'error', ... }).
|
||||
metadata?: MessageMetadata | null;
|
||||
tokens_used?: number | null;
|
||||
ctx_used?: number | null;
|
||||
ctx_max?: number | null;
|
||||
@@ -135,6 +192,11 @@ export function buildMessagesPayload(
|
||||
out.push({ role: 'system', content: m.content });
|
||||
continue;
|
||||
}
|
||||
// v1.8.2: cap-hit sentinels are UI-only — never send them to the LLM. The
|
||||
// synthetic "you've reached the tool budget" note lives only inside the
|
||||
// summary call's messages array and is never persisted, so on Continue
|
||||
// the model resumes with a clean context.
|
||||
if (isCapHitSentinel(m)) continue;
|
||||
if (m.role === 'assistant' && m.status === 'streaming') continue;
|
||||
if (m.role === 'assistant' && m.status === 'cancelled') continue;
|
||||
if (m.role === 'tool') {
|
||||
@@ -193,7 +255,7 @@ async function loadContext(
|
||||
|
||||
const history = await sql<Message[]>`
|
||||
SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq,
|
||||
tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at
|
||||
tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata
|
||||
FROM messages
|
||||
WHERE chat_id = ${chatId}
|
||||
ORDER BY created_at ASC, id ASC
|
||||
@@ -379,7 +441,10 @@ interface TurnArgs {
|
||||
sessionId: string;
|
||||
chatId: string;
|
||||
assistantMessageId: string;
|
||||
depth: number;
|
||||
// v1.8.2: cumulative tool calls executed this run. Compared against the
|
||||
// resolved budget at the top of each turn. Replaces the older `depth`
|
||||
// counter (which counted iterations, not invocations).
|
||||
toolsUsed: number;
|
||||
signal: AbortSignal | undefined;
|
||||
}
|
||||
|
||||
@@ -480,13 +545,32 @@ async function handleAbortOrError(
|
||||
const { sessionId, chatId, assistantMessageId } = args;
|
||||
const isAbort = err instanceof Error && err.name === 'AbortError';
|
||||
const finalStatus = isAbort ? 'cancelled' : 'failed';
|
||||
await ctx.sql`
|
||||
UPDATE messages
|
||||
SET status = ${finalStatus},
|
||||
content = ${accumulated},
|
||||
finished_at = clock_timestamp()
|
||||
WHERE id = ${assistantMessageId}
|
||||
`;
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
// v1.8.2: persist a structured error metadata blob on genuine failures so
|
||||
// the bubble can render the reason on reload without re-deriving from the
|
||||
// (one-shot) WS error frame. User-initiated abort skips this — there's no
|
||||
// "reason" to surface for a stop the user already explicitly chose.
|
||||
const errorMetadata: MessageMetadata | null = isAbort
|
||||
? null
|
||||
: { kind: 'error', error_reason: 'llm_provider_error', error_text: errMsg };
|
||||
if (errorMetadata) {
|
||||
await ctx.sql`
|
||||
UPDATE messages
|
||||
SET status = ${finalStatus},
|
||||
content = ${accumulated},
|
||||
finished_at = clock_timestamp(),
|
||||
metadata = ${ctx.sql.json(errorMetadata as never)}
|
||||
WHERE id = ${assistantMessageId}
|
||||
`;
|
||||
} else {
|
||||
await ctx.sql`
|
||||
UPDATE messages
|
||||
SET status = ${finalStatus},
|
||||
content = ${accumulated},
|
||||
finished_at = clock_timestamp()
|
||||
WHERE id = ${assistantMessageId}
|
||||
`;
|
||||
}
|
||||
const [failSessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>`
|
||||
UPDATE sessions SET updated_at = clock_timestamp()
|
||||
WHERE id = ${sessionId}
|
||||
@@ -494,9 +578,10 @@ async function handleAbortOrError(
|
||||
`;
|
||||
ctx.publishUser({ type: 'session_updated', session_id: sessionId, project_id: failSessRow!.project_id, name: failSessRow!.name, updated_at: failSessRow!.updated_at });
|
||||
// v1.8 mobile-tabs: cancellation is a user-initiated stop, treat as idle;
|
||||
// genuine errors flip the dot red.
|
||||
ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: isAbort ? 'idle' : 'error', at: new Date().toISOString() });
|
||||
// genuine errors flip the dot red. v1.8.2: error path also carries a
|
||||
// machine-readable `reason` so the UI can render specifics inline.
|
||||
if (isAbort) {
|
||||
ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() });
|
||||
ctx.publish(sessionId, {
|
||||
type: 'message_complete',
|
||||
message_id: assistantMessageId,
|
||||
@@ -504,12 +589,19 @@ async function handleAbortOrError(
|
||||
});
|
||||
ctx.log.info({ sessionId, chatId, assistantMessageId }, 'inference cancelled');
|
||||
} else {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
ctx.publishUser({
|
||||
type: 'chat_status',
|
||||
chat_id: chatId,
|
||||
status: 'error',
|
||||
at: new Date().toISOString(),
|
||||
reason: 'llm_provider_error',
|
||||
});
|
||||
ctx.publish(sessionId, {
|
||||
type: 'error',
|
||||
message_id: assistantMessageId,
|
||||
chat_id: chatId,
|
||||
error: errMsg,
|
||||
reason: 'llm_provider_error',
|
||||
});
|
||||
ctx.log.error({ err, sessionId, assistantMessageId }, 'inference failed');
|
||||
}
|
||||
@@ -523,7 +615,7 @@ async function executeToolPhase(
|
||||
session: Session,
|
||||
projectRoot: string
|
||||
): Promise<void> {
|
||||
const { sessionId, chatId, assistantMessageId, depth, signal } = args;
|
||||
const { sessionId, chatId, assistantMessageId, toolsUsed, signal } = args;
|
||||
const { content, toolCalls, promptTokens, completionTokens, nCtx } = result;
|
||||
|
||||
const [updated] = await ctx.sql<
|
||||
@@ -607,7 +699,10 @@ async function executeToolPhase(
|
||||
sessionId,
|
||||
chatId,
|
||||
assistantMessageId: nextAssistant!.id,
|
||||
depth: depth + 1,
|
||||
// v1.8.2: charge this turn's actual tool invocations against the budget.
|
||||
// One assistant message can emit multiple tool_calls, so we add the run
|
||||
// count, not 1. The next turn's budget check sees the cumulative total.
|
||||
toolsUsed: toolsUsed + result.toolCalls.length,
|
||||
signal,
|
||||
});
|
||||
}
|
||||
@@ -671,25 +766,7 @@ async function runAssistantTurn(
|
||||
ctx: InferenceContext,
|
||||
args: TurnArgs,
|
||||
): Promise<void> {
|
||||
const { sessionId, chatId, assistantMessageId, depth } = args;
|
||||
|
||||
if (depth > MAX_TOOL_LOOP_DEPTH) {
|
||||
await ctx.sql`
|
||||
UPDATE messages
|
||||
SET status = 'failed',
|
||||
content = ${'tool loop depth exceeded'},
|
||||
finished_at = clock_timestamp()
|
||||
WHERE id = ${assistantMessageId}
|
||||
`;
|
||||
ctx.publish(sessionId, {
|
||||
type: 'error',
|
||||
message_id: assistantMessageId,
|
||||
chat_id: chatId,
|
||||
error: 'tool loop depth exceeded',
|
||||
});
|
||||
ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'error', at: new Date().toISOString() });
|
||||
return;
|
||||
}
|
||||
const { sessionId, chatId } = args;
|
||||
|
||||
const loaded = await loadContext(ctx.sql, sessionId, chatId);
|
||||
if (!loaded) {
|
||||
@@ -704,6 +781,17 @@ async function runAssistantTurn(
|
||||
const agent = session.agent_id
|
||||
? await getAgentById(project.path, session.agent_id)
|
||||
: null;
|
||||
|
||||
// v1.8.2: cap-hit replaces the older "tool loop depth exceeded" failure.
|
||||
// When we've already burned the budget *before* this turn even runs, we
|
||||
// skip straight to the summary flow — the in-flight assistant message slot
|
||||
// gets reused for the wrap-up reply instead of being marked failed.
|
||||
const budget = resolveToolBudget(agent);
|
||||
if (args.toolsUsed >= budget) {
|
||||
await runCapHitSummary(ctx, args, session, project, history, agent, budget);
|
||||
return;
|
||||
}
|
||||
|
||||
const messages = buildMessagesPayload(session, project, history, agent);
|
||||
|
||||
const state: StreamPhaseState = { accumulated: '', startedAt: null };
|
||||
@@ -730,7 +818,264 @@ export async function runInference(
|
||||
assistantMessageId: string,
|
||||
signal?: AbortSignal
|
||||
): Promise<void> {
|
||||
return runAssistantTurn(ctx, { sessionId, chatId, assistantMessageId, depth: 0, signal });
|
||||
// v1.8.2: every fresh inference (initial send, regenerate, force_send,
|
||||
// continue) starts with a clean budget. Tool-call accumulation across
|
||||
// Continue invocations is what the hard ceiling guards against, not the
|
||||
// per-call budget.
|
||||
return runAssistantTurn(ctx, { sessionId, chatId, assistantMessageId, toolsUsed: 0, signal });
|
||||
}
|
||||
|
||||
// v1.8.2: cap-hit summary flow. Called instead of erroring when the loop
|
||||
// hits its budget. Reuses the in-flight assistant message slot to stream a
|
||||
// short wrap-up reply with the synthetic note prepended and tools disabled,
|
||||
// then always inserts a cap_hit sentinel afterward (regardless of summary
|
||||
// outcome) so the UI can show a Continue affordance.
|
||||
async function runCapHitSummary(
|
||||
ctx: InferenceContext,
|
||||
args: TurnArgs,
|
||||
session: Session,
|
||||
project: Project,
|
||||
history: Message[],
|
||||
agent: Agent | null,
|
||||
budget: number,
|
||||
): Promise<void> {
|
||||
const { sessionId, chatId, assistantMessageId, signal } = args;
|
||||
|
||||
const messages = buildMessagesPayload(session, project, history, agent);
|
||||
messages.push({ role: 'system', content: CAP_HIT_SUMMARY_NOTE(budget) });
|
||||
|
||||
const startedRow = await ctx.sql<{ started_at: string }[]>`
|
||||
UPDATE messages
|
||||
SET started_at = clock_timestamp()
|
||||
WHERE id = ${assistantMessageId}
|
||||
RETURNING started_at
|
||||
`;
|
||||
const startedAt = startedRow[0]?.started_at ?? null;
|
||||
|
||||
ctx.publish(sessionId, {
|
||||
type: 'message_started',
|
||||
message_id: assistantMessageId,
|
||||
chat_id: chatId,
|
||||
role: 'assistant',
|
||||
});
|
||||
|
||||
let accumulated = '';
|
||||
let pendingFlushTimer: NodeJS.Timeout | null = null;
|
||||
let flushPromise: Promise<unknown> = Promise.resolve();
|
||||
const flushNow = () => {
|
||||
if (pendingFlushTimer) {
|
||||
clearTimeout(pendingFlushTimer);
|
||||
pendingFlushTimer = null;
|
||||
}
|
||||
const snapshot = accumulated;
|
||||
flushPromise = flushPromise.then(() =>
|
||||
ctx.sql`UPDATE messages SET content = ${snapshot} WHERE id = ${assistantMessageId}`
|
||||
);
|
||||
};
|
||||
const scheduleFlush = () => {
|
||||
if (pendingFlushTimer) return;
|
||||
pendingFlushTimer = setTimeout(() => {
|
||||
pendingFlushTimer = null;
|
||||
flushNow();
|
||||
}, DB_FLUSH_INTERVAL_MS);
|
||||
};
|
||||
|
||||
let summaryOk = false;
|
||||
let summarySoftCancelled = false;
|
||||
let summaryError: string | null = null;
|
||||
let result: StreamResult | null = null;
|
||||
try {
|
||||
result = await streamCompletion(
|
||||
ctx,
|
||||
session.model,
|
||||
messages,
|
||||
{ tools: null, temperature: agent?.temperature },
|
||||
(delta) => {
|
||||
accumulated += delta;
|
||||
ctx.publish(sessionId, {
|
||||
type: 'delta',
|
||||
message_id: assistantMessageId,
|
||||
chat_id: chatId,
|
||||
content: delta,
|
||||
});
|
||||
scheduleFlush();
|
||||
},
|
||||
signal,
|
||||
);
|
||||
summaryOk = true;
|
||||
} catch (err) {
|
||||
if (err instanceof Error && err.name === 'AbortError') {
|
||||
summarySoftCancelled = true;
|
||||
} else {
|
||||
summaryError = err instanceof Error ? err.message : String(err);
|
||||
}
|
||||
} finally {
|
||||
if (pendingFlushTimer) {
|
||||
clearTimeout(pendingFlushTimer);
|
||||
pendingFlushTimer = null;
|
||||
}
|
||||
await flushPromise;
|
||||
}
|
||||
|
||||
// Finalize the summary message based on the three outcomes. The sentinel
|
||||
// is inserted regardless so the user always has the Continue affordance —
|
||||
// even on a partial / failed summary the chat history shows where the
|
||||
// budget was hit.
|
||||
if (summaryOk && result) {
|
||||
const [updated] = await ctx.sql<
|
||||
{ tokens_used: number | null; ctx_used: number | null; ctx_max: number | null; finished_at: string | null }[]
|
||||
>`
|
||||
UPDATE messages
|
||||
SET content = ${result.content},
|
||||
status = 'complete',
|
||||
tokens_used = ${result.completionTokens},
|
||||
ctx_used = ${result.promptTokens},
|
||||
ctx_max = ${result.nCtx},
|
||||
finished_at = clock_timestamp()
|
||||
WHERE id = ${assistantMessageId}
|
||||
RETURNING tokens_used, ctx_used, ctx_max, finished_at
|
||||
`;
|
||||
ctx.publish(sessionId, {
|
||||
type: 'message_complete',
|
||||
message_id: assistantMessageId,
|
||||
chat_id: chatId,
|
||||
tokens_used: updated?.tokens_used ?? null,
|
||||
ctx_used: updated?.ctx_used ?? null,
|
||||
ctx_max: updated?.ctx_max ?? null,
|
||||
started_at: startedAt,
|
||||
finished_at: updated?.finished_at ?? null,
|
||||
model: session.model,
|
||||
});
|
||||
} else if (summarySoftCancelled) {
|
||||
await ctx.sql`
|
||||
UPDATE messages
|
||||
SET content = ${accumulated},
|
||||
status = 'cancelled',
|
||||
finished_at = clock_timestamp()
|
||||
WHERE id = ${assistantMessageId}
|
||||
`;
|
||||
ctx.publish(sessionId, {
|
||||
type: 'message_complete',
|
||||
message_id: assistantMessageId,
|
||||
chat_id: chatId,
|
||||
});
|
||||
} else {
|
||||
const errMeta: MessageMetadata = {
|
||||
kind: 'error',
|
||||
error_reason: 'summary_after_cap_failed',
|
||||
error_text: summaryError ?? 'summary failed',
|
||||
};
|
||||
await ctx.sql`
|
||||
UPDATE messages
|
||||
SET content = ${accumulated},
|
||||
status = 'failed',
|
||||
finished_at = clock_timestamp(),
|
||||
metadata = ${ctx.sql.json(errMeta as never)}
|
||||
WHERE id = ${assistantMessageId}
|
||||
`;
|
||||
ctx.publish(sessionId, {
|
||||
type: 'error',
|
||||
message_id: assistantMessageId,
|
||||
chat_id: chatId,
|
||||
error: summaryError ?? 'summary failed',
|
||||
reason: 'summary_after_cap_failed',
|
||||
});
|
||||
}
|
||||
|
||||
// Bump session/chat updated_at exactly once for this turn.
|
||||
const [sessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>`
|
||||
UPDATE sessions SET updated_at = clock_timestamp()
|
||||
WHERE id = ${sessionId}
|
||||
RETURNING project_id, name, updated_at
|
||||
`;
|
||||
ctx.publishUser({
|
||||
type: 'session_updated',
|
||||
session_id: sessionId,
|
||||
project_id: sessRow!.project_id,
|
||||
name: sessRow!.name,
|
||||
updated_at: sessRow!.updated_at,
|
||||
});
|
||||
|
||||
await insertCapHitSentinel(ctx, sessionId, chatId, agent, budget);
|
||||
|
||||
// Status frame fires last so the dot color reflects the terminal state.
|
||||
// Success → idle, abort → idle (user-driven stop), error → error+reason.
|
||||
if (summaryOk) {
|
||||
ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() });
|
||||
} else if (summarySoftCancelled) {
|
||||
ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() });
|
||||
} else {
|
||||
ctx.publishUser({
|
||||
type: 'chat_status',
|
||||
chat_id: chatId,
|
||||
status: 'error',
|
||||
at: new Date().toISOString(),
|
||||
reason: 'summary_after_cap_failed',
|
||||
});
|
||||
}
|
||||
|
||||
ctx.log.info(
|
||||
{ sessionId, chatId, assistantMessageId, budget, summaryOk, summaryCancelled: summarySoftCancelled },
|
||||
'inference cap-hit summary finished',
|
||||
);
|
||||
}
|
||||
|
||||
async function insertCapHitSentinel(
|
||||
ctx: InferenceContext,
|
||||
sessionId: string,
|
||||
chatId: string,
|
||||
agent: Agent | null,
|
||||
budget: number,
|
||||
): Promise<void> {
|
||||
// Hard ceiling: count prior cap_hit sentinels in this chat. After two
|
||||
// continues (sentinel count of 2), the next sentinel reports can_continue
|
||||
// false and the UI disables the Continue button.
|
||||
const priorRows = await ctx.sql<{ count: number }[]>`
|
||||
SELECT COUNT(*)::int AS count
|
||||
FROM messages
|
||||
WHERE chat_id = ${chatId}
|
||||
AND role = 'system'
|
||||
AND metadata->>'kind' = 'cap_hit'
|
||||
`;
|
||||
const priorCount = priorRows[0]?.count ?? 0;
|
||||
const canContinue = priorCount < 2;
|
||||
const metadata: MessageMetadata = {
|
||||
kind: 'cap_hit',
|
||||
used: budget,
|
||||
limit: budget,
|
||||
agent_name: agent?.name ?? null,
|
||||
can_continue: canContinue,
|
||||
};
|
||||
const content = `Reached tool budget (${budget}/${budget}). Continue to extend.`;
|
||||
|
||||
const [row] = await ctx.sql<{ id: string }[]>`
|
||||
INSERT INTO messages (session_id, chat_id, role, content, status, created_at, metadata)
|
||||
VALUES (${sessionId}, ${chatId}, 'system', ${content}, 'complete', clock_timestamp(), ${ctx.sql.json(metadata as never)})
|
||||
RETURNING id
|
||||
`;
|
||||
|
||||
// The sentinel content is static, but we still walk the standard frame
|
||||
// sequence (started → delta → complete) so useSessionStream's reducer
|
||||
// appends it via the same path it uses for streaming assistant messages.
|
||||
// The delta carries the full text in one chunk.
|
||||
ctx.publish(sessionId, {
|
||||
type: 'message_started',
|
||||
message_id: row!.id,
|
||||
chat_id: chatId,
|
||||
role: 'system',
|
||||
});
|
||||
ctx.publish(sessionId, {
|
||||
type: 'delta',
|
||||
message_id: row!.id,
|
||||
chat_id: chatId,
|
||||
content,
|
||||
});
|
||||
ctx.publish(sessionId, {
|
||||
type: 'message_complete',
|
||||
message_id: row!.id,
|
||||
chat_id: chatId,
|
||||
metadata,
|
||||
});
|
||||
}
|
||||
|
||||
const COMPACT_SYSTEM_PROMPT =
|
||||
|
||||
@@ -308,6 +308,19 @@ export const ALL_TOOLS: ReadonlyArray<ToolDef<unknown>> = [
|
||||
gitStatus as ToolDef<unknown>,
|
||||
];
|
||||
|
||||
// v1.8.2: forward-compatible read-only whitelist. An agent whose `tools` is
|
||||
// fully contained in this set gets a generous default tool budget (30);
|
||||
// anything outside means the agent can mutate state and gets a tighter
|
||||
// default (10). Every tool in v1.8.2 happens to be read-only, so the
|
||||
// non-RO branch only takes effect once BooCoder lands write tools.
|
||||
export const READ_ONLY_TOOL_NAMES = [
|
||||
'view_file',
|
||||
'list_dir',
|
||||
'grep',
|
||||
'find_files',
|
||||
'git_status',
|
||||
] as const;
|
||||
|
||||
export const TOOLS_BY_NAME: Record<string, ToolDef<unknown>> = Object.fromEntries(
|
||||
ALL_TOOLS.map((t) => [t.name, t])
|
||||
);
|
||||
|
||||
@@ -45,6 +45,10 @@ export interface Agent {
|
||||
tools: string[]; // whitelist of tool names; empty = no tools allowed
|
||||
model: string | null; // null means "session.model wins"
|
||||
source: AgentSource;
|
||||
// v1.8.2: per-agent tool-loop budget. null means resolve at runtime from the
|
||||
// agent's toolset (30 if all tools are read-only, 10 otherwise) or 15 for
|
||||
// raw chat with no agent.
|
||||
max_tool_calls: number | null;
|
||||
}
|
||||
|
||||
// One entry per malformed `## Name` block. Per-block errors don't fail the
|
||||
@@ -100,6 +104,31 @@ export interface ToolResult {
|
||||
error?: string;
|
||||
}
|
||||
|
||||
// v1.8.2: structured reason codes for failed inferences. `error` carries the
|
||||
// human text; `reason` is the machine-readable discriminator the UI matches
|
||||
// on (with `error` as fallback when reason is absent or unrecognized).
|
||||
export type ErrorReason =
|
||||
| 'llm_provider_error'
|
||||
| 'tool_execution_failed'
|
||||
| 'summary_after_cap_failed';
|
||||
|
||||
// v1.8.2: shapes stored in messages.metadata. Discriminated on `kind`.
|
||||
// cap_hit — system sentinel emitted when tool budget is exhausted
|
||||
// error — attached to a failed assistant message so UI can show reason
|
||||
export type MessageMetadata =
|
||||
| {
|
||||
kind: 'cap_hit';
|
||||
used: number;
|
||||
limit: number;
|
||||
agent_name: string | null;
|
||||
can_continue: boolean;
|
||||
}
|
||||
| {
|
||||
kind: 'error';
|
||||
error_reason: ErrorReason;
|
||||
error_text: string;
|
||||
};
|
||||
|
||||
export interface Message {
|
||||
id: string;
|
||||
session_id: string;
|
||||
@@ -117,6 +146,9 @@ export interface Message {
|
||||
started_at: string | null;
|
||||
finished_at: string | null;
|
||||
created_at: string;
|
||||
// v1.8.2: per-message metadata. See MessageMetadata for the discriminated
|
||||
// shapes currently in use.
|
||||
metadata: MessageMetadata | null;
|
||||
}
|
||||
|
||||
export interface ModelInfo {
|
||||
@@ -257,11 +289,14 @@ export interface ProjectUpdatedFrame {
|
||||
}
|
||||
// v1.8 mobile-tabs: server can't know about client-side panes, so status
|
||||
// is keyed by chat_id. Frontend dot derives pane status from pane.activeChatId.
|
||||
// v1.8.2: optional `reason` carries a machine-readable code when status is
|
||||
// 'error'. UI prefers reason; falls back to no detail when absent.
|
||||
export interface ChatStatusFrame {
|
||||
type: 'chat_status';
|
||||
chat_id: string;
|
||||
status: 'working' | 'idle' | 'error';
|
||||
at: string;
|
||||
reason?: ErrorReason;
|
||||
}
|
||||
export type UserStreamFrame =
|
||||
| ProjectCreatedFrame
|
||||
|
||||
Reference in New Issue
Block a user