diff --git a/apps/server/src/routes/chats.ts b/apps/server/src/routes/chats.ts index 3cbfffa..b03dc54 100644 --- a/apps/server/src/routes/chats.ts +++ b/apps/server/src/routes/chats.ts @@ -231,7 +231,7 @@ export function registerChatRoutes( INSERT INTO messages ( session_id, chat_id, role, content, kind, tool_calls, tool_results, status, tokens_used, ctx_used, ctx_max, started_at, finished_at, - created_at + created_at, metadata ) SELECT ${source.session_id}, ${chat!.id}, role, content, kind, @@ -239,7 +239,8 @@ export function registerChatRoutes( tokens_used, ctx_used, ctx_max, started_at, finished_at, clock_timestamp() + ( ROW_NUMBER() OVER (ORDER BY created_at ASC, id ASC) * INTERVAL '1 microsecond' - ) + ), + metadata FROM messages WHERE chat_id = ${source.id} AND created_at <= ${target.created_at}::timestamptz @@ -268,7 +269,7 @@ export function registerChatRoutes( } const rows = await sql` SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, - tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at + tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata FROM messages WHERE chat_id = ${req.params.id} ORDER BY created_at ASC, id ASC diff --git a/apps/server/src/routes/messages.ts b/apps/server/src/routes/messages.ts index d00cd7f..a33e4e6 100644 --- a/apps/server/src/routes/messages.ts +++ b/apps/server/src/routes/messages.ts @@ -7,6 +7,13 @@ const SendBody = z.object({ content: z.string().min(1).max(64_000), }); +// v1.8.2: Continue extends an inference loop that hit the tool budget. Caller +// passes the sentinel message it's continuing from; server validates shape +// and the per-chat hard ceiling before resuming. +const ContinueBody = z.object({ + sentinel_message_id: z.string().uuid(), +}); + interface MessageHandlers { enqueueInference: (sessionId: string, chatId: string, assistantMessageId: string, user: string) => void; enqueueCompact: (sessionId: string, chatId: string, compactMessageId: string, user: string) => void; @@ -36,7 +43,7 @@ export function registerMessageRoutes( } const rows = await sql` SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, - tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at + tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata FROM messages WHERE session_id = ${req.params.id} ORDER BY created_at ASC, id ASC @@ -253,6 +260,76 @@ export function registerMessageRoutes( } ); + app.post<{ Params: { id: string } }>( + '/api/chats/:id/continue', + async (req, reply) => { + const parsed = ContinueBody.safeParse(req.body); + if (!parsed.success) { + reply.code(400); + return { error: 'invalid body', details: parsed.error.flatten() }; + } + + const chatRows = await sql` + SELECT id, session_id FROM chats WHERE id = ${req.params.id} AND status = 'open' + `; + if (chatRows.length === 0) { + reply.code(404); + return { error: 'chat not found' }; + } + const chat = chatRows[0]!; + const sessionId = chat.session_id; + + // Cap-hit sentinels are only ever inserted after a turn completes, so + // there must not be an active inference at this moment. If there is, + // the client is racing the cap-hit summary that just emitted the + // sentinel — bail rather than enqueue a parallel run. + if (handlers.hasActiveInference(chat.id)) { + reply.code(409); + return { error: 'chat is currently streaming' }; + } + + const sentinel = await sql<{ metadata: { kind?: unknown; can_continue?: unknown } | null }[]>` + SELECT metadata + FROM messages + WHERE id = ${parsed.data.sentinel_message_id} + AND chat_id = ${chat.id} + AND role = 'system' + `; + if (sentinel.length === 0) { + reply.code(404); + return { error: 'sentinel not found' }; + } + const meta = sentinel[0]!.metadata; + if (!meta || meta.kind !== 'cap_hit') { + reply.code(400); + return { error: 'message is not a cap-hit sentinel' }; + } + // Server-side hard ceiling check. UI already disables the button when + // can_continue is false; defending against a stale tab or a direct + // API hit is the only reason this lives on the server too. + if (meta.can_continue !== true) { + reply.code(409); + return { error: 'hard limit reached for this chat' }; + } + + const result = await sql.begin(async (tx) => { + const [assistantMsg] = await tx<{ id: string }[]>` + INSERT INTO messages (session_id, chat_id, role, content, status, created_at) + VALUES (${sessionId}, ${chat.id}, 'assistant', '', 'streaming', clock_timestamp()) + RETURNING id + `; + await tx`UPDATE sessions SET updated_at = clock_timestamp() WHERE id = ${sessionId}`; + await tx`UPDATE chats SET updated_at = clock_timestamp() WHERE id = ${chat.id}`; + return { assistant_message_id: assistantMsg!.id }; + }); + + handlers.enqueueInference(sessionId, chat.id, result.assistant_message_id, 'default'); + + reply.code(202); + return result; + } + ); + app.post<{ Params: { id: string } }>( '/api/chats/:id/force_send', async (req, reply) => { diff --git a/apps/server/src/routes/ws.ts b/apps/server/src/routes/ws.ts index a998f54..88c854e 100644 --- a/apps/server/src/routes/ws.ts +++ b/apps/server/src/routes/ws.ts @@ -23,7 +23,7 @@ export function registerWebSocket( const messages = await sql` SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, - tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at + tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata FROM messages WHERE session_id = ${sessionId} ORDER BY created_at ASC, id ASC diff --git a/apps/server/src/schema.sql b/apps/server/src/schema.sql index 1512fbd..18a7a15 100644 --- a/apps/server/src/schema.sql +++ b/apps/server/src/schema.sql @@ -158,3 +158,10 @@ END $$; -- the DB; they live in builtins (services/agents.ts) and a per-project AGENTS.md. -- agent_id is the slugified agent name. NULL means "use BooCode defaults". ALTER TABLE sessions ADD COLUMN IF NOT EXISTS agent_id TEXT; + +-- v1.8.2: per-message metadata for sentinels (cap-hit) and structured error +-- reasons. JSONB so future kinds can extend without further schema churn. +-- Shape for cap_hit: { kind: 'cap_hit', used: number, limit: number, +-- agent_name: string|null, can_continue: boolean } +-- Shape for errors: { error_reason: 'llm_provider_error'|..., error_text: string } +ALTER TABLE messages ADD COLUMN IF NOT EXISTS metadata JSONB; diff --git a/apps/server/src/services/__tests__/inference.test.ts b/apps/server/src/services/__tests__/inference.test.ts index 1012016..35f0049 100644 --- a/apps/server/src/services/__tests__/inference.test.ts +++ b/apps/server/src/services/__tests__/inference.test.ts @@ -21,6 +21,7 @@ function makeSession(overrides: Partial = {}): Session { status: 'open', created_at: new Date(0).toISOString(), updated_at: new Date(0).toISOString(), + agent_id: null, ...overrides, }; } @@ -62,6 +63,7 @@ function makeMessage( started_at: null, finished_at: null, created_at: new Date(counter * 1000).toISOString(), + metadata: null, ...overrides, }; } diff --git a/apps/server/src/services/agents.ts b/apps/server/src/services/agents.ts index aba55a8..7f7b2f8 100644 --- a/apps/server/src/services/agents.ts +++ b/apps/server/src/services/agents.ts @@ -29,6 +29,9 @@ interface ParsedFrontmatter { tools?: string[]; description?: string; model?: string; + // v1.8.2: optional per-agent tool-loop budget. Absent → inference resolves + // from the agent's toolset at runtime. + max_tool_calls?: number; } function stripQuotes(s: string): string { @@ -89,6 +92,21 @@ function parseFrontmatter(yaml: string): { data: ParsedFrontmatter; errors: stri data.description = stripQuotes(valueRaw); } else if (key === 'model') { data.model = stripQuotes(valueRaw); + } else if (key === 'max_tool_calls') { + // v1.8.2: 1..100 inclusive integer. Out-of-range values are skipped + // with a warning rather than throwing — agents shouldn't be unusable + // because of a typo on a defaulted field. Non-numeric or non-integer + // still hard-fails the block, matching `temperature` behavior. + const n = Number(valueRaw); + if (Number.isInteger(n) && n >= 1 && n <= 100) { + data.max_tool_calls = n; + } else if (Number.isInteger(n)) { + console.warn( + `agents: max_tool_calls ${n} out of range 1-100, ignoring (falling back to default)`, + ); + } else { + errors.push(`max_tool_calls must be an integer 1-100 (got "${valueRaw}")`); + } } // Unknown keys silently ignored — forward-compat. } @@ -177,6 +195,7 @@ function parseAgentSection(section: RawSection): Omit { temperature: typeof fm.temperature === 'number' ? fm.temperature : DEFAULT_TEMPERATURE, tools: filteredTools, model: typeof fm.model === 'string' && fm.model.length > 0 ? fm.model : null, + max_tool_calls: typeof fm.max_tool_calls === 'number' ? fm.max_tool_calls : null, }; } diff --git a/apps/server/src/services/inference.ts b/apps/server/src/services/inference.ts index b2eb1fb..ceb5615 100644 --- a/apps/server/src/services/inference.ts +++ b/apps/server/src/services/inference.ts @@ -1,8 +1,23 @@ import type { FastifyBaseLogger } from 'fastify'; import type { Sql } from '../db.js'; import type { Config } from '../config.js'; -import type { Agent, Message, Project, Session, ToolCall, UserStreamFrame } from '../types/api.js'; -import { ALL_TOOLS, TOOLS_BY_NAME, toolJsonSchemas, type ToolJsonSchema } from './tools.js'; +import type { + Agent, + ErrorReason, + Message, + MessageMetadata, + Project, + Session, + ToolCall, + UserStreamFrame, +} from '../types/api.js'; +import { + ALL_TOOLS, + READ_ONLY_TOOL_NAMES, + TOOLS_BY_NAME, + toolJsonSchemas, + type ToolJsonSchema, +} from './tools.js'; import { PathScopeError, resolveProjectRoot } from './path_guard.js'; import { maybeAutoNameChat } from './auto_name.js'; import { getAgentById } from './agents.js'; @@ -11,7 +26,39 @@ const BASE_SYSTEM_PROMPT = (projectPath: string) => `You are BooCode Chat, a code investigation assistant. The user is working on a project located at ${projectPath}. Use the file-read tools (view_file, list_dir, grep, find_files) to investigate code when needed. Be concise. Cite file paths and line numbers when discussing code. Do not hallucinate file contents — read the file first. Tool results may be truncated; if so, narrow your query rather than guessing.`; const DB_FLUSH_INTERVAL_MS = 500; -const MAX_TOOL_LOOP_DEPTH = 15; + +// v1.8.2: tool-call budget defaults. Resolved per-turn by resolveToolBudget. +// - Agent with explicit max_tool_calls: that value. +// - Agent with read-only-only tools: BUDGET_READ_ONLY (30). +// - Agent with any non-read-only tool: BUDGET_NON_READ_ONLY (10). +// - No agent (raw chat): BUDGET_NO_AGENT (15). +const BUDGET_READ_ONLY = 30; +const BUDGET_NON_READ_ONLY = 10; +const BUDGET_NO_AGENT = 15; + +const READ_ONLY_SET: ReadonlySet = new Set(READ_ONLY_TOOL_NAMES); + +function resolveToolBudget(agent: Agent | null): number { + if (agent?.max_tool_calls != null) return agent.max_tool_calls; + if (!agent) return BUDGET_NO_AGENT; + const allReadOnly = agent.tools.every((t) => READ_ONLY_SET.has(t)); + return allReadOnly ? BUDGET_READ_ONLY : BUDGET_NON_READ_ONLY; +} + +// Synthetic system note appended to the cap-hit summary call. Verbatim from +// the v1.8.2 spec — do not paraphrase: the model is more reliable when the +// instruction is short, declarative, and identical across calls. +const CAP_HIT_SUMMARY_NOTE = (limit: number) => + `You've reached the tool budget (${limit} calls). Produce the best answer you can with what you have. Do not call more tools.`; + +function isCapHitSentinel(m: Message): boolean { + return ( + m.role === 'system' && + m.metadata !== null && + typeof m.metadata === 'object' && + (m.metadata as { kind?: unknown }).kind === 'cap_hit' + ); +} export interface InferenceFrame { type: @@ -29,12 +76,22 @@ export interface InferenceFrame { chat_id?: string; tool_message_id?: string; tool_call_id?: string; - role?: 'assistant' | 'tool' | 'user'; + // v1.8.2: 'system' added so cap-hit sentinel messages can announce themselves + // through the normal message_started → delta → message_complete sequence. + role?: 'assistant' | 'tool' | 'user' | 'system'; content?: string; tool_call?: ToolCall; output?: unknown; truncated?: boolean; error?: string; + // v1.8.2: structured error reason. Set on `type: 'error'` so the UI can + // surface a specific message; `error` stays the human-readable text. + reason?: ErrorReason; + // v1.8.2: piggybacks on `message_complete` so static or terminally-resolved + // messages can carry their persisted metadata to the live stream without a + // refetch (sentinels carry { kind: 'cap_hit', ... }; failed messages carry + // { kind: 'error', ... }). + metadata?: MessageMetadata | null; tokens_used?: number | null; ctx_used?: number | null; ctx_max?: number | null; @@ -135,6 +192,11 @@ export function buildMessagesPayload( out.push({ role: 'system', content: m.content }); continue; } + // v1.8.2: cap-hit sentinels are UI-only — never send them to the LLM. The + // synthetic "you've reached the tool budget" note lives only inside the + // summary call's messages array and is never persisted, so on Continue + // the model resumes with a clean context. + if (isCapHitSentinel(m)) continue; if (m.role === 'assistant' && m.status === 'streaming') continue; if (m.role === 'assistant' && m.status === 'cancelled') continue; if (m.role === 'tool') { @@ -193,7 +255,7 @@ async function loadContext( const history = await sql` SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, - tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at + tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata FROM messages WHERE chat_id = ${chatId} ORDER BY created_at ASC, id ASC @@ -379,7 +441,10 @@ interface TurnArgs { sessionId: string; chatId: string; assistantMessageId: string; - depth: number; + // v1.8.2: cumulative tool calls executed this run. Compared against the + // resolved budget at the top of each turn. Replaces the older `depth` + // counter (which counted iterations, not invocations). + toolsUsed: number; signal: AbortSignal | undefined; } @@ -480,13 +545,32 @@ async function handleAbortOrError( const { sessionId, chatId, assistantMessageId } = args; const isAbort = err instanceof Error && err.name === 'AbortError'; const finalStatus = isAbort ? 'cancelled' : 'failed'; - await ctx.sql` - UPDATE messages - SET status = ${finalStatus}, - content = ${accumulated}, - finished_at = clock_timestamp() - WHERE id = ${assistantMessageId} - `; + const errMsg = err instanceof Error ? err.message : String(err); + // v1.8.2: persist a structured error metadata blob on genuine failures so + // the bubble can render the reason on reload without re-deriving from the + // (one-shot) WS error frame. User-initiated abort skips this — there's no + // "reason" to surface for a stop the user already explicitly chose. + const errorMetadata: MessageMetadata | null = isAbort + ? null + : { kind: 'error', error_reason: 'llm_provider_error', error_text: errMsg }; + if (errorMetadata) { + await ctx.sql` + UPDATE messages + SET status = ${finalStatus}, + content = ${accumulated}, + finished_at = clock_timestamp(), + metadata = ${ctx.sql.json(errorMetadata as never)} + WHERE id = ${assistantMessageId} + `; + } else { + await ctx.sql` + UPDATE messages + SET status = ${finalStatus}, + content = ${accumulated}, + finished_at = clock_timestamp() + WHERE id = ${assistantMessageId} + `; + } const [failSessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>` UPDATE sessions SET updated_at = clock_timestamp() WHERE id = ${sessionId} @@ -494,9 +578,10 @@ async function handleAbortOrError( `; ctx.publishUser({ type: 'session_updated', session_id: sessionId, project_id: failSessRow!.project_id, name: failSessRow!.name, updated_at: failSessRow!.updated_at }); // v1.8 mobile-tabs: cancellation is a user-initiated stop, treat as idle; - // genuine errors flip the dot red. - ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: isAbort ? 'idle' : 'error', at: new Date().toISOString() }); + // genuine errors flip the dot red. v1.8.2: error path also carries a + // machine-readable `reason` so the UI can render specifics inline. if (isAbort) { + ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() }); ctx.publish(sessionId, { type: 'message_complete', message_id: assistantMessageId, @@ -504,12 +589,19 @@ async function handleAbortOrError( }); ctx.log.info({ sessionId, chatId, assistantMessageId }, 'inference cancelled'); } else { - const errMsg = err instanceof Error ? err.message : String(err); + ctx.publishUser({ + type: 'chat_status', + chat_id: chatId, + status: 'error', + at: new Date().toISOString(), + reason: 'llm_provider_error', + }); ctx.publish(sessionId, { type: 'error', message_id: assistantMessageId, chat_id: chatId, error: errMsg, + reason: 'llm_provider_error', }); ctx.log.error({ err, sessionId, assistantMessageId }, 'inference failed'); } @@ -523,7 +615,7 @@ async function executeToolPhase( session: Session, projectRoot: string ): Promise { - const { sessionId, chatId, assistantMessageId, depth, signal } = args; + const { sessionId, chatId, assistantMessageId, toolsUsed, signal } = args; const { content, toolCalls, promptTokens, completionTokens, nCtx } = result; const [updated] = await ctx.sql< @@ -607,7 +699,10 @@ async function executeToolPhase( sessionId, chatId, assistantMessageId: nextAssistant!.id, - depth: depth + 1, + // v1.8.2: charge this turn's actual tool invocations against the budget. + // One assistant message can emit multiple tool_calls, so we add the run + // count, not 1. The next turn's budget check sees the cumulative total. + toolsUsed: toolsUsed + result.toolCalls.length, signal, }); } @@ -671,25 +766,7 @@ async function runAssistantTurn( ctx: InferenceContext, args: TurnArgs, ): Promise { - const { sessionId, chatId, assistantMessageId, depth } = args; - - if (depth > MAX_TOOL_LOOP_DEPTH) { - await ctx.sql` - UPDATE messages - SET status = 'failed', - content = ${'tool loop depth exceeded'}, - finished_at = clock_timestamp() - WHERE id = ${assistantMessageId} - `; - ctx.publish(sessionId, { - type: 'error', - message_id: assistantMessageId, - chat_id: chatId, - error: 'tool loop depth exceeded', - }); - ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'error', at: new Date().toISOString() }); - return; - } + const { sessionId, chatId } = args; const loaded = await loadContext(ctx.sql, sessionId, chatId); if (!loaded) { @@ -704,6 +781,17 @@ async function runAssistantTurn( const agent = session.agent_id ? await getAgentById(project.path, session.agent_id) : null; + + // v1.8.2: cap-hit replaces the older "tool loop depth exceeded" failure. + // When we've already burned the budget *before* this turn even runs, we + // skip straight to the summary flow — the in-flight assistant message slot + // gets reused for the wrap-up reply instead of being marked failed. + const budget = resolveToolBudget(agent); + if (args.toolsUsed >= budget) { + await runCapHitSummary(ctx, args, session, project, history, agent, budget); + return; + } + const messages = buildMessagesPayload(session, project, history, agent); const state: StreamPhaseState = { accumulated: '', startedAt: null }; @@ -730,7 +818,264 @@ export async function runInference( assistantMessageId: string, signal?: AbortSignal ): Promise { - return runAssistantTurn(ctx, { sessionId, chatId, assistantMessageId, depth: 0, signal }); + // v1.8.2: every fresh inference (initial send, regenerate, force_send, + // continue) starts with a clean budget. Tool-call accumulation across + // Continue invocations is what the hard ceiling guards against, not the + // per-call budget. + return runAssistantTurn(ctx, { sessionId, chatId, assistantMessageId, toolsUsed: 0, signal }); +} + +// v1.8.2: cap-hit summary flow. Called instead of erroring when the loop +// hits its budget. Reuses the in-flight assistant message slot to stream a +// short wrap-up reply with the synthetic note prepended and tools disabled, +// then always inserts a cap_hit sentinel afterward (regardless of summary +// outcome) so the UI can show a Continue affordance. +async function runCapHitSummary( + ctx: InferenceContext, + args: TurnArgs, + session: Session, + project: Project, + history: Message[], + agent: Agent | null, + budget: number, +): Promise { + const { sessionId, chatId, assistantMessageId, signal } = args; + + const messages = buildMessagesPayload(session, project, history, agent); + messages.push({ role: 'system', content: CAP_HIT_SUMMARY_NOTE(budget) }); + + const startedRow = await ctx.sql<{ started_at: string }[]>` + UPDATE messages + SET started_at = clock_timestamp() + WHERE id = ${assistantMessageId} + RETURNING started_at + `; + const startedAt = startedRow[0]?.started_at ?? null; + + ctx.publish(sessionId, { + type: 'message_started', + message_id: assistantMessageId, + chat_id: chatId, + role: 'assistant', + }); + + let accumulated = ''; + let pendingFlushTimer: NodeJS.Timeout | null = null; + let flushPromise: Promise = Promise.resolve(); + const flushNow = () => { + if (pendingFlushTimer) { + clearTimeout(pendingFlushTimer); + pendingFlushTimer = null; + } + const snapshot = accumulated; + flushPromise = flushPromise.then(() => + ctx.sql`UPDATE messages SET content = ${snapshot} WHERE id = ${assistantMessageId}` + ); + }; + const scheduleFlush = () => { + if (pendingFlushTimer) return; + pendingFlushTimer = setTimeout(() => { + pendingFlushTimer = null; + flushNow(); + }, DB_FLUSH_INTERVAL_MS); + }; + + let summaryOk = false; + let summarySoftCancelled = false; + let summaryError: string | null = null; + let result: StreamResult | null = null; + try { + result = await streamCompletion( + ctx, + session.model, + messages, + { tools: null, temperature: agent?.temperature }, + (delta) => { + accumulated += delta; + ctx.publish(sessionId, { + type: 'delta', + message_id: assistantMessageId, + chat_id: chatId, + content: delta, + }); + scheduleFlush(); + }, + signal, + ); + summaryOk = true; + } catch (err) { + if (err instanceof Error && err.name === 'AbortError') { + summarySoftCancelled = true; + } else { + summaryError = err instanceof Error ? err.message : String(err); + } + } finally { + if (pendingFlushTimer) { + clearTimeout(pendingFlushTimer); + pendingFlushTimer = null; + } + await flushPromise; + } + + // Finalize the summary message based on the three outcomes. The sentinel + // is inserted regardless so the user always has the Continue affordance — + // even on a partial / failed summary the chat history shows where the + // budget was hit. + if (summaryOk && result) { + const [updated] = await ctx.sql< + { tokens_used: number | null; ctx_used: number | null; ctx_max: number | null; finished_at: string | null }[] + >` + UPDATE messages + SET content = ${result.content}, + status = 'complete', + tokens_used = ${result.completionTokens}, + ctx_used = ${result.promptTokens}, + ctx_max = ${result.nCtx}, + finished_at = clock_timestamp() + WHERE id = ${assistantMessageId} + RETURNING tokens_used, ctx_used, ctx_max, finished_at + `; + ctx.publish(sessionId, { + type: 'message_complete', + message_id: assistantMessageId, + chat_id: chatId, + tokens_used: updated?.tokens_used ?? null, + ctx_used: updated?.ctx_used ?? null, + ctx_max: updated?.ctx_max ?? null, + started_at: startedAt, + finished_at: updated?.finished_at ?? null, + model: session.model, + }); + } else if (summarySoftCancelled) { + await ctx.sql` + UPDATE messages + SET content = ${accumulated}, + status = 'cancelled', + finished_at = clock_timestamp() + WHERE id = ${assistantMessageId} + `; + ctx.publish(sessionId, { + type: 'message_complete', + message_id: assistantMessageId, + chat_id: chatId, + }); + } else { + const errMeta: MessageMetadata = { + kind: 'error', + error_reason: 'summary_after_cap_failed', + error_text: summaryError ?? 'summary failed', + }; + await ctx.sql` + UPDATE messages + SET content = ${accumulated}, + status = 'failed', + finished_at = clock_timestamp(), + metadata = ${ctx.sql.json(errMeta as never)} + WHERE id = ${assistantMessageId} + `; + ctx.publish(sessionId, { + type: 'error', + message_id: assistantMessageId, + chat_id: chatId, + error: summaryError ?? 'summary failed', + reason: 'summary_after_cap_failed', + }); + } + + // Bump session/chat updated_at exactly once for this turn. + const [sessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>` + UPDATE sessions SET updated_at = clock_timestamp() + WHERE id = ${sessionId} + RETURNING project_id, name, updated_at + `; + ctx.publishUser({ + type: 'session_updated', + session_id: sessionId, + project_id: sessRow!.project_id, + name: sessRow!.name, + updated_at: sessRow!.updated_at, + }); + + await insertCapHitSentinel(ctx, sessionId, chatId, agent, budget); + + // Status frame fires last so the dot color reflects the terminal state. + // Success → idle, abort → idle (user-driven stop), error → error+reason. + if (summaryOk) { + ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() }); + } else if (summarySoftCancelled) { + ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() }); + } else { + ctx.publishUser({ + type: 'chat_status', + chat_id: chatId, + status: 'error', + at: new Date().toISOString(), + reason: 'summary_after_cap_failed', + }); + } + + ctx.log.info( + { sessionId, chatId, assistantMessageId, budget, summaryOk, summaryCancelled: summarySoftCancelled }, + 'inference cap-hit summary finished', + ); +} + +async function insertCapHitSentinel( + ctx: InferenceContext, + sessionId: string, + chatId: string, + agent: Agent | null, + budget: number, +): Promise { + // Hard ceiling: count prior cap_hit sentinels in this chat. After two + // continues (sentinel count of 2), the next sentinel reports can_continue + // false and the UI disables the Continue button. + const priorRows = await ctx.sql<{ count: number }[]>` + SELECT COUNT(*)::int AS count + FROM messages + WHERE chat_id = ${chatId} + AND role = 'system' + AND metadata->>'kind' = 'cap_hit' + `; + const priorCount = priorRows[0]?.count ?? 0; + const canContinue = priorCount < 2; + const metadata: MessageMetadata = { + kind: 'cap_hit', + used: budget, + limit: budget, + agent_name: agent?.name ?? null, + can_continue: canContinue, + }; + const content = `Reached tool budget (${budget}/${budget}). Continue to extend.`; + + const [row] = await ctx.sql<{ id: string }[]>` + INSERT INTO messages (session_id, chat_id, role, content, status, created_at, metadata) + VALUES (${sessionId}, ${chatId}, 'system', ${content}, 'complete', clock_timestamp(), ${ctx.sql.json(metadata as never)}) + RETURNING id + `; + + // The sentinel content is static, but we still walk the standard frame + // sequence (started → delta → complete) so useSessionStream's reducer + // appends it via the same path it uses for streaming assistant messages. + // The delta carries the full text in one chunk. + ctx.publish(sessionId, { + type: 'message_started', + message_id: row!.id, + chat_id: chatId, + role: 'system', + }); + ctx.publish(sessionId, { + type: 'delta', + message_id: row!.id, + chat_id: chatId, + content, + }); + ctx.publish(sessionId, { + type: 'message_complete', + message_id: row!.id, + chat_id: chatId, + metadata, + }); } const COMPACT_SYSTEM_PROMPT = diff --git a/apps/server/src/services/tools.ts b/apps/server/src/services/tools.ts index 45bb6d4..3765712 100644 --- a/apps/server/src/services/tools.ts +++ b/apps/server/src/services/tools.ts @@ -308,6 +308,19 @@ export const ALL_TOOLS: ReadonlyArray> = [ gitStatus as ToolDef, ]; +// v1.8.2: forward-compatible read-only whitelist. An agent whose `tools` is +// fully contained in this set gets a generous default tool budget (30); +// anything outside means the agent can mutate state and gets a tighter +// default (10). Every tool in v1.8.2 happens to be read-only, so the +// non-RO branch only takes effect once BooCoder lands write tools. +export const READ_ONLY_TOOL_NAMES = [ + 'view_file', + 'list_dir', + 'grep', + 'find_files', + 'git_status', +] as const; + export const TOOLS_BY_NAME: Record> = Object.fromEntries( ALL_TOOLS.map((t) => [t.name, t]) ); diff --git a/apps/server/src/types/api.ts b/apps/server/src/types/api.ts index c6fe7d6..68d7442 100644 --- a/apps/server/src/types/api.ts +++ b/apps/server/src/types/api.ts @@ -45,6 +45,10 @@ export interface Agent { tools: string[]; // whitelist of tool names; empty = no tools allowed model: string | null; // null means "session.model wins" source: AgentSource; + // v1.8.2: per-agent tool-loop budget. null means resolve at runtime from the + // agent's toolset (30 if all tools are read-only, 10 otherwise) or 15 for + // raw chat with no agent. + max_tool_calls: number | null; } // One entry per malformed `## Name` block. Per-block errors don't fail the @@ -100,6 +104,31 @@ export interface ToolResult { error?: string; } +// v1.8.2: structured reason codes for failed inferences. `error` carries the +// human text; `reason` is the machine-readable discriminator the UI matches +// on (with `error` as fallback when reason is absent or unrecognized). +export type ErrorReason = + | 'llm_provider_error' + | 'tool_execution_failed' + | 'summary_after_cap_failed'; + +// v1.8.2: shapes stored in messages.metadata. Discriminated on `kind`. +// cap_hit — system sentinel emitted when tool budget is exhausted +// error — attached to a failed assistant message so UI can show reason +export type MessageMetadata = + | { + kind: 'cap_hit'; + used: number; + limit: number; + agent_name: string | null; + can_continue: boolean; + } + | { + kind: 'error'; + error_reason: ErrorReason; + error_text: string; + }; + export interface Message { id: string; session_id: string; @@ -117,6 +146,9 @@ export interface Message { started_at: string | null; finished_at: string | null; created_at: string; + // v1.8.2: per-message metadata. See MessageMetadata for the discriminated + // shapes currently in use. + metadata: MessageMetadata | null; } export interface ModelInfo { @@ -257,11 +289,14 @@ export interface ProjectUpdatedFrame { } // v1.8 mobile-tabs: server can't know about client-side panes, so status // is keyed by chat_id. Frontend dot derives pane status from pane.activeChatId. +// v1.8.2: optional `reason` carries a machine-readable code when status is +// 'error'. UI prefers reason; falls back to no detail when absent. export interface ChatStatusFrame { type: 'chat_status'; chat_id: string; status: 'working' | 'idle' | 'error'; at: string; + reason?: ErrorReason; } export type UserStreamFrame = | ProjectCreatedFrame diff --git a/apps/web/src/api/client.ts b/apps/web/src/api/client.ts index 6db65fd..c136505 100644 --- a/apps/web/src/api/client.ts +++ b/apps/web/src/api/client.ts @@ -152,6 +152,13 @@ export const api = { `/api/chats/${chatId}/force_send`, { method: 'POST', body: JSON.stringify({ content }) } ), + // v1.8.2: extend an inference that hit the tool budget. `sentinelMessageId` + // is the cap-hit sentinel message the user clicked Continue on. + continue: (chatId: string, sentinelMessageId: string) => + request<{ assistant_message_id: string }>( + `/api/chats/${chatId}/continue`, + { method: 'POST', body: JSON.stringify({ sentinel_message_id: sentinelMessageId }) } + ), fork: (chatId: string, body: { messageId: string; name?: string }) => request(`/api/chats/${chatId}/fork`, { method: 'POST', diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts index a6554ba..84b610f 100644 --- a/apps/web/src/api/types.ts +++ b/apps/web/src/api/types.ts @@ -44,6 +44,10 @@ export interface Agent { tools: string[]; model: string | null; source: AgentSource; + // v1.8.2: per-agent tool-loop budget. null means resolve at runtime from + // the agent's toolset (30 for all read-only, 10 otherwise) or 15 for raw + // chat with no agent. + max_tool_calls: number | null; } export interface AgentParseError { @@ -89,6 +93,32 @@ export interface ToolResult { error?: string; } +// v1.8.2: structured reason codes that flow through error frames / metadata. +// `error` text stays human; `reason` is the discriminator the UI matches on. +export type ErrorReason = + | 'llm_provider_error' + | 'tool_execution_failed' + | 'summary_after_cap_failed'; + +// v1.8.2: shapes stored in Message.metadata. Discriminated on `kind`. +// cap_hit — sentinel emitted when the tool budget is hit; carries the +// budget + agent name + whether Continue is still allowed. +// error — attached to a failed assistant message so the bubble can show +// a specific reason on reload (WS error frame is one-shot). +export type MessageMetadata = + | { + kind: 'cap_hit'; + used: number; + limit: number; + agent_name: string | null; + can_continue: boolean; + } + | { + kind: 'error'; + error_reason: ErrorReason; + error_text: string; + }; + export interface Message { id: string; session_id: string; @@ -106,6 +136,9 @@ export interface Message { started_at: string | null; finished_at: string | null; created_at: string; + // v1.8.2: per-message metadata; see MessageMetadata. null for the vast + // majority of messages. + metadata: MessageMetadata | null; } export interface ModelInfo { @@ -225,7 +258,13 @@ export type WsFrame = ctx_max?: number | null; started_at?: string | null; finished_at?: string | null; + // v1.8.2: piggybacks the persisted metadata onto the terminal frame so + // cap-hit sentinels (and any future stamped-on-complete metadata) flow + // to the client without a refetch. + metadata?: MessageMetadata | null; } | { type: 'messages_deleted'; message_ids: string[]; chat_id?: string } | { type: 'chat_renamed'; chat_id: string; name: string } - | { type: 'error'; message_id?: string; chat_id?: string; error: string }; + // v1.8.2: `reason` discriminates structured failures (the UI prefers it + // over `error` text when present). + | { type: 'error'; message_id?: string; chat_id?: string; error: string; reason?: ErrorReason }; diff --git a/apps/web/src/components/CapHitSentinel.tsx b/apps/web/src/components/CapHitSentinel.tsx new file mode 100644 index 0000000..b0ae33f --- /dev/null +++ b/apps/web/src/components/CapHitSentinel.tsx @@ -0,0 +1,90 @@ +import { useState } from 'react'; +import { AlertCircle } from 'lucide-react'; +import { toast } from 'sonner'; +import { api } from '@/api/client'; +import type { Message } from '@/api/types'; +import { Button } from '@/components/ui/button'; + +interface Props { + message: Message; + // 1-indexed position among cap-hit sentinels in this chat. The first + // cap-hit is 1, second is 2, third is 3 (hard ceiling). + capHitPosition: number; + // Only the most recent sentinel shows the Continue button. Older ones + // render text-only — they've already been continued past. + isLatest: boolean; +} + +// Hard ceiling = 3 cap-hits per chat ⇒ 2 continues max. Lives here in sync +// with insertCapHitSentinel's `canContinue = priorCount < 2` rule in +// services/inference.ts. +const MAX_CONTINUES = 2; + +export function CapHitSentinel({ message, capHitPosition, isLatest }: Props) { + const meta = message.metadata; + // Defensive parse — if the row is somehow missing metadata we still render + // the bare text rather than crashing the chat. + const isCapHit = + meta !== null && typeof meta === 'object' && meta.kind === 'cap_hit'; + + const limit = isCapHit ? meta.limit : null; + const canContinue = isCapHit ? meta.can_continue : false; + const agentName = isCapHit ? meta.agent_name : null; + // `capHitPosition` is 1-indexed; `MAX_CONTINUES - (position - 1)` is the + // number of continues remaining including this one. Clamped to ≥0. + const remaining = Math.max(0, MAX_CONTINUES - (capHitPosition - 1)); + + const [continuing, setContinuing] = useState(false); + + async function handleContinue() { + if (continuing || !canContinue || !isLatest) return; + setContinuing(true); + try { + await api.chats.continue(message.chat_id, message.id); + } catch (err) { + toast.error(err instanceof Error ? err.message : 'continue failed'); + } finally { + setContinuing(false); + } + } + + // Tooltip wording from the v1.8.2 spec. Disabled state takes precedence — + // the spec text "Hard limit reached — start a new chat" matches what the + // server returns when canContinue is false. + const enabledTooltip = limit + ? `Resumes with a fresh budget of ${limit} tool calls. ${remaining} continue${remaining === 1 ? '' : 's'} remaining on this chat.` + : undefined; + const disabledTooltip = 'Hard limit reached — start a new chat'; + + return ( +
+
+ +
+
+ {isCapHit && limit !== null + ? `Reached tool budget (${limit}/${limit})${agentName ? ` — ${agentName}` : ''}.` + : 'Reached tool budget.'} +
+
+ {message.content} +
+ {isLatest && ( +
+ +
+ )} +
+
+
+ ); +} diff --git a/apps/web/src/components/MessageBubble.tsx b/apps/web/src/components/MessageBubble.tsx index 34f9156..283c26d 100644 --- a/apps/web/src/components/MessageBubble.tsx +++ b/apps/web/src/components/MessageBubble.tsx @@ -4,10 +4,10 @@ import Markdown from 'react-markdown'; import remarkGfm from 'remark-gfm'; import { ChevronDown, ChevronRight, Copy, RefreshCw, Check, Share2, RotateCw, GitFork, Trash2 } from 'lucide-react'; import { toast } from 'sonner'; -import type { Chat, Message } from '@/api/types'; +import type { Chat, ErrorReason, Message } from '@/api/types'; import { api } from '@/api/client'; import { sessionEvents } from '@/hooks/sessionEvents'; -import { ToolCallCard } from './ToolCallCard'; +import { CapHitSentinel } from './CapHitSentinel'; import { CodeBlock } from './CodeBlock'; import { Button } from '@/components/ui/button'; import { @@ -19,6 +19,15 @@ import { DialogTitle, } from '@/components/ui/dialog'; +// v1.8.2: human labels for the machine-readable error reasons that ride on +// failed assistant messages via metadata.kind === 'error'. Kept short so the +// inline render under "message failed" stays a single muted line. +const ERROR_REASON_LABELS: Record = { + llm_provider_error: 'LLM provider error', + tool_execution_failed: 'Tool execution failed', + summary_after_cap_failed: 'Summary after tool budget hit failed', +}; + // Match path-shaped substrings ending in `.ext`. Additionally require a `/` // in the match to reduce false positives in prose (e.g. plain `foo.ts` won't // match, but `src/foo.ts` will). False positives at the edges are accepted @@ -94,6 +103,9 @@ function linkifyChildren(children: ReactNode, keyPrefix = 'l'): ReactNode { interface Props { message: Message; sessionChats?: Chat[]; + // v1.8.2: passed by MessageList's render-item pass for cap-hit sentinels. + // Only the most recent sentinel shows the Continue button. + capHitInfo?: { position: number; isLatest: boolean }; } function MarkdownBody({ content }: { content: string }) { @@ -464,15 +476,34 @@ function CompactCard({ message, sessionChats }: { message: Message; sessionChats ); } -export function MessageBubble({ message, sessionChats }: Props) { +export function MessageBubble({ message, sessionChats, capHitInfo }: Props) { if (message.kind === 'compact') { return ; } - if (message.role === 'tool') { - return ; + // v1.8.2: cap-hit sentinels render as a distinct system bubble with a + // Continue button. MessageList's pre-render pass tags each sentinel with + // its position; only the latest gets the actionable button. + if ( + message.role === 'system' && + message.metadata?.kind === 'cap_hit' && + capHitInfo + ) { + return ( + + ); } + // v1.8.2: tool messages and assistant tool_calls are now rendered by + // MessageList via ToolCallLine / ToolCallGroup. Tool-role messages reach + // this point only if MessageList didn't consume them (shouldn't happen, + // but guard against it by rendering nothing rather than a stale card). + if (message.role === 'tool') return null; + if (message.role === 'user') { return (
@@ -487,14 +518,17 @@ export function MessageBubble({ message, sessionChats }: Props) { const isStreaming = message.status === 'streaming'; const failed = message.status === 'failed'; const hasContent = message.content.length > 0; - const hasToolCalls = (message.tool_calls?.length ?? 0) > 0; + // v1.8.2: if metadata stamps an error reason, surface it inline under the + // generic "message failed" line. Keeps the user's eye where it already is + // rather than introducing a separate banner. + const errorMeta = + message.metadata !== null && message.metadata.kind === 'error' + ? message.metadata + : null; return (
- {message.tool_calls?.map((tc) => ( - - ))} - {(hasContent || (!hasToolCalls && isStreaming)) && ( + {(hasContent || isStreaming) && (
{hasContent ? : null} {isStreaming && ( @@ -503,12 +537,18 @@ export function MessageBubble({ message, sessionChats }: Props) {
)} {failed && ( -
message failed
+
+ message failed + {errorMeta && ( + + {ERROR_REASON_LABELS[errorMeta.error_reason]} + {errorMeta.error_text ? ` — ${errorMeta.error_text}` : ''} + + )} +
)} {!isStreaming && } - {!isStreaming && (hasContent || hasToolCalls) && ( - - )} + {!isStreaming && hasContent && }
); } diff --git a/apps/web/src/components/MessageList.tsx b/apps/web/src/components/MessageList.tsx index 26d93d7..0e4f1c9 100644 --- a/apps/web/src/components/MessageList.tsx +++ b/apps/web/src/components/MessageList.tsx @@ -1,15 +1,128 @@ -import { useEffect, useRef } from 'react'; +import { useEffect, useMemo, useRef } from 'react'; import type { Chat, Message } from '@/api/types'; import { MessageBubble } from './MessageBubble'; +import { ToolCallGroup } from './ToolCallGroup'; +import { ToolCallLine, type ToolRun } from './ToolCallLine'; interface Props { messages: Message[]; sessionChats?: Chat[]; } +// v1.8.2: pre-render units. The single linear `messages` array gets walked +// into a render-time list where each tool_call is a first-class item and +// tool_result messages are folded onto their matching tool_run by id. +type RenderItem = + | { kind: 'message'; message: Message; capHitInfo?: { position: number; isLatest: boolean } } + | { kind: 'tool_run'; run: ToolRun; key: string } + | { kind: 'tool_group'; runs: ToolRun[]; key: string }; + +const GROUP_THRESHOLD = 3; + +function isCapHitSentinel(m: Message): boolean { + return m.role === 'system' && m.metadata?.kind === 'cap_hit'; +} + +// First pass: walk messages chronologically, expanding assistant tool_calls +// into per-call run items and folding tool_result messages onto their +// matching runs. Tool messages themselves never produce a render item. +// Assistant messages produce a text render item only when they have text; +// pure tool-call messages are "transparent" so consecutive tool runs can +// still group across them. +function flatten(messages: Message[]): RenderItem[] { + const items: RenderItem[] = []; + const runsByCallId = new Map(); + + for (const m of messages) { + if (m.role === 'tool') { + if (m.tool_results) { + const run = runsByCallId.get(m.tool_results.tool_call_id); + if (run) run.result = m.tool_results; + } + continue; + } + const hasToolCalls = m.tool_calls != null && m.tool_calls.length > 0; + const hasText = m.content.length > 0; + if (m.role === 'assistant' && hasToolCalls) { + if (hasText || m.status === 'streaming') { + items.push({ kind: 'message', message: m }); + } + for (const tc of m.tool_calls!) { + const run: ToolRun = { call: tc, result: null }; + runsByCallId.set(tc.id, run); + items.push({ kind: 'tool_run', run, key: tc.id }); + } + continue; + } + items.push({ kind: 'message', message: m }); + } + + return items; +} + +// Second pass: collapse runs of >=GROUP_THRESHOLD consecutive tool_run items +// of the same tool name into a single tool_group. Any other render item +// (text bubble, sentinel, user message) breaks the chain. +function group(items: RenderItem[]): RenderItem[] { + const out: RenderItem[] = []; + let i = 0; + while (i < items.length) { + const item = items[i]!; + if (item.kind !== 'tool_run') { + out.push(item); + i += 1; + continue; + } + const name = item.run.call.name; + let j = i + 1; + while ( + j < items.length && + items[j]!.kind === 'tool_run' && + (items[j] as { kind: 'tool_run'; run: ToolRun }).run.call.name === name + ) { + j += 1; + } + const run = items.slice(i, j) as Array<{ kind: 'tool_run'; run: ToolRun; key: string }>; + if (run.length >= GROUP_THRESHOLD) { + out.push({ + kind: 'tool_group', + runs: run.map((r) => r.run), + key: `group-${run[0]!.key}`, + }); + } else { + for (const r of run) out.push(r); + } + i = j; + } + return out; +} + +// Third pass: number cap-hit sentinels (1-indexed) and mark the latest. +// CapHitSentinel uses position to compute the "N continues remaining" +// tooltip, and isLatest to gate the Continue button (only the most recent +// sentinel is actionable). +function stampCapHits(items: RenderItem[]): RenderItem[] { + const totalCapHits = items.reduce( + (n, it) => n + (it.kind === 'message' && isCapHitSentinel(it.message) ? 1 : 0), + 0, + ); + if (totalCapHits === 0) return items; + let index = 0; + return items.map((it) => { + if (it.kind !== 'message' || !isCapHitSentinel(it.message)) return it; + index += 1; + return { + ...it, + capHitInfo: { position: index, isLatest: index === totalCapHits }, + }; + }); +} + export function MessageList({ messages, sessionChats }: Props) { const endRef = useRef(null); + const renderItems = useMemo(() => stampCapHits(group(flatten(messages))), [messages]); + useEffect(() => { endRef.current?.scrollIntoView({ block: 'end' }); }, [messages]); @@ -25,9 +138,22 @@ export function MessageList({ messages, sessionChats }: Props) { return (
- {messages.map((m) => ( - - ))} + {renderItems.map((item) => { + if (item.kind === 'message') { + return ( + + ); + } + if (item.kind === 'tool_run') { + return ; + } + return ; + })}
diff --git a/apps/web/src/components/ToolCallCard.tsx b/apps/web/src/components/ToolCallCard.tsx deleted file mode 100644 index f23c16b..0000000 --- a/apps/web/src/components/ToolCallCard.tsx +++ /dev/null @@ -1,102 +0,0 @@ -import { useState } from 'react'; -import type { ReactNode } from 'react'; -import { ChevronRight, Wrench } from 'lucide-react'; -import type { Message, ToolCall } from '@/api/types'; -import { sessionEvents } from '@/hooks/sessionEvents'; - -interface Props { - message?: Message; - toolCall?: ToolCall; -} - -// Same regex/heuristic as MessageBubble: paths ending in `.ext` with at -// least one `/`. Linkifies file paths emitted by tools like grep / find_files -// so they're clickable. -const PATH_REGEX = /([a-zA-Z0-9._/-]+\.[a-zA-Z0-9]+)/g; - -function linkifyOutput(text: string): ReactNode[] { - const out: ReactNode[] = []; - let lastIdx = 0; - let idx = 0; - for (const match of text.matchAll(PATH_REGEX)) { - const matchedText = match[0]; - const start = match.index ?? 0; - if (!matchedText.includes('/')) continue; - if (start > lastIdx) out.push(text.slice(lastIdx, start)); - out.push( - - ); - lastIdx = start + matchedText.length; - idx += 1; - } - if (lastIdx < text.length) out.push(text.slice(lastIdx)); - return out.length > 0 ? out : [text]; -} - -export function ToolCallCard({ message, toolCall }: Props) { - const [open, setOpen] = useState(false); - const tc = toolCall ?? message?.tool_calls?.[0]; - const result = message?.tool_results; - - const name = tc?.name ?? 'tool'; - const args = tc?.args ?? {}; - const error = result?.error; - const output = result?.output; - const truncated = result?.truncated; - - return ( -
- - {open && ( -
- {error ? ( -
-              {error}
-            
- ) : output !== undefined ? ( -
-              {linkifyOutput(
-                typeof output === 'string'
-                  ? output
-                  : JSON.stringify(output, null, 2)
-              )}
-            
- ) : ( -
no result yet
- )} -
- )} -
- ); -} diff --git a/apps/web/src/components/ToolCallGroup.tsx b/apps/web/src/components/ToolCallGroup.tsx new file mode 100644 index 0000000..311aa3f --- /dev/null +++ b/apps/web/src/components/ToolCallGroup.tsx @@ -0,0 +1,64 @@ +import { useState } from 'react'; +import { ChevronRight } from 'lucide-react'; +import { ToolCallLine, runStatus, type ToolRun } from './ToolCallLine'; + +interface Props { + // All runs must share the same tool name. Caller (MessageList grouping + // pass) enforces that invariant. + runs: ToolRun[]; +} + +export function ToolCallGroup({ runs }: Props) { + const [open, setOpen] = useState(false); + if (runs.length === 0) return null; + const toolName = runs[0]!.call.name; + const count = runs.length; + + // Group-level status: pending if any are still running, error if any + // finished with an error, otherwise success. Matches the visual the user + // gets when scanning a long run of greps / view_files. + let pending = 0; + let errored = 0; + for (const r of runs) { + const s = runStatus(r); + if (s === 'pending') pending += 1; + else if (s === 'error') errored += 1; + } + const summaryParts: string[] = []; + if (pending > 0) summaryParts.push(`${pending} running`); + if (errored > 0) summaryParts.push(`${errored} failed`); + const summary = summaryParts.length > 0 ? ` (${summaryParts.join(', ')})` : ''; + + return ( +
+ + {open && ( +
+ {runs.map((run, i) => ( + + ))} +
+ )} +
+ ); +} diff --git a/apps/web/src/components/ToolCallLine.tsx b/apps/web/src/components/ToolCallLine.tsx new file mode 100644 index 0000000..78fc170 --- /dev/null +++ b/apps/web/src/components/ToolCallLine.tsx @@ -0,0 +1,167 @@ +import { useState } from 'react'; +import type { ReactNode } from 'react'; +import { Check, ChevronRight, Loader2, X } from 'lucide-react'; +import type { ToolCall, ToolResult } from '@/api/types'; +import { sessionEvents } from '@/hooks/sessionEvents'; + +// v1.8.2: cap on the inline arg-summary length. Expanded view shows full +// args + full result, so this is purely a single-line render budget. +const ARG_SUMMARY_MAX = 60; + +export interface ToolRun { + call: ToolCall; + // null while the call is in flight or the matching tool result hasn't + // arrived yet on the WS stream. + result: ToolResult | null; +} + +function truncate(s: string, n: number): string { + return s.length > n ? s.slice(0, n - 1) + '…' : s; +} + +// Per-tool argument summary mapping from the v1.8.2 spec. Goal is a single +// scannable line that surfaces the *what* (path / pattern) without +// overwhelming the chat with full JSON. +export function formatToolArgs(name: string, args: Record): string { + if (name === 'view_file') { + const path = String(args.path ?? ''); + const start = args.start_line; + const end = args.end_line; + if (typeof start === 'number' && typeof end === 'number') { + return truncate(`${path}:${start}-${end}`, ARG_SUMMARY_MAX); + } + if (typeof start === 'number') { + return truncate(`${path}:${start}`, ARG_SUMMARY_MAX); + } + return truncate(path, ARG_SUMMARY_MAX); + } + if (name === 'list_dir') { + return truncate(String(args.path ?? '.'), ARG_SUMMARY_MAX); + } + if (name === 'grep') { + const pattern = String(args.pattern ?? ''); + const path = args.path ? ` ${String(args.path)}` : ''; + return truncate(`"${pattern}"${path}`, ARG_SUMMARY_MAX); + } + if (name === 'find_files') { + return truncate(String(args.pattern ?? ''), ARG_SUMMARY_MAX); + } + if (name === 'git_status') { + return ''; + } + // Unknown tool — surface first arg value or the literal {} so the user can + // see something happened. Forward-compatible with future tools. + const keys = Object.keys(args); + if (keys.length === 0) return '{}'; + const first = keys[0]!; + return truncate(`${first}: ${String(args[first])}`, ARG_SUMMARY_MAX); +} + +export function runStatus(run: ToolRun): 'pending' | 'success' | 'error' { + if (run.result === null) return 'pending'; + if (run.result.error) return 'error'; + return 'success'; +} + +// Path-shaped paths in tool output text get a click handler so users can +// jump to the file. Same heuristic as MessageBubble.linkifyPaths. +const PATH_REGEX = /([a-zA-Z0-9._/-]+\.[a-zA-Z0-9]+)/g; +function linkifyOutput(text: string): ReactNode[] { + const out: ReactNode[] = []; + let lastIdx = 0; + let idx = 0; + for (const match of text.matchAll(PATH_REGEX)) { + const matchedText = match[0]; + const start = match.index ?? 0; + if (!matchedText.includes('/')) continue; + if (start > lastIdx) out.push(text.slice(lastIdx, start)); + out.push( + + ); + lastIdx = start + matchedText.length; + idx += 1; + } + if (lastIdx < text.length) out.push(text.slice(lastIdx)); + return out.length > 0 ? out : [text]; +} + +interface Props { + run: ToolRun; + // When rendered inside a ToolCallGroup the line is already nested under a + // shared header, so the leading arrow is dropped to avoid double indent. + insideGroup?: boolean; +} + +export function ToolCallLine({ run, insideGroup }: Props) { + const [open, setOpen] = useState(false); + const status = runStatus(run); + const args = run.call.args ?? {}; + const summary = formatToolArgs(run.call.name, args); + + return ( +
+ + {open && ( +
+
+            {JSON.stringify(args, null, 2)}
+          
+ {run.result && ( +
+              {run.result.error ? (
+                {run.result.error}
+              ) : (
+                linkifyOutput(
+                  typeof run.result.output === 'string'
+                    ? run.result.output
+                    : JSON.stringify(run.result.output, null, 2)
+                )
+              )}
+              {run.result.truncated && (
+                
— output truncated —
+ )} +
+ )} +
+ )} +
+ ); +} diff --git a/apps/web/src/hooks/sessionEvents.ts b/apps/web/src/hooks/sessionEvents.ts index 0a9da51..abe7566 100644 --- a/apps/web/src/hooks/sessionEvents.ts +++ b/apps/web/src/hooks/sessionEvents.ts @@ -2,7 +2,7 @@ // across hooks (e.g. AI rename arriving via WS in the session view needs to // also refresh the sidebar's session list). -import type { Chat, Project, Session } from '@/api/types'; +import type { Chat, ErrorReason, Project, Session } from '@/api/types'; import type { Attachment } from '@/lib/attachments'; export interface SessionRenamedEvent { @@ -118,11 +118,14 @@ export interface ProjectUpdatedEvent { // v1.8 mobile-tabs: broadcast on user channel from inference.ts so any device // subscribed sees a chat working/idle/error. Frontend stores per-chat; panes // derive their dot from pane.activeChatId. +// v1.8.2: optional `reason` carries a machine-readable code when status is +// 'error'. UI prefers reason for inline error rendering. export interface ChatStatusEvent { type: 'chat_status'; chat_id: string; status: 'working' | 'idle' | 'error'; at: string; + reason?: ErrorReason; } export type SessionEvent = diff --git a/apps/web/src/hooks/useSessionStream.ts b/apps/web/src/hooks/useSessionStream.ts index 42d9b1f..07d85df 100644 --- a/apps/web/src/hooks/useSessionStream.ts +++ b/apps/web/src/hooks/useSessionStream.ts @@ -29,7 +29,9 @@ function applyFrame(state: State, frame: WsFrame): State { kind: 'message', tool_calls: null, tool_results: null, - status: 'streaming', + // v1.8.2: cap-hit sentinels arrive role='system' and are static, so + // skipping the streaming dot for them keeps the UI accurate. + status: frame.role === 'system' ? 'complete' : 'streaming', last_seq: 0, tokens_used: null, ctx_used: null, @@ -37,6 +39,7 @@ function applyFrame(state: State, frame: WsFrame): State { started_at: null, finished_at: null, created_at: new Date().toISOString(), + metadata: null, }; return { ...state, messages: [...state.messages, newMsg] }; } @@ -96,6 +99,7 @@ function applyFrame(state: State, frame: WsFrame): State { started_at: null, finished_at: null, created_at: new Date().toISOString(), + metadata: null, }; return { ...state, messages: [...state.messages, newMsg] }; } @@ -110,6 +114,10 @@ function applyFrame(state: State, frame: WsFrame): State { ...(frame.ctx_max !== undefined ? { ctx_max: frame.ctx_max } : {}), ...(frame.started_at !== undefined ? { started_at: frame.started_at } : {}), ...(frame.finished_at !== undefined ? { finished_at: frame.finished_at } : {}), + // v1.8.2: cap-hit sentinels (and future stamped metadata) ride + // in on this terminal frame so the reducer can attach it + // without waiting for a refetch. + ...(frame.metadata !== undefined ? { metadata: frame.metadata } : {}), } : m ); @@ -133,9 +141,22 @@ function applyFrame(state: State, frame: WsFrame): State { return state; } case 'error': { + // v1.8.2: when the frame carries a structured reason, stamp it onto the + // failed message's metadata so the bubble can render specifics inline + // (the WS error frame is one-shot; refresh-safe rendering needs the + // value persisted on the message). + const errorMeta = frame.reason + ? { kind: 'error' as const, error_reason: frame.reason, error_text: frame.error } + : null; const next = frame.message_id ? state.messages.map((m) => - m.id === frame.message_id ? { ...m, status: 'failed' as const } : m + m.id === frame.message_id + ? { + ...m, + status: 'failed' as const, + ...(errorMeta ? { metadata: errorMeta } : {}), + } + : m ) : state.messages; return { ...state, messages: next, error: frame.error }; diff --git a/boocode_roadmap.md b/boocode_roadmap.md index 1b91555..4e71309 100644 --- a/boocode_roadmap.md +++ b/boocode_roadmap.md @@ -323,6 +323,10 @@ Full inventory in `boocode_code_review.md`. Headline items: - **codeprysm rejected** — embedding-based; node/edge taxonomy noted as reference if we ever build our own graph. - **Batch 9 decoupled from Batch 7 (2026-05-16).** AgentPicker mounts in `ChatInput.tsx` toolbar only. SettingsDrawer agent entry and Header active-agent badge moved to Batch 7. Builtin defaults shipped: six agents (Code Reviewer, Debugger, Refactorer, Architect, Security Auditor, Prompt Builder) with no `model` field — session model wins by default. +## Follow-ups (post-ship docs / cleanup) + +- **After v1.8.2 ships:** Add explicit `max_tool_calls: 30` to all 6 agents in `/data/AGENTS.md` and `/opt/boocode/AGENTS.md`. Purely for documentation/discoverability — defaults handle behavior identically (all 6 agents use only read-only tools, default is already 30). + ----- ## Workflow