v1.8.2: tool loop cap-hit summary + tool call UI compaction

Old hardcoded MAX_TOOL_LOOP_DEPTH=15 replaced by per-agent max_tool_calls (1-100, AGENTS.md frontmatter) with defaults: 30 for read-only-only agents, 10 for agents that include any non-read-only tool, 15 for raw chat. When the loop hits cap, fire one final summary call with tools disabled, stream the wrap-up into the in-flight assistant message, then insert a system sentinel with metadata.kind='cap_hit'. The sentinel renders an amber bubble with a Continue button (latest sentinel only) that POSTs to a new /api/chats/:id/continue route to extend. Hard ceiling: 3 cap-hits per chat (2 continues max) — third sentinel reports can_continue=false. Error frames carry a machine-readable reason code alongside human error text. Failed messages persist the reason via metadata.kind='error' so the bubble renders specifics on reload (WS error frame is one-shot). Tool call UI rewired: ToolCallLine renders inline (↳ name args spinner/check/✗, expand-on-tap for args+result); ToolCallGroup collapses 3+ consecutive same-tool runs into a compact card. MessageList owns a three-pass pre-render (flatten + fold tool results onto matching runs by id + group same-tool runs + number sentinels). MessageBubble drops tool rendering and adds the sentinel / error-reason branches. ToolCallCard deleted. Roadmap follow-up logged: add explicit max_tool_calls: 30 to the 6 agents in /data/AGENTS.md and /opt/boocode/AGENTS.md post-ship for discoverability (defaults handle behavior identically). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-17 10:31:32 +00:00
parent 5422c47928
commit 5c61cc7281
20 changed files with 1125 additions and 167 deletions
--- a/apps/server/src/routes/chats.ts
+++ b/apps/server/src/routes/chats.ts
@@ -231,7 +231,7 @@ export function registerChatRoutes(
          INSERT INTO messages (
            session_id, chat_id, role, content, kind, tool_calls, tool_results,
            status, tokens_used, ctx_used, ctx_max, started_at, finished_at,
-            created_at
+            created_at, metadata
          )
          SELECT
            ${source.session_id}, ${chat!.id}, role, content, kind,
@@ -239,7 +239,8 @@ export function registerChatRoutes(
            tokens_used, ctx_used, ctx_max, started_at, finished_at,
            clock_timestamp() + (
              ROW_NUMBER() OVER (ORDER BY created_at ASC, id ASC) * INTERVAL '1 microsecond'
-            )
+            ),
+            metadata
          FROM messages
          WHERE chat_id = ${source.id}
            AND created_at <= ${target.created_at}::timestamptz
@@ -268,7 +269,7 @@ export function registerChatRoutes(
      }
      const rows = await sql<Message[]>`
        SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq,
-               tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at
+               tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata
        FROM messages
        WHERE chat_id = ${req.params.id}
        ORDER BY created_at ASC, id ASC
--- a/apps/server/src/routes/messages.ts
+++ b/apps/server/src/routes/messages.ts
@@ -7,6 +7,13 @@ const SendBody = z.object({
  content: z.string().min(1).max(64_000),
 });

+// v1.8.2: Continue extends an inference loop that hit the tool budget. Caller
+// passes the sentinel message it's continuing from; server validates shape
+// and the per-chat hard ceiling before resuming.
+const ContinueBody = z.object({
+  sentinel_message_id: z.string().uuid(),
+});
+
 interface MessageHandlers {
  enqueueInference: (sessionId: string, chatId: string, assistantMessageId: string, user: string) => void;
  enqueueCompact: (sessionId: string, chatId: string, compactMessageId: string, user: string) => void;
@@ -36,7 +43,7 @@ export function registerMessageRoutes(
      }
      const rows = await sql<Message[]>`
        SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq,
-               tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at
+               tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata
        FROM messages
        WHERE session_id = ${req.params.id}
        ORDER BY created_at ASC, id ASC
@@ -253,6 +260,76 @@ export function registerMessageRoutes(
    }
  );

+  app.post<{ Params: { id: string } }>(
+    '/api/chats/:id/continue',
+    async (req, reply) => {
+      const parsed = ContinueBody.safeParse(req.body);
+      if (!parsed.success) {
+        reply.code(400);
+        return { error: 'invalid body', details: parsed.error.flatten() };
+      }
+
+      const chatRows = await sql<Chat[]>`
+        SELECT id, session_id FROM chats WHERE id = ${req.params.id} AND status = 'open'
+      `;
+      if (chatRows.length === 0) {
+        reply.code(404);
+        return { error: 'chat not found' };
+      }
+      const chat = chatRows[0]!;
+      const sessionId = chat.session_id;
+
+      // Cap-hit sentinels are only ever inserted after a turn completes, so
+      // there must not be an active inference at this moment. If there is,
+      // the client is racing the cap-hit summary that just emitted the
+      // sentinel — bail rather than enqueue a parallel run.
+      if (handlers.hasActiveInference(chat.id)) {
+        reply.code(409);
+        return { error: 'chat is currently streaming' };
+      }
+
+      const sentinel = await sql<{ metadata: { kind?: unknown; can_continue?: unknown } | null }[]>`
+        SELECT metadata
+        FROM messages
+        WHERE id = ${parsed.data.sentinel_message_id}
+          AND chat_id = ${chat.id}
+          AND role = 'system'
+      `;
+      if (sentinel.length === 0) {
+        reply.code(404);
+        return { error: 'sentinel not found' };
+      }
+      const meta = sentinel[0]!.metadata;
+      if (!meta || meta.kind !== 'cap_hit') {
+        reply.code(400);
+        return { error: 'message is not a cap-hit sentinel' };
+      }
+      // Server-side hard ceiling check. UI already disables the button when
+      // can_continue is false; defending against a stale tab or a direct
+      // API hit is the only reason this lives on the server too.
+      if (meta.can_continue !== true) {
+        reply.code(409);
+        return { error: 'hard limit reached for this chat' };
+      }
+
+      const result = await sql.begin(async (tx) => {
+        const [assistantMsg] = await tx<{ id: string }[]>`
+          INSERT INTO messages (session_id, chat_id, role, content, status, created_at)
+          VALUES (${sessionId}, ${chat.id}, 'assistant', '', 'streaming', clock_timestamp())
+          RETURNING id
+        `;
+        await tx`UPDATE sessions SET updated_at = clock_timestamp() WHERE id = ${sessionId}`;
+        await tx`UPDATE chats SET updated_at = clock_timestamp() WHERE id = ${chat.id}`;
+        return { assistant_message_id: assistantMsg!.id };
+      });
+
+      handlers.enqueueInference(sessionId, chat.id, result.assistant_message_id, 'default');
+
+      reply.code(202);
+      return result;
+    }
+  );
+
  app.post<{ Params: { id: string } }>(
    '/api/chats/:id/force_send',
    async (req, reply) => {
--- a/apps/server/src/routes/ws.ts
+++ b/apps/server/src/routes/ws.ts
@@ -23,7 +23,7 @@ export function registerWebSocket(

      const messages = await sql<Message[]>`
        SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq,
-               tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at
+               tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata
        FROM messages
        WHERE session_id = ${sessionId}
        ORDER BY created_at ASC, id ASC