v1.14.0-outer-loop: explicit while loop replaces inference recursion

Converts the ad-hoc executeToolPhase → runAssistantTurn recursion into an explicit while (stepNumber < effectiveCap) loop. A step is one stream-and- tool-execute iteration; the loop terminates on non-tool finish, step-cap hit, doom-loop, budget exhaustion, abort, or synthesis success. MAX_STEPS = 200 hard ceiling (4x old effective limit from budget). Per-agent steps: field in AGENTS.md frontmatter sets tighter caps (Refactorer: 5, Architect: 20, others: unset = bounded only by MAX_STEPS). Resolution: effectiveCap = Math.min(agent.steps ?? Infinity, MAX_STEPS). executeToolPhase no longer recurses — returns ToolPhaseResult struct (action: 'continue' | 'paused' | 'synthesis_done') so the caller decides whether to continue or break. steps: 0 handled as "no tool calls allowed" via runTextOnlyTurn (one text-only stream phase, tool calls ignored with warn log). Step-cap hits produce a sentinel summary (reuses cap_hit kind so CapHitSentinel.tsx renders without frontend changes; text distinguishes "Step limit reached" from "Tool budget exhausted"). Doom-loop check migrated to top of loop body — same predicate, same threshold (3), break instead of return. step_start parts are in the schema CHECK but not emitted as message_parts — writing before the stream phase creates a sequence-0 collision with partsFromAssistantMessage. Structured log line emitted instead. Adversarial review caught the collision pre-deploy. 332/332 server tests passing. No frontend changes. No schema changes. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 20:29:21 +00:00
parent 211e903620
commit f4a97808ad
14 changed files with 756 additions and 204 deletions
--- a/apps/server/src/services/inference/sentinel-summaries.ts
+++ b/apps/server/src/services/inference/sentinel-summaries.ts
@@ -476,6 +476,202 @@ export async function runDoomLoopSummary(
  );
 }

+// v1.14.0: step-cap wrap-up. Mirrors runCapHitSummary structurally — same
+// in-flight-slot reuse, same tools-disabled streaming-summary call, same
+// post-finalize sentinel insert + chat_status drop. Difference: the note
+// text names the step limit rather than the tool budget. Sentinel reuses
+// metadata.kind = 'cap_hit' so the frontend CapHitSentinel component
+// renders it without changes.
+const STEP_CAP_NOTE = (steps: number, cap: number) =>
+  `You've reached the step limit (${steps}/${cap} steps). Produce the best answer you can with what you have. Do not call more tools.`;
+
+export async function runStepCapSummary(
+  ctx: InferenceContext,
+  args: TurnArgs,
+  session: Session,
+  project: Project,
+  history: Message[],
+  agent: Agent | null,
+  steps: number,
+  cap: number,
+): Promise<void> {
+  const { sessionId, chatId, assistantMessageId, signal } = args;
+
+  const messages = await buildMessagesPayload(session, project, history, agent, ctx.log);
+  messages.push({ role: 'system', content: STEP_CAP_NOTE(steps, cap) });
+
+  const startedRow = await ctx.sql<{ started_at: string }[]>`
+    UPDATE messages
+    SET started_at = clock_timestamp()
+    WHERE id = ${assistantMessageId}
+    RETURNING started_at
+  `;
+  const startedAt = startedRow[0]?.started_at ?? null;
+
+  ctx.publish(sessionId, {
+    type: 'message_started',
+    message_id: assistantMessageId,
+    chat_id: chatId,
+    role: 'assistant',
+  });
+
+  let accumulated = '';
+  let pendingFlushTimer: NodeJS.Timeout | null = null;
+  let flushPromise: Promise<unknown> = Promise.resolve();
+  const flushNow = () => {
+    if (pendingFlushTimer) {
+      clearTimeout(pendingFlushTimer);
+      pendingFlushTimer = null;
+    }
+    const snapshot = accumulated;
+    flushPromise = flushPromise.then(() =>
+      ctx.sql`UPDATE messages SET content = ${snapshot} WHERE id = ${assistantMessageId}`
+    );
+  };
+  const scheduleFlush = () => {
+    if (pendingFlushTimer) return;
+    pendingFlushTimer = setTimeout(() => {
+      pendingFlushTimer = null;
+      flushNow();
+    }, DB_FLUSH_INTERVAL_MS);
+  };
+
+  let summaryOk = false;
+  let summarySoftCancelled = false;
+  let summaryError: string | null = null;
+  let result: StreamResult | null = null;
+  try {
+    result = await streamCompletion(
+      ctx,
+      session.model,
+      messages,
+      { tools: null, temperature: agent?.temperature },
+      (delta) => {
+        accumulated += delta;
+        ctx.publish(sessionId, {
+          type: 'delta',
+          message_id: assistantMessageId,
+          chat_id: chatId,
+          content: delta,
+        });
+        scheduleFlush();
+      },
+      undefined,
+      signal,
+    );
+    summaryOk = true;
+  } catch (err) {
+    if (err instanceof Error && err.name === 'AbortError') {
+      summarySoftCancelled = true;
+    } else {
+      summaryError = err instanceof Error ? err.message : String(err);
+    }
+  } finally {
+    if (pendingFlushTimer) {
+      clearTimeout(pendingFlushTimer);
+      pendingFlushTimer = null;
+    }
+    await flushPromise;
+  }
+
+  if (summaryOk && result) {
+    const mctx = await modelContext.getModelContext(session.model);
+    const nCtx = mctx?.n_ctx ?? null;
+    const [updated] = await ctx.sql<
+      { tokens_used: number | null; ctx_used: number | null; ctx_max: number | null; finished_at: string | null }[]
+    >`
+      UPDATE messages
+      SET content = ${result.content},
+          status = 'complete',
+          tokens_used = ${result.completionTokens},
+          ctx_used = ${result.promptTokens},
+          ctx_max = ${nCtx},
+          finished_at = clock_timestamp()
+      WHERE id = ${assistantMessageId}
+      RETURNING tokens_used, ctx_used, ctx_max, finished_at
+    `;
+    ctx.publish(sessionId, {
+      type: 'message_complete',
+      message_id: assistantMessageId,
+      chat_id: chatId,
+      tokens_used: updated?.tokens_used ?? null,
+      ctx_used: updated?.ctx_used ?? null,
+      ctx_max: updated?.ctx_max ?? null,
+      started_at: startedAt,
+      finished_at: updated?.finished_at ?? null,
+      model: session.model,
+    });
+  } else if (summarySoftCancelled) {
+    await ctx.sql`
+      UPDATE messages
+      SET content = ${accumulated},
+          status = 'cancelled',
+          finished_at = clock_timestamp()
+      WHERE id = ${assistantMessageId}
+    `;
+    ctx.publish(sessionId, {
+      type: 'message_complete',
+      message_id: assistantMessageId,
+      chat_id: chatId,
+    });
+  } else {
+    const errMeta: MessageMetadata = {
+      kind: 'error',
+      error_reason: 'summary_after_cap_failed',
+      error_text: summaryError ?? 'step-cap summary failed',
+    };
+    await ctx.sql`
+      UPDATE messages
+      SET content = ${accumulated},
+          status = 'failed',
+          finished_at = clock_timestamp(),
+          metadata = ${ctx.sql.json(errMeta as never)}
+      WHERE id = ${assistantMessageId}
+    `;
+    ctx.publish(sessionId, {
+      type: 'error',
+      message_id: assistantMessageId,
+      chat_id: chatId,
+      error: summaryError ?? 'step-cap summary failed',
+      reason: 'summary_after_cap_failed',
+    });
+  }
+
+  const [sessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>`
+    UPDATE sessions SET updated_at = clock_timestamp()
+    WHERE id = ${sessionId}
+    RETURNING project_id, name, updated_at
+  `;
+  ctx.publishUser({
+    type: 'session_updated',
+    session_id: sessionId,
+    project_id: sessRow!.project_id,
+    name: sessRow!.name,
+    updated_at: sessRow!.updated_at,
+  });
+
+  // Reuse cap_hit sentinel so the frontend CapHitSentinel component renders
+  // it without changes. The content text distinguishes step cap from budget.
+  await insertCapHitSentinel(ctx, sessionId, chatId, agent, cap);
+
+  if (summaryOk || summarySoftCancelled) {
+    ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() });
+  } else {
+    ctx.publishUser({
+      type: 'chat_status',
+      chat_id: chatId,
+      status: 'error',
+      at: new Date().toISOString(),
+      reason: 'summary_after_cap_failed',
+    });
+  }
+
+  ctx.log.info(
+    { sessionId, chatId, assistantMessageId, steps, cap, summaryOk, summaryCancelled: summarySoftCancelled },
+    'inference step-cap summary finished',
+  );
+}
+
 async function insertDoomLoopSentinel(
  ctx: InferenceContext,
  sessionId: string,