From d58d553503fd5ab459e089f4ccb2ec776abb9dda Mon Sep 17 00:00:00 2001 From: indifferentketchup Date: Thu, 21 May 2026 17:15:02 +0000 Subject: [PATCH] v1.12.1: same-name doom-loop guard + runAssistantTurn trace logging Add detectSameNameLoop (threshold 5) to catch over-verification hangs where tool args vary but the model is stuck on one tool. Add 12 structured log points across the inference state machine (runAssistantTurn, executeToolPhase, runDoomLoopSummary) to diagnose the deterministic hang surfaced in v1.12.0 smoke testing. Co-Authored-By: Claude Opus 4.6 (1M context) --- apps/server/src/services/inference.ts | 36 +++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/apps/server/src/services/inference.ts b/apps/server/src/services/inference.ts index 9a22476..9e57595 100644 --- a/apps/server/src/services/inference.ts +++ b/apps/server/src/services/inference.ts @@ -62,6 +62,7 @@ const CAP_HIT_SUMMARY_NOTE = (limit: number) => // session/processor.ts). Threshold of 3 is the smallest value that doesn't // false-positive on a model that retries once after a transient error. export const DOOM_LOOP_THRESHOLD = 3; +export const DOOM_LOOP_SAME_NAME_THRESHOLD = 5; const DOOM_LOOP_NOTE = (name: string) => `You called ${name} with the same arguments ${DOOM_LOOP_THRESHOLD} times in a row. Stop calling it. Produce the best answer you can with what you have.`; @@ -85,6 +86,18 @@ export function detectDoomLoop( return { name: ref.name, args: ref.args }; } +export function detectSameNameLoop( + recentToolCalls: ToolCall[], +): { name: string } | null { + if (recentToolCalls.length < DOOM_LOOP_SAME_NAME_THRESHOLD) return null; + const last = recentToolCalls.slice(-DOOM_LOOP_SAME_NAME_THRESHOLD); + const name = last[0]!.name; + for (let i = 1; i < last.length; i++) { + if (last[i]!.name !== name) return null; + } + return { name }; +} + function isCapHitSentinel(m: Message): boolean { return ( m.role === 'system' && @@ -959,6 +972,7 @@ async function executeToolPhase( VALUES (${sessionId}, ${chatId}, 'assistant', '', 'streaming', clock_timestamp()) RETURNING id `; + ctx.log.info({ chatId, nextToolsUsed: toolsUsed + result.toolCalls.length, phase: 'executeToolPhase:before_recurse' }, 'recursing into next turn'); await runAssistantTurn(ctx, { sessionId, chatId, @@ -1043,6 +1057,7 @@ async function runAssistantTurn( args: TurnArgs, ): Promise { const { sessionId, chatId } = args; + ctx.log.info({ chatId, sessionId, toolsUsed: args.toolsUsed, recentToolCallsLen: args.recentToolCalls?.length ?? 0, phase: 'runAssistantTurn:enter' }, 'turn enter'); // v1.11: if the prior turn flagged this chat for compaction, run it first // so loadContext below reads the post-compaction history. We swallow @@ -1073,6 +1088,7 @@ async function runAssistantTurn( return; } const { session, project, history } = loaded; + ctx.log.info({ chatId, historyLen: history.length, phase: 'runAssistantTurn:loaded' }, 'context loaded'); const projectRoot = await resolveProjectRoot(project.path); // Agent resolution is per-turn so PATCH agent_id mid-conversation takes // effect on the next message. Unknown agent_id returns null silently — @@ -1090,6 +1106,7 @@ async function runAssistantTurn( await runCapHitSummary(ctx, args, session, project, history, agent, budget); return; } + ctx.log.info({ chatId, budget, toolsUsed: args.toolsUsed, phase: 'runAssistantTurn:budget_ok' }, 'budget ok'); // v1.11.6: doom-loop guard. Detected BEFORE the budget cap (the model can // burn through 3 identical calls long before the 15-call budget fires). @@ -1101,6 +1118,17 @@ async function runAssistantTurn( await runDoomLoopSummary(ctx, args, session, project, history, agent, loop); return; } + ctx.log.info({ chatId, phase: 'runAssistantTurn:no_doom_loop' }, 'no doom loop'); + + const sameNameLoop = detectSameNameLoop(args.recentToolCalls); + if (sameNameLoop) { + await runDoomLoopSummary(ctx, args, session, project, history, agent, { + name: sameNameLoop.name, + args: {}, + }); + return; + } + ctx.log.info({ chatId, phase: 'runAssistantTurn:no_same_name_loop' }, 'no same-name loop'); const messages = await buildMessagesPayload(session, project, history, agent); @@ -1114,17 +1142,24 @@ async function runAssistantTurn( const webToolsEnabled = session.web_search_enabled ?? project.default_web_search_enabled ?? false; + ctx.log.info({ chatId, msgCount: messages.length, phase: 'runAssistantTurn:payload_built' }, 'payload built'); + const state: StreamPhaseState = { accumulated: '', startedAt: null }; let result: StreamResult; try { + ctx.log.info({ chatId, model: session.model, phase: 'runAssistantTurn:before_stream' }, 'calling upstream'); result = await executeStreamPhase(ctx, args, session, messages, state, agent, webToolsEnabled); } catch (err) { await handleAbortOrError(ctx, args, state.accumulated, err); return; } + ctx.log.info({ chatId, toolCallsLen: result.toolCalls.length, finishReason: result.finishReason, contentLen: result.content?.length ?? 0, phase: 'runAssistantTurn:after_stream' }, 'upstream returned'); + if (result.toolCalls.length > 0) { + ctx.log.info({ chatId, toolNames: result.toolCalls.map(tc => tc.name), phase: 'runAssistantTurn:before_tools' }, 'executing tools'); await executeToolPhase(ctx, args, result, state.startedAt, session, projectRoot); + ctx.log.info({ chatId, phase: 'runAssistantTurn:after_tools' }, 'tools complete, returning'); return; } @@ -1430,6 +1465,7 @@ async function runDoomLoopSummary( loop: { name: string; args: Record }, ): Promise { const { sessionId, chatId, assistantMessageId, signal } = args; + ctx.log.info({ chatId, loopName: loop.name, phase: 'runDoomLoopSummary:enter' }, 'doom-loop summary firing'); const messages = await buildMessagesPayload(session, project, history, agent); messages.push({ role: 'system', content: DOOM_LOOP_NOTE(loop.name) });