v1.12.1: same-name doom-loop guard + runAssistantTurn trace logging

Add detectSameNameLoop (threshold 5) to catch over-verification hangs
where tool args vary but the model is stuck on one tool. Add 12 structured
log points across the inference state machine (runAssistantTurn,
executeToolPhase, runDoomLoopSummary) to diagnose the deterministic hang
surfaced in v1.12.0 smoke testing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-21 17:15:02 +00:00
parent fce8c06932
commit d58d553503

View File

@@ -62,6 +62,7 @@ const CAP_HIT_SUMMARY_NOTE = (limit: number) =>
// session/processor.ts). Threshold of 3 is the smallest value that doesn't // session/processor.ts). Threshold of 3 is the smallest value that doesn't
// false-positive on a model that retries once after a transient error. // false-positive on a model that retries once after a transient error.
export const DOOM_LOOP_THRESHOLD = 3; export const DOOM_LOOP_THRESHOLD = 3;
export const DOOM_LOOP_SAME_NAME_THRESHOLD = 5;
const DOOM_LOOP_NOTE = (name: string) => const DOOM_LOOP_NOTE = (name: string) =>
`You called ${name} with the same arguments ${DOOM_LOOP_THRESHOLD} times in a row. Stop calling it. Produce the best answer you can with what you have.`; `You called ${name} with the same arguments ${DOOM_LOOP_THRESHOLD} times in a row. Stop calling it. Produce the best answer you can with what you have.`;
@@ -85,6 +86,18 @@ export function detectDoomLoop(
return { name: ref.name, args: ref.args }; return { name: ref.name, args: ref.args };
} }
export function detectSameNameLoop(
recentToolCalls: ToolCall[],
): { name: string } | null {
if (recentToolCalls.length < DOOM_LOOP_SAME_NAME_THRESHOLD) return null;
const last = recentToolCalls.slice(-DOOM_LOOP_SAME_NAME_THRESHOLD);
const name = last[0]!.name;
for (let i = 1; i < last.length; i++) {
if (last[i]!.name !== name) return null;
}
return { name };
}
function isCapHitSentinel(m: Message): boolean { function isCapHitSentinel(m: Message): boolean {
return ( return (
m.role === 'system' && m.role === 'system' &&
@@ -959,6 +972,7 @@ async function executeToolPhase(
VALUES (${sessionId}, ${chatId}, 'assistant', '', 'streaming', clock_timestamp()) VALUES (${sessionId}, ${chatId}, 'assistant', '', 'streaming', clock_timestamp())
RETURNING id RETURNING id
`; `;
ctx.log.info({ chatId, nextToolsUsed: toolsUsed + result.toolCalls.length, phase: 'executeToolPhase:before_recurse' }, 'recursing into next turn');
await runAssistantTurn(ctx, { await runAssistantTurn(ctx, {
sessionId, sessionId,
chatId, chatId,
@@ -1043,6 +1057,7 @@ async function runAssistantTurn(
args: TurnArgs, args: TurnArgs,
): Promise<void> { ): Promise<void> {
const { sessionId, chatId } = args; const { sessionId, chatId } = args;
ctx.log.info({ chatId, sessionId, toolsUsed: args.toolsUsed, recentToolCallsLen: args.recentToolCalls?.length ?? 0, phase: 'runAssistantTurn:enter' }, 'turn enter');
// v1.11: if the prior turn flagged this chat for compaction, run it first // v1.11: if the prior turn flagged this chat for compaction, run it first
// so loadContext below reads the post-compaction history. We swallow // so loadContext below reads the post-compaction history. We swallow
@@ -1073,6 +1088,7 @@ async function runAssistantTurn(
return; return;
} }
const { session, project, history } = loaded; const { session, project, history } = loaded;
ctx.log.info({ chatId, historyLen: history.length, phase: 'runAssistantTurn:loaded' }, 'context loaded');
const projectRoot = await resolveProjectRoot(project.path); const projectRoot = await resolveProjectRoot(project.path);
// Agent resolution is per-turn so PATCH agent_id mid-conversation takes // Agent resolution is per-turn so PATCH agent_id mid-conversation takes
// effect on the next message. Unknown agent_id returns null silently — // effect on the next message. Unknown agent_id returns null silently —
@@ -1090,6 +1106,7 @@ async function runAssistantTurn(
await runCapHitSummary(ctx, args, session, project, history, agent, budget); await runCapHitSummary(ctx, args, session, project, history, agent, budget);
return; return;
} }
ctx.log.info({ chatId, budget, toolsUsed: args.toolsUsed, phase: 'runAssistantTurn:budget_ok' }, 'budget ok');
// v1.11.6: doom-loop guard. Detected BEFORE the budget cap (the model can // v1.11.6: doom-loop guard. Detected BEFORE the budget cap (the model can
// burn through 3 identical calls long before the 15-call budget fires). // burn through 3 identical calls long before the 15-call budget fires).
@@ -1101,6 +1118,17 @@ async function runAssistantTurn(
await runDoomLoopSummary(ctx, args, session, project, history, agent, loop); await runDoomLoopSummary(ctx, args, session, project, history, agent, loop);
return; return;
} }
ctx.log.info({ chatId, phase: 'runAssistantTurn:no_doom_loop' }, 'no doom loop');
const sameNameLoop = detectSameNameLoop(args.recentToolCalls);
if (sameNameLoop) {
await runDoomLoopSummary(ctx, args, session, project, history, agent, {
name: sameNameLoop.name,
args: {},
});
return;
}
ctx.log.info({ chatId, phase: 'runAssistantTurn:no_same_name_loop' }, 'no same-name loop');
const messages = await buildMessagesPayload(session, project, history, agent); const messages = await buildMessagesPayload(session, project, history, agent);
@@ -1114,17 +1142,24 @@ async function runAssistantTurn(
const webToolsEnabled = const webToolsEnabled =
session.web_search_enabled ?? project.default_web_search_enabled ?? false; session.web_search_enabled ?? project.default_web_search_enabled ?? false;
ctx.log.info({ chatId, msgCount: messages.length, phase: 'runAssistantTurn:payload_built' }, 'payload built');
const state: StreamPhaseState = { accumulated: '', startedAt: null }; const state: StreamPhaseState = { accumulated: '', startedAt: null };
let result: StreamResult; let result: StreamResult;
try { try {
ctx.log.info({ chatId, model: session.model, phase: 'runAssistantTurn:before_stream' }, 'calling upstream');
result = await executeStreamPhase(ctx, args, session, messages, state, agent, webToolsEnabled); result = await executeStreamPhase(ctx, args, session, messages, state, agent, webToolsEnabled);
} catch (err) { } catch (err) {
await handleAbortOrError(ctx, args, state.accumulated, err); await handleAbortOrError(ctx, args, state.accumulated, err);
return; return;
} }
ctx.log.info({ chatId, toolCallsLen: result.toolCalls.length, finishReason: result.finishReason, contentLen: result.content?.length ?? 0, phase: 'runAssistantTurn:after_stream' }, 'upstream returned');
if (result.toolCalls.length > 0) { if (result.toolCalls.length > 0) {
ctx.log.info({ chatId, toolNames: result.toolCalls.map(tc => tc.name), phase: 'runAssistantTurn:before_tools' }, 'executing tools');
await executeToolPhase(ctx, args, result, state.startedAt, session, projectRoot); await executeToolPhase(ctx, args, result, state.startedAt, session, projectRoot);
ctx.log.info({ chatId, phase: 'runAssistantTurn:after_tools' }, 'tools complete, returning');
return; return;
} }
@@ -1430,6 +1465,7 @@ async function runDoomLoopSummary(
loop: { name: string; args: Record<string, unknown> }, loop: { name: string; args: Record<string, unknown> },
): Promise<void> { ): Promise<void> {
const { sessionId, chatId, assistantMessageId, signal } = args; const { sessionId, chatId, assistantMessageId, signal } = args;
ctx.log.info({ chatId, loopName: loop.name, phase: 'runDoomLoopSummary:enter' }, 'doom-loop summary firing');
const messages = await buildMessagesPayload(session, project, history, agent); const messages = await buildMessagesPayload(session, project, history, agent);
messages.push({ role: 'system', content: DOOM_LOOP_NOTE(loop.name) }); messages.push({ role: 'system', content: DOOM_LOOP_NOTE(loop.name) });