diff --git a/apps/server/src/services/inference.ts b/apps/server/src/services/inference.ts index 9a22476..9e57595 100644 --- a/apps/server/src/services/inference.ts +++ b/apps/server/src/services/inference.ts @@ -62,6 +62,7 @@ const CAP_HIT_SUMMARY_NOTE = (limit: number) => // session/processor.ts). Threshold of 3 is the smallest value that doesn't // false-positive on a model that retries once after a transient error. export const DOOM_LOOP_THRESHOLD = 3; +export const DOOM_LOOP_SAME_NAME_THRESHOLD = 5; const DOOM_LOOP_NOTE = (name: string) => `You called ${name} with the same arguments ${DOOM_LOOP_THRESHOLD} times in a row. Stop calling it. Produce the best answer you can with what you have.`; @@ -85,6 +86,18 @@ export function detectDoomLoop( return { name: ref.name, args: ref.args }; } +export function detectSameNameLoop( + recentToolCalls: ToolCall[], +): { name: string } | null { + if (recentToolCalls.length < DOOM_LOOP_SAME_NAME_THRESHOLD) return null; + const last = recentToolCalls.slice(-DOOM_LOOP_SAME_NAME_THRESHOLD); + const name = last[0]!.name; + for (let i = 1; i < last.length; i++) { + if (last[i]!.name !== name) return null; + } + return { name }; +} + function isCapHitSentinel(m: Message): boolean { return ( m.role === 'system' && @@ -959,6 +972,7 @@ async function executeToolPhase( VALUES (${sessionId}, ${chatId}, 'assistant', '', 'streaming', clock_timestamp()) RETURNING id `; + ctx.log.info({ chatId, nextToolsUsed: toolsUsed + result.toolCalls.length, phase: 'executeToolPhase:before_recurse' }, 'recursing into next turn'); await runAssistantTurn(ctx, { sessionId, chatId, @@ -1043,6 +1057,7 @@ async function runAssistantTurn( args: TurnArgs, ): Promise { const { sessionId, chatId } = args; + ctx.log.info({ chatId, sessionId, toolsUsed: args.toolsUsed, recentToolCallsLen: args.recentToolCalls?.length ?? 0, phase: 'runAssistantTurn:enter' }, 'turn enter'); // v1.11: if the prior turn flagged this chat for compaction, run it first // so loadContext below reads the post-compaction history. We swallow @@ -1073,6 +1088,7 @@ async function runAssistantTurn( return; } const { session, project, history } = loaded; + ctx.log.info({ chatId, historyLen: history.length, phase: 'runAssistantTurn:loaded' }, 'context loaded'); const projectRoot = await resolveProjectRoot(project.path); // Agent resolution is per-turn so PATCH agent_id mid-conversation takes // effect on the next message. Unknown agent_id returns null silently — @@ -1090,6 +1106,7 @@ async function runAssistantTurn( await runCapHitSummary(ctx, args, session, project, history, agent, budget); return; } + ctx.log.info({ chatId, budget, toolsUsed: args.toolsUsed, phase: 'runAssistantTurn:budget_ok' }, 'budget ok'); // v1.11.6: doom-loop guard. Detected BEFORE the budget cap (the model can // burn through 3 identical calls long before the 15-call budget fires). @@ -1101,6 +1118,17 @@ async function runAssistantTurn( await runDoomLoopSummary(ctx, args, session, project, history, agent, loop); return; } + ctx.log.info({ chatId, phase: 'runAssistantTurn:no_doom_loop' }, 'no doom loop'); + + const sameNameLoop = detectSameNameLoop(args.recentToolCalls); + if (sameNameLoop) { + await runDoomLoopSummary(ctx, args, session, project, history, agent, { + name: sameNameLoop.name, + args: {}, + }); + return; + } + ctx.log.info({ chatId, phase: 'runAssistantTurn:no_same_name_loop' }, 'no same-name loop'); const messages = await buildMessagesPayload(session, project, history, agent); @@ -1114,17 +1142,24 @@ async function runAssistantTurn( const webToolsEnabled = session.web_search_enabled ?? project.default_web_search_enabled ?? false; + ctx.log.info({ chatId, msgCount: messages.length, phase: 'runAssistantTurn:payload_built' }, 'payload built'); + const state: StreamPhaseState = { accumulated: '', startedAt: null }; let result: StreamResult; try { + ctx.log.info({ chatId, model: session.model, phase: 'runAssistantTurn:before_stream' }, 'calling upstream'); result = await executeStreamPhase(ctx, args, session, messages, state, agent, webToolsEnabled); } catch (err) { await handleAbortOrError(ctx, args, state.accumulated, err); return; } + ctx.log.info({ chatId, toolCallsLen: result.toolCalls.length, finishReason: result.finishReason, contentLen: result.content?.length ?? 0, phase: 'runAssistantTurn:after_stream' }, 'upstream returned'); + if (result.toolCalls.length > 0) { + ctx.log.info({ chatId, toolNames: result.toolCalls.map(tc => tc.name), phase: 'runAssistantTurn:before_tools' }, 'executing tools'); await executeToolPhase(ctx, args, result, state.startedAt, session, projectRoot); + ctx.log.info({ chatId, phase: 'runAssistantTurn:after_tools' }, 'tools complete, returning'); return; } @@ -1430,6 +1465,7 @@ async function runDoomLoopSummary( loop: { name: string; args: Record }, ): Promise { const { sessionId, chatId, assistantMessageId, signal } = args; + ctx.log.info({ chatId, loopName: loop.name, phase: 'runDoomLoopSummary:enter' }, 'doom-loop summary firing'); const messages = await buildMessagesPayload(session, project, history, agent); messages.push({ role: 'system', content: DOOM_LOOP_NOTE(loop.name) });