// v#12 MistakeTracker: heterogeneous-failure recovery. Complements the // doom-loop guard (sentinels.ts:detectDoomLoop, which only catches *identical* // repeats) by catching a run of consecutive tool FAILURES the model isn't // recovering from — even when each failure is a *different* error. Algorithm // reimplemented from cline's mistake-counting pattern (NOT vendored). // // Pure module — mirrors sentinels.ts:detectDoomLoop. No DB, no I/O. The state // lives loop-local in TurnArgs (reset per runInference, like recentToolCalls). // The failure taxonomy already distinguished in tool-phase.ts:executeToolCall. // 'api_error' is reserved for upstream-model failures surfaced as tool outcomes // (no current emit site on apps/server, but the union mirrors the design doc // so a future caller can record it without a type change). export type FailureKind = | 'zod_reject' | 'tool_not_found' | 'exec_error' | 'api_error' | 'permission_denied'; // Smallest streak that doesn't false-positive on a model that retries once // after a transient error. Matches DOOM_LOOP_THRESHOLD's rationale. export const MISTAKE_THRESHOLD = 3; export interface MistakeState { // The current consecutive-failure streak (any successful tool step clears it). run: FailureKind[]; // How many recovery nudges have fired without an intervening success. Used to // escalate (stop the turn) on the second trip rather than nudging forever. nudges: number; } export function freshMistakeState(): MistakeState { return { run: [], nudges: 0 }; } // Record one tool step's outcome. A 'success' clears BOTH the streak and the // nudge counter (the model recovered). A FailureKind pushes onto the streak. export function recordStep( state: MistakeState, outcome: FailureKind | 'success', ): void { if (outcome === 'success') { state.run = []; state.nudges = 0; return; } state.run.push(outcome); } // Decide whether to intervene given the current streak. When the streak has // reached MISTAKE_THRESHOLD: 'nudge' the first time (no nudge fired yet), // 'escalate' if it trips again while a nudge is already outstanding (no // intervening success cleared `nudges`). Below threshold → null. // // Pure — the caller is responsible for mutating `nudges`/`run` after acting on // the decision (mirrors how turn.ts consumes detectDoomLoop's result). export function detectMistakePattern( state: MistakeState, ): 'nudge' | 'escalate' | null { if (state.run.length < MISTAKE_THRESHOLD) return null; return state.nudges === 0 ? 'nudge' : 'escalate'; } // Model-facing guidance injected (transiently, for the next step only) when a // nudge fires. Short + declarative for the same reliability reason as the // cap-hit / doom-loop notes. export const MISTAKE_RECOVERY_NOTE = "You've hit several different errors in a row. Stop retrying variations — re-read the tool schemas, verify file paths and arguments exist before calling, and try a fundamentally different approach.";