feat: MistakeTracker + file-provenance ledger (v2.7.4)

Two native-inference hardening features from boocode_code_review_v2 §1 #12. MistakeTracker: new pure mistake-tracker.ts tracks consecutive heterogeneous tool failures (kinds surfaced per tool from tool-phase.ts). On 3 in a row the turn loop soft-nudges (model-facing recovery guidance + mistake_recovery sentinel + reset), then escalates to stopping the turn (cap-hit-style, Continue affordance) on a re-trip. Complements doom-loop (identical repeats) + cap-hit. File-provenance ledger: compaction.ts derives a deterministic ## Files Read list from the head messages' read-tool calls and injects it into the rolling-summary prompt so provenance survives compaction (no new table; read-only). mistake_recovery sentinel: MessageMetadata arm (server + web) + MessageBubble render branch. Built by 2 parallel agents. Server 545 tests passing (23 new); build + web tsc clean. Native-inference only. Builds on v2.7.3. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-01 13:05:03 +00:00
parent f53d6a8afd
commit bcc89d8adc
15 changed files with 816 additions and 20 deletions
--- a/apps/server/src/services/compaction.ts
+++ b/apps/server/src/services/compaction.ts
@@ -181,6 +181,54 @@ export function select(
  };
 }

+// === file-provenance ledger (#12, Part B) ===
+
+// Read tools whose path/target arg names a file or directory that was read.
+// BooChat (apps/server) is read-only — there are no write tools, so the ledger
+// only ever has a "Files Read" side (apps/coder can add "Modified" later).
+const READ_TOOL_ARG: Record<string, string> = {
+  view_file: 'path',
+  list_dir: 'path',
+  grep: 'path',
+  find_files: 'path',
+};
+
+// Derive a deterministic, deduped, sorted list of file/dir paths read by the
+// HEAD messages being summarized. Pure — scans assistant tool_calls only; the
+// boundary (which messages are "head") is decided by select() at the call site.
+// We derive at compaction time rather than via a live accumulator because
+// TurnArgs resets per turn and would miss reads on non-compacting turns; the
+// head messages are the authoritative record of what was read in the window
+// being summarized. The result propagates forward as summary text across
+// compactions (the LLM merges it into ## Files Read), so a path read long ago
+// survives even after its originating messages are compacted out.
+export function deriveFilesRead(head: CompactionMessage[]): string[] {
+  const paths = new Set<string>();
+  for (const m of head) {
+    if (m.role !== 'assistant') continue;
+    if (!m.tool_calls) continue;
+    for (const tc of m.tool_calls) {
+      const argName = READ_TOOL_ARG[tc.name];
+      if (!argName) continue;
+      const raw = (tc.args as Record<string, unknown> | null)?.[argName];
+      if (typeof raw === 'string' && raw.trim().length > 0) {
+        paths.add(raw.trim());
+      }
+    }
+  }
+  return [...paths].sort();
+}
+
+// Format the derived paths as a deterministic ## Files Read block for injection
+// into buildPrompt's context array. Returns null when nothing was read (so we
+// don't inject an empty section). The summarizer merges this into the rolling
+// summary's ## Files Read section per the SUMMARY_TEMPLATE instructions.
+export function buildFilesReadContext(head: CompactionMessage[]): string | null {
+  const paths = deriveFilesRead(head);
+  if (paths.length === 0) return null;
+  return ['## Files Read', ...paths.map((p) => `- ${p}`)].join('\n');
+}
+
 // === prompt assembly ===

 // Build the final user message that asks the model to (re)produce the
@@ -220,15 +268,26 @@ export interface OpenAiMessage {
  tool_call_id?: string;
 }

-function isCapHitSentinel(m: CompactionMessage): boolean {
-  return m.role === 'system' && m.metadata != null && m.metadata.kind === 'cap_hit';
+// #12: mirror inference/sentinels.ts:isAnySentinel over the CompactionMessage
+// shape (which carries metadata as { kind?: string } | null, not the full
+// Message type isAnySentinel expects). All UI-only sentinels are stripped from
+// the head payload — they never go to the summarizer LLM. Keep the kind list in
+// sync with isAnySentinel in sentinels.ts.
+const SENTINEL_KINDS = new Set(['cap_hit', 'doom_loop', 'mistake_recovery']);
+function isAnySentinel(m: CompactionMessage): boolean {
+  return (
+    m.role === 'system' &&
+    m.metadata != null &&
+    typeof m.metadata.kind === 'string' &&
+    SENTINEL_KINDS.has(m.metadata.kind)
+  );
 }

 // v1.13.6: exported for unit-test access (reasoning render coverage).
 export function buildHeadPayload(head: CompactionMessage[]): OpenAiMessage[] {
  const out: OpenAiMessage[] = [];
  for (const m of head) {
-    if (isCapHitSentinel(m)) continue;
+    if (isAnySentinel(m)) continue;
    if (m.role === 'assistant' && (m.status === 'streaming' || m.status === 'cancelled')) continue;
    if (m.kind === 'compact') {
      // Legacy compact row — pass through as system context. The new
@@ -417,7 +476,14 @@ export async function process(input: ProcessInput): Promise<void> {
  // user message carrying buildPrompt(previousSummary, []). No system prompt
  // — matches opencode (`system: []`); the template + anchor are sufficient.
  const headPayload = buildHeadPayload(sel.head);
-  const finalUser: OpenAiMessage = { role: 'user', content: buildPrompt(previousSummary, []) };
+  // #12 Part B: derive the file-provenance ledger from the head's read-tool
+  // calls and inject it as a deterministic ## Files Read context block so the
+  // summarizer merges it into the rolling summary. Empty → no injection.
+  const filesReadCtx = buildFilesReadContext(sel.head);
+  const finalUser: OpenAiMessage = {
+    role: 'user',
+    content: buildPrompt(previousSummary, filesReadCtx ? [filesReadCtx] : []),
+  };
  const payload = [...headPayload, finalUser];

  log.info(