feat: MistakeTracker + file-provenance ledger (v2.7.4)

Two native-inference hardening features from boocode_code_review_v2 §1 #12. MistakeTracker: new pure mistake-tracker.ts tracks consecutive heterogeneous tool failures (kinds surfaced per tool from tool-phase.ts). On 3 in a row the turn loop soft-nudges (model-facing recovery guidance + mistake_recovery sentinel + reset), then escalates to stopping the turn (cap-hit-style, Continue affordance) on a re-trip. Complements doom-loop (identical repeats) + cap-hit. File-provenance ledger: compaction.ts derives a deterministic ## Files Read list from the head messages' read-tool calls and injects it into the rolling-summary prompt so provenance survives compaction (no new table; read-only). mistake_recovery sentinel: MessageMetadata arm (server + web) + MessageBubble render branch. Built by 2 parallel agents. Server 545 tests passing (23 new); build + web tsc clean. Native-inference only. Builds on v2.7.3. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-01 13:05:03 +00:00
parent f53d6a8afd
commit bcc89d8adc
15 changed files with 816 additions and 20 deletions
--- a/apps/server/src/services/tests/compaction.test.ts
+++ b/apps/server/src/services/tests/compaction.test.ts
@@ -7,6 +7,8 @@ import {
  select,
  buildPrompt,
  buildHeadPayload,
+  deriveFilesRead,
+  buildFilesReadContext,
  type CompactionMessage,
 } from '../compaction.js';
 import { SUMMARY_TEMPLATE } from '../compaction-prompt.js';
@@ -321,3 +323,105 @@ describe('buildHeadPayload reasoning render', () => {
    expect(out[1]!.content).not.toContain('<reasoning>');
  });
 });
+
+// ---- buildHeadPayload sentinel stripping (#12) -------------------------------
+
+describe('buildHeadPayload strips all UI sentinels', () => {
+  it('drops cap_hit, doom_loop, and mistake_recovery system rows', () => {
+    const out = buildHeadPayload([
+      mkMsg('user', 'do the thing'),
+      mkMsg('system', 'budget reached', { metadata: { kind: 'cap_hit' } }),
+      mkMsg('system', 'looping', { metadata: { kind: 'doom_loop' } }),
+      mkMsg('system', 'repeated errors', { metadata: { kind: 'mistake_recovery' } }),
+      mkMsg('assistant', 'answer'),
+    ]);
+    // Only the user + assistant rows survive; all three sentinels stripped.
+    expect(out).toHaveLength(2);
+    expect(out[0]!.role).toBe('user');
+    expect(out[1]!.role).toBe('assistant');
+  });
+
+  it('keeps a non-sentinel system row (e.g. compact bridge) untouched', () => {
+    const out = buildHeadPayload([
+      mkMsg('system', 'legacy compact', { kind: 'compact', metadata: null }),
+      mkMsg('user', 'q'),
+    ]);
+    expect(out[0]!.role).toBe('system');
+    expect(out[0]!.content).toBe('legacy compact');
+  });
+});
+
+// ---- file-provenance ledger (#12, Part B) -----------------------------------
+
+describe('deriveFilesRead', () => {
+  it('returns [] when the head has no read-tool calls', () => {
+    expect(deriveFilesRead([mkMsg('user', 'hi'), mkMsg('assistant', 'hello')])).toEqual([]);
+  });
+
+  it('extracts the path arg from view_file / list_dir / grep / find_files', () => {
+    const head = [
+      mkMsg('assistant', '', {
+        tool_calls: [
+          { id: 'c1', name: 'view_file', args: { path: 'src/index.ts' } },
+          { id: 'c2', name: 'list_dir', args: { path: 'src' } },
+          { id: 'c3', name: 'grep', args: { pattern: 'TODO', path: 'apps' } },
+          { id: 'c4', name: 'find_files', args: { pattern: '**/*.ts', path: 'lib' } },
+        ],
+      }),
+    ];
+    expect(deriveFilesRead(head)).toEqual(['apps', 'lib', 'src', 'src/index.ts']);
+  });
+
+  it('dedupes and sorts paths across multiple assistant turns', () => {
+    const head = [
+      mkMsg('assistant', '', { tool_calls: [{ id: 'c1', name: 'view_file', args: { path: 'b.ts' } }] }),
+      mkMsg('assistant', '', { tool_calls: [{ id: 'c2', name: 'view_file', args: { path: 'a.ts' } }] }),
+      mkMsg('assistant', '', { tool_calls: [{ id: 'c3', name: 'view_file', args: { path: 'b.ts' } }] }),
+    ];
+    expect(deriveFilesRead(head)).toEqual(['a.ts', 'b.ts']);
+  });
+
+  it('ignores non-read tools and grep calls without a path arg', () => {
+    const head = [
+      mkMsg('assistant', '', {
+        tool_calls: [
+          { id: 'c1', name: 'web_search', args: { query: 'x' } },
+          { id: 'c2', name: 'grep', args: { pattern: 'foo' } }, // no path → root, skipped
+          { id: 'c3', name: 'view_file', args: { path: 'kept.ts' } },
+        ],
+      }),
+    ];
+    expect(deriveFilesRead(head)).toEqual(['kept.ts']);
+  });
+
+  it('ignores read-tool calls on non-assistant rows', () => {
+    const head = [
+      mkMsg('user', '', { tool_calls: [{ id: 'c1', name: 'view_file', args: { path: 'nope.ts' } }] }),
+    ];
+    expect(deriveFilesRead(head)).toEqual([]);
+  });
+});
+
+describe('buildFilesReadContext', () => {
+  it('returns null when nothing was read (no empty section injected)', () => {
+    expect(buildFilesReadContext([mkMsg('user', 'hi')])).toBeNull();
+  });
+
+  it('formats a ## Files Read block with sorted bullet paths', () => {
+    const head = [
+      mkMsg('assistant', '', {
+        tool_calls: [
+          { id: 'c1', name: 'view_file', args: { path: 'z.ts' } },
+          { id: 'c2', name: 'view_file', args: { path: 'a.ts' } },
+        ],
+      }),
+    ];
+    expect(buildFilesReadContext(head)).toBe('## Files Read\n- a.ts\n- z.ts');
+  });
+});
+
+describe('SUMMARY_TEMPLATE includes the Files Read section (#12)', () => {
+  it('declares a ## Files Read section the model must maintain', () => {
+    expect(SUMMARY_TEMPLATE).toContain('## Files Read');
+  });
+});