feat: MistakeTracker + file-provenance ledger (v2.7.4)
Two native-inference hardening features from boocode_code_review_v2 §1 #12. MistakeTracker: new pure mistake-tracker.ts tracks consecutive heterogeneous tool failures (kinds surfaced per tool from tool-phase.ts). On 3 in a row the turn loop soft-nudges (model-facing recovery guidance + mistake_recovery sentinel + reset), then escalates to stopping the turn (cap-hit-style, Continue affordance) on a re-trip. Complements doom-loop (identical repeats) + cap-hit. File-provenance ledger: compaction.ts derives a deterministic ## Files Read list from the head messages' read-tool calls and injects it into the rolling-summary prompt so provenance survives compaction (no new table; read-only). mistake_recovery sentinel: MessageMetadata arm (server + web) + MessageBubble render branch. Built by 2 parallel agents. Server 545 tests passing (23 new); build + web tsc clean. Native-inference only. Builds on v2.7.3. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -181,6 +181,54 @@ export function select(
|
||||
};
|
||||
}
|
||||
|
||||
// === file-provenance ledger (#12, Part B) ===
|
||||
|
||||
// Read tools whose path/target arg names a file or directory that was read.
|
||||
// BooChat (apps/server) is read-only — there are no write tools, so the ledger
|
||||
// only ever has a "Files Read" side (apps/coder can add "Modified" later).
|
||||
const READ_TOOL_ARG: Record<string, string> = {
|
||||
view_file: 'path',
|
||||
list_dir: 'path',
|
||||
grep: 'path',
|
||||
find_files: 'path',
|
||||
};
|
||||
|
||||
// Derive a deterministic, deduped, sorted list of file/dir paths read by the
|
||||
// HEAD messages being summarized. Pure — scans assistant tool_calls only; the
|
||||
// boundary (which messages are "head") is decided by select() at the call site.
|
||||
// We derive at compaction time rather than via a live accumulator because
|
||||
// TurnArgs resets per turn and would miss reads on non-compacting turns; the
|
||||
// head messages are the authoritative record of what was read in the window
|
||||
// being summarized. The result propagates forward as summary text across
|
||||
// compactions (the LLM merges it into ## Files Read), so a path read long ago
|
||||
// survives even after its originating messages are compacted out.
|
||||
export function deriveFilesRead(head: CompactionMessage[]): string[] {
|
||||
const paths = new Set<string>();
|
||||
for (const m of head) {
|
||||
if (m.role !== 'assistant') continue;
|
||||
if (!m.tool_calls) continue;
|
||||
for (const tc of m.tool_calls) {
|
||||
const argName = READ_TOOL_ARG[tc.name];
|
||||
if (!argName) continue;
|
||||
const raw = (tc.args as Record<string, unknown> | null)?.[argName];
|
||||
if (typeof raw === 'string' && raw.trim().length > 0) {
|
||||
paths.add(raw.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
return [...paths].sort();
|
||||
}
|
||||
|
||||
// Format the derived paths as a deterministic ## Files Read block for injection
|
||||
// into buildPrompt's context array. Returns null when nothing was read (so we
|
||||
// don't inject an empty section). The summarizer merges this into the rolling
|
||||
// summary's ## Files Read section per the SUMMARY_TEMPLATE instructions.
|
||||
export function buildFilesReadContext(head: CompactionMessage[]): string | null {
|
||||
const paths = deriveFilesRead(head);
|
||||
if (paths.length === 0) return null;
|
||||
return ['## Files Read', ...paths.map((p) => `- ${p}`)].join('\n');
|
||||
}
|
||||
|
||||
// === prompt assembly ===
|
||||
|
||||
// Build the final user message that asks the model to (re)produce the
|
||||
@@ -220,15 +268,26 @@ export interface OpenAiMessage {
|
||||
tool_call_id?: string;
|
||||
}
|
||||
|
||||
function isCapHitSentinel(m: CompactionMessage): boolean {
|
||||
return m.role === 'system' && m.metadata != null && m.metadata.kind === 'cap_hit';
|
||||
// #12: mirror inference/sentinels.ts:isAnySentinel over the CompactionMessage
|
||||
// shape (which carries metadata as { kind?: string } | null, not the full
|
||||
// Message type isAnySentinel expects). All UI-only sentinels are stripped from
|
||||
// the head payload — they never go to the summarizer LLM. Keep the kind list in
|
||||
// sync with isAnySentinel in sentinels.ts.
|
||||
const SENTINEL_KINDS = new Set(['cap_hit', 'doom_loop', 'mistake_recovery']);
|
||||
function isAnySentinel(m: CompactionMessage): boolean {
|
||||
return (
|
||||
m.role === 'system' &&
|
||||
m.metadata != null &&
|
||||
typeof m.metadata.kind === 'string' &&
|
||||
SENTINEL_KINDS.has(m.metadata.kind)
|
||||
);
|
||||
}
|
||||
|
||||
// v1.13.6: exported for unit-test access (reasoning render coverage).
|
||||
export function buildHeadPayload(head: CompactionMessage[]): OpenAiMessage[] {
|
||||
const out: OpenAiMessage[] = [];
|
||||
for (const m of head) {
|
||||
if (isCapHitSentinel(m)) continue;
|
||||
if (isAnySentinel(m)) continue;
|
||||
if (m.role === 'assistant' && (m.status === 'streaming' || m.status === 'cancelled')) continue;
|
||||
if (m.kind === 'compact') {
|
||||
// Legacy compact row — pass through as system context. The new
|
||||
@@ -417,7 +476,14 @@ export async function process(input: ProcessInput): Promise<void> {
|
||||
// user message carrying buildPrompt(previousSummary, []). No system prompt
|
||||
// — matches opencode (`system: []`); the template + anchor are sufficient.
|
||||
const headPayload = buildHeadPayload(sel.head);
|
||||
const finalUser: OpenAiMessage = { role: 'user', content: buildPrompt(previousSummary, []) };
|
||||
// #12 Part B: derive the file-provenance ledger from the head's read-tool
|
||||
// calls and inject it as a deterministic ## Files Read context block so the
|
||||
// summarizer merges it into the rolling summary. Empty → no injection.
|
||||
const filesReadCtx = buildFilesReadContext(sel.head);
|
||||
const finalUser: OpenAiMessage = {
|
||||
role: 'user',
|
||||
content: buildPrompt(previousSummary, filesReadCtx ? [filesReadCtx] : []),
|
||||
};
|
||||
const payload = [...headPayload, finalUser];
|
||||
|
||||
log.info(
|
||||
|
||||
Reference in New Issue
Block a user