v1.13.8: system-prompt prefix stability verify-and-measure
Recon during planning disproved the original v1.13.7 (DB-cache) premise: buildSystemPrompt already runs over inputs mtime-cached at the file layer (BOOCHAT.md in system-prompt.ts:25, AGENTS.md global+per-project in agents.ts:245), and DB scalars are byte-stable until edited. The output is microsecond pure-string concat with no I/O. Skills aren't in the prefix; tools live in a separate request body field alpha-sorted by v1.13.3. This batch closes the verification gap with instrumentation, not implementation: - system-prompt.ts: buildSystemPromptWithFingerprint canonical impl computes SHA-256 over the assembled prefix, runs a per-session Map<sessionId, lastHash> observer, emits PrefixFingerprint per call and PrefixDrift (with field-level changed_inputs) on hash change. buildSystemPrompt is now a thin shim returning .prompt. - agents.ts: getAgentsMtimes accessor — cache-read only, no I/O. - payload.ts: buildMessagesPayload takes optional log argument; when passed, emits prefix-fingerprint (info) + prefix-drift (warn). - turn.ts + sentinel-summaries.ts: pass ctx.log at 3 production call sites; sentinel summaries log too so any drift across cap-hit / doom-loop paths surfaces. - system-prompt.test.ts: 4 new tests (byte-identical, no-drift-on- stable, drift-fires-with-changed-inputs, cross-session-no-drift). 194/194 tests pass (was 190). Smoke: 5 messages in a fresh session produced 7 prefix-fingerprint logs (extras from buildMessagesPayload being called from sentinel summary paths), all with identical prefix_hash and prefix_length=2907, zero prefix-drift. Prefix is byte-stable in steady-state. Decision: original system_prompt_cache DB table from the roadmap is permanently dropped. The v1.12.0 mtime caches at the input layer plus alpha tool ordering at the request body (v1.13.3) already address the load-bearing cache-stability surfaces. Instrumentation stays so the claim can be re-verified at any time.
This commit is contained in:
@@ -8,9 +8,19 @@
|
||||
// + container guidance (this layer, NEW in v1.12)
|
||||
// + agent.system_prompt (resolved from data/AGENTS.md by getAgentById)
|
||||
// + session.system_prompt OR project.default_system_prompt
|
||||
//
|
||||
// v1.13.8: byte-stability instrumentation. buildSystemPromptWithFingerprint
|
||||
// returns the assembled string plus a SHA-256 fingerprint and a per-session
|
||||
// drift signal. buildSystemPrompt stays a string→string shim for backward
|
||||
// compat (tests use it). No cache added — recon proved input-layer mtime
|
||||
// caches (this file + agents.ts) already deliver byte-stable inputs in
|
||||
// steady state. v1.13.8 measures that claim against production traffic
|
||||
// before any cache infrastructure earns its place.
|
||||
|
||||
import { createHash } from 'node:crypto';
|
||||
import { readFile, stat } from 'node:fs/promises';
|
||||
import type { Agent, Project, Session } from '../types/api.js';
|
||||
import { getAgentsMtimes } from './agents.js';
|
||||
|
||||
const BASE_SYSTEM_PROMPT = (projectPath: string) =>
|
||||
`You are BooCode Chat, a code investigation assistant. The user is working on a project located at ${projectPath}. Use the file-read tools (view_file, list_dir, grep, find_files) to investigate code when needed. Be concise. Cite file paths and line numbers when discussing code. Do not hallucinate file contents — read the file first. Tool results may be truncated; if so, narrow your query rather than guessing.`;
|
||||
@@ -60,11 +70,94 @@ export function _resetContainerGuidanceCacheForTests(): void {
|
||||
cachedGuidance = null;
|
||||
}
|
||||
|
||||
export async function buildSystemPrompt(
|
||||
// v1.13.8: expose the mtime currently held in the BOOCHAT cache so the
|
||||
// fingerprint log can stamp it without re-statting (no I/O race against
|
||||
// getContainerGuidance, which is the canonical mtime source).
|
||||
function getCachedGuidanceMtime(): number | null {
|
||||
if (!cachedGuidance) return null;
|
||||
// mtime=0 is the sentinel for "file is missing" (set in the catch above).
|
||||
// Surface it as null so the log/diff doesn't treat absence as a number.
|
||||
return cachedGuidance.mtime > 0 ? cachedGuidance.mtime : null;
|
||||
}
|
||||
|
||||
// v1.13.8: fingerprint emitted per turn, observer state keyed by session.
|
||||
// Field set is intentionally small — we want the diff between two
|
||||
// fingerprints to point at the exact input that drifted, not bury the
|
||||
// signal in noise.
|
||||
export interface PrefixFingerprint {
|
||||
msg: 'prefix-fingerprint';
|
||||
project_id: string;
|
||||
agent_id: string | null;
|
||||
agent_name: string | null;
|
||||
session_id: string;
|
||||
prefix_hash: string;
|
||||
prefix_length: number;
|
||||
mtime_boochat: number | null;
|
||||
mtime_agents_global: number | null;
|
||||
mtime_agents_project: number | null;
|
||||
has_agent_system_prompt: boolean;
|
||||
has_session_override: boolean;
|
||||
has_project_override: boolean;
|
||||
}
|
||||
|
||||
export interface PrefixDrift {
|
||||
msg: 'prefix-drift';
|
||||
session_id: string;
|
||||
prev_hash: string;
|
||||
new_hash: string;
|
||||
prev_length: number;
|
||||
new_length: number;
|
||||
// Names of fields in PrefixFingerprint (excluding the hash + length pair
|
||||
// and the session_id key itself) whose values differ between the previous
|
||||
// observation and this one. The bug case is `changed_inputs: []` — hash
|
||||
// differs but no tracked input moved, which means assembly is
|
||||
// nondeterministic somewhere.
|
||||
changed_inputs: string[];
|
||||
}
|
||||
|
||||
// Fields tracked per-session for the drift diff. Stored alongside the hash
|
||||
// so we can recompute changed_inputs without re-running buildSystemPrompt.
|
||||
interface ObservedInputs {
|
||||
agent_id: string | null;
|
||||
mtime_boochat: number | null;
|
||||
mtime_agents_global: number | null;
|
||||
mtime_agents_project: number | null;
|
||||
has_agent_system_prompt: boolean;
|
||||
has_session_override: boolean;
|
||||
has_project_override: boolean;
|
||||
}
|
||||
|
||||
interface ObserverEntry {
|
||||
hash: string;
|
||||
length: number;
|
||||
inputs: ObservedInputs;
|
||||
}
|
||||
|
||||
// Unbounded by design for v1.13.8 (instrumentation, short-lived sessions in
|
||||
// the smoke test). TODO(v1.13.x follow-up if v1.13.8 surfaces stable):
|
||||
// LRU-bound this Map at 1000 sessions when the in-process surface lives long
|
||||
// enough to matter.
|
||||
const prefixObserver = new Map<string, ObserverEntry>();
|
||||
|
||||
// Test-only: clear the observer so consecutive tests don't share state.
|
||||
export function _resetPrefixObserverForTests(): void {
|
||||
prefixObserver.clear();
|
||||
}
|
||||
|
||||
function computeChangedInputs(prev: ObservedInputs, curr: ObservedInputs): string[] {
|
||||
const out: string[] = [];
|
||||
const keys = Object.keys(curr) as (keyof ObservedInputs)[];
|
||||
for (const k of keys) {
|
||||
if (prev[k] !== curr[k]) out.push(k);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
export async function buildSystemPromptWithFingerprint(
|
||||
project: Project,
|
||||
session: Session,
|
||||
agent: Agent | null
|
||||
): Promise<string> {
|
||||
agent: Agent | null,
|
||||
): Promise<{ prompt: string; fingerprint: PrefixFingerprint; drift: PrefixDrift | null }> {
|
||||
let out = BASE_SYSTEM_PROMPT(project.path);
|
||||
const guidance = await getContainerGuidance();
|
||||
if (guidance) {
|
||||
@@ -79,5 +172,60 @@ export async function buildSystemPrompt(
|
||||
if (userPrompt.length > 0) {
|
||||
out += '\n\n' + userPrompt;
|
||||
}
|
||||
return out;
|
||||
|
||||
const hash = createHash('sha256').update(out, 'utf8').digest('hex');
|
||||
const agentsMtimes = getAgentsMtimes(project.path);
|
||||
const inputs: ObservedInputs = {
|
||||
agent_id: agent?.id ?? null,
|
||||
mtime_boochat: getCachedGuidanceMtime(),
|
||||
mtime_agents_global: agentsMtimes.global,
|
||||
mtime_agents_project: agentsMtimes.project,
|
||||
has_agent_system_prompt: !!(agent && agent.system_prompt.trim().length > 0),
|
||||
has_session_override: sessionPrompt.length > 0,
|
||||
has_project_override: projectPrompt.length > 0,
|
||||
};
|
||||
|
||||
const fingerprint: PrefixFingerprint = {
|
||||
msg: 'prefix-fingerprint',
|
||||
project_id: project.id,
|
||||
agent_id: agent?.id ?? null,
|
||||
agent_name: agent?.name ?? null,
|
||||
session_id: session.id,
|
||||
prefix_hash: hash,
|
||||
prefix_length: out.length,
|
||||
mtime_boochat: inputs.mtime_boochat,
|
||||
mtime_agents_global: inputs.mtime_agents_global,
|
||||
mtime_agents_project: inputs.mtime_agents_project,
|
||||
has_agent_system_prompt: inputs.has_agent_system_prompt,
|
||||
has_session_override: inputs.has_session_override,
|
||||
has_project_override: inputs.has_project_override,
|
||||
};
|
||||
|
||||
let drift: PrefixDrift | null = null;
|
||||
const prev = prefixObserver.get(session.id);
|
||||
if (prev && prev.hash !== hash) {
|
||||
drift = {
|
||||
msg: 'prefix-drift',
|
||||
session_id: session.id,
|
||||
prev_hash: prev.hash,
|
||||
new_hash: hash,
|
||||
prev_length: prev.length,
|
||||
new_length: out.length,
|
||||
changed_inputs: computeChangedInputs(prev.inputs, inputs),
|
||||
};
|
||||
}
|
||||
prefixObserver.set(session.id, { hash, length: out.length, inputs });
|
||||
|
||||
return { prompt: out, fingerprint, drift };
|
||||
}
|
||||
|
||||
// Backward-compatible string-returning shim. Kept so existing callers
|
||||
// (tests, future code paths that don't want to log) work unchanged.
|
||||
export async function buildSystemPrompt(
|
||||
project: Project,
|
||||
session: Session,
|
||||
agent: Agent | null,
|
||||
): Promise<string> {
|
||||
const { prompt } = await buildSystemPromptWithFingerprint(project, session, agent);
|
||||
return prompt;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user