v1.13.8: system-prompt prefix stability verify-and-measure

Recon during planning disproved the original v1.13.7 (DB-cache) premise: buildSystemPrompt already runs over inputs mtime-cached at the file layer (BOOCHAT.md in system-prompt.ts:25, AGENTS.md global+per-project in agents.ts:245), and DB scalars are byte-stable until edited. The output is microsecond pure-string concat with no I/O. Skills aren't in the prefix; tools live in a separate request body field alpha-sorted by v1.13.3. This batch closes the verification gap with instrumentation, not implementation: - system-prompt.ts: buildSystemPromptWithFingerprint canonical impl computes SHA-256 over the assembled prefix, runs a per-session Map<sessionId, lastHash> observer, emits PrefixFingerprint per call and PrefixDrift (with field-level changed_inputs) on hash change. buildSystemPrompt is now a thin shim returning .prompt. - agents.ts: getAgentsMtimes accessor — cache-read only, no I/O. - payload.ts: buildMessagesPayload takes optional log argument; when passed, emits prefix-fingerprint (info) + prefix-drift (warn). - turn.ts + sentinel-summaries.ts: pass ctx.log at 3 production call sites; sentinel summaries log too so any drift across cap-hit / doom-loop paths surfaces. - system-prompt.test.ts: 4 new tests (byte-identical, no-drift-on- stable, drift-fires-with-changed-inputs, cross-session-no-drift). 194/194 tests pass (was 190). Smoke: 5 messages in a fresh session produced 7 prefix-fingerprint logs (extras from buildMessagesPayload being called from sentinel summary paths), all with identical prefix_hash and prefix_length=2907, zero prefix-drift. Prefix is byte-stable in steady-state. Decision: original system_prompt_cache DB table from the roadmap is permanently dropped. The v1.12.0 mtime caches at the input layer plus alpha tool ordering at the request body (v1.13.3) already address the load-bearing cache-stability surfaces. Instrumentation stays so the claim can be re-verified at any time.
2026-05-22 13:42:18 +00:00
parent 0ce6115976
commit a0c8d212cb
6 changed files with 262 additions and 10 deletions
--- a/apps/server/src/services/tests/system-prompt.test.ts
+++ b/apps/server/src/services/tests/system-prompt.test.ts
@@ -6,7 +6,9 @@ import {
  loadContainerGuidance,
  getContainerGuidance,
  buildSystemPrompt,
+  buildSystemPromptWithFingerprint,
  _resetContainerGuidanceCacheForTests,
+  _resetPrefixObserverForTests,
 } from '../system-prompt.js';
 import type { Agent, Project, Session } from '../../types/api.js';

@@ -17,12 +19,14 @@ let tmpDir: string;
 beforeEach(async () => {
  tmpDir = await mkdtemp(join(tmpdir(), 'system-prompt-test-'));
  _resetContainerGuidanceCacheForTests();
+  _resetPrefixObserverForTests();
  delete process.env['CONTAINER_GUIDANCE_FILE'];
 });

 afterEach(async () => {
  delete process.env['CONTAINER_GUIDANCE_FILE'];
  _resetContainerGuidanceCacheForTests();
+  _resetPrefixObserverForTests();
  await rm(tmpDir, { recursive: true, force: true });
 });

@@ -176,3 +180,75 @@ describe('buildSystemPrompt', () => {
    expect(prompt).not.toContain('--- end container guidance ---');
  });
 });
+
+// v1.13.8: byte-stability instrumentation surface.
+describe('buildSystemPromptWithFingerprint (v1.13.8)', () => {
+  it('returns byte-identical prompts for two consecutive calls with the same inputs', async () => {
+    const path = join(tmpDir, 'BOOCHAT.md');
+    await writeFile(path, 'stable guidance', 'utf8');
+    process.env['CONTAINER_GUIDANCE_FILE'] = path;
+
+    const session = makeSession();
+    const project = makeProject({ path: '/tmp/stable-proj' });
+    const agent = makeAgent({ system_prompt: 'be terse' });
+
+    const first = await buildSystemPromptWithFingerprint(project, session, agent);
+    const second = await buildSystemPromptWithFingerprint(project, session, agent);
+
+    expect(first.prompt).toBe(second.prompt);
+    expect(first.fingerprint.prefix_hash).toBe(second.fingerprint.prefix_hash);
+    expect(first.fingerprint.prefix_length).toBe(second.fingerprint.prefix_length);
+  });
+
+  it('emits drift=null on the first call for a fresh session, then null again when nothing changes', async () => {
+    process.env['CONTAINER_GUIDANCE_FILE'] = join(tmpDir, 'absent.md');
+    const session = makeSession();
+    const project = makeProject({ path: '/tmp/stable-proj' });
+
+    const first = await buildSystemPromptWithFingerprint(project, session, null);
+    expect(first.drift).toBeNull();
+
+    const second = await buildSystemPromptWithFingerprint(project, session, null);
+    expect(second.drift).toBeNull();
+    expect(second.fingerprint.prefix_hash).toBe(first.fingerprint.prefix_hash);
+  });
+
+  it('emits drift with prev/new hashes and a changed_inputs entry when an input mutates', async () => {
+    // Two BOOCHAT.md contents with different mtimes → guidance cache picks
+    // up the change → fingerprint hash flips → drift fires.
+    const path = join(tmpDir, 'BOOCHAT.md');
+    await writeFile(path, 'first', 'utf8');
+    process.env['CONTAINER_GUIDANCE_FILE'] = path;
+
+    const session = makeSession();
+    const project = makeProject({ path: '/tmp/stable-proj' });
+
+    const first = await buildSystemPromptWithFingerprint(project, session, null);
+    expect(first.drift).toBeNull();
+
+    await writeFile(path, 'second — different content', 'utf8');
+    const later = new Date(Date.now() + 60_000);
+    await utimes(path, later, later);
+
+    const second = await buildSystemPromptWithFingerprint(project, session, null);
+    expect(second.drift).not.toBeNull();
+    expect(second.drift!.prev_hash).toBe(first.fingerprint.prefix_hash);
+    expect(second.drift!.new_hash).toBe(second.fingerprint.prefix_hash);
+    expect(second.drift!.prev_hash).not.toBe(second.drift!.new_hash);
+    expect(second.drift!.changed_inputs).toContain('mtime_boochat');
+  });
+
+  it('does not fire drift across distinct sessions even if their hashes differ', async () => {
+    process.env['CONTAINER_GUIDANCE_FILE'] = join(tmpDir, 'absent.md');
+    const sessionA = makeSession({ id: 'sess-A' });
+    const sessionB = makeSession({ id: 'sess-B', system_prompt: 'B-only override' });
+    const project = makeProject({ path: '/tmp/stable-proj' });
+
+    const a = await buildSystemPromptWithFingerprint(project, sessionA, null);
+    const b = await buildSystemPromptWithFingerprint(project, sessionB, null);
+
+    expect(a.drift).toBeNull();
+    expect(b.drift).toBeNull();
+    expect(a.fingerprint.prefix_hash).not.toBe(b.fingerprint.prefix_hash);
+  });
+});
--- a/apps/server/src/services/agents.ts
+++ b/apps/server/src/services/agents.ts
@@ -252,6 +252,22 @@ export function invalidateAgentsCache(projectPath?: string): void {
  }
 }

+// v1.13.8: cache-read accessor for the system-prompt prefix-fingerprint log.
+// Returns the AGENTS.md mtimes that getAgentsForProject() observed on its
+// last cache fill for this projectPath. Both fields are null when the cache
+// is cold (e.g. tests, fresh boot before the first inference turn). Does no
+// I/O — a fresh stat would race the cache and isn't what the fingerprint
+// wants anyway (we want what was actually used to resolve the agent).
+export function getAgentsMtimes(projectPath: string): {
+  global: number | null;
+  project: number | null;
+} {
+  const key = projectPath || '__none__';
+  const entry = cache.get(key);
+  if (!entry) return { global: null, project: null };
+  return { global: entry.globalMtime, project: entry.projectMtime };
+}
+
 async function safeStat(path: string): Promise<number | null> {
  try {
    const s = await fs.stat(path);
--- a/apps/server/src/services/inference/payload.ts
+++ b/apps/server/src/services/inference/payload.ts
@@ -1,3 +1,4 @@
+import type { FastifyBaseLogger } from 'fastify';
 import type { Sql } from '../../db.js';
 import type {
  Agent,
@@ -6,7 +7,7 @@ import type {
  Session,
 } from '../../types/api.js';
 import * as compaction from '../compaction.js';
-import { buildSystemPrompt } from '../system-prompt.js';
+import { buildSystemPromptWithFingerprint } from '../system-prompt.js';
 import { isAnySentinel } from './sentinels.js';
 import { PRUNE_TRIGGER_TOKENS, prune } from './prune.js';
 import type { InferenceContext } from './turn.js';
@@ -31,14 +32,25 @@ export interface OpenAiMessage {
 // v1.12: buildSystemPrompt lives in services/system-prompt.ts. It awaits the
 // container-guidance loader, so this function is async too and every call
 // site in inference.ts awaits the result.
+// v1.13.8: optional log argument. When provided, emit prefix-fingerprint
+// per call + prefix-drift when the same session sees a hash change. Tests
+// omit it and exercise the byte-stability surface directly through
+// buildSystemPromptWithFingerprint. The observer Map in system-prompt.ts
+// updates regardless of whether log is passed.
 export async function buildMessagesPayload(
  session: Session,
  project: Project,
  history: Message[],
-  agent: Agent | null = null
+  agent: Agent | null = null,
+  log?: FastifyBaseLogger,
 ): Promise<OpenAiMessage[]> {
  const out: OpenAiMessage[] = [];
-  const systemPrompt = await buildSystemPrompt(project, session, agent);
+  const { prompt: systemPrompt, fingerprint, drift } =
+    await buildSystemPromptWithFingerprint(project, session, agent);
+  if (log) {
+    log.info(fingerprint);
+    if (drift) log.warn(drift);
+  }
  out.push({ role: 'system', content: systemPrompt });

  // Find the latest compact marker — only send messages from that point onwards
--- a/apps/server/src/services/inference/sentinel-summaries.ts
+++ b/apps/server/src/services/inference/sentinel-summaries.ts
@@ -36,7 +36,7 @@ export async function runCapHitSummary(
 ): Promise<void> {
  const { sessionId, chatId, assistantMessageId, signal } = args;

-  const messages = await buildMessagesPayload(session, project, history, agent);
+  const messages = await buildMessagesPayload(session, project, history, agent, ctx.log);
  messages.push({ role: 'system', content: CAP_HIT_SUMMARY_NOTE(budget) });

  const startedRow = await ctx.sql<{ started_at: string }[]>`
@@ -298,7 +298,7 @@ export async function runDoomLoopSummary(
 ): Promise<void> {
  const { sessionId, chatId, assistantMessageId, signal } = args;

-  const messages = await buildMessagesPayload(session, project, history, agent);
+  const messages = await buildMessagesPayload(session, project, history, agent, ctx.log);
  messages.push({ role: 'system', content: DOOM_LOOP_NOTE(loop.name) });

  const startedRow = await ctx.sql<{ started_at: string }[]>`
--- a/apps/server/src/services/inference/turn.ts
+++ b/apps/server/src/services/inference/turn.ts
@@ -205,7 +205,7 @@ export async function runAssistantTurn(
    return;
  }

-  const messages = await buildMessagesPayload(session, project, history, agent);
+  const messages = await buildMessagesPayload(session, project, history, agent, ctx.log);

  // v1.11.8: resolve per-chat web-tools opt-in. Tri-state on the wire:
  //   - session.web_search_enabled = null → inherit project default
--- a/apps/server/src/services/system-prompt.ts
+++ b/apps/server/src/services/system-prompt.ts
@@ -8,9 +8,19 @@
 //   + container guidance (this layer, NEW in v1.12)
 //   + agent.system_prompt          (resolved from data/AGENTS.md by getAgentById)
 //   + session.system_prompt OR project.default_system_prompt
+//
+// v1.13.8: byte-stability instrumentation. buildSystemPromptWithFingerprint
+// returns the assembled string plus a SHA-256 fingerprint and a per-session
+// drift signal. buildSystemPrompt stays a string→string shim for backward
+// compat (tests use it). No cache added — recon proved input-layer mtime
+// caches (this file + agents.ts) already deliver byte-stable inputs in
+// steady state. v1.13.8 measures that claim against production traffic
+// before any cache infrastructure earns its place.

+import { createHash } from 'node:crypto';
 import { readFile, stat } from 'node:fs/promises';
 import type { Agent, Project, Session } from '../types/api.js';
+import { getAgentsMtimes } from './agents.js';

 const BASE_SYSTEM_PROMPT = (projectPath: string) =>
  `You are BooCode Chat, a code investigation assistant. The user is working on a project located at ${projectPath}. Use the file-read tools (view_file, list_dir, grep, find_files) to investigate code when needed. Be concise. Cite file paths and line numbers when discussing code. Do not hallucinate file contents — read the file first. Tool results may be truncated; if so, narrow your query rather than guessing.`;
@@ -60,11 +70,94 @@ export function _resetContainerGuidanceCacheForTests(): void {
  cachedGuidance = null;
 }

-export async function buildSystemPrompt(
+// v1.13.8: expose the mtime currently held in the BOOCHAT cache so the
+// fingerprint log can stamp it without re-statting (no I/O race against
+// getContainerGuidance, which is the canonical mtime source).
+function getCachedGuidanceMtime(): number | null {
+  if (!cachedGuidance) return null;
+  // mtime=0 is the sentinel for "file is missing" (set in the catch above).
+  // Surface it as null so the log/diff doesn't treat absence as a number.
+  return cachedGuidance.mtime > 0 ? cachedGuidance.mtime : null;
+}
+
+// v1.13.8: fingerprint emitted per turn, observer state keyed by session.
+// Field set is intentionally small — we want the diff between two
+// fingerprints to point at the exact input that drifted, not bury the
+// signal in noise.
+export interface PrefixFingerprint {
+  msg: 'prefix-fingerprint';
+  project_id: string;
+  agent_id: string | null;
+  agent_name: string | null;
+  session_id: string;
+  prefix_hash: string;
+  prefix_length: number;
+  mtime_boochat: number | null;
+  mtime_agents_global: number | null;
+  mtime_agents_project: number | null;
+  has_agent_system_prompt: boolean;
+  has_session_override: boolean;
+  has_project_override: boolean;
+}
+
+export interface PrefixDrift {
+  msg: 'prefix-drift';
+  session_id: string;
+  prev_hash: string;
+  new_hash: string;
+  prev_length: number;
+  new_length: number;
+  // Names of fields in PrefixFingerprint (excluding the hash + length pair
+  // and the session_id key itself) whose values differ between the previous
+  // observation and this one. The bug case is `changed_inputs: []` — hash
+  // differs but no tracked input moved, which means assembly is
+  // nondeterministic somewhere.
+  changed_inputs: string[];
+}
+
+// Fields tracked per-session for the drift diff. Stored alongside the hash
+// so we can recompute changed_inputs without re-running buildSystemPrompt.
+interface ObservedInputs {
+  agent_id: string | null;
+  mtime_boochat: number | null;
+  mtime_agents_global: number | null;
+  mtime_agents_project: number | null;
+  has_agent_system_prompt: boolean;
+  has_session_override: boolean;
+  has_project_override: boolean;
+}
+
+interface ObserverEntry {
+  hash: string;
+  length: number;
+  inputs: ObservedInputs;
+}
+
+// Unbounded by design for v1.13.8 (instrumentation, short-lived sessions in
+// the smoke test). TODO(v1.13.x follow-up if v1.13.8 surfaces stable):
+// LRU-bound this Map at 1000 sessions when the in-process surface lives long
+// enough to matter.
+const prefixObserver = new Map<string, ObserverEntry>();
+
+// Test-only: clear the observer so consecutive tests don't share state.
+export function _resetPrefixObserverForTests(): void {
+  prefixObserver.clear();
+}
+
+function computeChangedInputs(prev: ObservedInputs, curr: ObservedInputs): string[] {
+  const out: string[] = [];
+  const keys = Object.keys(curr) as (keyof ObservedInputs)[];
+  for (const k of keys) {
+    if (prev[k] !== curr[k]) out.push(k);
+  }
+  return out;
+}
+
+export async function buildSystemPromptWithFingerprint(
  project: Project,
  session: Session,
-  agent: Agent | null
-): Promise<string> {
+  agent: Agent | null,
+): Promise<{ prompt: string; fingerprint: PrefixFingerprint; drift: PrefixDrift | null }> {
  let out = BASE_SYSTEM_PROMPT(project.path);
  const guidance = await getContainerGuidance();
  if (guidance) {
@@ -79,5 +172,60 @@ export async function buildSystemPrompt(
  if (userPrompt.length > 0) {
    out += '\n\n' + userPrompt;
  }
-  return out;
+
+  const hash = createHash('sha256').update(out, 'utf8').digest('hex');
+  const agentsMtimes = getAgentsMtimes(project.path);
+  const inputs: ObservedInputs = {
+    agent_id: agent?.id ?? null,
+    mtime_boochat: getCachedGuidanceMtime(),
+    mtime_agents_global: agentsMtimes.global,
+    mtime_agents_project: agentsMtimes.project,
+    has_agent_system_prompt: !!(agent && agent.system_prompt.trim().length > 0),
+    has_session_override: sessionPrompt.length > 0,
+    has_project_override: projectPrompt.length > 0,
+  };
+
+  const fingerprint: PrefixFingerprint = {
+    msg: 'prefix-fingerprint',
+    project_id: project.id,
+    agent_id: agent?.id ?? null,
+    agent_name: agent?.name ?? null,
+    session_id: session.id,
+    prefix_hash: hash,
+    prefix_length: out.length,
+    mtime_boochat: inputs.mtime_boochat,
+    mtime_agents_global: inputs.mtime_agents_global,
+    mtime_agents_project: inputs.mtime_agents_project,
+    has_agent_system_prompt: inputs.has_agent_system_prompt,
+    has_session_override: inputs.has_session_override,
+    has_project_override: inputs.has_project_override,
+  };
+
+  let drift: PrefixDrift | null = null;
+  const prev = prefixObserver.get(session.id);
+  if (prev && prev.hash !== hash) {
+    drift = {
+      msg: 'prefix-drift',
+      session_id: session.id,
+      prev_hash: prev.hash,
+      new_hash: hash,
+      prev_length: prev.length,
+      new_length: out.length,
+      changed_inputs: computeChangedInputs(prev.inputs, inputs),
+    };
+  }
+  prefixObserver.set(session.id, { hash, length: out.length, inputs });
+
+  return { prompt: out, fingerprint, drift };
+}
+
+// Backward-compatible string-returning shim. Kept so existing callers
+// (tests, future code paths that don't want to log) work unchanged.
+export async function buildSystemPrompt(
+  project: Project,
+  session: Session,
+  agent: Agent | null,
+): Promise<string> {
+  const { prompt } = await buildSystemPromptWithFingerprint(project, session, agent);
+  return prompt;
 }