diff --git a/apps/server/src/services/__tests__/system-prompt.test.ts b/apps/server/src/services/__tests__/system-prompt.test.ts index 95b9528..b26d8c1 100644 --- a/apps/server/src/services/__tests__/system-prompt.test.ts +++ b/apps/server/src/services/__tests__/system-prompt.test.ts @@ -6,7 +6,9 @@ import { loadContainerGuidance, getContainerGuidance, buildSystemPrompt, + buildSystemPromptWithFingerprint, _resetContainerGuidanceCacheForTests, + _resetPrefixObserverForTests, } from '../system-prompt.js'; import type { Agent, Project, Session } from '../../types/api.js'; @@ -17,12 +19,14 @@ let tmpDir: string; beforeEach(async () => { tmpDir = await mkdtemp(join(tmpdir(), 'system-prompt-test-')); _resetContainerGuidanceCacheForTests(); + _resetPrefixObserverForTests(); delete process.env['CONTAINER_GUIDANCE_FILE']; }); afterEach(async () => { delete process.env['CONTAINER_GUIDANCE_FILE']; _resetContainerGuidanceCacheForTests(); + _resetPrefixObserverForTests(); await rm(tmpDir, { recursive: true, force: true }); }); @@ -176,3 +180,75 @@ describe('buildSystemPrompt', () => { expect(prompt).not.toContain('--- end container guidance ---'); }); }); + +// v1.13.8: byte-stability instrumentation surface. +describe('buildSystemPromptWithFingerprint (v1.13.8)', () => { + it('returns byte-identical prompts for two consecutive calls with the same inputs', async () => { + const path = join(tmpDir, 'BOOCHAT.md'); + await writeFile(path, 'stable guidance', 'utf8'); + process.env['CONTAINER_GUIDANCE_FILE'] = path; + + const session = makeSession(); + const project = makeProject({ path: '/tmp/stable-proj' }); + const agent = makeAgent({ system_prompt: 'be terse' }); + + const first = await buildSystemPromptWithFingerprint(project, session, agent); + const second = await buildSystemPromptWithFingerprint(project, session, agent); + + expect(first.prompt).toBe(second.prompt); + expect(first.fingerprint.prefix_hash).toBe(second.fingerprint.prefix_hash); + expect(first.fingerprint.prefix_length).toBe(second.fingerprint.prefix_length); + }); + + it('emits drift=null on the first call for a fresh session, then null again when nothing changes', async () => { + process.env['CONTAINER_GUIDANCE_FILE'] = join(tmpDir, 'absent.md'); + const session = makeSession(); + const project = makeProject({ path: '/tmp/stable-proj' }); + + const first = await buildSystemPromptWithFingerprint(project, session, null); + expect(first.drift).toBeNull(); + + const second = await buildSystemPromptWithFingerprint(project, session, null); + expect(second.drift).toBeNull(); + expect(second.fingerprint.prefix_hash).toBe(first.fingerprint.prefix_hash); + }); + + it('emits drift with prev/new hashes and a changed_inputs entry when an input mutates', async () => { + // Two BOOCHAT.md contents with different mtimes → guidance cache picks + // up the change → fingerprint hash flips → drift fires. + const path = join(tmpDir, 'BOOCHAT.md'); + await writeFile(path, 'first', 'utf8'); + process.env['CONTAINER_GUIDANCE_FILE'] = path; + + const session = makeSession(); + const project = makeProject({ path: '/tmp/stable-proj' }); + + const first = await buildSystemPromptWithFingerprint(project, session, null); + expect(first.drift).toBeNull(); + + await writeFile(path, 'second — different content', 'utf8'); + const later = new Date(Date.now() + 60_000); + await utimes(path, later, later); + + const second = await buildSystemPromptWithFingerprint(project, session, null); + expect(second.drift).not.toBeNull(); + expect(second.drift!.prev_hash).toBe(first.fingerprint.prefix_hash); + expect(second.drift!.new_hash).toBe(second.fingerprint.prefix_hash); + expect(second.drift!.prev_hash).not.toBe(second.drift!.new_hash); + expect(second.drift!.changed_inputs).toContain('mtime_boochat'); + }); + + it('does not fire drift across distinct sessions even if their hashes differ', async () => { + process.env['CONTAINER_GUIDANCE_FILE'] = join(tmpDir, 'absent.md'); + const sessionA = makeSession({ id: 'sess-A' }); + const sessionB = makeSession({ id: 'sess-B', system_prompt: 'B-only override' }); + const project = makeProject({ path: '/tmp/stable-proj' }); + + const a = await buildSystemPromptWithFingerprint(project, sessionA, null); + const b = await buildSystemPromptWithFingerprint(project, sessionB, null); + + expect(a.drift).toBeNull(); + expect(b.drift).toBeNull(); + expect(a.fingerprint.prefix_hash).not.toBe(b.fingerprint.prefix_hash); + }); +}); diff --git a/apps/server/src/services/agents.ts b/apps/server/src/services/agents.ts index 8d3629c..49fd3f3 100644 --- a/apps/server/src/services/agents.ts +++ b/apps/server/src/services/agents.ts @@ -252,6 +252,22 @@ export function invalidateAgentsCache(projectPath?: string): void { } } +// v1.13.8: cache-read accessor for the system-prompt prefix-fingerprint log. +// Returns the AGENTS.md mtimes that getAgentsForProject() observed on its +// last cache fill for this projectPath. Both fields are null when the cache +// is cold (e.g. tests, fresh boot before the first inference turn). Does no +// I/O — a fresh stat would race the cache and isn't what the fingerprint +// wants anyway (we want what was actually used to resolve the agent). +export function getAgentsMtimes(projectPath: string): { + global: number | null; + project: number | null; +} { + const key = projectPath || '__none__'; + const entry = cache.get(key); + if (!entry) return { global: null, project: null }; + return { global: entry.globalMtime, project: entry.projectMtime }; +} + async function safeStat(path: string): Promise { try { const s = await fs.stat(path); diff --git a/apps/server/src/services/inference/payload.ts b/apps/server/src/services/inference/payload.ts index ed63f5f..edba789 100644 --- a/apps/server/src/services/inference/payload.ts +++ b/apps/server/src/services/inference/payload.ts @@ -1,3 +1,4 @@ +import type { FastifyBaseLogger } from 'fastify'; import type { Sql } from '../../db.js'; import type { Agent, @@ -6,7 +7,7 @@ import type { Session, } from '../../types/api.js'; import * as compaction from '../compaction.js'; -import { buildSystemPrompt } from '../system-prompt.js'; +import { buildSystemPromptWithFingerprint } from '../system-prompt.js'; import { isAnySentinel } from './sentinels.js'; import { PRUNE_TRIGGER_TOKENS, prune } from './prune.js'; import type { InferenceContext } from './turn.js'; @@ -31,14 +32,25 @@ export interface OpenAiMessage { // v1.12: buildSystemPrompt lives in services/system-prompt.ts. It awaits the // container-guidance loader, so this function is async too and every call // site in inference.ts awaits the result. +// v1.13.8: optional log argument. When provided, emit prefix-fingerprint +// per call + prefix-drift when the same session sees a hash change. Tests +// omit it and exercise the byte-stability surface directly through +// buildSystemPromptWithFingerprint. The observer Map in system-prompt.ts +// updates regardless of whether log is passed. export async function buildMessagesPayload( session: Session, project: Project, history: Message[], - agent: Agent | null = null + agent: Agent | null = null, + log?: FastifyBaseLogger, ): Promise { const out: OpenAiMessage[] = []; - const systemPrompt = await buildSystemPrompt(project, session, agent); + const { prompt: systemPrompt, fingerprint, drift } = + await buildSystemPromptWithFingerprint(project, session, agent); + if (log) { + log.info(fingerprint); + if (drift) log.warn(drift); + } out.push({ role: 'system', content: systemPrompt }); // Find the latest compact marker — only send messages from that point onwards diff --git a/apps/server/src/services/inference/sentinel-summaries.ts b/apps/server/src/services/inference/sentinel-summaries.ts index 49a30df..44f2cee 100644 --- a/apps/server/src/services/inference/sentinel-summaries.ts +++ b/apps/server/src/services/inference/sentinel-summaries.ts @@ -36,7 +36,7 @@ export async function runCapHitSummary( ): Promise { const { sessionId, chatId, assistantMessageId, signal } = args; - const messages = await buildMessagesPayload(session, project, history, agent); + const messages = await buildMessagesPayload(session, project, history, agent, ctx.log); messages.push({ role: 'system', content: CAP_HIT_SUMMARY_NOTE(budget) }); const startedRow = await ctx.sql<{ started_at: string }[]>` @@ -298,7 +298,7 @@ export async function runDoomLoopSummary( ): Promise { const { sessionId, chatId, assistantMessageId, signal } = args; - const messages = await buildMessagesPayload(session, project, history, agent); + const messages = await buildMessagesPayload(session, project, history, agent, ctx.log); messages.push({ role: 'system', content: DOOM_LOOP_NOTE(loop.name) }); const startedRow = await ctx.sql<{ started_at: string }[]>` diff --git a/apps/server/src/services/inference/turn.ts b/apps/server/src/services/inference/turn.ts index 3450328..10be90f 100644 --- a/apps/server/src/services/inference/turn.ts +++ b/apps/server/src/services/inference/turn.ts @@ -205,7 +205,7 @@ export async function runAssistantTurn( return; } - const messages = await buildMessagesPayload(session, project, history, agent); + const messages = await buildMessagesPayload(session, project, history, agent, ctx.log); // v1.11.8: resolve per-chat web-tools opt-in. Tri-state on the wire: // - session.web_search_enabled = null → inherit project default diff --git a/apps/server/src/services/system-prompt.ts b/apps/server/src/services/system-prompt.ts index c0bef4c..9272a47 100644 --- a/apps/server/src/services/system-prompt.ts +++ b/apps/server/src/services/system-prompt.ts @@ -8,9 +8,19 @@ // + container guidance (this layer, NEW in v1.12) // + agent.system_prompt (resolved from data/AGENTS.md by getAgentById) // + session.system_prompt OR project.default_system_prompt +// +// v1.13.8: byte-stability instrumentation. buildSystemPromptWithFingerprint +// returns the assembled string plus a SHA-256 fingerprint and a per-session +// drift signal. buildSystemPrompt stays a string→string shim for backward +// compat (tests use it). No cache added — recon proved input-layer mtime +// caches (this file + agents.ts) already deliver byte-stable inputs in +// steady state. v1.13.8 measures that claim against production traffic +// before any cache infrastructure earns its place. +import { createHash } from 'node:crypto'; import { readFile, stat } from 'node:fs/promises'; import type { Agent, Project, Session } from '../types/api.js'; +import { getAgentsMtimes } from './agents.js'; const BASE_SYSTEM_PROMPT = (projectPath: string) => `You are BooCode Chat, a code investigation assistant. The user is working on a project located at ${projectPath}. Use the file-read tools (view_file, list_dir, grep, find_files) to investigate code when needed. Be concise. Cite file paths and line numbers when discussing code. Do not hallucinate file contents — read the file first. Tool results may be truncated; if so, narrow your query rather than guessing.`; @@ -60,11 +70,94 @@ export function _resetContainerGuidanceCacheForTests(): void { cachedGuidance = null; } -export async function buildSystemPrompt( +// v1.13.8: expose the mtime currently held in the BOOCHAT cache so the +// fingerprint log can stamp it without re-statting (no I/O race against +// getContainerGuidance, which is the canonical mtime source). +function getCachedGuidanceMtime(): number | null { + if (!cachedGuidance) return null; + // mtime=0 is the sentinel for "file is missing" (set in the catch above). + // Surface it as null so the log/diff doesn't treat absence as a number. + return cachedGuidance.mtime > 0 ? cachedGuidance.mtime : null; +} + +// v1.13.8: fingerprint emitted per turn, observer state keyed by session. +// Field set is intentionally small — we want the diff between two +// fingerprints to point at the exact input that drifted, not bury the +// signal in noise. +export interface PrefixFingerprint { + msg: 'prefix-fingerprint'; + project_id: string; + agent_id: string | null; + agent_name: string | null; + session_id: string; + prefix_hash: string; + prefix_length: number; + mtime_boochat: number | null; + mtime_agents_global: number | null; + mtime_agents_project: number | null; + has_agent_system_prompt: boolean; + has_session_override: boolean; + has_project_override: boolean; +} + +export interface PrefixDrift { + msg: 'prefix-drift'; + session_id: string; + prev_hash: string; + new_hash: string; + prev_length: number; + new_length: number; + // Names of fields in PrefixFingerprint (excluding the hash + length pair + // and the session_id key itself) whose values differ between the previous + // observation and this one. The bug case is `changed_inputs: []` — hash + // differs but no tracked input moved, which means assembly is + // nondeterministic somewhere. + changed_inputs: string[]; +} + +// Fields tracked per-session for the drift diff. Stored alongside the hash +// so we can recompute changed_inputs without re-running buildSystemPrompt. +interface ObservedInputs { + agent_id: string | null; + mtime_boochat: number | null; + mtime_agents_global: number | null; + mtime_agents_project: number | null; + has_agent_system_prompt: boolean; + has_session_override: boolean; + has_project_override: boolean; +} + +interface ObserverEntry { + hash: string; + length: number; + inputs: ObservedInputs; +} + +// Unbounded by design for v1.13.8 (instrumentation, short-lived sessions in +// the smoke test). TODO(v1.13.x follow-up if v1.13.8 surfaces stable): +// LRU-bound this Map at 1000 sessions when the in-process surface lives long +// enough to matter. +const prefixObserver = new Map(); + +// Test-only: clear the observer so consecutive tests don't share state. +export function _resetPrefixObserverForTests(): void { + prefixObserver.clear(); +} + +function computeChangedInputs(prev: ObservedInputs, curr: ObservedInputs): string[] { + const out: string[] = []; + const keys = Object.keys(curr) as (keyof ObservedInputs)[]; + for (const k of keys) { + if (prev[k] !== curr[k]) out.push(k); + } + return out; +} + +export async function buildSystemPromptWithFingerprint( project: Project, session: Session, - agent: Agent | null -): Promise { + agent: Agent | null, +): Promise<{ prompt: string; fingerprint: PrefixFingerprint; drift: PrefixDrift | null }> { let out = BASE_SYSTEM_PROMPT(project.path); const guidance = await getContainerGuidance(); if (guidance) { @@ -79,5 +172,60 @@ export async function buildSystemPrompt( if (userPrompt.length > 0) { out += '\n\n' + userPrompt; } - return out; + + const hash = createHash('sha256').update(out, 'utf8').digest('hex'); + const agentsMtimes = getAgentsMtimes(project.path); + const inputs: ObservedInputs = { + agent_id: agent?.id ?? null, + mtime_boochat: getCachedGuidanceMtime(), + mtime_agents_global: agentsMtimes.global, + mtime_agents_project: agentsMtimes.project, + has_agent_system_prompt: !!(agent && agent.system_prompt.trim().length > 0), + has_session_override: sessionPrompt.length > 0, + has_project_override: projectPrompt.length > 0, + }; + + const fingerprint: PrefixFingerprint = { + msg: 'prefix-fingerprint', + project_id: project.id, + agent_id: agent?.id ?? null, + agent_name: agent?.name ?? null, + session_id: session.id, + prefix_hash: hash, + prefix_length: out.length, + mtime_boochat: inputs.mtime_boochat, + mtime_agents_global: inputs.mtime_agents_global, + mtime_agents_project: inputs.mtime_agents_project, + has_agent_system_prompt: inputs.has_agent_system_prompt, + has_session_override: inputs.has_session_override, + has_project_override: inputs.has_project_override, + }; + + let drift: PrefixDrift | null = null; + const prev = prefixObserver.get(session.id); + if (prev && prev.hash !== hash) { + drift = { + msg: 'prefix-drift', + session_id: session.id, + prev_hash: prev.hash, + new_hash: hash, + prev_length: prev.length, + new_length: out.length, + changed_inputs: computeChangedInputs(prev.inputs, inputs), + }; + } + prefixObserver.set(session.id, { hash, length: out.length, inputs }); + + return { prompt: out, fingerprint, drift }; +} + +// Backward-compatible string-returning shim. Kept so existing callers +// (tests, future code paths that don't want to log) work unchanged. +export async function buildSystemPrompt( + project: Project, + session: Session, + agent: Agent | null, +): Promise { + const { prompt } = await buildSystemPromptWithFingerprint(project, session, agent); + return prompt; }