Files
boocode/apps/server/src/services/system-prompt.ts
indifferentketchup c4079dd85c feat: DeepSeek API integration + Whale lift (hooks, tool repair, MCP permissions, token tracking)
DeepSeek API:
- @ai-sdk/deepseek provider replaces openai-compatible for deepseek-* models
- Token tracking: cache_hit/reasoning tokens flow API → DB → WS frames → UI
- thinking effort levels (off/low/medium/high/xhigh/max) via AGENTS.md frontmatter
- V4 models: deepseek-v4-flash, deepseek-v4-pro
- Wired for both chat and coder panes

Whale lifts:
- Tool input repair (schema-based type coercion, markdown link unwrapping)
- Hooks system (6 lifecycle events, shell exec, JSON stdin/stdout contract)
- Per-MCP-server permissions (allow/ask/deny)
- token tracking UI (cache N, think N in message stats line)

Infra:
- New DB columns: messages.cache_tokens, messages.reasoning_tokens
- New WS frame fields: cache_tokens, reasoning_tokens on message_complete
- coder provider snapshot merges DeepSeek models alongside llama-swap
2026-06-08 01:24:23 +00:00

243 lines
9.2 KiB
TypeScript

// v1.12: extracted from inference.ts to give the prompt-assembly logic its
// own home + test surface. Adds the container-guidance layer (BOOCHAT.md
// baked into the Docker image, injected between the base prompt and the
// agent block).
//
// Resolution order, last-wins on conflicts:
// base prompt
// + container guidance (this layer, NEW in v1.12)
// + agent.system_prompt (resolved from data/AGENTS.md by getAgentById)
// + session.system_prompt OR project.default_system_prompt
//
// v1.13.8: byte-stability instrumentation. buildSystemPromptWithFingerprint
// returns the assembled string plus a SHA-256 fingerprint and a per-session
// drift signal. buildSystemPrompt stays a string→string shim for backward
// compat (tests use it). No cache added — recon proved input-layer mtime
// caches (this file + agents.ts) already deliver byte-stable inputs in
// steady state. v1.13.8 measures that claim against production traffic
// before any cache infrastructure earns its place.
import { createHash } from 'node:crypto';
import { readFile, stat } from 'node:fs/promises';
import type { Agent, Project, Session } from '../types/api.js';
import { getAgentsMtimes } from './agents.js';
import { resolveRoute } from './inference/provider.js';
import { loadMemoryForSession } from './memory/recall.js';
import { formatMemoryBlock } from './memory/prompt.js';
const BASE_SYSTEM_PROMPT = (projectPath: string) =>
`You are BooCode Chat, a code investigation assistant. The user is working on a project located at ${projectPath}. Use the file-read tools (view_file, list_dir, grep, find_files) to investigate code when needed. Be concise. Cite file paths and line numbers when discussing code. Do not hallucinate file contents — read the file first. Tool results may be truncated; if so, narrow your query rather than guessing.`;
// v1.12 mtime-watch cache. Mirrors the safeStat pattern in services/agents.ts.
// On every call we stat the file; if the mtime matches the cached entry we
// return the cached content without re-reading. If the file is missing we
// cache { mtime: 0, content: null } so the not-found case still benefits
// from caching (one stat per call, no readFile attempt on a known-missing
// path). Because BOOCHAT.md is bind-mounted from the host, edits land
// immediately on the next chat turn — no container restart needed.
let cachedGuidance: { mtime: number; content: string | null } | null = null;
function resolveGuidancePath(): string {
return process.env['CONTAINER_GUIDANCE_FILE'] ?? '/app/BOOCHAT.md';
}
export async function loadContainerGuidance(): Promise<string | null> {
const path = resolveGuidancePath();
try {
return await readFile(path, 'utf8');
} catch {
return null;
}
}
export async function getContainerGuidance(): Promise<string | null> {
const path = resolveGuidancePath();
let mtimeMs: number;
try {
const s = await stat(path);
mtimeMs = s.mtimeMs;
} catch {
cachedGuidance = { mtime: 0, content: null };
return null;
}
if (cachedGuidance && cachedGuidance.mtime === mtimeMs) {
return cachedGuidance.content;
}
const content = await loadContainerGuidance();
cachedGuidance = { mtime: mtimeMs, content };
return content;
}
// Test-only: clear the cache so consecutive tests don't share state.
export function _resetContainerGuidanceCacheForTests(): void {
cachedGuidance = null;
}
// v1.13.8: expose the mtime currently held in the BOOCHAT cache so the
// fingerprint log can stamp it without re-statting (no I/O race against
// getContainerGuidance, which is the canonical mtime source).
function getCachedGuidanceMtime(): number | null {
if (!cachedGuidance) return null;
// mtime=0 is the sentinel for "file is missing" (set in the catch above).
// Surface it as null so the log/diff doesn't treat absence as a number.
return cachedGuidance.mtime > 0 ? cachedGuidance.mtime : null;
}
// v1.13.8: fingerprint emitted per turn, observer state keyed by session.
// Field set is intentionally small — we want the diff between two
// fingerprints to point at the exact input that drifted, not bury the
// signal in noise.
export interface PrefixFingerprint {
msg: 'prefix-fingerprint';
project_id: string;
agent_id: string | null;
agent_name: string | null;
session_id: string;
prefix_hash: string;
prefix_length: number;
mtime_boochat: number | null;
mtime_agents_global: number | null;
mtime_agents_project: number | null;
has_agent_system_prompt: boolean;
has_session_override: boolean;
has_project_override: boolean;
route: 'swap' | 'sidecar' | 'deepseek';
}
export interface PrefixDrift {
msg: 'prefix-drift';
session_id: string;
prev_hash: string;
new_hash: string;
prev_length: number;
new_length: number;
// Names of fields in PrefixFingerprint (excluding the hash + length pair
// and the session_id key itself) whose values differ between the previous
// observation and this one. The bug case is `changed_inputs: []` — hash
// differs but no tracked input moved, which means assembly is
// nondeterministic somewhere.
changed_inputs: string[];
}
// Fields tracked per-session for the drift diff. Stored alongside the hash
// so we can recompute changed_inputs without re-running buildSystemPrompt.
interface ObservedInputs {
agent_id: string | null;
mtime_boochat: number | null;
mtime_agents_global: number | null;
mtime_agents_project: number | null;
has_agent_system_prompt: boolean;
has_session_override: boolean;
has_project_override: boolean;
route: 'swap' | 'sidecar' | 'deepseek';
}
interface ObserverEntry {
hash: string;
length: number;
inputs: ObservedInputs;
}
// Unbounded by design for v1.13.8 (instrumentation, short-lived sessions in
// the smoke test). TODO(v1.13.x follow-up if v1.13.8 surfaces stable):
// LRU-bound this Map at 1000 sessions when the in-process surface lives long
// enough to matter.
const prefixObserver = new Map<string, ObserverEntry>();
// Test-only: clear the observer so consecutive tests don't share state.
export function _resetPrefixObserverForTests(): void {
prefixObserver.clear();
}
function computeChangedInputs(prev: ObservedInputs, curr: ObservedInputs): string[] {
const out: string[] = [];
const keys = Object.keys(curr) as (keyof ObservedInputs)[];
for (const k of keys) {
if (prev[k] !== curr[k]) out.push(k);
}
return out;
}
export async function buildSystemPromptWithFingerprint(
project: Project,
session: Session,
agent: Agent | null,
): Promise<{ prompt: string; fingerprint: PrefixFingerprint; drift: PrefixDrift | null }> {
let out = BASE_SYSTEM_PROMPT(project.path);
const guidance = await getContainerGuidance();
if (guidance) {
out += '\n\n--- Container guidance ---\n' + guidance + '\n--- end container guidance ---\n';
}
const memory = await loadMemoryForSession(project.path, session.id).catch(() => []);
if (memory.length > 0) {
out += '\n\n' + formatMemoryBlock(memory);
}
if (agent && agent.system_prompt.trim().length > 0) {
out += '\n\n' + agent.system_prompt.trim();
}
const sessionPrompt = session.system_prompt?.trim() ?? '';
const projectPrompt = project.default_system_prompt?.trim() ?? '';
const userPrompt = sessionPrompt || projectPrompt;
if (userPrompt.length > 0) {
out += '\n\n' + userPrompt;
}
const hash = createHash('sha256').update(out, 'utf8').digest('hex');
const agentsMtimes = getAgentsMtimes(project.path);
const inputs: ObservedInputs = {
agent_id: agent?.id ?? null,
mtime_boochat: getCachedGuidanceMtime(),
mtime_agents_global: agentsMtimes.global,
mtime_agents_project: agentsMtimes.project,
has_agent_system_prompt: !!(agent && agent.system_prompt.trim().length > 0),
has_session_override: sessionPrompt.length > 0,
has_project_override: projectPrompt.length > 0,
route: resolveRoute(agent).route,
};
const fingerprint: PrefixFingerprint = {
msg: 'prefix-fingerprint',
project_id: project.id,
agent_id: agent?.id ?? null,
agent_name: agent?.name ?? null,
session_id: session.id,
prefix_hash: hash,
prefix_length: out.length,
mtime_boochat: inputs.mtime_boochat,
mtime_agents_global: inputs.mtime_agents_global,
mtime_agents_project: inputs.mtime_agents_project,
has_agent_system_prompt: inputs.has_agent_system_prompt,
has_session_override: inputs.has_session_override,
has_project_override: inputs.has_project_override,
route: inputs.route,
};
let drift: PrefixDrift | null = null;
const prev = prefixObserver.get(session.id);
if (prev && prev.hash !== hash) {
drift = {
msg: 'prefix-drift',
session_id: session.id,
prev_hash: prev.hash,
new_hash: hash,
prev_length: prev.length,
new_length: out.length,
changed_inputs: computeChangedInputs(prev.inputs, inputs),
};
}
prefixObserver.set(session.id, { hash, length: out.length, inputs });
return { prompt: out, fingerprint, drift };
}
// Backward-compatible string-returning shim. Kept so existing callers
// (tests, future code paths that don't want to log) work unchanged.
export async function buildSystemPrompt(
project: Project,
session: Session,
agent: Agent | null,
): Promise<string> {
const { prompt } = await buildSystemPromptWithFingerprint(project, session, agent);
return prompt;
}