import { promises as fs } from 'node:fs'; import { join } from 'node:path'; import type { Agent, AgentsResponse, AgentParseError } from '../types/api.js'; import { ALL_TOOLS, resolveToolTier } from './tools.js'; import { validateExtraArgs } from './inference/llama-args-validator.js'; // v1.8.1: global agents live at /data/AGENTS.md inside the container // (./data:/data:ro mount on the host). Per-project AGENTS.md at the project // root overrides global by name. In-code builtins are gone — the seed file is // the contents of the previous BUILTIN_AGENTS list, copied into /data/AGENTS.md // once on first deploy. const GLOBAL_AGENTS_PATH = '/data/AGENTS.md'; const CACHE_TTL_MS = 60_000; // v1.12 Track B.3: derive from services/tools.ts ALL_TOOLS so new tools are // auto-recognized in agent frontmatter `tools:` arrays. The previous // hand-maintained list drifted (web_search/web_fetch from v1.11.8 + the 8 // codecontext tools were missing), silently filtering valid tool names out // of agents that opted in. Single source of truth is tools.ts now. let ALL_TOOL_NAMES: readonly string[] = ALL_TOOLS.map((t) => t.name); let DEFAULT_TOOLS: string[] = [...ALL_TOOL_NAMES]; export function refreshToolNames(): void { ALL_TOOL_NAMES = ALL_TOOLS.map((t) => t.name); DEFAULT_TOOLS = [...ALL_TOOL_NAMES]; } const DEFAULT_TEMPERATURE = 0.7; // ---- Tool glob matching (v1.15.0-mcp-multi) -------------------------------- /** * Simple glob match for tool names. Supports `*` as a wildcard for any * characters. No `?` or `**` — tool names are flat (no path separators). */ function simpleGlobMatch(str: string, pattern: string): boolean { if (pattern === '*') return true; if (!pattern.includes('*')) return str === pattern; // Escape regex metacharacters, then replace escaped \* with .* const regex = new RegExp( '^' + pattern.replace(/[.+?^${}()|[\]\\]/g, '\\$&').replace(/\*/g, '.*') + '$', ); return regex.test(str); } /** * Check if a tool name matches a set of glob patterns. Last-match-wins. * Patterns starting with `!` are deny rules. * * Examples: * - `["grep", "view_file"]` — exact-match whitelist (same as pre-v1.15) * - `["context7_*"]` — all tools from the context7 MCP server * - `["*", "!web_*"]` — all tools except web tools * - `[]` — nothing matches (agent gets no tools) */ export function matchToolGlob(toolName: string, patterns: string[]): boolean { let matched = false; for (const pattern of patterns) { const deny = pattern.startsWith('!'); const glob = deny ? pattern.slice(1) : pattern; if (simpleGlobMatch(toolName, glob)) { matched = !deny; } } return matched; } /** * Returns true if a tools: entry is a glob pattern (contains * or starts * with !). Glob patterns can't be validated against the current tool list * since MCP tools are discovered at runtime. */ function isGlobPattern(entry: string): boolean { return entry.includes('*') || entry.startsWith('!'); } export function slugify(name: string): string { return name .toLowerCase() .replace(/[^a-z0-9]+/g, '-') .replace(/^-+|-+$/g, ''); } // ---- AGENTS.md parser ------------------------------------------------------ interface ParsedFrontmatter { temperature?: number; top_p?: number; top_k?: number; min_p?: number; presence_penalty?: number; tools?: string[]; description?: string; model?: string; // v1.8.2: optional per-agent tool-loop budget. Absent → inference resolves // from the agent's toolset at runtime. max_tool_calls?: number; // v1.14.0: optional per-agent step cap. Absent → bounded only by MAX_STEPS // (200) in the outer loop. Integer ≥ 0; steps: 0 means "no tool calls // allowed" — the model responds text-only. steps?: number; llama_extra_args?: string[]; } function stripQuotes(s: string): string { if ( s.length >= 2 && (s[0] === '"' || s[0] === "'") && s[0] === s[s.length - 1] ) { return s.slice(1, -1); } return s; } function parseFrontmatter(yaml: string): { data: ParsedFrontmatter; errors: string[] } { const data: ParsedFrontmatter = {}; const errors: string[] = []; const lines = yaml.split('\n'); let arrayKey: 'tools' | null = null; for (const rawLine of lines) { const line = rawLine.trim(); if (line.length === 0) continue; // Block-list continuation: "- value" under a key that was set to empty if (arrayKey && line.startsWith('- ')) { data[arrayKey]!.push(line.slice(2).trim()); continue; } arrayKey = null; const colonIdx = line.indexOf(':'); if (colonIdx < 0) continue; const key = line.slice(0, colonIdx).trim(); const valueRaw = line.slice(colonIdx + 1).trim(); if (key === 'temperature') { const n = Number(valueRaw); if (Number.isFinite(n)) data.temperature = n; else errors.push(`temperature must be a number (got "${valueRaw}")`); } else if (key === 'top_p') { const n = Number(valueRaw); if (Number.isFinite(n)) { data.top_p = n; if (n < 0 || n > 1) { console.warn(`agents: top_p ${n} out of range 0-1, ignoring (falling back to default)`); } } else { errors.push(`top_p must be a number (got "${valueRaw}")`); } } else if (key === 'top_k') { const n = Number(valueRaw); if (Number.isInteger(n)) { data.top_k = n; if (n < 0 || n > 200) { console.warn(`agents: top_k ${n} out of range 0-200, ignoring (falling back to default)`); } } else { errors.push(`top_k must be an integer (got "${valueRaw}")`); } } else if (key === 'min_p') { const n = Number(valueRaw); if (Number.isFinite(n)) { data.min_p = n; if (n < 0 || n > 1) { console.warn(`agents: min_p ${n} out of range 0-1, ignoring (falling back to default)`); } } else { errors.push(`min_p must be a number (got "${valueRaw}")`); } } else if (key === 'presence_penalty') { const n = Number(valueRaw); if (Number.isFinite(n)) { data.presence_penalty = n; if (n < -2 || n > 2) { console.warn(`agents: presence_penalty ${n} out of range -2-2, ignoring (falling back to default)`); } } else { errors.push(`presence_penalty must be a number (got "${valueRaw}")`); } } else if (key === 'tools') { if (valueRaw === '') { data.tools = []; arrayKey = 'tools'; } else if (valueRaw.startsWith('[') && valueRaw.endsWith(']')) { const inner = valueRaw.slice(1, -1); data.tools = inner .split(',') .map((s) => stripQuotes(s.trim())) .filter((s) => s.length > 0); } else { // Loose form: "tools: a, b, c" data.tools = valueRaw .split(',') .map((s) => stripQuotes(s.trim())) .filter((s) => s.length > 0); } } else if (key === 'description') { data.description = stripQuotes(valueRaw); } else if (key === 'model') { data.model = stripQuotes(valueRaw); } else if (key === 'max_tool_calls') { // v1.8.2: 1..100 inclusive integer. Out-of-range values are skipped // with a warning rather than throwing — agents shouldn't be unusable // because of a typo on a defaulted field. Non-numeric or non-integer // still hard-fails the block, matching `temperature` behavior. const n = Number(valueRaw); if (Number.isInteger(n) && n >= 1 && n <= 100) { data.max_tool_calls = n; } else if (Number.isInteger(n)) { console.warn( `agents: max_tool_calls ${n} out of range 1-100, ignoring (falling back to default)`, ); } else { errors.push(`max_tool_calls must be an integer 1-100 (got "${valueRaw}")`); } } else if (key === 'steps') { // v1.14.0: per-agent step cap for the outer inference loop. Integer ≥ 0. // steps: 0 means "no tool calls allowed" — model responds text-only. // Non-integer or negative values are warned and ignored (falls back to // MAX_STEPS ceiling), matching the max_tool_calls pattern above. const n = Number(valueRaw); if (Number.isInteger(n) && n >= 0) { data.steps = n; } else if (Number.isInteger(n)) { console.warn( `agents: steps ${n} is negative, ignoring (falling back to default)`, ); } else { errors.push(`steps must be a non-negative integer (got "${valueRaw}")`); } } else if (key === 'llama_extra_args') { if (valueRaw === '') { data.llama_extra_args = []; // No arrayKey support — llama_extra_args uses inline list only. } else if (valueRaw.startsWith('[') && valueRaw.endsWith(']')) { const inner = valueRaw.slice(1, -1); const parsed = inner .split(',') .map((s) => stripQuotes(s.trim())) .filter((s) => s.length > 0); try { validateExtraArgs(parsed); data.llama_extra_args = parsed; } catch (err) { errors.push(err instanceof Error ? err.message : String(err)); } } else { const parsed = valueRaw .split(',') .map((s) => stripQuotes(s.trim())) .filter((s) => s.length > 0); try { validateExtraArgs(parsed); data.llama_extra_args = parsed; } catch (err) { errors.push(err instanceof Error ? err.message : String(err)); } } } // Unknown keys silently ignored — forward-compat. } return { data, errors }; } interface RawSection { name: string; body: string; } function splitSections(content: string): RawSection[] { // Split by lines matching exactly "## ". Level-3+ headings are body content. const sections: RawSection[] = []; let currentName: string | null = null; let currentLines: string[] = []; for (const line of content.split('\n')) { const h2 = /^##\s+(.+?)\s*$/.exec(line); const h3 = line.startsWith('### '); if (h2 && !h3) { if (currentName !== null) { sections.push({ name: currentName, body: currentLines.join('\n') }); } currentName = h2[1]!.trim(); currentLines = []; continue; } if (currentName !== null) { currentLines.push(line); } } if (currentName !== null) { sections.push({ name: currentName, body: currentLines.join('\n') }); } return sections; } // Throws on malformed section — caller handles per-block error collection. function parseAgentSection(section: RawSection): Omit { const lines = section.body.split('\n'); // Opening "---" fence must be the first non-empty line. let openIdx = -1; for (let i = 0; i < lines.length; i++) { const t = lines[i]!.trim(); if (t === '') continue; if (t === '---') { openIdx = i; } break; } if (openIdx < 0) { throw new Error('missing opening --- fence after heading'); } let closeIdx = -1; for (let i = openIdx + 1; i < lines.length; i++) { if (lines[i]!.trim() === '---') { closeIdx = i; break; } } if (closeIdx < 0) { throw new Error('missing closing --- fence'); } const yamlText = lines.slice(openIdx + 1, closeIdx).join('\n'); const systemPrompt = lines.slice(closeIdx + 1).join('\n').trim(); const { data: fm, errors: fmErrors } = parseFrontmatter(yamlText); if (fmErrors.length > 0) { throw new Error(fmErrors.join('; ')); } // v1.13.15-tools: intersect with BOOCODE_TOOLS tier (ceiling, not expansion). // Unset → resolveToolTier returns ALL tool names → no narrowing. // v1.15.0-mcp-multi: glob patterns (entries containing * or starting with !) // pass through unvalidated — MCP tools are discovered at runtime and can't // be checked against ALL_TOOL_NAMES at parse time. const tierAllowed = new Set(resolveToolTier(process.env.BOOCODE_TOOLS)); const filteredTools = Array.isArray(fm.tools) ? fm.tools.filter((t): t is string => isGlobPattern(t) || ((ALL_TOOL_NAMES as readonly string[]).includes(t) && tierAllowed.has(t)), ) : DEFAULT_TOOLS.filter((t) => tierAllowed.has(t)); return { id: slugify(section.name), name: section.name, description: fm.description ?? '', system_prompt: systemPrompt, temperature: typeof fm.temperature === 'number' ? fm.temperature : DEFAULT_TEMPERATURE, top_p: typeof fm.top_p === 'number' ? fm.top_p : null, top_k: typeof fm.top_k === 'number' ? fm.top_k : null, min_p: typeof fm.min_p === 'number' ? fm.min_p : null, presence_penalty: typeof fm.presence_penalty === 'number' ? fm.presence_penalty : null, tools: filteredTools, model: typeof fm.model === 'string' && fm.model.length > 0 ? fm.model : null, max_tool_calls: typeof fm.max_tool_calls === 'number' ? fm.max_tool_calls : null, steps: typeof fm.steps === 'number' ? fm.steps : null, llama_extra_args: Array.isArray(fm.llama_extra_args) ? fm.llama_extra_args : null, }; } interface ParseResult { agents: Omit[]; errors: AgentParseError[]; } // v1.8.1: parse each `## Name` block independently. A failure in one block // does not abort the rest of the file — we collect a per-agent error and // keep parsing. Server logs a console.warn for each skipped agent. export function parseAgentsMd(content: string): ParseResult { const sections = splitSections(content); const agents: Omit[] = []; const errors: AgentParseError[] = []; for (const section of sections) { try { agents.push(parseAgentSection(section)); } catch (err) { const reason = err instanceof Error ? err.message : String(err); console.warn(`agents: skipped "${section.name}" — ${reason}`); errors.push({ agent_name: section.name, reason }); } } return { agents, errors }; } /** True when a file at `/AGENTS.md` is an agent registry, not Cursor/doc nav. */ export function isAgentRegistryMarkdown(content: string): boolean { const firstLine = content.trimStart().split('\n')[0]?.trim() ?? ''; // BooCode monorepo root AGENTS.md is navigation only; registry is /data/AGENTS.md. if (firstLine === '# Agent navigation') return false; return true; } // ---- mtime-keyed cache + public API ---------------------------------------- interface CacheEntry { globalMtime: number | null; projectMtime: number | null; cachedAt: number; result: AgentsResponse; } // Keyed by projectPath ('' is fine — no project case, e.g. tests). Two files // participate in the cache key (global + project); editing either bumps the // corresponding mtime so the next read sees a miss without a watcher. const cache = new Map(); export function invalidateAgentsCache(projectPath?: string): void { if (projectPath === undefined) { cache.clear(); } else { cache.delete(projectPath); } } // v1.13.8: cache-read accessor for the system-prompt prefix-fingerprint log. // Returns the AGENTS.md mtimes that getAgentsForProject() observed on its // last cache fill for this projectPath. Both fields are null when the cache // is cold (e.g. tests, fresh boot before the first inference turn). Does no // I/O — a fresh stat would race the cache and isn't what the fingerprint // wants anyway (we want what was actually used to resolve the agent). export function getAgentsMtimes(projectPath: string): { global: number | null; project: number | null; } { const key = projectPath || '__none__'; const entry = cache.get(key); if (!entry) return { global: null, project: null }; return { global: entry.globalMtime, project: entry.projectMtime }; } async function safeStat(path: string): Promise { try { const s = await fs.stat(path); return s.mtimeMs; } catch { return null; } } async function safeRead(path: string): Promise { try { return await fs.readFile(path, 'utf8'); } catch { return null; } } export async function getAgentsForProject(projectPath: string): Promise { const projectAgentsPath = projectPath ? join(projectPath, 'AGENTS.md') : null; const [globalMtime, projectMtime] = await Promise.all([ safeStat(GLOBAL_AGENTS_PATH), projectAgentsPath ? safeStat(projectAgentsPath) : Promise.resolve(null), ]); const cacheKey = projectPath || '__none__'; const cached = cache.get(cacheKey); const now = Date.now(); if ( cached && cached.globalMtime === globalMtime && cached.projectMtime === projectMtime && now - cached.cachedAt < CACHE_TTL_MS ) { return cached.result; } const [globalContent, projectContent] = await Promise.all([ globalMtime !== null ? safeRead(GLOBAL_AGENTS_PATH) : Promise.resolve(null), projectAgentsPath && projectMtime !== null ? safeRead(projectAgentsPath) : Promise.resolve(null), ]); const errors: AgentParseError[] = []; const byName = new Map(); if (globalContent !== null) { const r = parseAgentsMd(globalContent); for (const a of r.agents) byName.set(a.name, { ...a, source: 'global' }); errors.push(...r.errors); } if (projectContent !== null && isAgentRegistryMarkdown(projectContent)) { const r = parseAgentsMd(projectContent); for (const a of r.agents) byName.set(a.name, { ...a, source: 'project' }); errors.push(...r.errors); } const result: AgentsResponse = { agents: Array.from(byName.values()), errors, }; cache.set(cacheKey, { globalMtime, projectMtime, cachedAt: now, result }); return result; } export async function getAgentById( projectPath: string, agentId: string, ): Promise { const { agents } = await getAgentsForProject(projectPath); return agents.find((a) => a.id === agentId) ?? null; }