boocode/apps/server/src/services/agents.ts

import { promises as fs } from 'node:fs';
import { join } from 'node:path';
import type { Agent, AgentsResponse, AgentParseError } from '../types/api.js';
import { ALL_TOOLS, resolveToolTier } from './tools.js';

// v1.8.1: global agents live at /data/AGENTS.md inside the container
// (./data:/data:ro mount on the host). Per-project AGENTS.md at the project
// root overrides global by name. In-code builtins are gone — the seed file is
// the contents of the previous BUILTIN_AGENTS list, copied into /data/AGENTS.md
// once on first deploy.
const GLOBAL_AGENTS_PATH = '/data/AGENTS.md';
const CACHE_TTL_MS = 60_000;

// v1.12 Track B.3: derive from services/tools.ts ALL_TOOLS so new tools are
// auto-recognized in agent frontmatter `tools:` arrays. The previous
// hand-maintained list drifted (web_search/web_fetch from v1.11.8 + the 8
// codecontext tools were missing), silently filtering valid tool names out
// of agents that opted in. Single source of truth is tools.ts now.
let ALL_TOOL_NAMES: readonly string[] = ALL_TOOLS.map((t) => t.name);
let DEFAULT_TOOLS: string[] = [...ALL_TOOL_NAMES];

export function refreshToolNames(): void {
  ALL_TOOL_NAMES = ALL_TOOLS.map((t) => t.name);
  DEFAULT_TOOLS = [...ALL_TOOL_NAMES];
}
const DEFAULT_TEMPERATURE = 0.7;

// ---- Tool glob matching (v1.15.0-mcp-multi) --------------------------------

/**
 * Simple glob match for tool names. Supports `*` as a wildcard for any
 * characters. No `?` or `**` — tool names are flat (no path separators).
 */
function simpleGlobMatch(str: string, pattern: string): boolean {
  if (pattern === '*') return true;
  if (!pattern.includes('*')) return str === pattern;
  // Escape regex metacharacters, then replace escaped \* with .*
  const regex = new RegExp(
    '^' + pattern.replace(/[.+?^${}()|[\]\\]/g, '\\$&').replace(/\*/g, '.*') + '$',
  );
  return regex.test(str);
}

/**
 * Check if a tool name matches a set of glob patterns. Last-match-wins.
 * Patterns starting with `!` are deny rules.
 *
 * Examples:
 * - `["grep", "view_file"]` — exact-match whitelist (same as pre-v1.15)
 * - `["context7_*"]` — all tools from the context7 MCP server
 * - `["*", "!web_*"]` — all tools except web tools
 * - `[]` — nothing matches (agent gets no tools)
 */
export function matchToolGlob(toolName: string, patterns: string[]): boolean {
  let matched = false;
  for (const pattern of patterns) {
    const deny = pattern.startsWith('!');
    const glob = deny ? pattern.slice(1) : pattern;
    if (simpleGlobMatch(toolName, glob)) {
      matched = !deny;
    }
  }
  return matched;
}

/**
 * Returns true if a tools: entry is a glob pattern (contains * or starts
 * with !). Glob patterns can't be validated against the current tool list
 * since MCP tools are discovered at runtime.
 */
function isGlobPattern(entry: string): boolean {
  return entry.includes('*') || entry.startsWith('!');
}

export function slugify(name: string): string {
  return name
    .toLowerCase()
    .replace(/[^a-z0-9]+/g, '-')
    .replace(/^-+|-+$/g, '');
}

// ---- AGENTS.md parser ------------------------------------------------------

interface ParsedFrontmatter {
  temperature?: number;
  tools?: string[];
  description?: string;
  model?: string;
  // v1.8.2: optional per-agent tool-loop budget. Absent → inference resolves
  // from the agent's toolset at runtime.
  max_tool_calls?: number;
  // v1.14.0: optional per-agent step cap. Absent → bounded only by MAX_STEPS
  // (200) in the outer loop. Integer ≥ 0; steps: 0 means "no tool calls
  // allowed" — the model responds text-only.
  steps?: number;
}

function stripQuotes(s: string): string {
  if (
    s.length >= 2 &&
    (s[0] === '"' || s[0] === "'") &&
    s[0] === s[s.length - 1]
  ) {
    return s.slice(1, -1);
  }
  return s;
}

function parseFrontmatter(yaml: string): { data: ParsedFrontmatter; errors: string[] } {
  const data: ParsedFrontmatter = {};
  const errors: string[] = [];
  const lines = yaml.split('\n');
  let arrayKey: 'tools' | null = null;

  for (const rawLine of lines) {
    const line = rawLine.trim();
    if (line.length === 0) continue;

    // Block-list continuation: "- value" under a key that was set to empty
    if (arrayKey && line.startsWith('- ')) {
      data[arrayKey]!.push(line.slice(2).trim());
      continue;
    }
    arrayKey = null;

    const colonIdx = line.indexOf(':');
    if (colonIdx < 0) continue;
    const key = line.slice(0, colonIdx).trim();
    const valueRaw = line.slice(colonIdx + 1).trim();

    if (key === 'temperature') {
      const n = Number(valueRaw);
      if (Number.isFinite(n)) data.temperature = n;
      else errors.push(`temperature must be a number (got "${valueRaw}")`);
    } else if (key === 'tools') {
      if (valueRaw === '') {
        data.tools = [];
        arrayKey = 'tools';
      } else if (valueRaw.startsWith('[') && valueRaw.endsWith(']')) {
        const inner = valueRaw.slice(1, -1);
        data.tools = inner
          .split(',')
          .map((s) => stripQuotes(s.trim()))
          .filter((s) => s.length > 0);
      } else {
        // Loose form: "tools: a, b, c"
        data.tools = valueRaw
          .split(',')
          .map((s) => stripQuotes(s.trim()))
          .filter((s) => s.length > 0);
      }
    } else if (key === 'description') {
      data.description = stripQuotes(valueRaw);
    } else if (key === 'model') {
      data.model = stripQuotes(valueRaw);
    } else if (key === 'max_tool_calls') {
      // v1.8.2: 1..100 inclusive integer. Out-of-range values are skipped
      // with a warning rather than throwing — agents shouldn't be unusable
      // because of a typo on a defaulted field. Non-numeric or non-integer
      // still hard-fails the block, matching `temperature` behavior.
      const n = Number(valueRaw);
      if (Number.isInteger(n) && n >= 1 && n <= 100) {
        data.max_tool_calls = n;
      } else if (Number.isInteger(n)) {
        console.warn(
          `agents: max_tool_calls ${n} out of range 1-100, ignoring (falling back to default)`,
        );
      } else {
        errors.push(`max_tool_calls must be an integer 1-100 (got "${valueRaw}")`);
      }
    } else if (key === 'steps') {
      // v1.14.0: per-agent step cap for the outer inference loop. Integer ≥ 0.
      // steps: 0 means "no tool calls allowed" — model responds text-only.
      // Non-integer or negative values are warned and ignored (falls back to
      // MAX_STEPS ceiling), matching the max_tool_calls pattern above.
      const n = Number(valueRaw);
      if (Number.isInteger(n) && n >= 0) {
        data.steps = n;
      } else if (Number.isInteger(n)) {
        console.warn(
          `agents: steps ${n} is negative, ignoring (falling back to default)`,
        );
      } else {
        errors.push(`steps must be a non-negative integer (got "${valueRaw}")`);
      }
    }
    // Unknown keys silently ignored — forward-compat.
  }

  return { data, errors };
}

interface RawSection {
  name: string;
  body: string;
}

function splitSections(content: string): RawSection[] {
  // Split by lines matching exactly "## <name>". Level-3+ headings are body content.
  const sections: RawSection[] = [];
  let currentName: string | null = null;
  let currentLines: string[] = [];

  for (const line of content.split('\n')) {
    const h2 = /^##\s+(.+?)\s*$/.exec(line);
    const h3 = line.startsWith('### ');
    if (h2 && !h3) {
      if (currentName !== null) {
        sections.push({ name: currentName, body: currentLines.join('\n') });
      }
      currentName = h2[1]!.trim();
      currentLines = [];
      continue;
    }
    if (currentName !== null) {
      currentLines.push(line);
    }
  }
  if (currentName !== null) {
    sections.push({ name: currentName, body: currentLines.join('\n') });
  }
  return sections;
}

// Throws on malformed section — caller handles per-block error collection.
function parseAgentSection(section: RawSection): Omit<Agent, 'source'> {
  const lines = section.body.split('\n');

  // Opening "---" fence must be the first non-empty line.
  let openIdx = -1;
  for (let i = 0; i < lines.length; i++) {
    const t = lines[i]!.trim();
    if (t === '') continue;
    if (t === '---') {
      openIdx = i;
    }
    break;
  }
  if (openIdx < 0) {
    throw new Error('missing opening --- fence after heading');
  }
  let closeIdx = -1;
  for (let i = openIdx + 1; i < lines.length; i++) {
    if (lines[i]!.trim() === '---') {
      closeIdx = i;
      break;
    }
  }
  if (closeIdx < 0) {
    throw new Error('missing closing --- fence');
  }
  const yamlText = lines.slice(openIdx + 1, closeIdx).join('\n');
  const systemPrompt = lines.slice(closeIdx + 1).join('\n').trim();

  const { data: fm, errors: fmErrors } = parseFrontmatter(yamlText);
  if (fmErrors.length > 0) {
    throw new Error(fmErrors.join('; '));
  }

  // v1.13.15-tools: intersect with BOOCODE_TOOLS tier (ceiling, not expansion).
  // Unset → resolveToolTier returns ALL tool names → no narrowing.
  // v1.15.0-mcp-multi: glob patterns (entries containing * or starting with !)
  // pass through unvalidated — MCP tools are discovered at runtime and can't
  // be checked against ALL_TOOL_NAMES at parse time.
  const tierAllowed = new Set(resolveToolTier(process.env.BOOCODE_TOOLS));
  const filteredTools = Array.isArray(fm.tools)
    ? fm.tools.filter((t): t is string =>
        isGlobPattern(t) ||
        ((ALL_TOOL_NAMES as readonly string[]).includes(t) && tierAllowed.has(t)),
      )
    : DEFAULT_TOOLS.filter((t) => tierAllowed.has(t));

  return {
    id: slugify(section.name),
    name: section.name,
    description: fm.description ?? '',
    system_prompt: systemPrompt,
    temperature: typeof fm.temperature === 'number' ? fm.temperature : DEFAULT_TEMPERATURE,
    tools: filteredTools,
    model: typeof fm.model === 'string' && fm.model.length > 0 ? fm.model : null,
    max_tool_calls: typeof fm.max_tool_calls === 'number' ? fm.max_tool_calls : null,
    steps: typeof fm.steps === 'number' ? fm.steps : null,
  };
}

interface ParseResult {
  agents: Omit<Agent, 'source'>[];
  errors: AgentParseError[];
}

// v1.8.1: parse each `## Name` block independently. A failure in one block
// does not abort the rest of the file — we collect a per-agent error and
// keep parsing. Server logs a console.warn for each skipped agent.
export function parseAgentsMd(content: string): ParseResult {
  const sections = splitSections(content);
  const agents: Omit<Agent, 'source'>[] = [];
  const errors: AgentParseError[] = [];

  for (const section of sections) {
    try {
      agents.push(parseAgentSection(section));
    } catch (err) {
      const reason = err instanceof Error ? err.message : String(err);
      console.warn(`agents: skipped "${section.name}" — ${reason}`);
      errors.push({ agent_name: section.name, reason });
    }
  }

  return { agents, errors };
}

// ---- mtime-keyed cache + public API ----------------------------------------

interface CacheEntry {
  globalMtime: number | null;
  projectMtime: number | null;
  cachedAt: number;
  result: AgentsResponse;
}

// Keyed by projectPath ('' is fine — no project case, e.g. tests). Two files
// participate in the cache key (global + project); editing either bumps the
// corresponding mtime so the next read sees a miss without a watcher.
const cache = new Map<string, CacheEntry>();

export function invalidateAgentsCache(projectPath?: string): void {
  if (projectPath === undefined) {
    cache.clear();
  } else {
    cache.delete(projectPath);
  }
}

// v1.13.8: cache-read accessor for the system-prompt prefix-fingerprint log.
// Returns the AGENTS.md mtimes that getAgentsForProject() observed on its
// last cache fill for this projectPath. Both fields are null when the cache
// is cold (e.g. tests, fresh boot before the first inference turn). Does no
// I/O — a fresh stat would race the cache and isn't what the fingerprint
// wants anyway (we want what was actually used to resolve the agent).
export function getAgentsMtimes(projectPath: string): {
  global: number | null;
  project: number | null;
} {
  const key = projectPath || '__none__';
  const entry = cache.get(key);
  if (!entry) return { global: null, project: null };
  return { global: entry.globalMtime, project: entry.projectMtime };
}

async function safeStat(path: string): Promise<number | null> {
  try {
    const s = await fs.stat(path);
    return s.mtimeMs;
  } catch {
    return null;
  }
}

async function safeRead(path: string): Promise<string | null> {
  try {
    return await fs.readFile(path, 'utf8');
  } catch {
    return null;
  }
}

export async function getAgentsForProject(projectPath: string): Promise<AgentsResponse> {
  const projectAgentsPath = projectPath ? join(projectPath, 'AGENTS.md') : null;

  const [globalMtime, projectMtime] = await Promise.all([
    safeStat(GLOBAL_AGENTS_PATH),
    projectAgentsPath ? safeStat(projectAgentsPath) : Promise.resolve(null),
  ]);

  const cacheKey = projectPath || '__none__';
  const cached = cache.get(cacheKey);
  const now = Date.now();
  if (
    cached &&
    cached.globalMtime === globalMtime &&
    cached.projectMtime === projectMtime &&
    now - cached.cachedAt < CACHE_TTL_MS
  ) {
    return cached.result;
  }

  const [globalContent, projectContent] = await Promise.all([
    globalMtime !== null ? safeRead(GLOBAL_AGENTS_PATH) : Promise.resolve(null),
    projectAgentsPath && projectMtime !== null ? safeRead(projectAgentsPath) : Promise.resolve(null),
  ]);

  const errors: AgentParseError[] = [];
  const byName = new Map<string, Agent>();

  if (globalContent !== null) {
    const r = parseAgentsMd(globalContent);
    for (const a of r.agents) byName.set(a.name, { ...a, source: 'global' });
    errors.push(...r.errors);
  }
  if (projectContent !== null) {
    const r = parseAgentsMd(projectContent);
    for (const a of r.agents) byName.set(a.name, { ...a, source: 'project' });
    errors.push(...r.errors);
  }

  const result: AgentsResponse = {
    agents: Array.from(byName.values()),
    errors,
  };
  cache.set(cacheKey, { globalMtime, projectMtime, cachedAt: now, result });
  return result;
}

export async function getAgentById(
  projectPath: string,
  agentId: string,
): Promise<Agent | null> {
  const { agents } = await getAgentsForProject(projectPath);
  return agents.find((a) => a.id === agentId) ?? null;
}