feat(agents): Tier 2 — AGENTS.md + per-session picker

Six builtin defaults (Code Reviewer, Debugger, Refactorer, Architect, Security Auditor, Prompt Builder) with no model field so session.model wins. Project root AGENTS.md parsed on demand with mtime cache; when present, only its agents are shown. sessions.agent_id resolves per turn into effective system prompt, temperature, and a tool whitelist applied in inference. AgentPicker mounts in the ChatInput toolbar; SettingsDrawer agent surface deferred to Batch 7. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 20:06:51 +00:00
parent 934f739ca1
commit 92bd3b1cdf
16 changed files with 984 additions and 35 deletions
--- a/apps/server/src/services/inference.ts
+++ b/apps/server/src/services/inference.ts
@@ -1,10 +1,11 @@
 import type { FastifyBaseLogger } from 'fastify';
 import type { Sql } from '../db.js';
 import type { Config } from '../config.js';
-import type { Message, Project, Session, ToolCall, UserStreamFrame } from '../types/api.js';
-import { ALL_TOOLS, TOOLS_BY_NAME, toolJsonSchemas } from './tools.js';
+import type { Agent, Message, Project, Session, ToolCall, UserStreamFrame } from '../types/api.js';
+import { ALL_TOOLS, TOOLS_BY_NAME, toolJsonSchemas, type ToolJsonSchema } from './tools.js';
 import { PathScopeError, resolveProjectRoot } from './path_guard.js';
 import { maybeAutoNameChat } from './auto_name.js';
+import { getAgentById } from './agents.js';

 const BASE_SYSTEM_PROMPT = (projectPath: string) =>
  `You are BooCode Chat, a code investigation assistant. The user is working on a project located at ${projectPath}. Use the file-read tools (view_file, list_dir, grep, find_files) to investigate code when needed. Be concise. Cite file paths and line numbers when discussing code. Do not hallucinate file contents — read the file first. Tool results may be truncated; if so, narrow your query rather than guessing.`;
@@ -91,16 +92,32 @@ export interface InferenceContext {
  publishUser: (frame: UserStreamFrame) => void;
 }

+// Resolution order: base prompt < agent.system_prompt < session.system_prompt.
+// Agent prompts layer on top of the base; session prompt is the most specific
+// override and stacks last so callers can append per-session instructions.
+export function buildSystemPrompt(
+  project: Project,
+  session: Session,
+  agent: Agent | null
+): string {
+  let out = BASE_SYSTEM_PROMPT(project.path);
+  if (agent && agent.system_prompt.trim().length > 0) {
+    out += '\n\n' + agent.system_prompt.trim();
+  }
+  if (session.system_prompt && session.system_prompt.trim().length > 0) {
+    out += '\n\n' + session.system_prompt.trim();
+  }
+  return out;
+}
+
 export function buildMessagesPayload(
  session: Session,
  project: Project,
-  history: Message[]
+  history: Message[],
+  agent: Agent | null = null
 ): OpenAiMessage[] {
  const out: OpenAiMessage[] = [];
-  let systemPrompt = BASE_SYSTEM_PROMPT(project.path);
-  if (session.system_prompt && session.system_prompt.trim().length > 0) {
-    systemPrompt += '\n\n' + session.system_prompt.trim();
-  }
+  const systemPrompt = buildSystemPrompt(project, session, agent);
  out.push({ role: 'system', content: systemPrompt });

  // Find the latest compact marker — only send messages from that point onwards
@@ -161,7 +178,7 @@ async function loadContext(
  chatId: string
 ): Promise<{ session: Session; project: Project; history: Message[] } | null> {
  const sessionRows = await sql<Session[]>`
-    SELECT id, project_id, name, model, system_prompt, status, created_at, updated_at
+    SELECT id, project_id, name, model, system_prompt, status, created_at, updated_at, agent_id
    FROM sessions WHERE id = ${sessionId}
  `;
  if (sessionRows.length === 0) return null;
@@ -217,11 +234,18 @@ interface StreamResult {
  nCtx: number | null;
 }

+interface StreamOptions {
+  // null = omit tools entirely (compact phase); [] = caller stripped all tools
+  // (rare; we still omit from the request body to avoid OpenAI 400).
+  tools: ToolJsonSchema[] | null;
+  temperature?: number;
+}
+
 async function streamCompletion(
  ctx: InferenceContext,
  model: string,
  messages: OpenAiMessage[],
-  includeTools: boolean,
+  opts: StreamOptions,
  onDelta: (content: string) => void,
  signal?: AbortSignal
 ): Promise<StreamResult> {
@@ -231,10 +255,13 @@ async function streamCompletion(
    stream: true,
    stream_options: { include_usage: true },
  };
-  if (includeTools) {
-    body['tools'] = toolJsonSchemas();
+  if (opts.tools && opts.tools.length > 0) {
+    body['tools'] = opts.tools;
    body['tool_choice'] = 'auto';
  }
+  if (typeof opts.temperature === 'number') {
+    body['temperature'] = opts.temperature;
+  }

  const res = await fetch(`${ctx.config.LLAMA_SWAP_URL}/v1/chat/completions`, {
    method: 'POST',
@@ -366,7 +393,8 @@ async function executeStreamPhase(
  args: TurnArgs,
  session: Session,
  messages: OpenAiMessage[],
-  state: StreamPhaseState
+  state: StreamPhaseState,
+  agent: Agent | null
 ): Promise<StreamResult> {
  const { sessionId, chatId, assistantMessageId, signal } = args;

@@ -407,12 +435,20 @@ async function executeStreamPhase(
    }, DB_FLUSH_INTERVAL_MS);
  };

+  // Tool whitelist: if an agent is set, filter the global tool list to only the
+  // tool names it allows. Unknown names in agent.tools are dropped silently
+  // (handled here by intersection). When no agent: send all tools.
+  const effectiveTools: ToolJsonSchema[] = agent
+    ? toolJsonSchemas().filter((t) => agent.tools.includes(t.function.name))
+    : toolJsonSchemas();
+  const effectiveTemperature = agent?.temperature;
+
  try {
    return await streamCompletion(
      ctx,
      session.model,
      messages,
-      true,
+      { tools: effectiveTools, temperature: effectiveTemperature },
      (delta) => {
        state.accumulated += delta;
        ctx.publish(sessionId, {
@@ -657,12 +693,18 @@ async function runAssistantTurn(
  }
  const { session, project, history } = loaded;
  const projectRoot = await resolveProjectRoot(project.path);
-  const messages = buildMessagesPayload(session, project, history);
+  // Agent resolution is per-turn so PATCH agent_id mid-conversation takes
+  // effect on the next message. Unknown agent_id returns null silently —
+  // session falls back to base prompt + all tools + default temperature.
+  const agent = session.agent_id
+    ? await getAgentById(project.path, session.agent_id)
+    : null;
+  const messages = buildMessagesPayload(session, project, history, agent);

  const state: StreamPhaseState = { accumulated: '', startedAt: null };
  let result: StreamResult;
  try {
-    result = await executeStreamPhase(ctx, args, session, messages, state);
+    result = await executeStreamPhase(ctx, args, session, messages, state, agent);
  } catch (err) {
    await handleAbortOrError(ctx, args, state.accumulated, err);
    return;
@@ -720,7 +762,7 @@ async function runCompact(
      ctx,
      session.model,
      messagesForSummary,
-      false,
+      { tools: null },
      (delta) => {
        content += delta;
        ctx.publish(sessionId, {