From 92bd3b1cdfcfa9832770f23ae860803a336c1ac0 Mon Sep 17 00:00:00 2001
From: indifferentketchup <samkintop@gmail.com>
Date: Sat, 16 May 2026 20:06:51 +0000
Subject: [PATCH] =?UTF-8?q?feat(agents):=20Tier=202=20=E2=80=94=20AGENTS.m?=
 =?UTF-8?q?d=20+=20per-session=20picker?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Six builtin defaults (Code Reviewer, Debugger, Refactorer, Architect,
Security Auditor, Prompt Builder) with no model field so session.model
wins. Project root AGENTS.md parsed on demand with mtime cache; when
present, only its agents are shown. sessions.agent_id resolves per turn
into effective system prompt, temperature, and a tool whitelist applied
in inference. AgentPicker mounts in the ChatInput toolbar; SettingsDrawer
agent surface deferred to Batch 7.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 AGENTS.md                                  | 197 +++++++++
 CLAUDE.md                                  |   8 +-
 apps/server/src/index.ts                   |   2 +
 apps/server/src/routes/agents.ts           |  20 +
 apps/server/src/routes/sessions.ts         |  40 +-
 apps/server/src/schema.sql                 |   5 +
 apps/server/src/services/agents.ts         | 458 +++++++++++++++++++++
 apps/server/src/services/inference.ts      |  74 +++-
 apps/server/src/types/api.ts               |  21 +
 apps/web/src/api/client.ts                 |  10 +-
 apps/web/src/api/types.ts                  |  19 +
 apps/web/src/components/AgentPicker.tsx    | 108 +++++
 apps/web/src/components/ChatInput.tsx      |  19 +-
 apps/web/src/components/Workspace.tsx      |  14 +-
 apps/web/src/components/panes/ChatPane.tsx |  14 +-
 apps/web/src/pages/Session.tsx             |  10 +-
 16 files changed, 984 insertions(+), 35 deletions(-)
 create mode 100644 AGENTS.md
 create mode 100644 apps/server/src/routes/agents.ts
 create mode 100644 apps/server/src/services/agents.ts
 create mode 100644 apps/web/src/components/AgentPicker.tsx
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..19c55b7
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,197 @@
+# Agents
+
+## Code Reviewer
+---
+temperature: 0.3
+tools: [view_file, list_dir, grep, find_files]
+description: Reviews code for bugs, security issues, and maintainability. Read-only.
+---
+You review code. Find real problems, not style nits.
+
+Process:
+1. Read the file(s) in question with view_file. If a diff is provided, read surrounding context too.
+2. Use grep/find_files to check how changed symbols are used elsewhere.
+3. Cite every finding as file:line.
+
+Prioritize in order:
+1. Bugs and logic errors
+2. Security issues (injection, auth bypass, secret leakage, unsafe deserialization, SSRF, path traversal)
+3. Race conditions, error handling, resource leaks
+4. Performance issues with measurable impact
+5. Maintainability (only if it blocks future work)
+
+Skip: formatting, naming preferences, "consider extracting", "add a comment here". The user has a linter.
+
+Output format:
+- Critical: <file:line> — <issue> — <fix>
+- Major: <file:line> — <issue> — <fix>
+- Minor: <file:line> — <issue> — <fix>
+
+If nothing critical or major, say so in one line. Do not pad.
+
+
+## Debugger
+---
+temperature: 0.2
+tools: [view_file, list_dir, grep, find_files]
+description: Diagnoses bugs from error messages, logs, or described symptoms.
+---
+You diagnose bugs. Form a hypothesis, prove it with evidence from the code.
+
+Process:
+1. Restate the symptom in one line. Confirm you understand it.
+2. Read the error/stacktrace. Identify the exact frame where things go wrong.
+3. view_file on that frame. Read 50 lines around it.
+4. grep for callers, related state, recent changes that could explain it.
+5. State the root cause with file:line evidence.
+6. Propose the minimal fix. Note any side effects.
+
+Rules:
+- Never guess. If evidence is missing, say what you need (specific log line, specific file, specific repro step).
+- Distinguish symptom from cause. A null check fixes the symptom; missing init causes it.
+- Off-by-one, race conditions, and silent except blocks are common — check for them.
+- If two plausible causes exist, name both and say what would discriminate.
+
+Output:
+- Symptom: <one line>
+- Root cause: <file:line> — <explanation>
+- Fix: <minimal diff or description>
+- Risk: <what could break>
+
+
+## Refactorer
+---
+temperature: 0.3
+tools: [view_file, list_dir, grep, find_files]
+description: Proposes refactors for clarity, deduplication, or decoupling. Read-only — outputs plans, not edits.
+---
+You propose refactors. You do not apply them. The user applies via OpenCode or Claude Code.
+
+Process:
+1. Read the target file(s).
+2. grep for callers, duplicates, and similar patterns elsewhere in the repo.
+3. Identify the smallest refactor that delivers the goal.
+
+Prioritize:
+1. Deduplication where 3+ sites have near-identical logic
+2. Extracting a function/module when one is doing two unrelated jobs
+3. Decoupling when a change in A forces a change in B unnecessarily
+4. Renaming when a name actively misleads
+
+Reject:
+- Refactors that touch 10+ files for marginal gain
+- "Modernization" with no concrete benefit
+- Abstraction for future flexibility that may never come
+- Style-only changes
+
+Output:
+- Goal: <one line>
+- Scope: <files affected, count of lines roughly>
+- Plan: numbered steps, each one self-contained
+- Risk: <what tests must pass, what could regress>
+- Skip if: <conditions under which this refactor is not worth doing>
+
+
+## Architect
+---
+temperature: 0.5
+tools: [view_file, list_dir, grep, find_files]
+description: Designs new features, modules, or architectural changes. Outputs a build plan.
+---
+You design. You produce build plans, not code.
+
+Process:
+1. Restate the goal in your own words. Confirm constraints (perf, deploy, deps).
+2. list_dir the relevant areas. Read existing patterns — match them unless there's a reason not to.
+3. Decide: extend existing code or add new module. Justify.
+4. Sketch the data flow: inputs → transforms → outputs → side effects.
+5. Identify integration points: DB schema, API surface, env vars, container boundaries.
+6. List failure modes and how the design handles them.
+
+Rules:
+- Reuse before inventing. If a service/lib in the repo already does this, say so.
+- Prefer boring tech. New deps require justification.
+- Tailscale IPs for internal routing. No 0.0.0.0 binds.
+- Least privilege: separate read/write paths, explicit auth gates.
+- State assumptions inline. Do not ask clarifying questions mid-design unless blocked.
+
+Output:
+- Goal
+- Existing code to reuse: <file paths>
+- New code: <file paths, one-line purpose each>
+- Data model changes: <SQL or schema diff>
+- API surface: <endpoints, request/response shapes>
+- Failure modes: <list>
+- Build order: numbered, each step 30-90 min
+
+
+## Security Auditor
+---
+temperature: 0.2
+tools: [view_file, list_dir, grep, find_files]
+description: Audits code for security vulnerabilities. Read-only.
+---
+You audit for security issues. Concrete findings only, no generic warnings.
+
+Process:
+1. Identify the trust boundary: where does untrusted input enter? Where does it leave?
+2. Trace input flow with grep. Mark every transformation.
+3. Check each finding against a real attack scenario.
+
+Look for:
+- Injection: SQL (raw queries, string concat into queries), command (subprocess with shell=True, unescaped args), XSS (unescaped output in HTML/JSX), template injection, NoSQL injection
+- AuthN/AuthZ: missing checks on routes, IDOR (user-supplied IDs without ownership check), JWT misuse (alg=none, weak secret, no expiry), session fixation
+- Secrets: hardcoded keys/passwords, .env in repo, secrets in logs, secrets in error messages
+- Crypto: weak hashes (MD5, SHA1 for passwords), missing salt, predictable randomness (Math.random for tokens), ECB mode, custom crypto
+- Network: SSRF (user URL → server fetch), open CORS, missing CSRF on state-changing requests, plaintext over public network
+- File: path traversal, unrestricted upload type/size, zip slip
+- Deserialization: pickle, yaml.load, eval, exec on user input
+- Resource: missing rate limits on auth/expensive endpoints, unbounded query results
+
+For each finding:
+- Severity: Critical / High / Medium / Low
+- Location: file:line
+- Attack scenario: one sentence describing how an attacker exploits this
+- Fix: minimal change
+
+Skip:
+- Generic "use HTTPS" advice
+- "Consider adding rate limiting" without a specific endpoint
+- CVE-of-the-week scares without proof the code is affected
+
+If the code is clean, say so. Do not invent findings.
+
+
+## Prompt Builder
+---
+temperature: 0.4
+tools: [view_file, list_dir, grep, find_files]
+description: Builds prompts for OpenCode, Claude Code, or BooCode dispatch.
+---
+You write prompts that another coding agent will execute. Your output is the prompt, not the work.
+
+Process:
+1. Ask the user (or read context) for: goal, target repo, target files if known, constraints.
+2. list_dir and view_file the target area. Confirm files exist and are roughly the shape you think.
+3. Identify imports, exports, and conventions in the repo (component layout, error handling style, test framework).
+4. Write the prompt.
+
+Prompt structure:
+- One-line goal at the top
+- Constraints block: don't commit, don't push, don't pull. Use `#careful` and `#nofluff` style hashtags if the target agent honors them
+- Pre-flight: list_dir or grep commands the agent must run before writing (e.g. "run: ls frontend/src/components/ui/ and only import primitives that exist")
+- Files to modify: explicit paths
+- Files to create: explicit paths with one-line purpose
+- Behavior spec: numbered, testable
+- Backup rule: `cp file file.bak-$(date +%Y%m%d)` before any destructive edit
+- Verification: `py_compile`, `tsc --noEmit`, `docker compose up --build -d` — whichever applies
+- Stop conditions: when to halt and report instead of pressing on
+
+Rules:
+- Tailored to the target agent: OpenCode honors hashtag snippets and skills; Claude Code honors CLAUDE.md and slash commands; BooCode batches are written as user-facing markdown
+- Never include credentials or secrets
+- Never instruct the agent to commit or push
+- Include the exact model the user wants if dispatch is via Paseo or BooCode batch
+- For BooLab frontend prompts, always include the "verify shadcn primitives exist" preflight
+
+Output: the prompt, ready to paste. Nothing else.
diff --git a/CLAUDE.md b/CLAUDE.md
index 6fc5049..5281469 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -31,7 +31,7 @@ npx tsc -p apps/web/tsconfig.app.json --noEmit  # web app specifically
 docker compose build --no-cache boocode && docker compose up -d
 ```
 
-There are no tests or linters configured.
+Tests: `pnpm -C apps/server test` runs 23 vitest tests. No test harness on `apps/web` (adding it requires installing vitest as a new devDep). Vitest pinned to `^3` because Vite 5 / vitest 4 are incompatible. No linters configured.
 
 ## Architecture
 
@@ -44,7 +44,7 @@ There are no tests or linters configured.
 - **Zod** for request validation and config parsing.
 
 Key services:
-- **`services/inference.ts`** — Streams LLM responses, executes tool loops (max 5 depth), flushes to DB every 500ms. Publishes `InferenceFrame` events through the broker.
+- **`services/inference.ts`** — Streams LLM responses, executes tool loops (max depth 15, see `MAX_TOOL_LOOP_DEPTH`), flushes to DB every 500ms. Publishes `InferenceFrame` events through the broker.
 - **`services/broker.ts`** — In-memory pub/sub with two channel types: per-session (message streaming) and per-user (sidebar updates). No persistence; clients reconnect on restart.
 - **`services/tools.ts`** — Four read-only file tools exposed as OpenAI function-calling schemas. All file access goes through `path_guard.ts` which resolves against project root.
 - **`services/file_ops.ts`** — Shared file operation implementations used by both inference tools and HTTP routes.
@@ -57,6 +57,7 @@ Route registration: all routes registered in `index.ts` via `register*Routes(app
 - **React 18** + React Router v6 + **Tailwind v4** + shadcn/radix-ui primitives.
 - **Shiki** for syntax highlighting (async `codeToHtml` in `CodeBlock.tsx` and `FileViewer` in `FileBrowserPane.tsx`).
 - Path alias: `@/` maps to `src/`.
+- **Mobile interaction primitives** (post-v1.6): `useViewport` (matchMedia, breakpoints mobile <768 / tablet 768–1023 / desktop ≥1024), `useSidebarDrawer` / `useRightRailDrawer` (Context + auto-close on `useLocation().pathname` change), `useLongPress` (500ms timer, dispatches synthetic `contextmenu` on `[data-tab-id]`), `usePullToRefresh` (80px threshold, 600ms hold), `SwipeablePaneTab` (60px close, 30px vertical bail). Tap-target convention: `max-md:min-h-[44px] max-md:min-w-[44px]`. Mobile headers: `border-b px-3 sm:px-4 py-2` + `style={{ paddingTop: 'max(0.5rem, env(safe-area-inset-top))' }}`. Hamburger left, FolderTree right.
 
 Key patterns:
 - **`hooks/sessionEvents.ts`** — Module-singleton event bus (Set of listeners). Used for cross-component communication: session renames, file-open events, attachment dispatch. 9 event types in the discriminated union. When adding a new event type to the `SessionEvent` union, you must also add a case to the `applyEvent` switch in `useSidebar.ts` (even if it's a no-op `return prev`).
@@ -76,7 +77,7 @@ Key patterns:
 
 ### Multi-pane workspace
 
-Sessions hold 1–5 panes (chat / empty / placeholder terminal+agent). Workspace pane state is **client-side only** (localStorage keyed by sessionId); the legacy `session_panes` table is deprecated. Each chat lives in at most one pane; tab strip is per-pane and tracks `chatIds[]` + `activeChatIdx`. Sessions 1:N chats; chats own messages. Tab reorder via native HTML5 drag events.
+Sessions hold 1–5 panes (chat / empty / placeholder terminal+agent). Workspace pane state is **client-side only** (localStorage key `boocode.workspace.panes.<sessionId>`); the legacy `session_panes` table and its REST endpoints are deprecated — no `/api/panes/*` routes exist. Each chat lives in at most one pane; tab strip is per-pane and tracks `chatIds[]` + `activeChatIdx`. Sessions 1:N chats; chats own messages. Tab reorder via native HTML5 drag events.
 
 ## Database
 
@@ -94,6 +95,7 @@ Required: `DATABASE_URL`, `LLAMA_SWAP_URL`. Optional: `PORT` (3000), `HOST` (0.0
 
 - Sam reviews all diffs and commits manually. Do not commit unless explicitly asked.
 - Deploy: `cd /opt/boocode && docker compose up --build -d` (or `docker compose build --no-cache boocode && docker compose up -d` if you suspect a layer-cache issue).
+- Git push to Gitea: `GIT_SSH_COMMAND="ssh -i /opt/boocode/secrets/boocode_gitea -o IdentitiesOnly=yes" git push origin <branch>`. The default agent identity is rejected; the in-repo deploy key (`secrets/`, gitignored) is the working one. Transient `Connection reset by peer` retries cleanly after `sleep 5`.
 - Don't accumulate `.bak-*` files. Clean them up in the same batch or immediately after merge.
 - Fastify global JSON parser tolerates empty bodies (overridden in `index.ts`); bodyless POSTs (archive, unarchive, stop) work without setting `Content-Type` tricks on the client.
 - Event dedup discipline: for any mutation the server publishes via `broker.publishUser`, do NOT add a local `sessionEvents.emit(...)` after the API call — `useUserEvents` forwards the WS frame onto the bus. Frontend mutation handlers must be idempotent (dedup by id, no-op on already-present).
diff --git a/apps/server/src/index.ts b/apps/server/src/index.ts
index 082a2f7..e1a3642 100644
--- a/apps/server/src/index.ts
+++ b/apps/server/src/index.ts
@@ -14,6 +14,7 @@ import { registerChatRoutes } from './routes/chats.js';
 import { registerSidebarRoutes } from './routes/sidebar.js';
 import { registerWebSocket } from './routes/ws.js';
 import { registerModelRoutes } from './routes/models.js';
+import { registerAgentRoutes } from './routes/agents.js';
 import { createInferenceRunner } from './services/inference.js';
 import { createBroker } from './services/broker.js';
 
@@ -57,6 +58,7 @@ async function main() {
   registerSessionRoutes(app, sql, config, broker);
   registerSettingsRoutes(app, sql);
   registerModelRoutes(app, config);
+  registerAgentRoutes(app, sql);
   registerSidebarRoutes(app, sql);
   registerChatRoutes(app, sql, broker);
 
diff --git a/apps/server/src/routes/agents.ts b/apps/server/src/routes/agents.ts
new file mode 100644
index 0000000..3eab756
--- /dev/null
+++ b/apps/server/src/routes/agents.ts
@@ -0,0 +1,20 @@
+import type { FastifyInstance } from 'fastify';
+import type { Sql } from '../db.js';
+import { getAgentsForProject } from '../services/agents.js';
+
+export function registerAgentRoutes(app: FastifyInstance, sql: Sql): void {
+  app.get<{ Params: { id: string } }>(
+    '/api/projects/:id/agents',
+    async (req, reply) => {
+      const rows = await sql<{ path: string }[]>`
+        SELECT path FROM projects WHERE id = ${req.params.id}
+      `;
+      if (rows.length === 0) {
+        reply.code(404);
+        return { error: 'project not found' };
+      }
+      // getAgentsForProject handles AGENTS.md presence/parse/cache; never throws.
+      return await getAgentsForProject(rows[0]!.path);
+    }
+  );
+}
diff --git a/apps/server/src/routes/sessions.ts b/apps/server/src/routes/sessions.ts
index 2f087ea..23e2d12 100644
--- a/apps/server/src/routes/sessions.ts
+++ b/apps/server/src/routes/sessions.ts
@@ -5,17 +5,20 @@ import type { Config } from '../config.js';
 import type { Broker } from '../services/broker.js';
 import type { Session } from '../types/api.js';
 import { getSetting } from './settings.js';
+import { getAgentsForProject } from '../services/agents.js';
 
 const CreateBody = z.object({
   name: z.string().min(1).max(200).optional(),
   model: z.string().min(1).max(200).optional(),
   system_prompt: z.string().max(8000).optional(),
+  agent_id: z.string().min(1).max(200).nullable().optional(),
 });
 
 const PatchBody = z.object({
   name: z.string().min(1).max(200).optional(),
   model: z.string().min(1).max(200).optional(),
   system_prompt: z.string().max(8000).optional(),
+  agent_id: z.string().min(1).max(200).nullable().optional(),
 });
 
 async function resolveDefaultModel(sql: Sql, config: Config): Promise<string> {
@@ -24,6 +27,13 @@ async function resolveDefaultModel(sql: Sql, config: Config): Promise<string> {
   return config.DEFAULT_MODEL;
 }
 
+// First agent in the project's effective list (file-defined or builtin),
+// or null if somehow none exist.
+async function resolveDefaultAgent(projectPath: string): Promise<string | null> {
+  const { agents } = await getAgentsForProject(projectPath);
+  return agents[0]?.id ?? null;
+}
+
 export function registerSessionRoutes(
   app: FastifyInstance,
   sql: Sql,
@@ -40,7 +50,7 @@ export function registerSessionRoutes(
       }
       const status = req.query.status === 'archived' ? 'archived' : 'open';
       const rows = await sql<Session[]>`
-        SELECT id, project_id, name, model, system_prompt, status, created_at, updated_at
+        SELECT id, project_id, name, model, system_prompt, status, created_at, updated_at, agent_id
         FROM sessions
         WHERE project_id = ${req.params.id} AND status = ${status}
         ORDER BY updated_at DESC
@@ -57,11 +67,14 @@ export function registerSessionRoutes(
         reply.code(400);
         return { error: 'invalid body', details: parsed.error.flatten() };
       }
-      const project = await sql`SELECT id FROM projects WHERE id = ${req.params.id}`;
+      const project = await sql<{ id: string; path: string }[]>`
+        SELECT id, path FROM projects WHERE id = ${req.params.id}
+      `;
       if (project.length === 0) {
         reply.code(404);
         return { error: 'project not found' };
       }
+      const projectPath = project[0]!.path;
 
       let model = parsed.data.model;
       if (!model) {
@@ -76,12 +89,18 @@ export function registerSessionRoutes(
 
       const name = parsed.data.name ?? 'New session';
       const systemPrompt = parsed.data.system_prompt ?? '';
+      // If the client provided agent_id (string or null), use it; otherwise
+      // resolve to the project's first agent (file-defined or builtin), or null.
+      const agentId =
+        parsed.data.agent_id !== undefined
+          ? parsed.data.agent_id
+          : await resolveDefaultAgent(projectPath);
 
       const row = await sql.begin(async (tx) => {
         const [session] = await tx<Session[]>`
-          INSERT INTO sessions (project_id, name, model, system_prompt)
-          VALUES (${req.params.id}, ${name}, ${model}, ${systemPrompt})
-          RETURNING id, project_id, name, model, system_prompt, status, created_at, updated_at
+          INSERT INTO sessions (project_id, name, model, system_prompt, agent_id)
+          VALUES (${req.params.id}, ${name}, ${model}, ${systemPrompt}, ${agentId})
+          RETURNING id, project_id, name, model, system_prompt, status, created_at, updated_at, agent_id
         `;
         await tx`
           INSERT INTO chats (session_id, name, status)
@@ -101,7 +120,7 @@ export function registerSessionRoutes(
 
   app.get<{ Params: { id: string } }>('/api/sessions/:id', async (req, reply) => {
     const rows = await sql<Session[]>`
-      SELECT id, project_id, name, model, system_prompt, status, created_at, updated_at
+      SELECT id, project_id, name, model, system_prompt, status, created_at, updated_at, agent_id
       FROM sessions WHERE id = ${req.params.id}
     `;
     if (rows.length === 0) {
@@ -120,6 +139,10 @@ export function registerSessionRoutes(
         return { error: 'invalid body', details: parsed.error.flatten() };
       }
       const { name, model, system_prompt } = parsed.data;
+      // agent_id is tri-state on the wire: omitted = no change, null = clear,
+      // string = set. CASE WHEN inside SET handles all three atomically.
+      const agentIdProvided = parsed.data.agent_id !== undefined;
+      const newAgentId = parsed.data.agent_id ?? null;
       // Read the prior name so the post-update publish can skip no-op renames
       // (PATCH { name: "Foo" } where the session is already "Foo"). The window
       // between SELECT and UPDATE is sub-millisecond in the same request handler;
@@ -135,9 +158,10 @@ export function registerSessionRoutes(
           name = COALESCE(${name ?? null}, name),
           model = COALESCE(${model ?? null}, model),
           system_prompt = COALESCE(${system_prompt ?? null}, system_prompt),
+          agent_id = CASE WHEN ${agentIdProvided} THEN ${newAgentId} ELSE agent_id END,
           updated_at = clock_timestamp()
         WHERE id = ${req.params.id}
-        RETURNING id, project_id, name, model, system_prompt, status, created_at, updated_at
+        RETURNING id, project_id, name, model, system_prompt, status, created_at, updated_at, agent_id
       `;
       if (rows.length === 0) {
         reply.code(404);
@@ -183,7 +207,7 @@ export function registerSessionRoutes(
       const rows = await sql<Session[]>`
         UPDATE sessions SET status = 'open', updated_at = clock_timestamp()
         WHERE id = ${req.params.id} AND status = 'archived'
-        RETURNING id, project_id, name, model, system_prompt, status, created_at, updated_at
+        RETURNING id, project_id, name, model, system_prompt, status, created_at, updated_at, agent_id
       `;
       if (rows.length === 0) {
         reply.code(404);
diff --git a/apps/server/src/schema.sql b/apps/server/src/schema.sql
index c399bdf..1512fbd 100644
--- a/apps/server/src/schema.sql
+++ b/apps/server/src/schema.sql
@@ -153,3 +153,8 @@ BEGIN
       CHECK (status IN ('open', 'archived'));
   END IF;
 END $$;
+
+-- v1.x-batch9: per-session agent reference. Agent definitions are not stored in
+-- the DB; they live in builtins (services/agents.ts) and a per-project AGENTS.md.
+-- agent_id is the slugified agent name. NULL means "use BooCode defaults".
+ALTER TABLE sessions ADD COLUMN IF NOT EXISTS agent_id TEXT;
diff --git a/apps/server/src/services/agents.ts b/apps/server/src/services/agents.ts
new file mode 100644
index 0000000..631b160
--- /dev/null
+++ b/apps/server/src/services/agents.ts
@@ -0,0 +1,458 @@
+import { promises as fs } from 'node:fs';
+import { join } from 'node:path';
+import type { Agent, AgentsResponse } from '../types/api.js';
+
+// Tools whitelist universe matches services/tools.ts ALL_TOOLS. Keep in sync.
+const ALL_TOOL_NAMES = ['view_file', 'list_dir', 'grep', 'find_files'] as const;
+const DEFAULT_TOOLS: string[] = [...ALL_TOOL_NAMES];
+const DEFAULT_TEMPERATURE = 0.7;
+
+export function slugify(name: string): string {
+  return name
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-+|-+$/g, '');
+}
+
+// Six builtin defaults. model is intentionally null — session.model wins.
+// Match AGENTS.md format; system prompts are verbatim.
+const BUILTIN_AGENTS: Agent[] = [
+  {
+    id: 'code-reviewer',
+    name: 'Code Reviewer',
+    description: 'Reviews code for bugs, security issues, and maintainability. Read-only.',
+    temperature: 0.3,
+    tools: [...DEFAULT_TOOLS],
+    model: null,
+    source: 'builtin',
+    system_prompt: `You review code. Find real problems, not style nits.
+
+Process:
+1. Read the file(s) in question with view_file. If a diff is provided, read surrounding context too.
+2. Use grep/find_files to check how changed symbols are used elsewhere.
+3. Cite every finding as file:line.
+
+Prioritize in order:
+1. Bugs and logic errors
+2. Security issues (injection, auth bypass, secret leakage, unsafe deserialization, SSRF, path traversal)
+3. Race conditions, error handling, resource leaks
+4. Performance issues with measurable impact
+5. Maintainability (only if it blocks future work)
+
+Skip: formatting, naming preferences, "consider extracting", "add a comment here". The user has a linter.
+
+Output format:
+- Critical: <file:line> — <issue> — <fix>
+- Major: <file:line> — <issue> — <fix>
+- Minor: <file:line> — <issue> — <fix>
+
+If nothing critical or major, say so in one line. Do not pad.`,
+  },
+  {
+    id: 'debugger',
+    name: 'Debugger',
+    description: 'Diagnoses bugs from error messages, logs, or described symptoms.',
+    temperature: 0.2,
+    tools: [...DEFAULT_TOOLS],
+    model: null,
+    source: 'builtin',
+    system_prompt: `You diagnose bugs. Form a hypothesis, prove it with evidence from the code.
+
+Process:
+1. Restate the symptom in one line. Confirm you understand it.
+2. Read the error/stacktrace. Identify the exact frame where things go wrong.
+3. view_file on that frame. Read 50 lines around it.
+4. grep for callers, related state, recent changes that could explain it.
+5. State the root cause with file:line evidence.
+6. Propose the minimal fix. Note any side effects.
+
+Rules:
+- Never guess. If evidence is missing, say what you need (specific log line, specific file, specific repro step).
+- Distinguish symptom from cause. A null check fixes the symptom; missing init causes it.
+- Off-by-one, race conditions, and silent except blocks are common — check for them.
+- If two plausible causes exist, name both and say what would discriminate.
+
+Output:
+- Symptom: <one line>
+- Root cause: <file:line> — <explanation>
+- Fix: <minimal diff or description>
+- Risk: <what could break>`,
+  },
+  {
+    id: 'refactorer',
+    name: 'Refactorer',
+    description: 'Proposes refactors for clarity, deduplication, or decoupling. Read-only — outputs plans, not edits.',
+    temperature: 0.3,
+    tools: [...DEFAULT_TOOLS],
+    model: null,
+    source: 'builtin',
+    system_prompt: `You propose refactors. You do not apply them. The user applies via OpenCode or Claude Code.
+
+Process:
+1. Read the target file(s).
+2. grep for callers, duplicates, and similar patterns elsewhere in the repo.
+3. Identify the smallest refactor that delivers the goal.
+
+Prioritize:
+1. Deduplication where 3+ sites have near-identical logic
+2. Extracting a function/module when one is doing two unrelated jobs
+3. Decoupling when a change in A forces a change in B unnecessarily
+4. Renaming when a name actively misleads
+
+Reject:
+- Refactors that touch 10+ files for marginal gain
+- "Modernization" with no concrete benefit
+- Abstraction for future flexibility that may never come
+- Style-only changes
+
+Output:
+- Goal: <one line>
+- Scope: <files affected, count of lines roughly>
+- Plan: numbered steps, each one self-contained
+- Risk: <what tests must pass, what could regress>
+- Skip if: <conditions under which this refactor is not worth doing>`,
+  },
+  {
+    id: 'architect',
+    name: 'Architect',
+    description: 'Designs new features, modules, or architectural changes. Outputs a build plan.',
+    temperature: 0.5,
+    tools: [...DEFAULT_TOOLS],
+    model: null,
+    source: 'builtin',
+    system_prompt: `You design. You produce build plans, not code.
+
+Process:
+1. Restate the goal in your own words. Confirm constraints (perf, deploy, deps).
+2. list_dir the relevant areas. Read existing patterns — match them unless there's a reason not to.
+3. Decide: extend existing code or add new module. Justify.
+4. Sketch the data flow: inputs → transforms → outputs → side effects.
+5. Identify integration points: DB schema, API surface, env vars, container boundaries.
+6. List failure modes and how the design handles them.
+
+Rules:
+- Reuse before inventing. If a service/lib in the repo already does this, say so.
+- Prefer boring tech. New deps require justification.
+- Tailscale IPs for internal routing. No 0.0.0.0 binds.
+- Least privilege: separate read/write paths, explicit auth gates.
+- State assumptions inline. Do not ask clarifying questions mid-design unless blocked.
+
+Output:
+- Goal
+- Existing code to reuse: <file paths>
+- New code: <file paths, one-line purpose each>
+- Data model changes: <SQL or schema diff>
+- API surface: <endpoints, request/response shapes>
+- Failure modes: <list>
+- Build order: numbered, each step 30-90 min`,
+  },
+  {
+    id: 'security-auditor',
+    name: 'Security Auditor',
+    description: 'Audits code for security vulnerabilities. Read-only.',
+    temperature: 0.2,
+    tools: [...DEFAULT_TOOLS],
+    model: null,
+    source: 'builtin',
+    system_prompt: `You audit for security issues. Concrete findings only, no generic warnings.
+
+Process:
+1. Identify the trust boundary: where does untrusted input enter? Where does it leave?
+2. Trace input flow with grep. Mark every transformation.
+3. Check each finding against a real attack scenario.
+
+Look for:
+- Injection: SQL (raw queries, string concat into queries), command (subprocess with shell=True, unescaped args), XSS (unescaped output in HTML/JSX), template injection, NoSQL injection
+- AuthN/AuthZ: missing checks on routes, IDOR (user-supplied IDs without ownership check), JWT misuse (alg=none, weak secret, no expiry), session fixation
+- Secrets: hardcoded keys/passwords, .env in repo, secrets in logs, secrets in error messages
+- Crypto: weak hashes (MD5, SHA1 for passwords), missing salt, predictable randomness (Math.random for tokens), ECB mode, custom crypto
+- Network: SSRF (user URL → server fetch), open CORS, missing CSRF on state-changing requests, plaintext over public network
+- File: path traversal, unrestricted upload type/size, zip slip
+- Deserialization: pickle, yaml.load, eval, exec on user input
+- Resource: missing rate limits on auth/expensive endpoints, unbounded query results
+
+For each finding:
+- Severity: Critical / High / Medium / Low
+- Location: file:line
+- Attack scenario: one sentence describing how an attacker exploits this
+- Fix: minimal change
+
+Skip:
+- Generic "use HTTPS" advice
+- "Consider adding rate limiting" without a specific endpoint
+- CVE-of-the-week scares without proof the code is affected
+
+If the code is clean, say so. Do not invent findings.`,
+  },
+  {
+    id: 'prompt-builder',
+    name: 'Prompt Builder',
+    description: 'Builds prompts for OpenCode, Claude Code, or BooCode dispatch.',
+    temperature: 0.4,
+    tools: [...DEFAULT_TOOLS],
+    model: null,
+    source: 'builtin',
+    system_prompt: `You write prompts that another coding agent will execute. Your output is the prompt, not the work.
+
+Process:
+1. Ask the user (or read context) for: goal, target repo, target files if known, constraints.
+2. list_dir and view_file the target area. Confirm files exist and are roughly the shape you think.
+3. Identify imports, exports, and conventions in the repo (component layout, error handling style, test framework).
+4. Write the prompt.
+
+Prompt structure:
+- One-line goal at the top
+- Constraints block: don't commit, don't push, don't pull. Use \`#careful\` and \`#nofluff\` style hashtags if the target agent honors them
+- Pre-flight: list_dir or grep commands the agent must run before writing (e.g. "run: ls frontend/src/components/ui/ and only import primitives that exist")
+- Files to modify: explicit paths
+- Files to create: explicit paths with one-line purpose
+- Behavior spec: numbered, testable
+- Backup rule: \`cp file file.bak-\$(date +%Y%m%d)\` before any destructive edit
+- Verification: \`py_compile\`, \`tsc --noEmit\`, \`docker compose up --build -d\` — whichever applies
+- Stop conditions: when to halt and report instead of pressing on
+
+Rules:
+- Tailored to the target agent: OpenCode honors hashtag snippets and skills; Claude Code honors CLAUDE.md and slash commands; BooCode batches are written as user-facing markdown
+- Never include credentials or secrets
+- Never instruct the agent to commit or push
+- Include the exact model the user wants if dispatch is via Paseo or BooCode batch
+- For BooLab frontend prompts, always include the "verify shadcn primitives exist" preflight
+
+Output: the prompt, ready to paste. Nothing else.`,
+  },
+];
+
+// ---- AGENTS.md parser ------------------------------------------------------
+
+interface ParsedFrontmatter {
+  temperature?: number;
+  tools?: string[];
+  description?: string;
+  model?: string;
+}
+
+function stripQuotes(s: string): string {
+  if (
+    s.length >= 2 &&
+    (s[0] === '"' || s[0] === "'") &&
+    s[0] === s[s.length - 1]
+  ) {
+    return s.slice(1, -1);
+  }
+  return s;
+}
+
+function parseFrontmatter(yaml: string): { data: ParsedFrontmatter; errors: string[] } {
+  const data: ParsedFrontmatter = {};
+  const errors: string[] = [];
+  const lines = yaml.split('\n');
+  let arrayKey: 'tools' | null = null;
+
+  for (const rawLine of lines) {
+    const line = rawLine.trim();
+    if (line.length === 0) continue;
+
+    // Block-list continuation: "- value" under a key that was set to empty
+    if (arrayKey && line.startsWith('- ')) {
+      data[arrayKey]!.push(line.slice(2).trim());
+      continue;
+    }
+    arrayKey = null;
+
+    const colonIdx = line.indexOf(':');
+    if (colonIdx < 0) continue;
+    const key = line.slice(0, colonIdx).trim();
+    const valueRaw = line.slice(colonIdx + 1).trim();
+
+    if (key === 'temperature') {
+      const n = Number(valueRaw);
+      if (Number.isFinite(n)) data.temperature = n;
+      else errors.push(`temperature must be a number (got "${valueRaw}")`);
+    } else if (key === 'tools') {
+      if (valueRaw === '') {
+        data.tools = [];
+        arrayKey = 'tools';
+      } else if (valueRaw.startsWith('[') && valueRaw.endsWith(']')) {
+        const inner = valueRaw.slice(1, -1);
+        data.tools = inner
+          .split(',')
+          .map((s) => stripQuotes(s.trim()))
+          .filter((s) => s.length > 0);
+      } else {
+        // Loose form: "tools: a, b, c"
+        data.tools = valueRaw
+          .split(',')
+          .map((s) => stripQuotes(s.trim()))
+          .filter((s) => s.length > 0);
+      }
+    } else if (key === 'description') {
+      data.description = stripQuotes(valueRaw);
+    } else if (key === 'model') {
+      data.model = stripQuotes(valueRaw);
+    }
+    // Unknown keys silently ignored — forward-compat.
+  }
+
+  return { data, errors };
+}
+
+interface ParseResult {
+  agents: Agent[];
+  error: string | null;
+}
+
+export function parseAgentsMd(content: string): ParseResult {
+  const errors: string[] = [];
+  const agents: Agent[] = [];
+
+  // Split into per-agent sections by lines that exactly match "## <name>".
+  // Lines starting with "### " (level-3 headings) are not section boundaries.
+  const sections: { name: string; body: string }[] = [];
+  let currentName: string | null = null;
+  let currentLines: string[] = [];
+
+  for (const line of content.split('\n')) {
+    const h2 = /^##\s+(.+?)\s*$/.exec(line);
+    const h3 = line.startsWith('### ');
+    if (h2 && !h3) {
+      if (currentName !== null) {
+        sections.push({ name: currentName, body: currentLines.join('\n') });
+      }
+      currentName = h2[1]!.trim();
+      currentLines = [];
+      continue;
+    }
+    if (currentName !== null) {
+      currentLines.push(line);
+    }
+  }
+  if (currentName !== null) {
+    sections.push({ name: currentName, body: currentLines.join('\n') });
+  }
+
+  for (const section of sections) {
+    const lines = section.body.split('\n');
+    // Opening "---" fence must be the first non-empty line (blank lines allowed).
+    let openIdx = -1;
+    for (let i = 0; i < lines.length; i++) {
+      const t = lines[i]!.trim();
+      if (t === '') continue;
+      if (t === '---') {
+        openIdx = i;
+      }
+      break;
+    }
+    if (openIdx < 0) {
+      errors.push(`agent "${section.name}": missing opening --- fence after heading`);
+      continue;
+    }
+    let closeIdx = -1;
+    for (let i = openIdx + 1; i < lines.length; i++) {
+      if (lines[i]!.trim() === '---') {
+        closeIdx = i;
+        break;
+      }
+    }
+    if (closeIdx < 0) {
+      errors.push(`agent "${section.name}": missing closing --- fence`);
+      continue;
+    }
+    const yamlText = lines.slice(openIdx + 1, closeIdx).join('\n');
+    const systemPrompt = lines.slice(closeIdx + 1).join('\n').trim();
+
+    const { data: fm, errors: fmErrors } = parseFrontmatter(yamlText);
+    if (fmErrors.length > 0) {
+      errors.push(`agent "${section.name}": ${fmErrors.join('; ')}`);
+      continue;
+    }
+
+    const filteredTools = Array.isArray(fm.tools)
+      ? fm.tools.filter((t): t is string =>
+          (ALL_TOOL_NAMES as readonly string[]).includes(t)
+        )
+      : DEFAULT_TOOLS;
+
+    agents.push({
+      id: slugify(section.name),
+      name: section.name,
+      description: fm.description ?? '',
+      system_prompt: systemPrompt,
+      temperature: typeof fm.temperature === 'number' ? fm.temperature : DEFAULT_TEMPERATURE,
+      tools: filteredTools,
+      model: typeof fm.model === 'string' && fm.model.length > 0 ? fm.model : null,
+      source: 'file',
+    });
+  }
+
+  return { agents, error: errors.length > 0 ? errors.join('; ') : null };
+}
+
+// ---- mtime-keyed cache + public API ----------------------------------------
+
+interface CacheEntry {
+  mtimeMs: number;
+  result: AgentsResponse;
+}
+
+const cache = new Map<string, CacheEntry>();
+
+// Test/admin: force re-parse on next call for a project (or all projects).
+export function invalidateAgentsCache(projectPath?: string): void {
+  if (projectPath === undefined) {
+    cache.clear();
+  } else {
+    cache.delete(projectPath);
+  }
+}
+
+export async function getAgentsForProject(projectPath: string): Promise<AgentsResponse> {
+  const agentsPath = join(projectPath, 'AGENTS.md');
+  let mtimeMs: number;
+  try {
+    const s = await fs.stat(agentsPath);
+    mtimeMs = s.mtimeMs;
+  } catch {
+    // No AGENTS.md → builtins, no parse error
+    cache.delete(projectPath);
+    return { agents: BUILTIN_AGENTS, parse_error: null };
+  }
+
+  const cached = cache.get(projectPath);
+  if (cached && cached.mtimeMs === mtimeMs) {
+    return cached.result;
+  }
+
+  let content: string;
+  try {
+    content = await fs.readFile(agentsPath, 'utf8');
+  } catch {
+    cache.delete(projectPath);
+    return { agents: BUILTIN_AGENTS, parse_error: null };
+  }
+
+  const parsed = parseAgentsMd(content);
+  let result: AgentsResponse;
+  if (parsed.error) {
+    // Parse error: surface in API, fall back to builtins
+    result = { agents: BUILTIN_AGENTS, parse_error: parsed.error };
+  } else if (parsed.agents.length === 0) {
+    // Empty / no headings → builtins
+    result = { agents: BUILTIN_AGENTS, parse_error: null };
+  } else {
+    // At least one valid agent → file-defined agents win, builtins hidden
+    result = { agents: parsed.agents, parse_error: null };
+  }
+
+  cache.set(projectPath, { mtimeMs, result });
+  return result;
+}
+
+export async function getAgentById(
+  projectPath: string,
+  agentId: string
+): Promise<Agent | null> {
+  const { agents } = await getAgentsForProject(projectPath);
+  return agents.find((a) => a.id === agentId) ?? null;
+}
+
+export { BUILTIN_AGENTS };
diff --git a/apps/server/src/services/inference.ts b/apps/server/src/services/inference.ts
index 5920760..704ca20 100644
--- a/apps/server/src/services/inference.ts
+++ b/apps/server/src/services/inference.ts
@@ -1,10 +1,11 @@
 import type { FastifyBaseLogger } from 'fastify';
 import type { Sql } from '../db.js';
 import type { Config } from '../config.js';
-import type { Message, Project, Session, ToolCall, UserStreamFrame } from '../types/api.js';
-import { ALL_TOOLS, TOOLS_BY_NAME, toolJsonSchemas } from './tools.js';
+import type { Agent, Message, Project, Session, ToolCall, UserStreamFrame } from '../types/api.js';
+import { ALL_TOOLS, TOOLS_BY_NAME, toolJsonSchemas, type ToolJsonSchema } from './tools.js';
 import { PathScopeError, resolveProjectRoot } from './path_guard.js';
 import { maybeAutoNameChat } from './auto_name.js';
+import { getAgentById } from './agents.js';
 
 const BASE_SYSTEM_PROMPT = (projectPath: string) =>
   `You are BooCode Chat, a code investigation assistant. The user is working on a project located at ${projectPath}. Use the file-read tools (view_file, list_dir, grep, find_files) to investigate code when needed. Be concise. Cite file paths and line numbers when discussing code. Do not hallucinate file contents — read the file first. Tool results may be truncated; if so, narrow your query rather than guessing.`;
@@ -91,16 +92,32 @@ export interface InferenceContext {
   publishUser: (frame: UserStreamFrame) => void;
 }
 
+// Resolution order: base prompt < agent.system_prompt < session.system_prompt.
+// Agent prompts layer on top of the base; session prompt is the most specific
+// override and stacks last so callers can append per-session instructions.
+export function buildSystemPrompt(
+  project: Project,
+  session: Session,
+  agent: Agent | null
+): string {
+  let out = BASE_SYSTEM_PROMPT(project.path);
+  if (agent && agent.system_prompt.trim().length > 0) {
+    out += '\n\n' + agent.system_prompt.trim();
+  }
+  if (session.system_prompt && session.system_prompt.trim().length > 0) {
+    out += '\n\n' + session.system_prompt.trim();
+  }
+  return out;
+}
+
 export function buildMessagesPayload(
   session: Session,
   project: Project,
-  history: Message[]
+  history: Message[],
+  agent: Agent | null = null
 ): OpenAiMessage[] {
   const out: OpenAiMessage[] = [];
-  let systemPrompt = BASE_SYSTEM_PROMPT(project.path);
-  if (session.system_prompt && session.system_prompt.trim().length > 0) {
-    systemPrompt += '\n\n' + session.system_prompt.trim();
-  }
+  const systemPrompt = buildSystemPrompt(project, session, agent);
   out.push({ role: 'system', content: systemPrompt });
 
   // Find the latest compact marker — only send messages from that point onwards
@@ -161,7 +178,7 @@ async function loadContext(
   chatId: string
 ): Promise<{ session: Session; project: Project; history: Message[] } | null> {
   const sessionRows = await sql<Session[]>`
-    SELECT id, project_id, name, model, system_prompt, status, created_at, updated_at
+    SELECT id, project_id, name, model, system_prompt, status, created_at, updated_at, agent_id
     FROM sessions WHERE id = ${sessionId}
   `;
   if (sessionRows.length === 0) return null;
@@ -217,11 +234,18 @@ interface StreamResult {
   nCtx: number | null;
 }
 
+interface StreamOptions {
+  // null = omit tools entirely (compact phase); [] = caller stripped all tools
+  // (rare; we still omit from the request body to avoid OpenAI 400).
+  tools: ToolJsonSchema[] | null;
+  temperature?: number;
+}
+
 async function streamCompletion(
   ctx: InferenceContext,
   model: string,
   messages: OpenAiMessage[],
-  includeTools: boolean,
+  opts: StreamOptions,
   onDelta: (content: string) => void,
   signal?: AbortSignal
 ): Promise<StreamResult> {
@@ -231,10 +255,13 @@ async function streamCompletion(
     stream: true,
     stream_options: { include_usage: true },
   };
-  if (includeTools) {
-    body['tools'] = toolJsonSchemas();
+  if (opts.tools && opts.tools.length > 0) {
+    body['tools'] = opts.tools;
     body['tool_choice'] = 'auto';
   }
+  if (typeof opts.temperature === 'number') {
+    body['temperature'] = opts.temperature;
+  }
 
   const res = await fetch(`${ctx.config.LLAMA_SWAP_URL}/v1/chat/completions`, {
     method: 'POST',
@@ -366,7 +393,8 @@ async function executeStreamPhase(
   args: TurnArgs,
   session: Session,
   messages: OpenAiMessage[],
-  state: StreamPhaseState
+  state: StreamPhaseState,
+  agent: Agent | null
 ): Promise<StreamResult> {
   const { sessionId, chatId, assistantMessageId, signal } = args;
 
@@ -407,12 +435,20 @@ async function executeStreamPhase(
     }, DB_FLUSH_INTERVAL_MS);
   };
 
+  // Tool whitelist: if an agent is set, filter the global tool list to only the
+  // tool names it allows. Unknown names in agent.tools are dropped silently
+  // (handled here by intersection). When no agent: send all tools.
+  const effectiveTools: ToolJsonSchema[] = agent
+    ? toolJsonSchemas().filter((t) => agent.tools.includes(t.function.name))
+    : toolJsonSchemas();
+  const effectiveTemperature = agent?.temperature;
+
   try {
     return await streamCompletion(
       ctx,
       session.model,
       messages,
-      true,
+      { tools: effectiveTools, temperature: effectiveTemperature },
       (delta) => {
         state.accumulated += delta;
         ctx.publish(sessionId, {
@@ -657,12 +693,18 @@ async function runAssistantTurn(
   }
   const { session, project, history } = loaded;
   const projectRoot = await resolveProjectRoot(project.path);
-  const messages = buildMessagesPayload(session, project, history);
+  // Agent resolution is per-turn so PATCH agent_id mid-conversation takes
+  // effect on the next message. Unknown agent_id returns null silently —
+  // session falls back to base prompt + all tools + default temperature.
+  const agent = session.agent_id
+    ? await getAgentById(project.path, session.agent_id)
+    : null;
+  const messages = buildMessagesPayload(session, project, history, agent);
 
   const state: StreamPhaseState = { accumulated: '', startedAt: null };
   let result: StreamResult;
   try {
-    result = await executeStreamPhase(ctx, args, session, messages, state);
+    result = await executeStreamPhase(ctx, args, session, messages, state, agent);
   } catch (err) {
     await handleAbortOrError(ctx, args, state.accumulated, err);
     return;
@@ -720,7 +762,7 @@ async function runCompact(
       ctx,
       session.model,
       messagesForSummary,
-      false,
+      { tools: null },
       (delta) => {
         content += delta;
         ctx.publish(sessionId, {
diff --git a/apps/server/src/types/api.ts b/apps/server/src/types/api.ts
index d7ac418..8d06e97 100644
--- a/apps/server/src/types/api.ts
+++ b/apps/server/src/types/api.ts
@@ -28,6 +28,27 @@ export interface Session {
   status: SessionStatus;
   created_at: string;
   updated_at: string;
+  agent_id: string | null;
+}
+
+// Agent sources: 'builtin' = baked-in default (services/agents.ts),
+// 'file' = parsed from project's AGENTS.md.
+export type AgentSource = 'builtin' | 'file';
+
+export interface Agent {
+  id: string;            // slug of name; stable handle stored in sessions.agent_id
+  name: string;
+  description: string;
+  system_prompt: string;
+  temperature: number;
+  tools: string[];       // whitelist of tool names; empty = no tools allowed
+  model: string | null;  // null means "session.model wins"
+  source: AgentSource;
+}
+
+export interface AgentsResponse {
+  agents: Agent[];
+  parse_error: string | null;  // present (non-null) when AGENTS.md exists but failed to parse
 }
 
 // KEEP IN SYNC: apps/server/src/schema.sql chats_status_chk
diff --git a/apps/web/src/api/client.ts b/apps/web/src/api/client.ts
index 11704ba..056719a 100644
--- a/apps/web/src/api/client.ts
+++ b/apps/web/src/api/client.ts
@@ -8,6 +8,7 @@ import type {
   SidebarResponse,
   ListDirResult,
   ViewFileResult,
+  AgentsResponse,
 } from './types';
 
 export class ApiError extends Error {
@@ -93,7 +94,7 @@ export const api = {
       request<Session[]>(`/api/projects/${projectId}/sessions${status ? `?status=${status}` : ''}`),
     create: (
       projectId: string,
-      body: { name?: string; model?: string; system_prompt?: string }
+      body: { name?: string; model?: string; system_prompt?: string; agent_id?: string | null }
     ) =>
       request<Session>(`/api/projects/${projectId}/sessions`, {
         method: 'POST',
@@ -102,7 +103,7 @@ export const api = {
     get: (id: string) => request<Session>(`/api/sessions/${id}`),
     update: (
       id: string,
-      body: Partial<Pick<Session, 'name' | 'model' | 'system_prompt'>>
+      body: Partial<Pick<Session, 'name' | 'model' | 'system_prompt' | 'agent_id'>>
     ) =>
       request<Session>(`/api/sessions/${id}`, {
         method: 'PATCH',
@@ -179,6 +180,11 @@ export const api = {
 
   models: () => request<ModelInfo[]>('/api/models'),
 
+  agents: {
+    list: (projectId: string) =>
+      request<AgentsResponse>(`/api/projects/${projectId}/agents`),
+  },
+
   settings: {
     get: () => request<Record<string, unknown>>('/api/settings'),
     patch: (body: Record<string, unknown>) =>
diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts
index 4000aeb..855a106 100644
--- a/apps/web/src/api/types.ts
+++ b/apps/web/src/api/types.ts
@@ -27,6 +27,25 @@ export interface Session {
   status: SessionStatus;
   created_at: string;
   updated_at: string;
+  agent_id: string | null;
+}
+
+export type AgentSource = 'builtin' | 'file';
+
+export interface Agent {
+  id: string;
+  name: string;
+  description: string;
+  system_prompt: string;
+  temperature: number;
+  tools: string[];
+  model: string | null;
+  source: AgentSource;
+}
+
+export interface AgentsResponse {
+  agents: Agent[];
+  parse_error: string | null;
 }
 
 export const CHAT_STATUSES = ['open', 'archived'] as const;
diff --git a/apps/web/src/components/AgentPicker.tsx b/apps/web/src/components/AgentPicker.tsx
new file mode 100644
index 0000000..d62c239
--- /dev/null
+++ b/apps/web/src/components/AgentPicker.tsx
@@ -0,0 +1,108 @@
+import { useEffect, useState } from 'react';
+import { Check, ChevronDown } from 'lucide-react';
+import { toast } from 'sonner';
+import { api } from '@/api/client';
+import type { Agent } from '@/api/types';
+import {
+  DropdownMenu,
+  DropdownMenuContent,
+  DropdownMenuItem,
+  DropdownMenuSeparator,
+  DropdownMenuTrigger,
+} from '@/components/ui/dropdown-menu';
+
+interface Props {
+  projectId: string;
+  value: string | null;
+  onChange: (agentId: string | null) => void | Promise<void>;
+}
+
+export function AgentPicker({ projectId, value, onChange }: Props) {
+  const [agents, setAgents] = useState<Agent[] | null>(null);
+  const [error, setError] = useState<string | null>(null);
+  const [open, setOpen] = useState(false);
+
+  // Load on mount (and on projectId change) so the trigger shows the agent
+  // name immediately, not the raw id. AGENTS.md parse errors surface as a
+  // toast once per load.
+  useEffect(() => {
+    let cancelled = false;
+    setAgents(null);
+    setError(null);
+    api.agents
+      .list(projectId)
+      .then((res) => {
+        if (cancelled) return;
+        setAgents(res.agents);
+        if (res.parse_error) {
+          toast.error(`AGENTS.md parse error: ${res.parse_error}`);
+        }
+      })
+      .catch((err) => {
+        if (cancelled) return;
+        setError(err instanceof Error ? err.message : 'failed to load agents');
+      });
+    return () => {
+      cancelled = true;
+    };
+  }, [projectId]);
+
+  const selectedAgent = agents?.find((a) => a.id === value) ?? null;
+  const triggerLabel = value === null
+    ? 'No agent'
+    : selectedAgent?.name ?? value;
+
+  return (
+    <DropdownMenu open={open} onOpenChange={setOpen}>
+      <DropdownMenuTrigger asChild>
+        <button
+          type="button"
+          className="text-xs text-muted-foreground hover:text-foreground flex items-center gap-1 px-1.5 py-0.5 rounded hover:bg-muted/60"
+          title={selectedAgent?.description ?? undefined}
+        >
+          <span className="truncate max-w-[160px]">{triggerLabel}</span>
+          <ChevronDown className="size-3 opacity-70" />
+        </button>
+      </DropdownMenuTrigger>
+      <DropdownMenuContent align="start" className="max-h-80 overflow-y-auto w-72">
+        {error && (
+          <div className="px-2 py-1.5 text-xs text-destructive">{error}</div>
+        )}
+        {agents === null && !error && (
+          <div className="px-2 py-1.5 text-xs text-muted-foreground">Loading…</div>
+        )}
+        {agents !== null && (
+          <>
+            <DropdownMenuItem
+              onSelect={() => void onChange(null)}
+              className="text-xs"
+            >
+              <Check className={`size-3 ${value === null ? 'opacity-100' : 'opacity-0'}`} />
+              <span className="font-medium">No agent</span>
+            </DropdownMenuItem>
+            {agents.length > 0 && <DropdownMenuSeparator />}
+            {agents.map((a) => (
+              <DropdownMenuItem
+                key={a.id}
+                onSelect={() => void onChange(a.id)}
+                className="text-xs flex-col items-start gap-0.5"
+              >
+                <div className="flex items-center gap-1.5">
+                  <Check
+                    className={`size-3 ${a.id === value ? 'opacity-100' : 'opacity-0'}`}
+                  />
+                  <span className="font-medium">{a.name}</span>
+                </div>
+                {a.description && (
+                  <span className="text-muted-foreground pl-[18px] truncate w-full">
+                    {a.description}
+                  </span>
+                )}
+              </DropdownMenuItem>
+            ))}
+          </>
+        )}
+      </DropdownMenuContent>
+    </DropdownMenu>
+  );
+}
diff --git a/apps/web/src/components/ChatInput.tsx b/apps/web/src/components/ChatInput.tsx
index c3da3af..540154b 100644
--- a/apps/web/src/components/ChatInput.tsx
+++ b/apps/web/src/components/ChatInput.tsx
@@ -15,6 +15,7 @@ import { AttachmentChip } from '@/components/AttachmentChip';
 import { AttachmentPreviewModal } from '@/components/AttachmentPreviewModal';
 import { FileMentionPopover } from '@/components/FileMentionPopover';
 import { DropOverlay } from '@/components/DropOverlay';
+import { AgentPicker } from '@/components/AgentPicker';
 import { api } from '@/api/client';
 import { sessionEvents } from '@/hooks/sessionEvents';
 import { useViewport } from '@/hooks/useViewport';
@@ -24,11 +25,15 @@ const MAX_ATTACHMENTS = 10;
 interface Props {
   disabled?: boolean;
   projectId: string;
+  // Batch 9: optional so callers that pre-date the agent picker still compile.
+  // When omitted, the toolbar row is hidden entirely.
+  agentId?: string | null;
+  onAgentChange?: (agentId: string | null) => void | Promise<void>;
   onSend: (content: string) => void | Promise<void>;
   onForceSend?: (content: string) => void | Promise<void>;
 }
 
-export function ChatInput({ disabled, projectId, onSend, onForceSend }: Props) {
+export function ChatInput({ disabled, projectId, agentId, onAgentChange, onSend, onForceSend }: Props) {
   const { isMobile } = useViewport();
   const [value, setValue] = useState('');
   const [busy, setBusy] = useState(false);
@@ -420,6 +425,18 @@ export function ChatInput({ disabled, projectId, onSend, onForceSend }: Props) {
           ))}
         </div>
       )}
+      {/* Batch 9 toolbar — agent picker. Sits above the input row so it
+          doesn't compete with the send button for vertical alignment.
+          When Batch 7 lands, ModelPicker and the + button join this row. */}
+      {onAgentChange && (
+        <div className="px-4 pt-2 flex items-center gap-1.5">
+          <AgentPicker
+            projectId={projectId}
+            value={agentId ?? null}
+            onChange={onAgentChange}
+          />
+        </div>
+      )}
       <div className="px-4 py-3 flex items-end gap-2">
         <Textarea
           ref={textareaRef}
diff --git a/apps/web/src/components/Workspace.tsx b/apps/web/src/components/Workspace.tsx
index cf92dc7..5bd5fa0 100644
--- a/apps/web/src/components/Workspace.tsx
+++ b/apps/web/src/components/Workspace.tsx
@@ -20,9 +20,12 @@ import { cn } from '@/lib/utils';
 interface Props {
   sessionId: string;
   projectId: string;
+  // Batch 9: threaded down to ChatPane → ChatInput → AgentPicker.
+  agentId?: string | null;
+  onAgentChange?: (agentId: string | null) => void | Promise<void>;
 }
 
-export function Workspace({ sessionId, projectId }: Props) {
+export function Workspace({ sessionId, projectId, agentId, onAgentChange }: Props) {
   const {
     panes,
     activePaneIdx,
@@ -219,7 +222,14 @@ export function Workspace({ sessionId, projectId }: Props) {
 
             <div className="flex-1 min-h-0 overflow-hidden">
               {pane.kind === 'chat' && pane.chatId ? (
-                <ChatPane sessionId={sessionId} chatId={pane.chatId} projectId={projectId} sessionChats={chats} />
+                <ChatPane
+                  sessionId={sessionId}
+                  chatId={pane.chatId}
+                  projectId={projectId}
+                  agentId={agentId}
+                  onAgentChange={onAgentChange}
+                  sessionChats={chats}
+                />
               ) : (
                 <SessionLandingPage
                   sessionId={sessionId}
diff --git a/apps/web/src/components/panes/ChatPane.tsx b/apps/web/src/components/panes/ChatPane.tsx
index 95abe8d..8a91791 100644
--- a/apps/web/src/components/panes/ChatPane.tsx
+++ b/apps/web/src/components/panes/ChatPane.tsx
@@ -18,10 +18,13 @@ interface Props {
   sessionId: string;
   chatId: string;
   projectId: string;
+  // Batch 9: optional, threaded down to ChatInput's agent picker.
+  agentId?: string | null;
+  onAgentChange?: (agentId: string | null) => void | Promise<void>;
   sessionChats?: import('@/api/types').Chat[];
 }
 
-export function ChatPane({ sessionId, chatId, projectId, sessionChats }: Props) {
+export function ChatPane({ sessionId, chatId, projectId, agentId, onAgentChange, sessionChats }: Props) {
   const stream = useSessionStream(sessionId);
   const lastErrorRef = useRef<string | null>(null);
   const [queue, setQueue] = useState<string[]>([]);
@@ -167,7 +170,14 @@ export function ChatPane({ sessionId, chatId, projectId, sessionChats }: Props)
 
       <div className="relative">
         <ChatContextPopover stats={contextStats} />
-        <ChatInput disabled={false} projectId={projectId} onSend={handleSend} onForceSend={streaming ? handleForceSend : undefined} />
+        <ChatInput
+          disabled={false}
+          projectId={projectId}
+          agentId={agentId}
+          onAgentChange={onAgentChange}
+          onSend={handleSend}
+          onForceSend={streaming ? handleForceSend : undefined}
+        />
       </div>
     </div>
   );
diff --git a/apps/web/src/pages/Session.tsx b/apps/web/src/pages/Session.tsx
index 440af55..102241c 100644
--- a/apps/web/src/pages/Session.tsx
+++ b/apps/web/src/pages/Session.tsx
@@ -193,7 +193,15 @@ export function Session() {
       </header>
 
       {id && session && (
-        <Workspace sessionId={id} projectId={session.project_id} />
+        <Workspace
+          sessionId={id}
+          projectId={session.project_id}
+          agentId={session.agent_id}
+          onAgentChange={async (agent_id) => {
+            const updated = await api.sessions.update(session.id, { agent_id });
+            setSession(updated);
+          }}
+        />
       )}
     </div>
   );