v2.5.0-task-model: lightweight task model services + tasks table

Task model infrastructure for cheap LLM calls (auto-naming, search rewrite, tags, summaries) via a dedicated llama-server instance at TASK_MODEL_URL, falling back to LLAMA_SWAP_URL with FAST_MODEL when unset. Replaces the inline fetch in auto_name.ts with taskModelCompletion. Adds search query rewriting: on step 0 when web tools are enabled, the user's message is summarized into a search intent hint appended to the system prompt, improving web_search relevance. Schema: tasks table for provider dispatch and arena, sessions.tags column. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
v2.4.1-sidecar-routing: route per-agent flags to llama-sidecar + tool gap fix
2026-05-27 21:44:39 +00:00 · 2026-05-27 19:28:08 +00:00
16 changed files with 349 additions and 80 deletions
--- a/.env.example
+++ b/.env.example
@@ -11,6 +11,11 @@ POSTGRES_PASSWORD=CHANGE_ME
 # point BooCode at a different SearXNG instance.
 SEARXNG_URL=http://100.114.205.53:8888
 # Task model: lightweight model for auto-naming, search rewrite, etc.
 # Direct llama-server instance (NOT llama-swap). Falls back to LLAMA_SWAP_URL
 # with FAST_MODEL when unset.
 # TASK_MODEL_URL=http://100.90.172.55:7995
 # v1.13.15-tools: BOOCODE_TOOLS narrows the tool whitelist sent to the LLM.
 # Unset (default) → all tools (~21k schema). Useful primarily for single-purpose
 # sessions where the model only needs read-only filesystem access.
--- a/apps/server/src/config.ts
+++ b/apps/server/src/config.ts
@@ -25,6 +25,8 @@ const ConfigSchema = z.object({
  // v2.0.5: cheaper model for titles, summaries, labeling. Falls back to
  // session model (auto_name) or DEFAULT_MODEL when unset.
  FAST_MODEL: z.string().optional(),
  TASK_MODEL_URL: z.string().url().optional(),
  LLAMA_SIDECAR_URL: z.string().url().optional(),
 });
 export type Config = z.infer<typeof ConfigSchema>;
--- a/apps/server/src/index.ts
+++ b/apps/server/src/index.ts
@@ -28,7 +28,7 @@ import { cleanupTruncations } from './services/truncate.js';
 import { loadMcpConfig } from './services/mcp-config.js';
 import { initialize as initMcp, getTools as getMcpTools, shutdown as shutdownMcp } from './services/mcp-client.js';
 import { appendMcpTools } from './services/tools.js';
-import { refreshToolNames } from './services/agents.js';
+import { refreshToolNames, getAgentsForProject } from './services/agents.js';
 async function main() {
  const config = loadConfig();
@@ -91,6 +91,20 @@ async function main() {
  }
  app.addHook('onClose', async () => { await shutdownMcp(); });
  // Boot-time guard: if any agent has llama_extra_args but LLAMA_SIDECAR_URL
  // is unset, fail fast. Silent fallback would defeat per-agent flags.
  if (!config.LLAMA_SIDECAR_URL) {
    const { agents } = await getAgentsForProject('');
    const offending = agents.find(a => a.llama_extra_args && a.llama_extra_args.length > 0);
    if (offending) {
      app.log.fatal(
        { agent: offending.name },
        `Agent "${offending.name}" has llama_extra_args but LLAMA_SIDECAR_URL is not set`,
      );
      process.exit(1);
    }
  }
  await app.register(fastifyWebsocket);
  app.get('/api/health', async () => {
--- a/apps/server/src/schema.sql
+++ b/apps/server/src/schema.sql
@@ -344,6 +344,7 @@ INSERT INTO settings (key, value) VALUES ('theme_mode', '"dark"') ON CONFLICT (k
 ALTER TABLE projects ADD COLUMN IF NOT EXISTS default_system_prompt TEXT NOT NULL DEFAULT '';
 ALTER TABLE projects ADD COLUMN IF NOT EXISTS default_web_search_enabled BOOLEAN NOT NULL DEFAULT false;
 ALTER TABLE sessions ADD COLUMN IF NOT EXISTS web_search_enabled BOOLEAN;
 ALTER TABLE sessions ADD COLUMN IF NOT EXISTS tags TEXT[] DEFAULT '{}';
 -- v1.11: anchored rolling compaction.
 --   compacted_at  — marks rows that are "behind the curtain" of the latest
@@ -366,3 +367,39 @@ ALTER TABLE messages ADD COLUMN IF NOT EXISTS summary BOOLEAN NOT NULL DEFAULT F
 ALTER TABLE messages ADD COLUMN IF NOT EXISTS tail_start_id UUID REFERENCES messages(id) ON DELETE SET NULL;
 ALTER TABLE chats ADD COLUMN IF NOT EXISTS needs_compaction BOOLEAN NOT NULL DEFAULT FALSE;
 CREATE INDEX IF NOT EXISTS idx_messages_chat_compacted ON messages (chat_id, compacted_at);
 -- tasks table (provider dispatch, arena)
 CREATE TABLE IF NOT EXISTS tasks (
  id                UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  project_id        UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
  session_id        UUID REFERENCES sessions(id) ON DELETE CASCADE,
  parent_task_id    UUID REFERENCES tasks(id),
  arena_id          UUID,
  state             TEXT NOT NULL DEFAULT 'pending'
                    CHECK (state IN ('pending','running','completed','failed','blocked','cancelled')),
  input             TEXT NOT NULL,
  output_summary    TEXT,
  agent             TEXT,
  model             TEXT,
  mode_id           TEXT,
  thinking_option_id TEXT,
  feature_values    JSONB,
  execution_path    TEXT CHECK (execution_path IS NULL OR execution_path IN ('native','acp','pty','qwen')),
  worktree_path     TEXT,
  cost_tokens       INTEGER,
  started_at        TIMESTAMPTZ,
  ended_at          TIMESTAMPTZ,
  created_at        TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
 );
 -- Fix tasks FK to cascade on session delete (existing tables without CASCADE)
 DO $$ BEGIN
  IF EXISTS (
    SELECT 1 FROM pg_constraint WHERE conname = 'tasks_session_id_fkey'
    AND confdeltype != 'c'
  ) THEN
    ALTER TABLE tasks DROP CONSTRAINT tasks_session_id_fkey;
    ALTER TABLE tasks ADD CONSTRAINT tasks_session_id_fkey
      FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE;
  END IF;
 END $$;
--- a/apps/server/src/services/tests/provider.test.ts
+++ b/apps/server/src/services/tests/provider.test.ts
@@ -0,0 +1,58 @@
 import { describe, expect, it } from 'vitest';
 import { resolveRoute, upstreamModel } from '../inference/provider.js';
 describe('resolveRoute', () => {
  it('routes to swap when agent is null', () => {
    expect(resolveRoute(null)).toEqual({ route: 'swap', flags: null });
  });
  it('routes to swap when agent has no llama_extra_args', () => {
    expect(resolveRoute({ llama_extra_args: null })).toEqual({ route: 'swap', flags: null });
  });
  it('routes to swap when agent has empty llama_extra_args', () => {
    expect(resolveRoute({ llama_extra_args: [] })).toEqual({ route: 'swap', flags: null });
  });
  it('routes to sidecar when agent has llama_extra_args', () => {
    const result = resolveRoute({ llama_extra_args: ['--top-k', '20'] });
    expect(result.route).toBe('sidecar');
    expect(result.flags).toEqual(['--top-k', '20']);
  });
 });
 describe('upstreamModel', () => {
  const swapConfig = { LLAMA_SWAP_URL: 'http://localhost:8401' };
  const fullConfig = {
    LLAMA_SWAP_URL: 'http://localhost:8401',
    LLAMA_SIDECAR_URL: 'http://localhost:8402',
  };
  it('returns a model for swap route (no agent)', () => {
    const model = upstreamModel(swapConfig, 'test-model');
    expect(model).toBeDefined();
    expect((model as any).modelId).toBe('test-model');
  });
  it('returns a model for swap route (agent without extra args)', () => {
    const model = upstreamModel(swapConfig, 'test-model', { llama_extra_args: null });
    expect(model).toBeDefined();
  });
  it('returns a model for sidecar route', () => {
    const model = upstreamModel(fullConfig, 'test-model', { llama_extra_args: ['--top-k', '20'] });
    expect(model).toBeDefined();
    expect((model as any).modelId).toBe('test-model');
  });
  it('throws when sidecar route requested but URL missing', () => {
    expect(() =>
      upstreamModel(swapConfig, 'test-model', { llama_extra_args: ['--top-k', '20'] }),
    ).toThrow(/LLAMA_SIDECAR_URL/);
  });
  it('routes to swap for empty llama_extra_args array', () => {
    const model = upstreamModel(swapConfig, 'test-model', { llama_extra_args: [] });
    expect(model).toBeDefined();
  });
 });
--- a/apps/server/src/services/auto_name.ts
+++ b/apps/server/src/services/auto_name.ts
@@ -1,9 +1,10 @@
 import type { InferenceContext } from './inference/index.js';
 import { taskModelCompletion } from './task-model.js';
 const NAMING_SYSTEM_PROMPT =
-  'You name chat sessions based on what the assistant did. Summarize the topic or outcome — do NOT copy the first few words verbatim. Reply directly with no thinking, reasoning, or explanation. Output ONLY the title, 4 words max, no quotes, no punctuation, no prefix like "Title:".';
+  'You name chat sessions. Reply with ONLY the title. 4 to 6 words. No quotes, no punctuation, no prefix.';
-const MAX_TITLE_CHARS = 60;
+const MAX_TITLE_CHARS = 80;
 function cleanTitle(raw: string): string {
  let name = raw.trim();
@@ -18,27 +19,7 @@ function cleanTitle(raw: string): string {
  return name;
 }
-interface NamingResponse {
+// TODO: wire suggestTags after task model validation
  choices?: Array<{
    message?: {
      content?: string;
      reasoning_content?: string;
    };
  }>;
 }
 function pickTitleSource(data: NamingResponse): string {
  const choice = data.choices?.[0]?.message;
  if (!choice) return '';
  if (choice.content && choice.content.trim().length > 0) return choice.content;
  const reasoning = choice.reasoning_content ?? '';
  if (reasoning.length === 0) return '';
  const lines = reasoning
    .split('\n')
    .map((l) => l.trim())
    .filter((l) => l.length > 0);
  return lines[lines.length - 1] ?? '';
 }
 export async function maybeAutoNameChat(
  ctx: InferenceContext,
@@ -64,13 +45,6 @@ export async function maybeAutoNameChat(
  if (!chat) return;
  if (chat.name !== null && chat.name !== '') return;
  const sessionRows = await ctx.sql<{ model: string }[]>`
    SELECT model FROM sessions WHERE id = ${sessionId}
  `;
  // v2.0.5: prefer FAST_MODEL for cheap LLM calls (titles, summaries).
  const model = ctx.config.FAST_MODEL ?? sessionRows[0]?.model;
  if (!model) return;
  const assistantMsg = await ctx.sql<{ content: string }[]>`
    SELECT content FROM messages
    WHERE chat_id = ${chatId}
@@ -84,32 +58,12 @@ export async function maybeAutoNameChat(
  const assistantText = assistantMsg[0].content.slice(0, 2000);
-  const body = {
+  const raw = await taskModelCompletion({
-    model,
+    system: NAMING_SYSTEM_PROMPT,
-    messages: [
+    user: assistantText,
-      { role: 'system', content: NAMING_SYSTEM_PROMPT },
+    maxTokens: 30,
      {
        role: 'user',
        content: assistantText,
      },
    ],
    max_tokens: 30,
    temperature: 0.3,
    stream: false,
    chat_template_kwargs: { enable_thinking: false },
  };
  const res = await fetch(`${ctx.config.LLAMA_SWAP_URL}/v1/chat/completions`, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify(body),
  });
  if (!res.ok) {
    const text = await res.text().catch(() => '');
    throw new Error(`naming request failed: ${res.status} ${text.slice(0, 200)}`);
  }
  const data = (await res.json()) as NamingResponse;
  const raw = pickTitleSource(data);
  const name = cleanTitle(raw);
  if (!name) {
    ctx.log.warn({ chatId, raw }, 'auto-name: empty title from model');
--- a/apps/server/src/services/inference/provider.ts
+++ b/apps/server/src/services/inference/provider.ts
@@ -1,37 +1,84 @@
 import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
 import type { LanguageModel } from 'ai';
 // TODO: When per-agent llama-server flag overrides are added, route them
 // through validateExtraArgs (./llama-args-validator.ts) first.
 // v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from
 // config.LLAMA_SWAP_URL at call time (not module-load) so tests can stub the
 // upstream without touching env vars. No apiKey — llama-swap is unauth in our
 // Tailscale topology and exposing it over the public internet is gated by
 // Authelia at the Caddy layer, not by API keys.
 //
 // v2.4.1-sidecar: when the agent has llama_extra_args, route through
 // llama-sidecar instead. A fresh provider is created per call (not cached)
 // because the X-Agent-Flags header varies per agent. The llama-swap path
 // stays cached since it has no per-request headers.
-const cache = new Map<string, ReturnType<typeof createOpenAICompatible>>();
+const swapCache = new Map<string, ReturnType<typeof createOpenAICompatible>>();
-function getProvider(baseURL: string): ReturnType<typeof createOpenAICompatible> {
+function getSwapProvider(baseURL: string): ReturnType<typeof createOpenAICompatible> {
-  let provider = cache.get(baseURL);
+  let provider = swapCache.get(baseURL);
  if (!provider) {
    provider = createOpenAICompatible({
      name: 'llama-swap',
      baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
      // v1.13.7: @ai-sdk/openai-compatible defaults includeUsage=false, which
      // omits `stream_options.include_usage` from the request body. Without
      // it, llama.cpp / llama-swap never emits the trailing usage block, so
      // `result.usage` resolves with inputTokens=outputTokens=undefined and
      // tokens_used / ctx_used land as NULL in every messages row. Setting
      // true here re-enables the per-stream usage payload across all models
      // served via the llama-swap provider.
      includeUsage: true,
    });
-    cache.set(baseURL, provider);
+    swapCache.set(baseURL, provider);
  }
  return provider;
 }
-export function upstreamModel(baseURL: string, modelId: string): LanguageModel {
+function sidecarProvider(
-  return getProvider(baseURL).chatModel(modelId);
+  baseURL: string,
  flags: string[],
 ): ReturnType<typeof createOpenAICompatible> {
  return createOpenAICompatible({
    name: 'llama-sidecar',
    baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
    includeUsage: true,
    headers: {
      'X-Agent-Flags': flags.join(' '),
    },
  });
 }
 export type InferenceRoute = 'swap' | 'sidecar';
 export interface RoutingInfo {
  route: InferenceRoute;
  flags: string[] | null;
 }
 interface AgentLike {
  llama_extra_args: string[] | null;
 }
 interface ConfigLike {
  LLAMA_SWAP_URL: string;
  LLAMA_SIDECAR_URL?: string;
 }
 export function resolveRoute(agent: AgentLike | null): RoutingInfo {
  const flags = agent?.llama_extra_args;
  if (flags && flags.length > 0) {
    return { route: 'sidecar', flags };
  }
  return { route: 'swap', flags: null };
 }
 export function upstreamModel(
  config: ConfigLike,
  modelId: string,
  agent?: AgentLike | null,
 ): LanguageModel {
  const { route, flags } = resolveRoute(agent ?? null);
  if (route === 'sidecar') {
    const url = config.LLAMA_SIDECAR_URL;
    if (!url) {
      throw new Error(
        `Agent has llama_extra_args but LLAMA_SIDECAR_URL is not set`,
      );
    }
    return sidecarProvider(url, flags!).chatModel(modelId);
  }
  return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId);
 }
--- a/apps/server/src/services/inference/stream-phase.ts
+++ b/apps/server/src/services/inference/stream-phase.ts
@@ -157,7 +157,8 @@ export async function streamCompletion(
  opts: StreamOptions,
  onDelta: (content: string) => void,
  onUsage: ((prompt: number | null, completion: number | null) => void) | undefined,
-  signal?: AbortSignal
+  signal?: AbortSignal,
  agent?: Agent | null,
 ): Promise<StreamResult> {
  const aiMessages = toModelMessages(messages);
  const hasTools = opts.tools !== null && opts.tools.length > 0;
@@ -195,7 +196,7 @@ export async function streamCompletion(
  };
  const result = streamText({
-    model: upstreamModel(ctx.config.LLAMA_SWAP_URL, model),
+    model: upstreamModel(ctx.config, model, agent ?? null),
    messages: aiMessages,
    ...(aiTools
      ? { tools: aiTools, toolChoice: 'auto' as const, experimental_repairToolCall: repairToolCall }
@@ -458,7 +459,8 @@ export async function executeStreamPhase(
          }, USAGE_THROTTLE_MS - elapsed);
        }
      },
-      signal
+      signal,
      agent,
    );
  } finally {
    if (pendingFlushTimer) {
--- a/apps/server/src/services/inference/turn.ts
+++ b/apps/server/src/services/inference/turn.ts
@@ -14,6 +14,7 @@ import type {
 import { ALL_TOOLS } from '../tools.js';
 import { resolveProjectRoot } from '../path_guard.js';
 import { maybeAutoNameChat } from '../auto_name.js';
 import { rewriteSearchQuery } from '../task-search-rewrite.js';
 import { getAgentById } from '../agents.js';
 import * as compaction from '../compaction.js';
 import type { Broker } from '../broker.js';
@@ -254,6 +255,16 @@ export async function runAssistantTurn(
    const webToolsEnabled =
      iterSession.web_search_enabled ?? iterProject.default_web_search_enabled ?? false;
    if (stepNumber === 0 && webToolsEnabled && messages.length >= 2) {
      const lastUserMsg = [...messages].reverse().find((m) => m.role === 'user');
      if (lastUserMsg?.content) {
        const hint = await rewriteSearchQuery(lastUserMsg.content);
        if (hint && messages[0]?.role === 'system' && messages[0].content) {
          messages[0].content += `\n\nThe user's search intent can be summarized as: "${hint}"`;
        }
      }
    }
    const iterArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, signal };
    const state: StreamPhaseState = { accumulated: '', startedAt: null };
    let result: StreamResult;
--- a/apps/server/src/services/system-prompt.ts
+++ b/apps/server/src/services/system-prompt.ts
@@ -21,6 +21,7 @@ import { createHash } from 'node:crypto';
 import { readFile, stat } from 'node:fs/promises';
 import type { Agent, Project, Session } from '../types/api.js';
 import { getAgentsMtimes } from './agents.js';
 import { resolveRoute } from './inference/provider.js';
 const BASE_SYSTEM_PROMPT = (projectPath: string) =>
  `You are BooCode Chat, a code investigation assistant. The user is working on a project located at ${projectPath}. Use the file-read tools (view_file, list_dir, grep, find_files) to investigate code when needed. Be concise. Cite file paths and line numbers when discussing code. Do not hallucinate file contents — read the file first. Tool results may be truncated; if so, narrow your query rather than guessing.`;
@@ -98,6 +99,7 @@ export interface PrefixFingerprint {
  has_agent_system_prompt: boolean;
  has_session_override: boolean;
  has_project_override: boolean;
  route: 'swap' | 'sidecar';
 }
 export interface PrefixDrift {
@@ -125,6 +127,7 @@ interface ObservedInputs {
  has_agent_system_prompt: boolean;
  has_session_override: boolean;
  has_project_override: boolean;
  route: 'swap' | 'sidecar';
 }
 interface ObserverEntry {
@@ -183,6 +186,7 @@ export async function buildSystemPromptWithFingerprint(
    has_agent_system_prompt: !!(agent && agent.system_prompt.trim().length > 0),
    has_session_override: sessionPrompt.length > 0,
    has_project_override: projectPrompt.length > 0,
    route: resolveRoute(agent).route,
  };
  const fingerprint: PrefixFingerprint = {
@@ -199,6 +203,7 @@ export async function buildSystemPromptWithFingerprint(
    has_agent_system_prompt: inputs.has_agent_system_prompt,
    has_session_override: inputs.has_session_override,
    has_project_override: inputs.has_project_override,
    route: inputs.route,
  };
  let drift: PrefixDrift | null = null;
--- a/apps/server/src/services/task-model.ts
+++ b/apps/server/src/services/task-model.ts
@@ -0,0 +1,68 @@
 import { loadConfig, type Config } from '../config.js';
 const TIMEOUT_MS = 10_000;
 export async function taskModelCompletion(opts: {
  system: string;
  user: string;
  maxTokens?: number;
  temperature?: number;
  fallbackModel?: string;
 }): Promise<string> {
  const config = loadConfig();
  const maxTokens = opts.maxTokens ?? 30;
  const temperature = opts.temperature ?? 0.3;
  const { url, model } = resolveEndpoint(config, opts.fallbackModel);
  try {
    const res = await fetch(`${url}/v1/chat/completions`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({
        model,
        messages: [
          { role: 'system', content: opts.system },
          { role: 'user', content: opts.user },
        ],
        max_tokens: maxTokens,
        temperature,
        stream: false,
        chat_template_kwargs: { enable_thinking: false },
      }),
      signal: AbortSignal.timeout(TIMEOUT_MS),
    });
    if (!res.ok) {
      const text = await res.text().catch(() => '');
      console.warn(`task-model: ${res.status} ${text.slice(0, 200)}`);
      return '';
    }
    const data = (await res.json()) as {
      choices?: Array<{
        message?: { content?: string; reasoning_content?: string };
      }>;
    };
    const choice = data.choices?.[0]?.message;
    if (!choice) return '';
    const content = (choice.content ?? '').trim();
    if (content.length > 0) return content;
    const reasoning = choice.reasoning_content ?? '';
    if (reasoning.length === 0) return '';
    const lines = reasoning.split('\n').map((l) => l.trim()).filter((l) => l.length > 0);
    return lines[lines.length - 1] ?? '';
  } catch (err) {
    console.warn('task-model: request failed', err);
    return '';
  }
 }
 function resolveEndpoint(
  config: Config,
  fallbackModel?: string,
 ): { url: string; model: string } {
  if (config.TASK_MODEL_URL) {
    return { url: config.TASK_MODEL_URL, model: 'gemma-3-270m-it' };
  }
  const model = config.FAST_MODEL ?? fallbackModel ?? config.DEFAULT_MODEL;
  return { url: config.LLAMA_SWAP_URL, model };
 }
--- a/apps/server/src/services/task-search-rewrite.ts
+++ b/apps/server/src/services/task-search-rewrite.ts
@@ -0,0 +1,19 @@
 import { taskModelCompletion } from './task-model.js';
 const SYSTEM_PROMPT =
  'You rewrite user messages into concise web search queries. Reply with ONLY the search query. 3 to 6 words. No quotes, no explanation.';
 const MAX_INPUT_CHARS = 500;
 const FALLBACK_CHARS = 60;
 export async function rewriteSearchQuery(userMessage: string): Promise<string> {
  const input = userMessage.slice(0, MAX_INPUT_CHARS);
  const result = await taskModelCompletion({
    system: SYSTEM_PROMPT,
    user: input,
    maxTokens: 20,
    temperature: 0.2,
  });
  if (result.length > 0) return result;
  return userMessage.slice(0, FALLBACK_CHARS).trim();
 }
--- a/apps/server/src/services/task-summary.ts
+++ b/apps/server/src/services/task-summary.ts
@@ -0,0 +1,24 @@
 import { taskModelCompletion } from './task-model.js';
 const SYSTEM_PROMPT =
  'Summarize this conversation in one sentence, 15 words max. No quotes, no prefix.';
 const MAX_INPUT_CHARS = 1000;
 export async function oneLineSummary(
  messages: Array<{ role: string; content: string }>,
 ): Promise<string> {
  const lastPairs = messages.slice(-6);
  let input = lastPairs
    .map((m) => `${m.role}: ${m.content}`)
    .join('\n');
  if (input.length > MAX_INPUT_CHARS) {
    input = input.slice(0, MAX_INPUT_CHARS);
  }
  return taskModelCompletion({
    system: SYSTEM_PROMPT,
    user: input,
    maxTokens: 30,
    temperature: 0.3,
  });
 }
--- a/apps/server/src/services/task-tags.ts
+++ b/apps/server/src/services/task-tags.ts
@@ -0,0 +1,22 @@
 import { taskModelCompletion } from './task-model.js';
 const SYSTEM_PROMPT =
  'You tag chat sessions. Reply with 1 to 3 lowercase tags separated by commas. Tags should describe the topic. No explanation. Examples: "docker, deployment", "python, debugging", "react, styling".';
 export async function suggestTags(
  userMessage: string,
  assistantReply: string,
 ): Promise<string[]> {
  const input = `User: ${userMessage.slice(0, 300)}\nAssistant: ${assistantReply.slice(0, 300)}`;
  const result = await taskModelCompletion({
    system: SYSTEM_PROMPT,
    user: input,
    maxTokens: 30,
    temperature: 0.3,
  });
  if (result.length === 0) return [];
  return result
    .split(',')
    .map((t) => t.trim().toLowerCase())
    .filter((t) => t.length > 0 && t.length <= 30);
 }
--- a/data/AGENTS.md
+++ b/data/AGENTS.md
@@ -7,7 +7,7 @@ top_p: 0.95
 top_k: 20
 min_p: 0.0
 presence_penalty: 0.0
-tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes]
+tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes, request_read_access, view_truncated_output, ask_user_input, git_status, get_blast_radius, get_hot_files, get_middleware, get_routes]
 description: Reviews code for bugs, security issues, and maintainability. Read-only.
 ---
 You review code. Find real problems, not style nits.
@@ -46,7 +46,7 @@ top_p: 0.95
 top_k: 20
 min_p: 0.0
 presence_penalty: 0.0
-tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes]
+tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes, request_read_access, view_truncated_output, ask_user_input, git_status, get_blast_radius, get_hot_files, get_middleware, get_routes]
 description: Diagnoses bugs from error messages, logs, or described symptoms.
 ---
 You diagnose bugs. Form a hypothesis, prove it with evidence from the code.
@@ -72,7 +72,7 @@ top_k: 20
 min_p: 0.0
 presence_penalty: 0.0
 steps: 5
-tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes]
+tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes, request_read_access, view_truncated_output, ask_user_input, git_status, get_blast_radius, get_hot_files, get_middleware, get_routes]
 description: Proposes refactors for clarity, deduplication, or decoupling. Read-only — outputs plans, not edits.
 ---
 You propose refactors. You do not apply them. The user applies via OpenCode or Claude Code.
@@ -115,7 +115,7 @@ top_k: 20
 min_p: 0.0
 presence_penalty: 1.5
 steps: 20
-tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes]
+tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes, request_read_access, view_truncated_output, ask_user_input, git_status, get_blast_radius, get_hot_files, get_middleware, get_routes]
 description: Designs new features, modules, or architectural changes. Outputs a build plan.
 ---
 You design. You produce build plans, not code.
@@ -157,7 +157,7 @@ top_p: 0.95
 top_k: 20
 min_p: 0.0
 presence_penalty: 0.0
-tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes]
+tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes, request_read_access, view_truncated_output, ask_user_input, git_status, get_blast_radius, get_hot_files, get_middleware, get_routes]
 description: Audits code for security vulnerabilities. Read-only.
 ---
 You audit for security issues. Concrete findings only, no generic warnings.
@@ -240,7 +240,7 @@ top_p: 0.95
 top_k: 20
 min_p: 0.0
 presence_penalty: 0.0
-tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes]
+tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes, request_read_access, view_truncated_output, ask_user_input, git_status, get_blast_radius, get_hot_files, get_middleware, get_routes]
 description: Discovers and maps unfamiliar codebases. Reads architecture, traces data flow, identifies key symbols.
 ---
 You map codebases. Start broad, then drill into specifics.
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -11,6 +11,7 @@ services:
      CONTAINER_GUIDANCE_FILE: /app/BOOCHAT.md
      DATABASE_URL: postgres://boocode:${POSTGRES_PASSWORD}@boocode_db:5432/boochat
      BOOCODER_URL: http://100.114.205.53:9502
      LLAMA_SIDECAR_URL: http://100.101.41.16:8402
    volumes:
      - /opt:/opt
      - /opt/projects:/opt/projects:rw