v2.5.0-task-model: lightweight task model services + tasks table

Task model infrastructure for cheap LLM calls (auto-naming, search rewrite, tags, summaries) via a dedicated llama-server instance at TASK_MODEL_URL, falling back to LLAMA_SWAP_URL with FAST_MODEL when unset. Replaces the inline fetch in auto_name.ts with taskModelCompletion. Adds search query rewriting: on step 0 when web tools are enabled, the user's message is summarized into a search intent hint appended to the system prompt, improving web_search relevance. Schema: tasks table for provider dispatch and arena, sessions.tags column. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-27 21:44:39 +00:00
parent bcfc94fa47
commit fcc7c5a86e
8 changed files with 194 additions and 54 deletions
--- a/.env.example
+++ b/.env.example
@@ -11,6 +11,11 @@ POSTGRES_PASSWORD=CHANGE_ME
 # point BooCode at a different SearXNG instance.
 SEARXNG_URL=http://100.114.205.53:8888
 # Task model: lightweight model for auto-naming, search rewrite, etc.
 # Direct llama-server instance (NOT llama-swap). Falls back to LLAMA_SWAP_URL
 # with FAST_MODEL when unset.
 # TASK_MODEL_URL=http://100.90.172.55:7995
 # v1.13.15-tools: BOOCODE_TOOLS narrows the tool whitelist sent to the LLM.
 # Unset (default) → all tools (~21k schema). Useful primarily for single-purpose
 # sessions where the model only needs read-only filesystem access.
--- a/apps/server/src/schema.sql
+++ b/apps/server/src/schema.sql
@@ -344,6 +344,7 @@ INSERT INTO settings (key, value) VALUES ('theme_mode', '"dark"') ON CONFLICT (k
 ALTER TABLE projects ADD COLUMN IF NOT EXISTS default_system_prompt TEXT NOT NULL DEFAULT '';
 ALTER TABLE projects ADD COLUMN IF NOT EXISTS default_web_search_enabled BOOLEAN NOT NULL DEFAULT false;
 ALTER TABLE sessions ADD COLUMN IF NOT EXISTS web_search_enabled BOOLEAN;
 ALTER TABLE sessions ADD COLUMN IF NOT EXISTS tags TEXT[] DEFAULT '{}';
 -- v1.11: anchored rolling compaction.
 --   compacted_at  — marks rows that are "behind the curtain" of the latest
@@ -366,3 +367,39 @@ ALTER TABLE messages ADD COLUMN IF NOT EXISTS summary BOOLEAN NOT NULL DEFAULT F
 ALTER TABLE messages ADD COLUMN IF NOT EXISTS tail_start_id UUID REFERENCES messages(id) ON DELETE SET NULL;
 ALTER TABLE chats ADD COLUMN IF NOT EXISTS needs_compaction BOOLEAN NOT NULL DEFAULT FALSE;
 CREATE INDEX IF NOT EXISTS idx_messages_chat_compacted ON messages (chat_id, compacted_at);
 -- tasks table (provider dispatch, arena)
 CREATE TABLE IF NOT EXISTS tasks (
  id                UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  project_id        UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
  session_id        UUID REFERENCES sessions(id) ON DELETE CASCADE,
  parent_task_id    UUID REFERENCES tasks(id),
  arena_id          UUID,
  state             TEXT NOT NULL DEFAULT 'pending'
                    CHECK (state IN ('pending','running','completed','failed','blocked','cancelled')),
  input             TEXT NOT NULL,
  output_summary    TEXT,
  agent             TEXT,
  model             TEXT,
  mode_id           TEXT,
  thinking_option_id TEXT,
  feature_values    JSONB,
  execution_path    TEXT CHECK (execution_path IS NULL OR execution_path IN ('native','acp','pty','qwen')),
  worktree_path     TEXT,
  cost_tokens       INTEGER,
  started_at        TIMESTAMPTZ,
  ended_at          TIMESTAMPTZ,
  created_at        TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
 );
 -- Fix tasks FK to cascade on session delete (existing tables without CASCADE)
 DO $$ BEGIN
  IF EXISTS (
    SELECT 1 FROM pg_constraint WHERE conname = 'tasks_session_id_fkey'
    AND confdeltype != 'c'
  ) THEN
    ALTER TABLE tasks DROP CONSTRAINT tasks_session_id_fkey;
    ALTER TABLE tasks ADD CONSTRAINT tasks_session_id_fkey
      FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE;
  END IF;
 END $$;
--- a/apps/server/src/services/auto_name.ts
+++ b/apps/server/src/services/auto_name.ts
@@ -1,9 +1,10 @@
 import type { InferenceContext } from './inference/index.js';
 import { taskModelCompletion } from './task-model.js';
 const NAMING_SYSTEM_PROMPT =
-  'You name chat sessions based on what the assistant did. Summarize the topic or outcome — do NOT copy the first few words verbatim. Reply directly with no thinking, reasoning, or explanation. Output ONLY the title, 4 words max, no quotes, no punctuation, no prefix like "Title:".';
+  'You name chat sessions. Reply with ONLY the title. 4 to 6 words. No quotes, no punctuation, no prefix.';
-const MAX_TITLE_CHARS = 60;
+const MAX_TITLE_CHARS = 80;
 function cleanTitle(raw: string): string {
  let name = raw.trim();
@@ -18,27 +19,7 @@ function cleanTitle(raw: string): string {
  return name;
 }
-interface NamingResponse {
+// TODO: wire suggestTags after task model validation
  choices?: Array<{
    message?: {
      content?: string;
      reasoning_content?: string;
    };
  }>;
 }
 function pickTitleSource(data: NamingResponse): string {
  const choice = data.choices?.[0]?.message;
  if (!choice) return '';
  if (choice.content && choice.content.trim().length > 0) return choice.content;
  const reasoning = choice.reasoning_content ?? '';
  if (reasoning.length === 0) return '';
  const lines = reasoning
    .split('\n')
    .map((l) => l.trim())
    .filter((l) => l.length > 0);
  return lines[lines.length - 1] ?? '';
 }
 export async function maybeAutoNameChat(
  ctx: InferenceContext,
@@ -64,13 +45,6 @@ export async function maybeAutoNameChat(
  if (!chat) return;
  if (chat.name !== null && chat.name !== '') return;
  const sessionRows = await ctx.sql<{ model: string }[]>`
    SELECT model FROM sessions WHERE id = ${sessionId}
  `;
  // v2.0.5: prefer FAST_MODEL for cheap LLM calls (titles, summaries).
  const model = ctx.config.FAST_MODEL ?? sessionRows[0]?.model;
  if (!model) return;
  const assistantMsg = await ctx.sql<{ content: string }[]>`
    SELECT content FROM messages
    WHERE chat_id = ${chatId}
@@ -84,32 +58,12 @@ export async function maybeAutoNameChat(
  const assistantText = assistantMsg[0].content.slice(0, 2000);
-  const body = {
+  const raw = await taskModelCompletion({
-    model,
+    system: NAMING_SYSTEM_PROMPT,
-    messages: [
+    user: assistantText,
-      { role: 'system', content: NAMING_SYSTEM_PROMPT },
+    maxTokens: 30,
      {
        role: 'user',
        content: assistantText,
      },
    ],
    max_tokens: 30,
    temperature: 0.3,
    stream: false,
    chat_template_kwargs: { enable_thinking: false },
  };
  const res = await fetch(`${ctx.config.LLAMA_SWAP_URL}/v1/chat/completions`, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify(body),
  });
  if (!res.ok) {
    const text = await res.text().catch(() => '');
    throw new Error(`naming request failed: ${res.status} ${text.slice(0, 200)}`);
  }
  const data = (await res.json()) as NamingResponse;
  const raw = pickTitleSource(data);
  const name = cleanTitle(raw);
  if (!name) {
    ctx.log.warn({ chatId, raw }, 'auto-name: empty title from model');
--- a/apps/server/src/services/inference/turn.ts
+++ b/apps/server/src/services/inference/turn.ts
@@ -14,6 +14,7 @@ import type {
 import { ALL_TOOLS } from '../tools.js';
 import { resolveProjectRoot } from '../path_guard.js';
 import { maybeAutoNameChat } from '../auto_name.js';
 import { rewriteSearchQuery } from '../task-search-rewrite.js';
 import { getAgentById } from '../agents.js';
 import * as compaction from '../compaction.js';
 import type { Broker } from '../broker.js';
@@ -254,6 +255,16 @@ export async function runAssistantTurn(
    const webToolsEnabled =
      iterSession.web_search_enabled ?? iterProject.default_web_search_enabled ?? false;
    if (stepNumber === 0 && webToolsEnabled && messages.length >= 2) {
      const lastUserMsg = [...messages].reverse().find((m) => m.role === 'user');
      if (lastUserMsg?.content) {
        const hint = await rewriteSearchQuery(lastUserMsg.content);
        if (hint && messages[0]?.role === 'system' && messages[0].content) {
          messages[0].content += `\n\nThe user's search intent can be summarized as: "${hint}"`;
        }
      }
    }
    const iterArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, signal };
    const state: StreamPhaseState = { accumulated: '', startedAt: null };
    let result: StreamResult;
--- a/apps/server/src/services/task-model.ts
+++ b/apps/server/src/services/task-model.ts
@@ -0,0 +1,68 @@
 import { loadConfig, type Config } from '../config.js';
 const TIMEOUT_MS = 10_000;
 export async function taskModelCompletion(opts: {
  system: string;
  user: string;
  maxTokens?: number;
  temperature?: number;
  fallbackModel?: string;
 }): Promise<string> {
  const config = loadConfig();
  const maxTokens = opts.maxTokens ?? 30;
  const temperature = opts.temperature ?? 0.3;
  const { url, model } = resolveEndpoint(config, opts.fallbackModel);
  try {
    const res = await fetch(`${url}/v1/chat/completions`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({
        model,
        messages: [
          { role: 'system', content: opts.system },
          { role: 'user', content: opts.user },
        ],
        max_tokens: maxTokens,
        temperature,
        stream: false,
        chat_template_kwargs: { enable_thinking: false },
      }),
      signal: AbortSignal.timeout(TIMEOUT_MS),
    });
    if (!res.ok) {
      const text = await res.text().catch(() => '');
      console.warn(`task-model: ${res.status} ${text.slice(0, 200)}`);
      return '';
    }
    const data = (await res.json()) as {
      choices?: Array<{
        message?: { content?: string; reasoning_content?: string };
      }>;
    };
    const choice = data.choices?.[0]?.message;
    if (!choice) return '';
    const content = (choice.content ?? '').trim();
    if (content.length > 0) return content;
    const reasoning = choice.reasoning_content ?? '';
    if (reasoning.length === 0) return '';
    const lines = reasoning.split('\n').map((l) => l.trim()).filter((l) => l.length > 0);
    return lines[lines.length - 1] ?? '';
  } catch (err) {
    console.warn('task-model: request failed', err);
    return '';
  }
 }
 function resolveEndpoint(
  config: Config,
  fallbackModel?: string,
 ): { url: string; model: string } {
  if (config.TASK_MODEL_URL) {
    return { url: config.TASK_MODEL_URL, model: 'gemma-3-270m-it' };
  }
  const model = config.FAST_MODEL ?? fallbackModel ?? config.DEFAULT_MODEL;
  return { url: config.LLAMA_SWAP_URL, model };
 }
--- a/apps/server/src/services/task-search-rewrite.ts
+++ b/apps/server/src/services/task-search-rewrite.ts
@@ -0,0 +1,19 @@
 import { taskModelCompletion } from './task-model.js';
 const SYSTEM_PROMPT =
  'You rewrite user messages into concise web search queries. Reply with ONLY the search query. 3 to 6 words. No quotes, no explanation.';
 const MAX_INPUT_CHARS = 500;
 const FALLBACK_CHARS = 60;
 export async function rewriteSearchQuery(userMessage: string): Promise<string> {
  const input = userMessage.slice(0, MAX_INPUT_CHARS);
  const result = await taskModelCompletion({
    system: SYSTEM_PROMPT,
    user: input,
    maxTokens: 20,
    temperature: 0.2,
  });
  if (result.length > 0) return result;
  return userMessage.slice(0, FALLBACK_CHARS).trim();
 }
--- a/apps/server/src/services/task-summary.ts
+++ b/apps/server/src/services/task-summary.ts
@@ -0,0 +1,24 @@
 import { taskModelCompletion } from './task-model.js';
 const SYSTEM_PROMPT =
  'Summarize this conversation in one sentence, 15 words max. No quotes, no prefix.';
 const MAX_INPUT_CHARS = 1000;
 export async function oneLineSummary(
  messages: Array<{ role: string; content: string }>,
 ): Promise<string> {
  const lastPairs = messages.slice(-6);
  let input = lastPairs
    .map((m) => `${m.role}: ${m.content}`)
    .join('\n');
  if (input.length > MAX_INPUT_CHARS) {
    input = input.slice(0, MAX_INPUT_CHARS);
  }
  return taskModelCompletion({
    system: SYSTEM_PROMPT,
    user: input,
    maxTokens: 30,
    temperature: 0.3,
  });
 }
--- a/apps/server/src/services/task-tags.ts
+++ b/apps/server/src/services/task-tags.ts
@@ -0,0 +1,22 @@
 import { taskModelCompletion } from './task-model.js';
 const SYSTEM_PROMPT =
  'You tag chat sessions. Reply with 1 to 3 lowercase tags separated by commas. Tags should describe the topic. No explanation. Examples: "docker, deployment", "python, debugging", "react, styling".';
 export async function suggestTags(
  userMessage: string,
  assistantReply: string,
 ): Promise<string[]> {
  const input = `User: ${userMessage.slice(0, 300)}\nAssistant: ${assistantReply.slice(0, 300)}`;
  const result = await taskModelCompletion({
    system: SYSTEM_PROMPT,
    user: input,
    maxTokens: 30,
    temperature: 0.3,
  });
  if (result.length === 0) return [];
  return result
    .split(',')
    .map((t) => t.trim().toLowerCase())
    .filter((t) => t.length > 0 && t.length <= 30);
 }