From fcc7c5a86ec884d2046ff734d9468f9481a73021 Mon Sep 17 00:00:00 2001 From: indifferentketchup Date: Wed, 27 May 2026 21:44:39 +0000 Subject: [PATCH] v2.5.0-task-model: lightweight task model services + tasks table Task model infrastructure for cheap LLM calls (auto-naming, search rewrite, tags, summaries) via a dedicated llama-server instance at TASK_MODEL_URL, falling back to LLAMA_SWAP_URL with FAST_MODEL when unset. Replaces the inline fetch in auto_name.ts with taskModelCompletion. Adds search query rewriting: on step 0 when web tools are enabled, the user's message is summarized into a search intent hint appended to the system prompt, improving web_search relevance. Schema: tasks table for provider dispatch and arena, sessions.tags column. Co-Authored-By: Claude Opus 4.6 (1M context) --- .env.example | 5 ++ apps/server/src/schema.sql | 37 ++++++++++ apps/server/src/services/auto_name.ts | 62 +++-------------- apps/server/src/services/inference/turn.ts | 11 +++ apps/server/src/services/task-model.ts | 68 +++++++++++++++++++ .../src/services/task-search-rewrite.ts | 19 ++++++ apps/server/src/services/task-summary.ts | 24 +++++++ apps/server/src/services/task-tags.ts | 22 ++++++ 8 files changed, 194 insertions(+), 54 deletions(-) create mode 100644 apps/server/src/services/task-model.ts create mode 100644 apps/server/src/services/task-search-rewrite.ts create mode 100644 apps/server/src/services/task-summary.ts create mode 100644 apps/server/src/services/task-tags.ts diff --git a/.env.example b/.env.example index c09d710..c23f675 100644 --- a/.env.example +++ b/.env.example @@ -11,6 +11,11 @@ POSTGRES_PASSWORD=CHANGE_ME # point BooCode at a different SearXNG instance. SEARXNG_URL=http://100.114.205.53:8888 +# Task model: lightweight model for auto-naming, search rewrite, etc. +# Direct llama-server instance (NOT llama-swap). Falls back to LLAMA_SWAP_URL +# with FAST_MODEL when unset. +# TASK_MODEL_URL=http://100.90.172.55:7995 + # v1.13.15-tools: BOOCODE_TOOLS narrows the tool whitelist sent to the LLM. # Unset (default) → all tools (~21k schema). Useful primarily for single-purpose # sessions where the model only needs read-only filesystem access. diff --git a/apps/server/src/schema.sql b/apps/server/src/schema.sql index 8a9070d..a2487d4 100644 --- a/apps/server/src/schema.sql +++ b/apps/server/src/schema.sql @@ -344,6 +344,7 @@ INSERT INTO settings (key, value) VALUES ('theme_mode', '"dark"') ON CONFLICT (k ALTER TABLE projects ADD COLUMN IF NOT EXISTS default_system_prompt TEXT NOT NULL DEFAULT ''; ALTER TABLE projects ADD COLUMN IF NOT EXISTS default_web_search_enabled BOOLEAN NOT NULL DEFAULT false; ALTER TABLE sessions ADD COLUMN IF NOT EXISTS web_search_enabled BOOLEAN; +ALTER TABLE sessions ADD COLUMN IF NOT EXISTS tags TEXT[] DEFAULT '{}'; -- v1.11: anchored rolling compaction. -- compacted_at — marks rows that are "behind the curtain" of the latest @@ -366,3 +367,39 @@ ALTER TABLE messages ADD COLUMN IF NOT EXISTS summary BOOLEAN NOT NULL DEFAULT F ALTER TABLE messages ADD COLUMN IF NOT EXISTS tail_start_id UUID REFERENCES messages(id) ON DELETE SET NULL; ALTER TABLE chats ADD COLUMN IF NOT EXISTS needs_compaction BOOLEAN NOT NULL DEFAULT FALSE; CREATE INDEX IF NOT EXISTS idx_messages_chat_compacted ON messages (chat_id, compacted_at); + +-- tasks table (provider dispatch, arena) +CREATE TABLE IF NOT EXISTS tasks ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + session_id UUID REFERENCES sessions(id) ON DELETE CASCADE, + parent_task_id UUID REFERENCES tasks(id), + arena_id UUID, + state TEXT NOT NULL DEFAULT 'pending' + CHECK (state IN ('pending','running','completed','failed','blocked','cancelled')), + input TEXT NOT NULL, + output_summary TEXT, + agent TEXT, + model TEXT, + mode_id TEXT, + thinking_option_id TEXT, + feature_values JSONB, + execution_path TEXT CHECK (execution_path IS NULL OR execution_path IN ('native','acp','pty','qwen')), + worktree_path TEXT, + cost_tokens INTEGER, + started_at TIMESTAMPTZ, + ended_at TIMESTAMPTZ, + created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp() +); + +-- Fix tasks FK to cascade on session delete (existing tables without CASCADE) +DO $$ BEGIN + IF EXISTS ( + SELECT 1 FROM pg_constraint WHERE conname = 'tasks_session_id_fkey' + AND confdeltype != 'c' + ) THEN + ALTER TABLE tasks DROP CONSTRAINT tasks_session_id_fkey; + ALTER TABLE tasks ADD CONSTRAINT tasks_session_id_fkey + FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE; + END IF; +END $$; diff --git a/apps/server/src/services/auto_name.ts b/apps/server/src/services/auto_name.ts index 95c62d6..1b01952 100644 --- a/apps/server/src/services/auto_name.ts +++ b/apps/server/src/services/auto_name.ts @@ -1,9 +1,10 @@ import type { InferenceContext } from './inference/index.js'; +import { taskModelCompletion } from './task-model.js'; const NAMING_SYSTEM_PROMPT = - 'You name chat sessions based on what the assistant did. Summarize the topic or outcome — do NOT copy the first few words verbatim. Reply directly with no thinking, reasoning, or explanation. Output ONLY the title, 4 words max, no quotes, no punctuation, no prefix like "Title:".'; + 'You name chat sessions. Reply with ONLY the title. 4 to 6 words. No quotes, no punctuation, no prefix.'; -const MAX_TITLE_CHARS = 60; +const MAX_TITLE_CHARS = 80; function cleanTitle(raw: string): string { let name = raw.trim(); @@ -18,27 +19,7 @@ function cleanTitle(raw: string): string { return name; } -interface NamingResponse { - choices?: Array<{ - message?: { - content?: string; - reasoning_content?: string; - }; - }>; -} - -function pickTitleSource(data: NamingResponse): string { - const choice = data.choices?.[0]?.message; - if (!choice) return ''; - if (choice.content && choice.content.trim().length > 0) return choice.content; - const reasoning = choice.reasoning_content ?? ''; - if (reasoning.length === 0) return ''; - const lines = reasoning - .split('\n') - .map((l) => l.trim()) - .filter((l) => l.length > 0); - return lines[lines.length - 1] ?? ''; -} +// TODO: wire suggestTags after task model validation export async function maybeAutoNameChat( ctx: InferenceContext, @@ -64,13 +45,6 @@ export async function maybeAutoNameChat( if (!chat) return; if (chat.name !== null && chat.name !== '') return; - const sessionRows = await ctx.sql<{ model: string }[]>` - SELECT model FROM sessions WHERE id = ${sessionId} - `; - // v2.0.5: prefer FAST_MODEL for cheap LLM calls (titles, summaries). - const model = ctx.config.FAST_MODEL ?? sessionRows[0]?.model; - if (!model) return; - const assistantMsg = await ctx.sql<{ content: string }[]>` SELECT content FROM messages WHERE chat_id = ${chatId} @@ -84,32 +58,12 @@ export async function maybeAutoNameChat( const assistantText = assistantMsg[0].content.slice(0, 2000); - const body = { - model, - messages: [ - { role: 'system', content: NAMING_SYSTEM_PROMPT }, - { - role: 'user', - content: assistantText, - }, - ], - max_tokens: 30, + const raw = await taskModelCompletion({ + system: NAMING_SYSTEM_PROMPT, + user: assistantText, + maxTokens: 30, temperature: 0.3, - stream: false, - chat_template_kwargs: { enable_thinking: false }, - }; - - const res = await fetch(`${ctx.config.LLAMA_SWAP_URL}/v1/chat/completions`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(body), }); - if (!res.ok) { - const text = await res.text().catch(() => ''); - throw new Error(`naming request failed: ${res.status} ${text.slice(0, 200)}`); - } - const data = (await res.json()) as NamingResponse; - const raw = pickTitleSource(data); const name = cleanTitle(raw); if (!name) { ctx.log.warn({ chatId, raw }, 'auto-name: empty title from model'); diff --git a/apps/server/src/services/inference/turn.ts b/apps/server/src/services/inference/turn.ts index b6111bc..cd5c5dd 100644 --- a/apps/server/src/services/inference/turn.ts +++ b/apps/server/src/services/inference/turn.ts @@ -14,6 +14,7 @@ import type { import { ALL_TOOLS } from '../tools.js'; import { resolveProjectRoot } from '../path_guard.js'; import { maybeAutoNameChat } from '../auto_name.js'; +import { rewriteSearchQuery } from '../task-search-rewrite.js'; import { getAgentById } from '../agents.js'; import * as compaction from '../compaction.js'; import type { Broker } from '../broker.js'; @@ -254,6 +255,16 @@ export async function runAssistantTurn( const webToolsEnabled = iterSession.web_search_enabled ?? iterProject.default_web_search_enabled ?? false; + if (stepNumber === 0 && webToolsEnabled && messages.length >= 2) { + const lastUserMsg = [...messages].reverse().find((m) => m.role === 'user'); + if (lastUserMsg?.content) { + const hint = await rewriteSearchQuery(lastUserMsg.content); + if (hint && messages[0]?.role === 'system' && messages[0].content) { + messages[0].content += `\n\nThe user's search intent can be summarized as: "${hint}"`; + } + } + } + const iterArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, signal }; const state: StreamPhaseState = { accumulated: '', startedAt: null }; let result: StreamResult; diff --git a/apps/server/src/services/task-model.ts b/apps/server/src/services/task-model.ts new file mode 100644 index 0000000..2b0810c --- /dev/null +++ b/apps/server/src/services/task-model.ts @@ -0,0 +1,68 @@ +import { loadConfig, type Config } from '../config.js'; + +const TIMEOUT_MS = 10_000; + +export async function taskModelCompletion(opts: { + system: string; + user: string; + maxTokens?: number; + temperature?: number; + fallbackModel?: string; +}): Promise { + const config = loadConfig(); + const maxTokens = opts.maxTokens ?? 30; + const temperature = opts.temperature ?? 0.3; + + const { url, model } = resolveEndpoint(config, opts.fallbackModel); + + try { + const res = await fetch(`${url}/v1/chat/completions`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model, + messages: [ + { role: 'system', content: opts.system }, + { role: 'user', content: opts.user }, + ], + max_tokens: maxTokens, + temperature, + stream: false, + chat_template_kwargs: { enable_thinking: false }, + }), + signal: AbortSignal.timeout(TIMEOUT_MS), + }); + if (!res.ok) { + const text = await res.text().catch(() => ''); + console.warn(`task-model: ${res.status} ${text.slice(0, 200)}`); + return ''; + } + const data = (await res.json()) as { + choices?: Array<{ + message?: { content?: string; reasoning_content?: string }; + }>; + }; + const choice = data.choices?.[0]?.message; + if (!choice) return ''; + const content = (choice.content ?? '').trim(); + if (content.length > 0) return content; + const reasoning = choice.reasoning_content ?? ''; + if (reasoning.length === 0) return ''; + const lines = reasoning.split('\n').map((l) => l.trim()).filter((l) => l.length > 0); + return lines[lines.length - 1] ?? ''; + } catch (err) { + console.warn('task-model: request failed', err); + return ''; + } +} + +function resolveEndpoint( + config: Config, + fallbackModel?: string, +): { url: string; model: string } { + if (config.TASK_MODEL_URL) { + return { url: config.TASK_MODEL_URL, model: 'gemma-3-270m-it' }; + } + const model = config.FAST_MODEL ?? fallbackModel ?? config.DEFAULT_MODEL; + return { url: config.LLAMA_SWAP_URL, model }; +} diff --git a/apps/server/src/services/task-search-rewrite.ts b/apps/server/src/services/task-search-rewrite.ts new file mode 100644 index 0000000..0ae3d8c --- /dev/null +++ b/apps/server/src/services/task-search-rewrite.ts @@ -0,0 +1,19 @@ +import { taskModelCompletion } from './task-model.js'; + +const SYSTEM_PROMPT = + 'You rewrite user messages into concise web search queries. Reply with ONLY the search query. 3 to 6 words. No quotes, no explanation.'; + +const MAX_INPUT_CHARS = 500; +const FALLBACK_CHARS = 60; + +export async function rewriteSearchQuery(userMessage: string): Promise { + const input = userMessage.slice(0, MAX_INPUT_CHARS); + const result = await taskModelCompletion({ + system: SYSTEM_PROMPT, + user: input, + maxTokens: 20, + temperature: 0.2, + }); + if (result.length > 0) return result; + return userMessage.slice(0, FALLBACK_CHARS).trim(); +} diff --git a/apps/server/src/services/task-summary.ts b/apps/server/src/services/task-summary.ts new file mode 100644 index 0000000..f7d6aea --- /dev/null +++ b/apps/server/src/services/task-summary.ts @@ -0,0 +1,24 @@ +import { taskModelCompletion } from './task-model.js'; + +const SYSTEM_PROMPT = + 'Summarize this conversation in one sentence, 15 words max. No quotes, no prefix.'; + +const MAX_INPUT_CHARS = 1000; + +export async function oneLineSummary( + messages: Array<{ role: string; content: string }>, +): Promise { + const lastPairs = messages.slice(-6); + let input = lastPairs + .map((m) => `${m.role}: ${m.content}`) + .join('\n'); + if (input.length > MAX_INPUT_CHARS) { + input = input.slice(0, MAX_INPUT_CHARS); + } + return taskModelCompletion({ + system: SYSTEM_PROMPT, + user: input, + maxTokens: 30, + temperature: 0.3, + }); +} diff --git a/apps/server/src/services/task-tags.ts b/apps/server/src/services/task-tags.ts new file mode 100644 index 0000000..2197e98 --- /dev/null +++ b/apps/server/src/services/task-tags.ts @@ -0,0 +1,22 @@ +import { taskModelCompletion } from './task-model.js'; + +const SYSTEM_PROMPT = + 'You tag chat sessions. Reply with 1 to 3 lowercase tags separated by commas. Tags should describe the topic. No explanation. Examples: "docker, deployment", "python, debugging", "react, styling".'; + +export async function suggestTags( + userMessage: string, + assistantReply: string, +): Promise { + const input = `User: ${userMessage.slice(0, 300)}\nAssistant: ${assistantReply.slice(0, 300)}`; + const result = await taskModelCompletion({ + system: SYSTEM_PROMPT, + user: input, + maxTokens: 30, + temperature: 0.3, + }); + if (result.length === 0) return []; + return result + .split(',') + .map((t) => t.trim().toLowerCase()) + .filter((t) => t.length > 0 && t.length <= 30); +}