v2.5.0-task-model: lightweight task model services + tasks table
Task model infrastructure for cheap LLM calls (auto-naming, search rewrite, tags, summaries) via a dedicated llama-server instance at TASK_MODEL_URL, falling back to LLAMA_SWAP_URL with FAST_MODEL when unset. Replaces the inline fetch in auto_name.ts with taskModelCompletion. Adds search query rewriting: on step 0 when web tools are enabled, the user's message is summarized into a search intent hint appended to the system prompt, improving web_search relevance. Schema: tasks table for provider dispatch and arena, sessions.tags column. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -11,6 +11,11 @@ POSTGRES_PASSWORD=CHANGE_ME
|
||||
# point BooCode at a different SearXNG instance.
|
||||
SEARXNG_URL=http://100.114.205.53:8888
|
||||
|
||||
# Task model: lightweight model for auto-naming, search rewrite, etc.
|
||||
# Direct llama-server instance (NOT llama-swap). Falls back to LLAMA_SWAP_URL
|
||||
# with FAST_MODEL when unset.
|
||||
# TASK_MODEL_URL=http://100.90.172.55:7995
|
||||
|
||||
# v1.13.15-tools: BOOCODE_TOOLS narrows the tool whitelist sent to the LLM.
|
||||
# Unset (default) → all tools (~21k schema). Useful primarily for single-purpose
|
||||
# sessions where the model only needs read-only filesystem access.
|
||||
|
||||
@@ -344,6 +344,7 @@ INSERT INTO settings (key, value) VALUES ('theme_mode', '"dark"') ON CONFLICT (k
|
||||
ALTER TABLE projects ADD COLUMN IF NOT EXISTS default_system_prompt TEXT NOT NULL DEFAULT '';
|
||||
ALTER TABLE projects ADD COLUMN IF NOT EXISTS default_web_search_enabled BOOLEAN NOT NULL DEFAULT false;
|
||||
ALTER TABLE sessions ADD COLUMN IF NOT EXISTS web_search_enabled BOOLEAN;
|
||||
ALTER TABLE sessions ADD COLUMN IF NOT EXISTS tags TEXT[] DEFAULT '{}';
|
||||
|
||||
-- v1.11: anchored rolling compaction.
|
||||
-- compacted_at — marks rows that are "behind the curtain" of the latest
|
||||
@@ -366,3 +367,39 @@ ALTER TABLE messages ADD COLUMN IF NOT EXISTS summary BOOLEAN NOT NULL DEFAULT F
|
||||
ALTER TABLE messages ADD COLUMN IF NOT EXISTS tail_start_id UUID REFERENCES messages(id) ON DELETE SET NULL;
|
||||
ALTER TABLE chats ADD COLUMN IF NOT EXISTS needs_compaction BOOLEAN NOT NULL DEFAULT FALSE;
|
||||
CREATE INDEX IF NOT EXISTS idx_messages_chat_compacted ON messages (chat_id, compacted_at);
|
||||
|
||||
-- tasks table (provider dispatch, arena)
|
||||
CREATE TABLE IF NOT EXISTS tasks (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
|
||||
session_id UUID REFERENCES sessions(id) ON DELETE CASCADE,
|
||||
parent_task_id UUID REFERENCES tasks(id),
|
||||
arena_id UUID,
|
||||
state TEXT NOT NULL DEFAULT 'pending'
|
||||
CHECK (state IN ('pending','running','completed','failed','blocked','cancelled')),
|
||||
input TEXT NOT NULL,
|
||||
output_summary TEXT,
|
||||
agent TEXT,
|
||||
model TEXT,
|
||||
mode_id TEXT,
|
||||
thinking_option_id TEXT,
|
||||
feature_values JSONB,
|
||||
execution_path TEXT CHECK (execution_path IS NULL OR execution_path IN ('native','acp','pty','qwen')),
|
||||
worktree_path TEXT,
|
||||
cost_tokens INTEGER,
|
||||
started_at TIMESTAMPTZ,
|
||||
ended_at TIMESTAMPTZ,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
|
||||
);
|
||||
|
||||
-- Fix tasks FK to cascade on session delete (existing tables without CASCADE)
|
||||
DO $$ BEGIN
|
||||
IF EXISTS (
|
||||
SELECT 1 FROM pg_constraint WHERE conname = 'tasks_session_id_fkey'
|
||||
AND confdeltype != 'c'
|
||||
) THEN
|
||||
ALTER TABLE tasks DROP CONSTRAINT tasks_session_id_fkey;
|
||||
ALTER TABLE tasks ADD CONSTRAINT tasks_session_id_fkey
|
||||
FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import type { InferenceContext } from './inference/index.js';
|
||||
import { taskModelCompletion } from './task-model.js';
|
||||
|
||||
const NAMING_SYSTEM_PROMPT =
|
||||
'You name chat sessions based on what the assistant did. Summarize the topic or outcome — do NOT copy the first few words verbatim. Reply directly with no thinking, reasoning, or explanation. Output ONLY the title, 4 words max, no quotes, no punctuation, no prefix like "Title:".';
|
||||
'You name chat sessions. Reply with ONLY the title. 4 to 6 words. No quotes, no punctuation, no prefix.';
|
||||
|
||||
const MAX_TITLE_CHARS = 60;
|
||||
const MAX_TITLE_CHARS = 80;
|
||||
|
||||
function cleanTitle(raw: string): string {
|
||||
let name = raw.trim();
|
||||
@@ -18,27 +19,7 @@ function cleanTitle(raw: string): string {
|
||||
return name;
|
||||
}
|
||||
|
||||
interface NamingResponse {
|
||||
choices?: Array<{
|
||||
message?: {
|
||||
content?: string;
|
||||
reasoning_content?: string;
|
||||
};
|
||||
}>;
|
||||
}
|
||||
|
||||
function pickTitleSource(data: NamingResponse): string {
|
||||
const choice = data.choices?.[0]?.message;
|
||||
if (!choice) return '';
|
||||
if (choice.content && choice.content.trim().length > 0) return choice.content;
|
||||
const reasoning = choice.reasoning_content ?? '';
|
||||
if (reasoning.length === 0) return '';
|
||||
const lines = reasoning
|
||||
.split('\n')
|
||||
.map((l) => l.trim())
|
||||
.filter((l) => l.length > 0);
|
||||
return lines[lines.length - 1] ?? '';
|
||||
}
|
||||
// TODO: wire suggestTags after task model validation
|
||||
|
||||
export async function maybeAutoNameChat(
|
||||
ctx: InferenceContext,
|
||||
@@ -64,13 +45,6 @@ export async function maybeAutoNameChat(
|
||||
if (!chat) return;
|
||||
if (chat.name !== null && chat.name !== '') return;
|
||||
|
||||
const sessionRows = await ctx.sql<{ model: string }[]>`
|
||||
SELECT model FROM sessions WHERE id = ${sessionId}
|
||||
`;
|
||||
// v2.0.5: prefer FAST_MODEL for cheap LLM calls (titles, summaries).
|
||||
const model = ctx.config.FAST_MODEL ?? sessionRows[0]?.model;
|
||||
if (!model) return;
|
||||
|
||||
const assistantMsg = await ctx.sql<{ content: string }[]>`
|
||||
SELECT content FROM messages
|
||||
WHERE chat_id = ${chatId}
|
||||
@@ -84,32 +58,12 @@ export async function maybeAutoNameChat(
|
||||
|
||||
const assistantText = assistantMsg[0].content.slice(0, 2000);
|
||||
|
||||
const body = {
|
||||
model,
|
||||
messages: [
|
||||
{ role: 'system', content: NAMING_SYSTEM_PROMPT },
|
||||
{
|
||||
role: 'user',
|
||||
content: assistantText,
|
||||
},
|
||||
],
|
||||
max_tokens: 30,
|
||||
const raw = await taskModelCompletion({
|
||||
system: NAMING_SYSTEM_PROMPT,
|
||||
user: assistantText,
|
||||
maxTokens: 30,
|
||||
temperature: 0.3,
|
||||
stream: false,
|
||||
chat_template_kwargs: { enable_thinking: false },
|
||||
};
|
||||
|
||||
const res = await fetch(`${ctx.config.LLAMA_SWAP_URL}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
if (!res.ok) {
|
||||
const text = await res.text().catch(() => '');
|
||||
throw new Error(`naming request failed: ${res.status} ${text.slice(0, 200)}`);
|
||||
}
|
||||
const data = (await res.json()) as NamingResponse;
|
||||
const raw = pickTitleSource(data);
|
||||
const name = cleanTitle(raw);
|
||||
if (!name) {
|
||||
ctx.log.warn({ chatId, raw }, 'auto-name: empty title from model');
|
||||
|
||||
@@ -14,6 +14,7 @@ import type {
|
||||
import { ALL_TOOLS } from '../tools.js';
|
||||
import { resolveProjectRoot } from '../path_guard.js';
|
||||
import { maybeAutoNameChat } from '../auto_name.js';
|
||||
import { rewriteSearchQuery } from '../task-search-rewrite.js';
|
||||
import { getAgentById } from '../agents.js';
|
||||
import * as compaction from '../compaction.js';
|
||||
import type { Broker } from '../broker.js';
|
||||
@@ -254,6 +255,16 @@ export async function runAssistantTurn(
|
||||
const webToolsEnabled =
|
||||
iterSession.web_search_enabled ?? iterProject.default_web_search_enabled ?? false;
|
||||
|
||||
if (stepNumber === 0 && webToolsEnabled && messages.length >= 2) {
|
||||
const lastUserMsg = [...messages].reverse().find((m) => m.role === 'user');
|
||||
if (lastUserMsg?.content) {
|
||||
const hint = await rewriteSearchQuery(lastUserMsg.content);
|
||||
if (hint && messages[0]?.role === 'system' && messages[0].content) {
|
||||
messages[0].content += `\n\nThe user's search intent can be summarized as: "${hint}"`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const iterArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, signal };
|
||||
const state: StreamPhaseState = { accumulated: '', startedAt: null };
|
||||
let result: StreamResult;
|
||||
|
||||
68
apps/server/src/services/task-model.ts
Normal file
68
apps/server/src/services/task-model.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
import { loadConfig, type Config } from '../config.js';
|
||||
|
||||
const TIMEOUT_MS = 10_000;
|
||||
|
||||
export async function taskModelCompletion(opts: {
|
||||
system: string;
|
||||
user: string;
|
||||
maxTokens?: number;
|
||||
temperature?: number;
|
||||
fallbackModel?: string;
|
||||
}): Promise<string> {
|
||||
const config = loadConfig();
|
||||
const maxTokens = opts.maxTokens ?? 30;
|
||||
const temperature = opts.temperature ?? 0.3;
|
||||
|
||||
const { url, model } = resolveEndpoint(config, opts.fallbackModel);
|
||||
|
||||
try {
|
||||
const res = await fetch(`${url}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages: [
|
||||
{ role: 'system', content: opts.system },
|
||||
{ role: 'user', content: opts.user },
|
||||
],
|
||||
max_tokens: maxTokens,
|
||||
temperature,
|
||||
stream: false,
|
||||
chat_template_kwargs: { enable_thinking: false },
|
||||
}),
|
||||
signal: AbortSignal.timeout(TIMEOUT_MS),
|
||||
});
|
||||
if (!res.ok) {
|
||||
const text = await res.text().catch(() => '');
|
||||
console.warn(`task-model: ${res.status} ${text.slice(0, 200)}`);
|
||||
return '';
|
||||
}
|
||||
const data = (await res.json()) as {
|
||||
choices?: Array<{
|
||||
message?: { content?: string; reasoning_content?: string };
|
||||
}>;
|
||||
};
|
||||
const choice = data.choices?.[0]?.message;
|
||||
if (!choice) return '';
|
||||
const content = (choice.content ?? '').trim();
|
||||
if (content.length > 0) return content;
|
||||
const reasoning = choice.reasoning_content ?? '';
|
||||
if (reasoning.length === 0) return '';
|
||||
const lines = reasoning.split('\n').map((l) => l.trim()).filter((l) => l.length > 0);
|
||||
return lines[lines.length - 1] ?? '';
|
||||
} catch (err) {
|
||||
console.warn('task-model: request failed', err);
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
function resolveEndpoint(
|
||||
config: Config,
|
||||
fallbackModel?: string,
|
||||
): { url: string; model: string } {
|
||||
if (config.TASK_MODEL_URL) {
|
||||
return { url: config.TASK_MODEL_URL, model: 'gemma-3-270m-it' };
|
||||
}
|
||||
const model = config.FAST_MODEL ?? fallbackModel ?? config.DEFAULT_MODEL;
|
||||
return { url: config.LLAMA_SWAP_URL, model };
|
||||
}
|
||||
19
apps/server/src/services/task-search-rewrite.ts
Normal file
19
apps/server/src/services/task-search-rewrite.ts
Normal file
@@ -0,0 +1,19 @@
|
||||
import { taskModelCompletion } from './task-model.js';
|
||||
|
||||
const SYSTEM_PROMPT =
|
||||
'You rewrite user messages into concise web search queries. Reply with ONLY the search query. 3 to 6 words. No quotes, no explanation.';
|
||||
|
||||
const MAX_INPUT_CHARS = 500;
|
||||
const FALLBACK_CHARS = 60;
|
||||
|
||||
export async function rewriteSearchQuery(userMessage: string): Promise<string> {
|
||||
const input = userMessage.slice(0, MAX_INPUT_CHARS);
|
||||
const result = await taskModelCompletion({
|
||||
system: SYSTEM_PROMPT,
|
||||
user: input,
|
||||
maxTokens: 20,
|
||||
temperature: 0.2,
|
||||
});
|
||||
if (result.length > 0) return result;
|
||||
return userMessage.slice(0, FALLBACK_CHARS).trim();
|
||||
}
|
||||
24
apps/server/src/services/task-summary.ts
Normal file
24
apps/server/src/services/task-summary.ts
Normal file
@@ -0,0 +1,24 @@
|
||||
import { taskModelCompletion } from './task-model.js';
|
||||
|
||||
const SYSTEM_PROMPT =
|
||||
'Summarize this conversation in one sentence, 15 words max. No quotes, no prefix.';
|
||||
|
||||
const MAX_INPUT_CHARS = 1000;
|
||||
|
||||
export async function oneLineSummary(
|
||||
messages: Array<{ role: string; content: string }>,
|
||||
): Promise<string> {
|
||||
const lastPairs = messages.slice(-6);
|
||||
let input = lastPairs
|
||||
.map((m) => `${m.role}: ${m.content}`)
|
||||
.join('\n');
|
||||
if (input.length > MAX_INPUT_CHARS) {
|
||||
input = input.slice(0, MAX_INPUT_CHARS);
|
||||
}
|
||||
return taskModelCompletion({
|
||||
system: SYSTEM_PROMPT,
|
||||
user: input,
|
||||
maxTokens: 30,
|
||||
temperature: 0.3,
|
||||
});
|
||||
}
|
||||
22
apps/server/src/services/task-tags.ts
Normal file
22
apps/server/src/services/task-tags.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
import { taskModelCompletion } from './task-model.js';
|
||||
|
||||
const SYSTEM_PROMPT =
|
||||
'You tag chat sessions. Reply with 1 to 3 lowercase tags separated by commas. Tags should describe the topic. No explanation. Examples: "docker, deployment", "python, debugging", "react, styling".';
|
||||
|
||||
export async function suggestTags(
|
||||
userMessage: string,
|
||||
assistantReply: string,
|
||||
): Promise<string[]> {
|
||||
const input = `User: ${userMessage.slice(0, 300)}\nAssistant: ${assistantReply.slice(0, 300)}`;
|
||||
const result = await taskModelCompletion({
|
||||
system: SYSTEM_PROMPT,
|
||||
user: input,
|
||||
maxTokens: 30,
|
||||
temperature: 0.3,
|
||||
});
|
||||
if (result.length === 0) return [];
|
||||
return result
|
||||
.split(',')
|
||||
.map((t) => t.trim().toLowerCase())
|
||||
.filter((t) => t.length > 0 && t.length <= 30);
|
||||
}
|
||||
Reference in New Issue
Block a user