v2.5.0-task-model: lightweight task model services + tasks table
Task model infrastructure for cheap LLM calls (auto-naming, search rewrite, tags, summaries) via a dedicated llama-server instance at TASK_MODEL_URL, falling back to LLAMA_SWAP_URL with FAST_MODEL when unset. Replaces the inline fetch in auto_name.ts with taskModelCompletion. Adds search query rewriting: on step 0 when web tools are enabled, the user's message is summarized into a search intent hint appended to the system prompt, improving web_search relevance. Schema: tasks table for provider dispatch and arena, sessions.tags column. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
68
apps/server/src/services/task-model.ts
Normal file
68
apps/server/src/services/task-model.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
import { loadConfig, type Config } from '../config.js';
|
||||
|
||||
const TIMEOUT_MS = 10_000;
|
||||
|
||||
export async function taskModelCompletion(opts: {
|
||||
system: string;
|
||||
user: string;
|
||||
maxTokens?: number;
|
||||
temperature?: number;
|
||||
fallbackModel?: string;
|
||||
}): Promise<string> {
|
||||
const config = loadConfig();
|
||||
const maxTokens = opts.maxTokens ?? 30;
|
||||
const temperature = opts.temperature ?? 0.3;
|
||||
|
||||
const { url, model } = resolveEndpoint(config, opts.fallbackModel);
|
||||
|
||||
try {
|
||||
const res = await fetch(`${url}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages: [
|
||||
{ role: 'system', content: opts.system },
|
||||
{ role: 'user', content: opts.user },
|
||||
],
|
||||
max_tokens: maxTokens,
|
||||
temperature,
|
||||
stream: false,
|
||||
chat_template_kwargs: { enable_thinking: false },
|
||||
}),
|
||||
signal: AbortSignal.timeout(TIMEOUT_MS),
|
||||
});
|
||||
if (!res.ok) {
|
||||
const text = await res.text().catch(() => '');
|
||||
console.warn(`task-model: ${res.status} ${text.slice(0, 200)}`);
|
||||
return '';
|
||||
}
|
||||
const data = (await res.json()) as {
|
||||
choices?: Array<{
|
||||
message?: { content?: string; reasoning_content?: string };
|
||||
}>;
|
||||
};
|
||||
const choice = data.choices?.[0]?.message;
|
||||
if (!choice) return '';
|
||||
const content = (choice.content ?? '').trim();
|
||||
if (content.length > 0) return content;
|
||||
const reasoning = choice.reasoning_content ?? '';
|
||||
if (reasoning.length === 0) return '';
|
||||
const lines = reasoning.split('\n').map((l) => l.trim()).filter((l) => l.length > 0);
|
||||
return lines[lines.length - 1] ?? '';
|
||||
} catch (err) {
|
||||
console.warn('task-model: request failed', err);
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
function resolveEndpoint(
|
||||
config: Config,
|
||||
fallbackModel?: string,
|
||||
): { url: string; model: string } {
|
||||
if (config.TASK_MODEL_URL) {
|
||||
return { url: config.TASK_MODEL_URL, model: 'gemma-3-270m-it' };
|
||||
}
|
||||
const model = config.FAST_MODEL ?? fallbackModel ?? config.DEFAULT_MODEL;
|
||||
return { url: config.LLAMA_SWAP_URL, model };
|
||||
}
|
||||
Reference in New Issue
Block a user