Task model infrastructure for cheap LLM calls (auto-naming, search rewrite, tags, summaries) via a dedicated llama-server instance at TASK_MODEL_URL, falling back to LLAMA_SWAP_URL with FAST_MODEL when unset. Replaces the inline fetch in auto_name.ts with taskModelCompletion. Adds search query rewriting: on step 0 when web tools are enabled, the user's message is summarized into a search intent hint appended to the system prompt, improving web_search relevance. Schema: tasks table for provider dispatch and arena, sessions.tags column. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
69 lines
2.1 KiB
TypeScript
69 lines
2.1 KiB
TypeScript
import { loadConfig, type Config } from '../config.js';
|
|
|
|
const TIMEOUT_MS = 10_000;
|
|
|
|
export async function taskModelCompletion(opts: {
|
|
system: string;
|
|
user: string;
|
|
maxTokens?: number;
|
|
temperature?: number;
|
|
fallbackModel?: string;
|
|
}): Promise<string> {
|
|
const config = loadConfig();
|
|
const maxTokens = opts.maxTokens ?? 30;
|
|
const temperature = opts.temperature ?? 0.3;
|
|
|
|
const { url, model } = resolveEndpoint(config, opts.fallbackModel);
|
|
|
|
try {
|
|
const res = await fetch(`${url}/v1/chat/completions`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
model,
|
|
messages: [
|
|
{ role: 'system', content: opts.system },
|
|
{ role: 'user', content: opts.user },
|
|
],
|
|
max_tokens: maxTokens,
|
|
temperature,
|
|
stream: false,
|
|
chat_template_kwargs: { enable_thinking: false },
|
|
}),
|
|
signal: AbortSignal.timeout(TIMEOUT_MS),
|
|
});
|
|
if (!res.ok) {
|
|
const text = await res.text().catch(() => '');
|
|
console.warn(`task-model: ${res.status} ${text.slice(0, 200)}`);
|
|
return '';
|
|
}
|
|
const data = (await res.json()) as {
|
|
choices?: Array<{
|
|
message?: { content?: string; reasoning_content?: string };
|
|
}>;
|
|
};
|
|
const choice = data.choices?.[0]?.message;
|
|
if (!choice) return '';
|
|
const content = (choice.content ?? '').trim();
|
|
if (content.length > 0) return content;
|
|
const reasoning = choice.reasoning_content ?? '';
|
|
if (reasoning.length === 0) return '';
|
|
const lines = reasoning.split('\n').map((l) => l.trim()).filter((l) => l.length > 0);
|
|
return lines[lines.length - 1] ?? '';
|
|
} catch (err) {
|
|
console.warn('task-model: request failed', err);
|
|
return '';
|
|
}
|
|
}
|
|
|
|
function resolveEndpoint(
|
|
config: Config,
|
|
fallbackModel?: string,
|
|
): { url: string; model: string } {
|
|
if (config.TASK_MODEL_URL) {
|
|
return { url: config.TASK_MODEL_URL, model: 'gemma-3-270m-it' };
|
|
}
|
|
const model = config.FAST_MODEL ?? fallbackModel ?? config.DEFAULT_MODEL;
|
|
return { url: config.LLAMA_SWAP_URL, model };
|
|
}
|