v2.5.0-task-model: lightweight task model services + tasks table
Task model infrastructure for cheap LLM calls (auto-naming, search rewrite, tags, summaries) via a dedicated llama-server instance at TASK_MODEL_URL, falling back to LLAMA_SWAP_URL with FAST_MODEL when unset. Replaces the inline fetch in auto_name.ts with taskModelCompletion. Adds search query rewriting: on step 0 when web tools are enabled, the user's message is summarized into a search intent hint appended to the system prompt, improving web_search relevance. Schema: tasks table for provider dispatch and arena, sessions.tags column. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,9 +1,10 @@
|
||||
import type { InferenceContext } from './inference/index.js';
|
||||
import { taskModelCompletion } from './task-model.js';
|
||||
|
||||
const NAMING_SYSTEM_PROMPT =
|
||||
'You name chat sessions based on what the assistant did. Summarize the topic or outcome — do NOT copy the first few words verbatim. Reply directly with no thinking, reasoning, or explanation. Output ONLY the title, 4 words max, no quotes, no punctuation, no prefix like "Title:".';
|
||||
'You name chat sessions. Reply with ONLY the title. 4 to 6 words. No quotes, no punctuation, no prefix.';
|
||||
|
||||
const MAX_TITLE_CHARS = 60;
|
||||
const MAX_TITLE_CHARS = 80;
|
||||
|
||||
function cleanTitle(raw: string): string {
|
||||
let name = raw.trim();
|
||||
@@ -18,27 +19,7 @@ function cleanTitle(raw: string): string {
|
||||
return name;
|
||||
}
|
||||
|
||||
interface NamingResponse {
|
||||
choices?: Array<{
|
||||
message?: {
|
||||
content?: string;
|
||||
reasoning_content?: string;
|
||||
};
|
||||
}>;
|
||||
}
|
||||
|
||||
function pickTitleSource(data: NamingResponse): string {
|
||||
const choice = data.choices?.[0]?.message;
|
||||
if (!choice) return '';
|
||||
if (choice.content && choice.content.trim().length > 0) return choice.content;
|
||||
const reasoning = choice.reasoning_content ?? '';
|
||||
if (reasoning.length === 0) return '';
|
||||
const lines = reasoning
|
||||
.split('\n')
|
||||
.map((l) => l.trim())
|
||||
.filter((l) => l.length > 0);
|
||||
return lines[lines.length - 1] ?? '';
|
||||
}
|
||||
// TODO: wire suggestTags after task model validation
|
||||
|
||||
export async function maybeAutoNameChat(
|
||||
ctx: InferenceContext,
|
||||
@@ -64,13 +45,6 @@ export async function maybeAutoNameChat(
|
||||
if (!chat) return;
|
||||
if (chat.name !== null && chat.name !== '') return;
|
||||
|
||||
const sessionRows = await ctx.sql<{ model: string }[]>`
|
||||
SELECT model FROM sessions WHERE id = ${sessionId}
|
||||
`;
|
||||
// v2.0.5: prefer FAST_MODEL for cheap LLM calls (titles, summaries).
|
||||
const model = ctx.config.FAST_MODEL ?? sessionRows[0]?.model;
|
||||
if (!model) return;
|
||||
|
||||
const assistantMsg = await ctx.sql<{ content: string }[]>`
|
||||
SELECT content FROM messages
|
||||
WHERE chat_id = ${chatId}
|
||||
@@ -84,32 +58,12 @@ export async function maybeAutoNameChat(
|
||||
|
||||
const assistantText = assistantMsg[0].content.slice(0, 2000);
|
||||
|
||||
const body = {
|
||||
model,
|
||||
messages: [
|
||||
{ role: 'system', content: NAMING_SYSTEM_PROMPT },
|
||||
{
|
||||
role: 'user',
|
||||
content: assistantText,
|
||||
},
|
||||
],
|
||||
max_tokens: 30,
|
||||
const raw = await taskModelCompletion({
|
||||
system: NAMING_SYSTEM_PROMPT,
|
||||
user: assistantText,
|
||||
maxTokens: 30,
|
||||
temperature: 0.3,
|
||||
stream: false,
|
||||
chat_template_kwargs: { enable_thinking: false },
|
||||
};
|
||||
|
||||
const res = await fetch(`${ctx.config.LLAMA_SWAP_URL}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
if (!res.ok) {
|
||||
const text = await res.text().catch(() => '');
|
||||
throw new Error(`naming request failed: ${res.status} ${text.slice(0, 200)}`);
|
||||
}
|
||||
const data = (await res.json()) as NamingResponse;
|
||||
const raw = pickTitleSource(data);
|
||||
const name = cleanTitle(raw);
|
||||
if (!name) {
|
||||
ctx.log.warn({ chatId, raw }, 'auto-name: empty title from model');
|
||||
|
||||
Reference in New Issue
Block a user