v2.5.0-task-model: lightweight task model services + tasks table

Task model infrastructure for cheap LLM calls (auto-naming, search rewrite, tags, summaries) via a dedicated llama-server instance at TASK_MODEL_URL, falling back to LLAMA_SWAP_URL with FAST_MODEL when unset. Replaces the inline fetch in auto_name.ts with taskModelCompletion. Adds search query rewriting: on step 0 when web tools are enabled, the user's message is summarized into a search intent hint appended to the system prompt, improving web_search relevance. Schema: tasks table for provider dispatch and arena, sessions.tags column. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-27 21:44:39 +00:00
parent bcfc94fa47
commit fcc7c5a86e
8 changed files with 194 additions and 54 deletions
--- a/apps/server/src/services/task-summary.ts
+++ b/apps/server/src/services/task-summary.ts
@@ -0,0 +1,24 @@
+import { taskModelCompletion } from './task-model.js';
+
+const SYSTEM_PROMPT =
+  'Summarize this conversation in one sentence, 15 words max. No quotes, no prefix.';
+
+const MAX_INPUT_CHARS = 1000;
+
+export async function oneLineSummary(
+  messages: Array<{ role: string; content: string }>,
+): Promise<string> {
+  const lastPairs = messages.slice(-6);
+  let input = lastPairs
+    .map((m) => `${m.role}: ${m.content}`)
+    .join('\n');
+  if (input.length > MAX_INPUT_CHARS) {
+    input = input.slice(0, MAX_INPUT_CHARS);
+  }
+  return taskModelCompletion({
+    system: SYSTEM_PROMPT,
+    user: input,
+    maxTokens: 30,
+    temperature: 0.3,
+  });
+}