import { loadConfig, type Config } from '../config.js'; const TIMEOUT_MS = 10_000; export async function taskModelCompletion(opts: { system: string; user: string; maxTokens?: number; temperature?: number; fallbackModel?: string; }): Promise { const config = loadConfig(); const maxTokens = opts.maxTokens ?? 30; const temperature = opts.temperature ?? 0.3; const { url, model } = resolveEndpoint(config, opts.fallbackModel); try { const res = await fetch(`${url}/v1/chat/completions`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model, messages: [ { role: 'system', content: opts.system }, { role: 'user', content: opts.user }, ], max_tokens: maxTokens, temperature, stream: false, chat_template_kwargs: { enable_thinking: false }, }), signal: AbortSignal.timeout(TIMEOUT_MS), }); if (!res.ok) { const text = await res.text().catch(() => ''); console.warn(`task-model: ${res.status} ${text.slice(0, 200)}`); return ''; } const data = (await res.json()) as { choices?: Array<{ message?: { content?: string; reasoning_content?: string }; }>; }; const choice = data.choices?.[0]?.message; if (!choice) return ''; const content = (choice.content ?? '').trim(); if (content.length > 0) return content; const reasoning = choice.reasoning_content ?? ''; if (reasoning.length === 0) return ''; const lines = reasoning.split('\n').map((l) => l.trim()).filter((l) => l.length > 0); return lines[lines.length - 1] ?? ''; } catch (err) { console.warn('task-model: request failed', err); return ''; } } function resolveEndpoint( config: Config, fallbackModel?: string, ): { url: string; model: string } { if (config.TASK_MODEL_URL) { return { url: config.TASK_MODEL_URL, model: 'gemma-3-270m-it' }; } const model = config.FAST_MODEL ?? fallbackModel ?? config.DEFAULT_MODEL; return { url: config.LLAMA_SWAP_URL, model }; }