v2.0.5: FAST_MODEL routing + tool-use summaries + Qwen dispatch + Arena

Source-level recon of QwenLM/qwen-code (Apache-2.0) informed 4 lifts:

1. FAST_MODEL config: optional env var routes cheap LLM calls (titles,
   summaries, labeling) to a smaller model on llama-swap. auto_name.ts
   uses ctx.config.FAST_MODEL ?? session.model. Set FAST_MODEL=nemotron-
   nano-4b to avoid loading the 35B model for 20-token title generation.

2. Tool-use summaries (services/inference/tool-summaries.ts): utility
   that generates "git-commit-subject-style" labels for tool batches via
   a fast-model LLM call. System prompt + truncation logic ported from
   Qwen Code's toolUseSummary.ts. Exported via @boocode/server/inference
   for BooCoder's dispatcher to call after task completion.

3. Qwen as dispatchable agent: added to agent-probe.ts KNOWN_AGENTS.
   PTY dispatch builds: qwen -p "<task>" --output-format stream-json
   (NDJSON structured events over stdout). Env: OPENAI_BASE_URL +
   OPENAI_API_KEY points Qwen Code at llama-swap. execution_path CHECK
   constraint extended with 'qwen'.

4. Arena routes (routes/arena.ts): POST /api/arena dispatches the same
   task to N contestants (2-5, each with different agent/model), each
   getting its own task row linked by arena_id UUID. GET /api/arena/:id
   shows all contestants. POST /api/arena/:id/select/:task_id marks
   winner. Schema: arena_id column added to tasks.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-25 14:05:59 +00:00
parent 06116f31b3
commit e423579e99
10 changed files with 235 additions and 2 deletions

View File

@@ -22,6 +22,9 @@ const ConfigSchema = z.object({
// v1.15.0-mcp-multi: path to the MCP config JSON file. Default /data/mcp.json
// (bind-mounted alongside AGENTS.md). File missing = no MCP (opt-in).
MCP_CONFIG_PATH: z.string().optional(),
// v2.0.5: cheaper model for titles, summaries, labeling. Falls back to
// session model (auto_name) or DEFAULT_MODEL when unset.
FAST_MODEL: z.string().optional(),
});
export type Config = z.infer<typeof ConfigSchema>;

View File

@@ -67,7 +67,8 @@ export async function maybeAutoNameChat(
const sessionRows = await ctx.sql<{ model: string }[]>`
SELECT model FROM sessions WHERE id = ${sessionId}
`;
const model = sessionRows[0]?.model;
// v2.0.5: prefer FAST_MODEL for cheap LLM calls (titles, summaries).
const model = ctx.config.FAST_MODEL ?? sessionRows[0]?.model;
if (!model) return;
const assistantMsg = await ctx.sql<{ content: string }[]>`

View File

@@ -20,3 +20,5 @@ export type {
export type { ToolPhaseResult } from './tool-phase.js';
export { detectDoomLoop, DOOM_LOOP_THRESHOLD } from './sentinels.js';
export { buildMessagesPayload } from './payload.js';
export { generateToolUseSummary } from './tool-summaries.js';
export type { ToolInfo } from './tool-summaries.js';

View File

@@ -0,0 +1,81 @@
/**
* v2.0.5: Tool-use summary generation.
*
* After a batch of tool calls completes, fire a cheap LLM call to generate
* a "git-commit-subject-style" one-liner label describing what the tools
* accomplished. Ported from the Qwen Code source recon.
*/
import type { FastifyBaseLogger } from 'fastify';
const TOOL_SUMMARY_SYSTEM_PROMPT = `Write a short summary label describing what these tool calls accomplished. Think git-commit-subject, not sentence. Past tense, most distinctive noun. Max 30 characters. Output ONLY the label.
Examples:
- Searched in auth/
- Fixed NPE in UserService
- Created signup endpoint
- Read config.json
- Ran failing tests`;
const INPUT_TRUNCATE = 300;
const MAX_SUMMARY_LENGTH = 100;
export interface ToolInfo {
name: string;
input: string;
output: string;
}
export async function generateToolUseSummary(opts: {
tools: ToolInfo[];
llamaSwapUrl: string;
model: string;
log: FastifyBaseLogger;
signal?: AbortSignal;
}): Promise<string | null> {
const { tools, llamaSwapUrl, model, log, signal } = opts;
if (tools.length === 0) return null;
if (signal?.aborted) return null;
const toolText = tools
.map(t => `Tool: ${t.name}\nInput: ${t.input.slice(0, INPUT_TRUNCATE)}\nOutput: ${t.output.slice(0, INPUT_TRUNCATE)}`)
.join('\n\n');
try {
const res = await fetch(`${llamaSwapUrl}/v1/chat/completions`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
messages: [
{ role: 'system', content: TOOL_SUMMARY_SYSTEM_PROMPT },
{ role: 'user', content: toolText },
],
max_tokens: 30,
temperature: 0.2,
stream: false,
chat_template_kwargs: { enable_thinking: false },
}),
signal,
});
if (!res.ok) {
log.debug({ status: res.status }, 'tool-summary: LLM request failed');
return null;
}
const data = await res.json() as { choices?: Array<{ message?: { content?: string } }> };
const raw = data.choices?.[0]?.message?.content?.trim() ?? '';
if (!raw) return null;
// Clean: strip quotes, "Label:" prefix, cap length
let cleaned = raw.split('\n')[0]?.trim() ?? '';
cleaned = cleaned
.replace(/^[-*•]\s+/, '')
.replace(/^["'`‘’“”]|["'`‘’“”]$/g, '')
.replace(/^(label|summary)\s*:\s*/i, '')
.trim();
return cleaned.length > MAX_SUMMARY_LENGTH
? cleaned.slice(0, MAX_SUMMARY_LENGTH).trim()
: cleaned || null;
} catch (err) {
log.debug({ err: err instanceof Error ? err.message : String(err) }, 'tool-summary: error');
return null;
}
}