From e423579e99b0b2715bba45448a0a4ff7e781cc39 Mon Sep 17 00:00:00 2001 From: indifferentketchup Date: Mon, 25 May 2026 14:05:59 +0000 Subject: [PATCH] v2.0.5: FAST_MODEL routing + tool-use summaries + Qwen dispatch + Arena Source-level recon of QwenLM/qwen-code (Apache-2.0) informed 4 lifts: 1. FAST_MODEL config: optional env var routes cheap LLM calls (titles, summaries, labeling) to a smaller model on llama-swap. auto_name.ts uses ctx.config.FAST_MODEL ?? session.model. Set FAST_MODEL=nemotron- nano-4b to avoid loading the 35B model for 20-token title generation. 2. Tool-use summaries (services/inference/tool-summaries.ts): utility that generates "git-commit-subject-style" labels for tool batches via a fast-model LLM call. System prompt + truncation logic ported from Qwen Code's toolUseSummary.ts. Exported via @boocode/server/inference for BooCoder's dispatcher to call after task completion. 3. Qwen as dispatchable agent: added to agent-probe.ts KNOWN_AGENTS. PTY dispatch builds: qwen -p "" --output-format stream-json (NDJSON structured events over stdout). Env: OPENAI_BASE_URL + OPENAI_API_KEY points Qwen Code at llama-swap. execution_path CHECK constraint extended with 'qwen'. 4. Arena routes (routes/arena.ts): POST /api/arena dispatches the same task to N contestants (2-5, each with different agent/model), each getting its own task row linked by arena_id UUID. GET /api/arena/:id shows all contestants. POST /api/arena/:id/select/:task_id marks winner. Schema: arena_id column added to tasks. Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/coder/src/config.ts | 2 + apps/coder/src/index.ts | 2 + apps/coder/src/routes/arena.ts | 122 ++++++++++++++++++ apps/coder/src/schema.sql | 14 +- apps/coder/src/services/agent-probe.ts | 1 + apps/coder/src/services/pty-dispatch.ts | 7 + apps/server/src/config.ts | 3 + apps/server/src/services/auto_name.ts | 3 +- apps/server/src/services/inference/index.ts | 2 + .../src/services/inference/tool-summaries.ts | 81 ++++++++++++ 10 files changed, 235 insertions(+), 2 deletions(-) create mode 100644 apps/coder/src/routes/arena.ts create mode 100644 apps/server/src/services/inference/tool-summaries.ts diff --git a/apps/coder/src/config.ts b/apps/coder/src/config.ts index 586f7a8..7ecff4e 100644 --- a/apps/coder/src/config.ts +++ b/apps/coder/src/config.ts @@ -23,6 +23,8 @@ const ConfigSchema = z.object({ GITEA_TOKEN: z.string().optional(), GITEA_SSH_HOST: z.string().default('100.114.205.53:2222'), MCP_CONFIG_PATH: z.string().optional(), + // v2.0.5: cheaper model for titles, summaries, labeling. + FAST_MODEL: z.string().optional(), // SSH access to the host for external agent dispatch (Phase 5) BOOCODER_SSH_HOST: z.string().default('100.114.205.53'), BOOCODER_SSH_USER: z.string().default('samkintop'), diff --git a/apps/coder/src/index.ts b/apps/coder/src/index.ts index 8632f55..9b60999 100644 --- a/apps/coder/src/index.ts +++ b/apps/coder/src/index.ts @@ -27,6 +27,7 @@ import { registerPendingRoutes } from './routes/pending.js'; import { registerTaskRoutes } from './routes/tasks.js'; import { registerInboxRoutes } from './routes/inbox.js'; import { registerStatsRoutes } from './routes/stats.js'; +import { registerArenaRoutes } from './routes/arena.js'; import { registerWebSocket } from './routes/ws.js'; // Phase 4: dispatcher + agent probe import { createDispatcher } from './services/dispatcher.js'; @@ -143,6 +144,7 @@ async function main() { registerTaskRoutes(app, sql, inferenceApi); registerInboxRoutes(app, sql); registerStatsRoutes(app, sql); + registerArenaRoutes(app, sql); registerWebSocket(app, sql, broker); // Serve static frontend (built web app). In production, the dist/ is diff --git a/apps/coder/src/routes/arena.ts b/apps/coder/src/routes/arena.ts new file mode 100644 index 0000000..9983cd5 --- /dev/null +++ b/apps/coder/src/routes/arena.ts @@ -0,0 +1,122 @@ +/** + * v2.0.5: Arena routes — competitive dispatch of the same task to multiple agents. + * + * POST /api/arena — create an arena with 2-5 contestants + * GET /api/arena/:id — get all tasks in an arena + * POST /api/arena/:id/select/:task_id — mark a task as the arena winner + */ +import type { FastifyInstance } from 'fastify'; +import { z } from 'zod'; +import type { Sql } from '../db.js'; + +const ContestantSchema = z.object({ + agent: z.string().max(100).optional(), + model: z.string().max(200).optional(), +}); + +const CreateArenaBody = z.object({ + project_id: z.string().uuid(), + input: z.string().min(1).max(64_000), + contestants: z.array(ContestantSchema).min(2).max(5), +}); + +interface TaskRow { + id: string; + agent: string | null; + model: string | null; + state: string; +} + +export function registerArenaRoutes(app: FastifyInstance, sql: Sql): void { + // POST /api/arena — create a new arena + app.post('/api/arena', async (req, reply) => { + const parsed = CreateArenaBody.safeParse(req.body); + if (!parsed.success) { + reply.code(400); + return { error: 'invalid body', details: parsed.error.flatten() }; + } + + const { project_id, input, contestants } = parsed.data; + const arenaId = crypto.randomUUID(); + + const tasks: TaskRow[] = []; + for (const contestant of contestants) { + const [task] = await sql` + INSERT INTO tasks (project_id, input, agent, model, arena_id) + VALUES (${project_id}, ${input}, ${contestant.agent ?? null}, ${contestant.model ?? null}, ${arenaId}) + RETURNING id, agent, model, state + `; + tasks.push(task!); + } + + reply.code(201); + return { + arena_id: arenaId, + tasks: tasks.map(t => ({ + id: t.id, + agent: t.agent, + model: t.model, + state: t.state, + })), + }; + }); + + // GET /api/arena/:arena_id — list all tasks in an arena + app.get<{ Params: { arena_id: string } }>('/api/arena/:arena_id', async (req, reply) => { + const { arena_id } = req.params; + + // Validate UUID format + const uuidRegex = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i; + if (!uuidRegex.test(arena_id)) { + reply.code(400); + return { error: 'invalid arena_id format' }; + } + + const tasks = await sql` + SELECT id, project_id, state, input, output_summary, agent, model, execution_path, session_id, started_at, ended_at, created_at, arena_id + FROM tasks + WHERE arena_id = ${arena_id} + ORDER BY created_at + `; + + if (tasks.length === 0) { + reply.code(404); + return { error: 'arena not found' }; + } + + return { arena_id, tasks }; + }); + + // POST /api/arena/:arena_id/select/:task_id — mark the winner + app.post<{ Params: { arena_id: string; task_id: string } }>( + '/api/arena/:arena_id/select/:task_id', + async (req, reply) => { + const { arena_id, task_id } = req.params; + + // Verify the task belongs to this arena + const rows = await sql<{ id: string; state: string; arena_id: string | null }[]>` + SELECT id, state, arena_id FROM tasks WHERE id = ${task_id} + `; + + if (rows.length === 0) { + reply.code(404); + return { error: 'task not found' }; + } + + const task = rows[0]!; + if (task.arena_id !== arena_id) { + reply.code(409); + return { error: 'task does not belong to this arena' }; + } + + // Mark as selected via output_summary prefix (lightweight — no schema change) + await sql` + UPDATE tasks + SET output_summary = COALESCE('[SELECTED] ' || output_summary, '[SELECTED]') + WHERE id = ${task_id} + `; + + return { selected: true, task_id, arena_id }; + } + ); +} diff --git a/apps/coder/src/schema.sql b/apps/coder/src/schema.sql index 63f8de1..2b313e7 100644 --- a/apps/coder/src/schema.sql +++ b/apps/coder/src/schema.sql @@ -31,7 +31,7 @@ CREATE TABLE IF NOT EXISTS tasks ( ended_at TIMESTAMPTZ, created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp(), CONSTRAINT tasks_state_chk CHECK (state IN ('pending', 'running', 'completed', 'failed', 'blocked', 'cancelled')), - CONSTRAINT tasks_execution_path_chk CHECK (execution_path IS NULL OR execution_path IN ('native', 'acp', 'pty')) + CONSTRAINT tasks_execution_path_chk CHECK (execution_path IS NULL OR execution_path IN ('native', 'acp', 'pty', 'qwen')) ); CREATE TABLE IF NOT EXISTS available_agents ( @@ -46,6 +46,18 @@ CREATE TABLE IF NOT EXISTS available_agents ( -- v2.0.0 Phase 4: link tasks to their inference sessions. ALTER TABLE tasks ADD COLUMN IF NOT EXISTS session_id UUID REFERENCES sessions(id); +-- v2.0.5: add 'qwen' to execution_path CHECK + arena_id column. +ALTER TABLE tasks DROP CONSTRAINT IF EXISTS tasks_execution_path_chk; +DO $$ BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'tasks_execution_path_chk') THEN + ALTER TABLE tasks ADD CONSTRAINT tasks_execution_path_chk + CHECK (execution_path IS NULL OR execution_path IN ('native', 'acp', 'pty', 'qwen')); + END IF; +END $$; + +-- v2.0.5: arena support — group tasks into competitive arenas. +ALTER TABLE tasks ADD COLUMN IF NOT EXISTS arena_id UUID; + -- Human inbox: tasks needing attention CREATE OR REPLACE VIEW human_inbox AS SELECT * FROM tasks WHERE state IN ('blocked', 'failed'); diff --git a/apps/coder/src/services/agent-probe.ts b/apps/coder/src/services/agent-probe.ts index 1a7059e..fa7cad5 100644 --- a/apps/coder/src/services/agent-probe.ts +++ b/apps/coder/src/services/agent-probe.ts @@ -7,6 +7,7 @@ const KNOWN_AGENTS: Array<{ name: string; supportsAcp: boolean }> = [ { name: 'goose', supportsAcp: true }, { name: 'claude', supportsAcp: false }, { name: 'pi', supportsAcp: false }, + { name: 'qwen', supportsAcp: false }, ]; /** diff --git a/apps/coder/src/services/pty-dispatch.ts b/apps/coder/src/services/pty-dispatch.ts index 278d471..d77ac49 100644 --- a/apps/coder/src/services/pty-dispatch.ts +++ b/apps/coder/src/services/pty-dispatch.ts @@ -8,6 +8,7 @@ * Supported agents: * - claude: `claude -p --model ` (print mode, reads task from stdin) * - opencode: `echo | opencode` (stdin pipe — exact flags TBD) + * - qwen: `qwen -p --output-format stream-json` (NDJSON structured output) * - goose: stub (not yet supported) * - pi: stub (not yet supported) */ @@ -51,6 +52,12 @@ function buildAgentCommand(agent: string, task: string, model?: string): string ? `echo '${escapedTask}' | opencode --model '${model}'` : `echo '${escapedTask}' | opencode`; + case 'qwen': + // Qwen Code: structured JSON output mode for parseable events + return model + ? `qwen -p '${escapedTask}' --model '${model}' --output-format stream-json` + : `qwen -p '${escapedTask}' --output-format stream-json`; + case 'goose': // Not yet verified for non-interactive use return null; diff --git a/apps/server/src/config.ts b/apps/server/src/config.ts index 8a2ea0d..1fb641d 100644 --- a/apps/server/src/config.ts +++ b/apps/server/src/config.ts @@ -22,6 +22,9 @@ const ConfigSchema = z.object({ // v1.15.0-mcp-multi: path to the MCP config JSON file. Default /data/mcp.json // (bind-mounted alongside AGENTS.md). File missing = no MCP (opt-in). MCP_CONFIG_PATH: z.string().optional(), + // v2.0.5: cheaper model for titles, summaries, labeling. Falls back to + // session model (auto_name) or DEFAULT_MODEL when unset. + FAST_MODEL: z.string().optional(), }); export type Config = z.infer; diff --git a/apps/server/src/services/auto_name.ts b/apps/server/src/services/auto_name.ts index 4022d6a..95c62d6 100644 --- a/apps/server/src/services/auto_name.ts +++ b/apps/server/src/services/auto_name.ts @@ -67,7 +67,8 @@ export async function maybeAutoNameChat( const sessionRows = await ctx.sql<{ model: string }[]>` SELECT model FROM sessions WHERE id = ${sessionId} `; - const model = sessionRows[0]?.model; + // v2.0.5: prefer FAST_MODEL for cheap LLM calls (titles, summaries). + const model = ctx.config.FAST_MODEL ?? sessionRows[0]?.model; if (!model) return; const assistantMsg = await ctx.sql<{ content: string }[]>` diff --git a/apps/server/src/services/inference/index.ts b/apps/server/src/services/inference/index.ts index c0cb907..59d031f 100644 --- a/apps/server/src/services/inference/index.ts +++ b/apps/server/src/services/inference/index.ts @@ -20,3 +20,5 @@ export type { export type { ToolPhaseResult } from './tool-phase.js'; export { detectDoomLoop, DOOM_LOOP_THRESHOLD } from './sentinels.js'; export { buildMessagesPayload } from './payload.js'; +export { generateToolUseSummary } from './tool-summaries.js'; +export type { ToolInfo } from './tool-summaries.js'; diff --git a/apps/server/src/services/inference/tool-summaries.ts b/apps/server/src/services/inference/tool-summaries.ts new file mode 100644 index 0000000..aa6983f --- /dev/null +++ b/apps/server/src/services/inference/tool-summaries.ts @@ -0,0 +1,81 @@ +/** + * v2.0.5: Tool-use summary generation. + * + * After a batch of tool calls completes, fire a cheap LLM call to generate + * a "git-commit-subject-style" one-liner label describing what the tools + * accomplished. Ported from the Qwen Code source recon. + */ +import type { FastifyBaseLogger } from 'fastify'; + +const TOOL_SUMMARY_SYSTEM_PROMPT = `Write a short summary label describing what these tool calls accomplished. Think git-commit-subject, not sentence. Past tense, most distinctive noun. Max 30 characters. Output ONLY the label. + +Examples: +- Searched in auth/ +- Fixed NPE in UserService +- Created signup endpoint +- Read config.json +- Ran failing tests`; + +const INPUT_TRUNCATE = 300; +const MAX_SUMMARY_LENGTH = 100; + +export interface ToolInfo { + name: string; + input: string; + output: string; +} + +export async function generateToolUseSummary(opts: { + tools: ToolInfo[]; + llamaSwapUrl: string; + model: string; + log: FastifyBaseLogger; + signal?: AbortSignal; +}): Promise { + const { tools, llamaSwapUrl, model, log, signal } = opts; + if (tools.length === 0) return null; + if (signal?.aborted) return null; + + const toolText = tools + .map(t => `Tool: ${t.name}\nInput: ${t.input.slice(0, INPUT_TRUNCATE)}\nOutput: ${t.output.slice(0, INPUT_TRUNCATE)}`) + .join('\n\n'); + + try { + const res = await fetch(`${llamaSwapUrl}/v1/chat/completions`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model, + messages: [ + { role: 'system', content: TOOL_SUMMARY_SYSTEM_PROMPT }, + { role: 'user', content: toolText }, + ], + max_tokens: 30, + temperature: 0.2, + stream: false, + chat_template_kwargs: { enable_thinking: false }, + }), + signal, + }); + if (!res.ok) { + log.debug({ status: res.status }, 'tool-summary: LLM request failed'); + return null; + } + const data = await res.json() as { choices?: Array<{ message?: { content?: string } }> }; + const raw = data.choices?.[0]?.message?.content?.trim() ?? ''; + if (!raw) return null; + // Clean: strip quotes, "Label:" prefix, cap length + let cleaned = raw.split('\n')[0]?.trim() ?? ''; + cleaned = cleaned + .replace(/^[-*•]\s+/, '') + .replace(/^["'`‘’“”]|["'`‘’“”]$/g, '') + .replace(/^(label|summary)\s*:\s*/i, '') + .trim(); + return cleaned.length > MAX_SUMMARY_LENGTH + ? cleaned.slice(0, MAX_SUMMARY_LENGTH).trim() + : cleaned || null; + } catch (err) { + log.debug({ err: err instanceof Error ? err.message : String(err) }, 'tool-summary: error'); + return null; + } +}