#!/usr/bin/env node import fs from "node:fs"; import os from "node:os"; import path from "node:path"; import { execFileSync } from "node:child_process"; import * as ledger from "./load-ledger.mjs"; const DEFAULT_MODEL_TIERS = "~/.paseo/model-tiers.json"; const DEFAULT_PRESET = "~/.paseo/orchestration-preferences.json"; const ROLES = new Set(["impl", "ui", "audit", "research", "planning"]); const DIFFICULTIES = new Set(["simple", "standard", "hard"]); const BUDGETS = new Set(["cost_sensitive", "balanced", "quality"]); const LOCAL_MODEL_MARKER = "==qwen==/"; // Quality grade -> numeric. S>A>B>C; L (local) ranks with C on the quality axis. const GRADE_VALUE = { S: 4, A: 3, B: 2, C: 1, L: 1 }; // Minimum grade value a task of each difficulty wants. Below floor = under-spec penalty. const DIFFICULTY_FLOOR = { simple: 1, standard: 2, hard: 3 }; // Routing priority profiles. costWeight penalizes effective cost; speedWeight // rewards the per-model speed signal; qualityBonus rewards higher grade; effortBias // steps the recommended reasoningEffort up (+1) or down (-1) within the model's levels. const PRIORITIES = { balanced: { costWeight: 6, speedWeight: 0, qualityBonus: 0, effortBias: 0 }, "cost-efficiency": { costWeight: 14, speedWeight: 0, qualityBonus: 0, effortBias: -1 }, speed: { costWeight: 4, speedWeight: 60, qualityBonus: 0, effortBias: -1 }, quality: { costWeight: 2, speedWeight: 0, qualityBonus: 15, effortBias: 1 }, }; // Back-compat: the legacy --budget values map onto priorities. const BUDGET_TO_PRIORITY = { cost_sensitive: "cost-efficiency", balanced: "balanced", quality: "quality" }; function expandHome(filePath) { if (!filePath) return filePath; if (filePath === "~") return os.homedir(); if (filePath.startsWith("~/")) return path.join(os.homedir(), filePath.slice(2)); return filePath; } function readJson(filePath) { return JSON.parse(fs.readFileSync(expandHome(filePath), "utf8")); } function parseArgs(argv) { const args = { budget: "balanced", contextTokens: 0, difficulty: "standard", fanout: 1, modelTiersPath: DEFAULT_MODEL_TIERS, presetPath: DEFAULT_PRESET, requires: [], residentLocal: "", task: "", }; for (let index = 0; index < argv.length; index += 1) { const arg = argv[index]; const next = () => argv[++index]; if (arg === "--dry-run-samples") args.dryRunSamples = true; else if (arg === "--json") args.json = true; else if (arg === "--explain") args.explain = true; else if (arg === "--role") args.role = next(); else if (arg === "--task") args.task = next() || ""; else if (arg === "--difficulty") args.difficulty = next() || "standard"; else if (arg === "--budget") args.budget = next() || "balanced"; else if (arg === "--priority") args.priority = next(); else if (arg === "--context-tokens") args.contextTokens = Number(next() || 0); else if (arg === "--fanout") args.fanout = Number(next() || 1); else if (arg === "--requires") args.requires = String(next() || "").split(",").map((s) => s.trim()).filter(Boolean); else if (arg === "--resident-local") args.residentLocal = next() || ""; else if (arg === "--preset") args.presetPath = next(); else if (arg === "--model-tiers") args.modelTiersPath = next(); else if (arg === "--reserve") args.reserve = next() || ""; else if (arg === "--release") args.release = next() || ""; else if (arg === "--tokens") args.tokens = Number(next() || 0); else if (arg === "--no-ledger") args.noLedger = true; else if (arg === "--load-snapshot") args.loadSnapshot = true; else if (arg === "--live-status") args.liveStatus = next() || ""; else if (arg === "--help" || arg === "-h") args.help = true; else throw new Error(`Unknown argument: ${arg}`); } return args; } function normalizeModelId(provider) { return String(provider).replace(/^opencode\//, ""); } function modelKey(provider) { const parts = normalizeModelId(provider).split("/"); return parts[parts.length - 1]; } function isLocalProvider(provider) { return normalizeModelId(provider).startsWith(LOCAL_MODEL_MARKER); } // Models flagged neverSubagent in any tier object must never be routed (the // router only ever selects subagents). This is the guardrail the README promised. function neverSubagentSet(registry) { const set = new Set(); for (const value of Object.values(registry)) { if (value && typeof value === "object" && !Array.isArray(value) && value.neverSubagent && Array.isArray(value.models)) { for (const id of value.models) set.add(modelKey(id)); } } return set; } function tierName(modelId, tiers) { const normalized = normalizeModelId(modelId); for (const [name, value] of Object.entries(tiers)) { if (Array.isArray(value) && value.includes(normalized)) return name; if (value && Array.isArray(value.models) && value.models.includes(normalized)) return name; } return "unknown"; } // Pick the pricing band that matches the request context size. Qwen Plus models // carry an _over_256k band that roughly triples cost past 256K tokens. function pricingForContext(key, registry, contextTokens) { const base = registry.pricing?.[key] || {}; if (contextTokens > 256000 && base._over_256k) return { ...base, ...base._over_256k, _band: "over_256k" }; return base; } function effectivePricing(pricing = {}) { return { input: Number(pricing.effective_input ?? pricing.input ?? 0), output: Number(pricing.effective_output ?? pricing.output ?? 0), cachedRead: Number(pricing.effective_cached_read ?? pricing.cached_read ?? 0), }; } // Output dominates real spend; weight it 3:1 over input. function blendedCost(pricing) { const e = effectivePricing(pricing); return e.output * 0.75 + e.input * 0.25; } // Hard filters. Returns null if the candidate survives, else a disqualifying reason. function disqualify(key, attrs, request, neverSub) { if (neverSub.has(key)) return "neverSubagent guardrail (S-tier never routed as subagent)"; if (!attrs) return null; // unknown model: do not eliminate, it just scores low if (attrs.routable === false) return `not routable (${attrs.routable_note || "availability/license hold"})`; const mods = attrs.modalities || []; for (const need of request.requires) { if (!mods.includes(need)) return `missing required modality: ${need}`; } if (request.contextTokens > 0 && attrs.ctx_max && request.contextTokens > attrs.ctx_max) { return `context ${request.contextTokens} exceeds ctx_max ${attrs.ctx_max}`; } return null; } // Soft quality and role-fit score. function fitScore(attrs, request) { const reasons = []; let score = 0; const add = (points, reason) => { score += points; if (points) reasons.push(`${points > 0 ? "+" : ""}${points.toFixed(0)} ${reason}`); }; if (!attrs) { add(50, "unknown model (neutral baseline)"); return { score, reasons, grade: "?" }; } // Role affinity is the backbone. const affinity = attrs.roles?.[request.role] ?? 0.5; add(affinity * 100, `role ${request.role} affinity ${affinity}`); // Trait overlap with the task/role/requires text. const haystack = `${request.task} ${request.role} ${request.requires.join(" ")}`.toLowerCase(); const hits = (attrs.traits || []).filter((t) => haystack.includes(String(t).toLowerCase())); if (hits.length) add(Math.min(hits.length, 4) * 8, `traits: ${hits.slice(0, 4).join(", ")}`); // Difficulty headroom: penalize under-spec, do not reward overkill (economics handles that). const gradeVal = GRADE_VALUE[attrs.grade] ?? 1; const floor = DIFFICULTY_FLOOR[request.difficulty] ?? 2; if (gradeVal < floor) add(-(floor - gradeVal) * 40, `under-spec for ${request.difficulty} (grade ${attrs.grade})`); // Context sweet-spot: fits the ceiling but degrades past the sweet spot. if (request.contextTokens > 0 && attrs.ctx_sweet_spot && request.contextTokens > attrs.ctx_sweet_spot) { add(-30, `past ctx sweet-spot ${attrs.ctx_sweet_spot}`); } // Quality priority: reward higher grade. const priority = PRIORITIES[request.priority] || PRIORITIES.balanced; if (priority.qualityBonus) add(gradeVal * priority.qualityBonus, `quality priority (grade ${attrs.grade})`); return { score, reasons, grade: attrs.grade }; } // Classify a provider string into a cost/priority source for provider_priority. // Order matters: check the cloud gateways before the generic opencode markers. function sourceOf(provider) { const s = String(provider); if (s.includes("digitalocean")) return "digitalocean"; if (s.includes("openrouter")) return "openrouter"; if (s.startsWith("reasonix/")) return "reasonix"; if (s.includes("==edge-")) return "local-edge"; if (s.includes("==")) return "local"; if (s.includes("opencode-go/")) return "opencode-go"; if (s.startsWith("claude/") || s.startsWith("codex/")) return "subscription"; if (s.includes("opencode/opencode/") || s.endsWith("-free")) return "opencode-zen"; return "other"; } // Economics tiebreak: provider priority, cost, quota, live load, locality, residency. function economics(provider, key, request, registry, presetIndex, loadCtx) { const reasons = []; let score = 0; const add = (points, reason) => { score += points; if (points) reasons.push(`${points > 0 ? "+" : ""}${points.toFixed(1)} ${reason}`); }; const priority = PRIORITIES[request.priority] || PRIORITIES.balanced; // Provider priority: route equivalent models to the preferred source (free DO // credits first, then cheap cloud, then local; opencode-go deprioritized). const src = sourceOf(provider); const pp = registry.provider_priority?.[src]; if (typeof pp === "number" && pp) add(pp, `provider ${src}`); const pricing = pricingForContext(key, registry, request.contextTokens); const cost = blendedCost(pricing); add(-cost * priority.costWeight, `blended cost $${cost.toFixed(3)}/M${pricing._band ? ` (${pricing._band})` : ""}`); // Speed priority: reward the per-model responsiveness signal (TTFT-oriented). const speed = registry.speed?.[key]; if (priority.speedWeight && typeof speed === "number") add(speed * priority.speedWeight, `speed ${speed}`); const quota = Number(registry.quotas_per_5h?.[key] ?? (isLocalProvider(provider) ? 200 : 0)); add(Math.min(quota, 30000) / 1000, `quota ${quota}/5h`); const local = isLocalProvider(provider); if (local && request.fanout > 1) { add(-80, `local penalized for fan-out x${request.fanout}`); } else if (local) { add(request.priority === "cost-efficiency" ? 20 : 5, "local zero-dollar serial option"); if (request.residentLocal && key === modelKey(request.residentLocal)) { add(25, "already resident in llama-swap (no model swap)"); } } add(-presetIndex * 0.01, "preset order"); // Live load: soft penalties for in-flight crowding, quota exhaustion, and (local // only) host saturation. Reconciled from the shared cross-process ledger. if (loadCtx) { const adj = ledger.loadAdjustment({ src, isLocal: local, inflight: loadCtx.bySrc?.[src]?.inflight, usage: loadCtx.byKey?.[key]?.usage, quota, host: loadCtx.host, tuning: loadCtx.tuning, }); score += adj.score; reasons.push(...adj.reasons); } return { score, cost, quota, isLocal: local, band: pricing._band || "base", reasons }; } function scoreCandidate(provider, request, registry, neverSub, presetIndex, loadCtx) { const key = modelKey(provider); const normalized = normalizeModelId(provider); const attrs = registry.attributes?.[key]; const dq = disqualify(key, attrs, request, neverSub); if (dq) { return { provider, modelId: normalized, key, tier: tierName(normalized, registry), eliminated: true, reason: dq, score: -Infinity, reasons: [`ELIMINATED: ${dq}`] }; } const fit = fitScore(attrs, request); const econ = economics(provider, key, request, registry, presetIndex, loadCtx); return { provider, modelId: normalized, key, tier: tierName(normalized, registry), grade: fit.grade, eliminated: false, score: fit.score + econ.score, fitScore: Math.round(fit.score), econScore: Number(econ.score.toFixed(1)), effectiveCostPerMTok: econ.cost, quotaPer5h: econ.quota, isLocal: econ.isLocal, band: econ.band, reasons: [...fit.reasons, ...econ.reasons], }; } // Merge the registry's optional `load` block over the code defaults (concurrency // caps merge per-source rather than replacing the map wholesale). function mergeTuning(registry) { return { ...ledger.LOAD_DEFAULTS, ...(registry.load || {}), concurrency_soft: { ...ledger.LOAD_DEFAULTS.concurrency_soft, ...(registry.load?.concurrency_soft || {}) }, }; } // Resolve the live-load context once per routing call: merged tuning, the // reconciled cross-process ledger snapshot, and host pressure. Returns null when // load awareness is disabled, so the scorer falls back to stateless behavior. function buildLoadContext(request, registry) { const tuning = mergeTuning(registry); if (request.noLedger || tuning.enabled === false) return null; const snap = ledger.snapshot(Date.now(), { windowSec: tuning.window_sec, ttlSec: tuning.reservation_ttl_sec }); return { byKey: snap.byKey, bySrc: snap.bySrc, host: ledger.hostLoad(), tuning }; } // Emit the raw load snapshot as JSON for the control UI's dashboard. Self-contained // so the UI can shell out the same way it runs a routing decision. function printLoadSnapshot(args) { const registry = readJson(args.modelTiersPath); const tuning = mergeTuning(registry); const snap = ledger.snapshot(Date.now(), { windowSec: tuning.window_sec, ttlSec: tuning.reservation_ttl_sec }); console.log(JSON.stringify({ now: Date.now(), host: ledger.hostLoad(), byKey: snap.byKey, bySrc: snap.bySrc, tuning })); } function chooseProvider(request, preset, registry) { if (!ROLES.has(request.role)) throw new Error(`Role must be one of: ${[...ROLES].join(", ")}`); if (!DIFFICULTIES.has(request.difficulty)) throw new Error(`Difficulty must be one of: ${[...DIFFICULTIES].join(", ")}`); if (!BUDGETS.has(request.budget)) throw new Error(`Budget must be one of: ${[...BUDGETS].join(", ")}`); if (!PRIORITIES[request.priority]) throw new Error(`Priority must be one of: ${Object.keys(PRIORITIES).join(", ")}`); const roleValue = preset.providers?.[request.role]; if (!roleValue) throw new Error(`Preset has no provider entry for role: ${request.role}`); if (typeof roleValue === "string") { return { provider: roleValue, modelId: normalizeModelId(roleValue), rationale: "Preset role is pinned to a single provider.", reasoning: resolveReasoning(roleValue, registry, request.difficulty, request.contextTokens, request.priority), permissions: resolvePermissions(roleValue, registry), fallbacks: [], scores: [] }; } if (!Array.isArray(roleValue)) throw new Error(`Provider entry for ${request.role} must be a string or array`); const neverSub = neverSubagentSet(registry); const loadCtx = buildLoadContext(request, registry); const scored = roleValue.map((provider, index) => scoreCandidate(provider, request, registry, neverSub, index, loadCtx)); const survivors = scored.filter((c) => !c.eliminated).sort((a, b) => b.score - a.score); if (!survivors.length) { const why = scored.map((c) => `${c.key} (${c.reason})`).join("; "); throw new Error(`All candidates eliminated for role ${request.role}: ${why}`); } return { provider: survivors[0].provider, modelId: survivors[0].modelId, rationale: buildRationale(survivors[0], request), reasoning: resolveReasoning(survivors[0].provider, registry, request.difficulty, request.contextTokens, request.priority), permissions: resolvePermissions(survivors[0].provider, registry), fallbacks: survivors.slice(1).map((s) => s.provider), scores: scored }; } // Resolve the recommended OpenCode reasoningEffort for the chosen model at this // difficulty. "auto" means do not set reasoningEffort (leave the model default). // Clamps to the model's supported levels and steps effort down under context // pressure (reasoning consumes output headroom that a near-full window lacks). function resolveReasoning(provider, registry, difficulty, contextTokens = 0, priorityName = "balanced") { const key = modelKey(provider); const r = registry.reasoning?.[key]; if (!r) return { effort: "auto", apply: "no reasoning profile in registry; leave model default" }; if (r.effort === "auto") return { effort: "auto", apply: "model self-manages; do not set reasoningEffort" }; const levels = Array.isArray(r.levels) ? r.levels : null; let effort = r.by_difficulty?.[difficulty] ?? r.default; const notes = []; // Priority bias: speed/cost-efficiency lean reasoning down, quality leans it up. const bias = (PRIORITIES[priorityName] || PRIORITIES.balanced).effortBias; if (levels && bias) { const i = levels.indexOf(effort); const j = Math.max(0, Math.min(levels.length - 1, i + bias)); if (j !== i) { effort = levels[j]; notes.push(`${bias > 0 ? "raised" : "lowered"} for ${priorityName} priority`); } } // Clamp to the model's supported set (e.g. OpenAI never accepts "max"). if (levels && !levels.includes(effort)) { effort = levels.includes(r.default) ? r.default : levels[levels.length - 1]; notes.push(`clamped to supported level ${effort}`); } // Context-pressure downgrade: past 70% of the model's ctx_max, step down one // level so reasoning leaves room for output (levels are ordered low->high). const ctxMax = registry.attributes?.[key]?.ctx_max; if (levels && contextTokens > 0 && ctxMax && contextTokens > ctxMax * 0.7) { const i = levels.indexOf(effort); if (i > 0) { effort = levels[i - 1]; notes.push(`stepped down for context pressure (>${Math.round(ctxMax * 0.7)})`); } } const apply = (registry.reasoning?._apply || "") + (notes.length ? ` [${notes.join("; ")}]` : ""); return { effort, apply }; } // Resolve the permission posture for the chosen provider's backend. Defaults to // bypass/yolo (registry permissions._default); settings go to create_agent. function resolvePermissions(provider, registry) { let backend = String(provider).split("/")[0]; if (backend.startsWith("oc-")) backend = "opencode"; // oc-digitalocean/oc-openrouter/oc-sam-desktop/oc-embedding extend opencode const mode = registry.permissions?._default || "bypass"; const p = registry.permissions?.[backend]; if (!p) return { backend, mode, settings: null, note: "no permission profile for this backend; pass nothing" }; return { backend, mode, settings: p[mode] ?? null, cliBypass: p.cli_bypass || null }; } function buildRationale(winner, request) { const parts = [ `${winner.key} (grade ${winner.grade}) won for ${request.role}`, `fit ${winner.fitScore}`, `econ ${winner.econScore}`, `effective cost $${winner.effectiveCostPerMTok.toFixed(3)}/M`, `quota ${winner.quotaPer5h}/5h`, ]; if (winner.reasons.length) parts.push(winner.reasons.slice(0, 3).join(", ")); return parts.join("; "); } function printHuman(result, request, presetPath, explain) { console.log(`role: ${request.role} difficulty: ${request.difficulty} priority: ${request.priority} fanout: ${request.fanout}`); console.log(`preset: ${presetPath}`); console.log(`pick: ${result.provider}`); console.log(`rationale: ${result.rationale}`); if (result.reasoning) { console.log(`reasoningEffort: ${result.reasoning.effort}`); if (explain && result.reasoning.apply) console.log(` apply: ${result.reasoning.apply}`); } if (result.permissions) { console.log(`permissions: ${result.permissions.backend} ${result.permissions.mode} -> ${JSON.stringify(result.permissions.settings)}`); if (explain && result.permissions.cliBypass) console.log(` cli: ${result.permissions.cliBypass}`); } if (result.fallbacks && result.fallbacks.length) { console.log(`fallbacks: ${result.fallbacks.join(" -> ")}`); } if (result.scores.length) { console.log("candidates:"); for (const c of result.scores) { if (c.eliminated) { console.log(` - ${c.provider} ELIMINATED: ${c.reason}`); continue; } console.log(` - ${c.provider} score=${c.score.toFixed(2)} grade=${c.grade} fit=${c.fitScore} econ=${c.econScore} cost=${c.effectiveCostPerMTok.toFixed(3)} quota=${c.quotaPer5h} band=${c.band}`); if (explain) for (const r of c.reasons) console.log(` ${r}`); } } } function usage() { console.log(`Usage: node model-router/router.mjs --role --task [options] Options: --preset Active preset JSON path. Default: ${DEFAULT_PRESET} --model-tiers Model registry path. Default: ${DEFAULT_MODEL_TIERS} --difficulty simple, standard, or hard. Default: standard --budget cost_sensitive, balanced, or quality (legacy alias for --priority) --priority cost-efficiency, speed, quality, or balanced. Default: balanced --context-tokens Approximate input context size. Default: 0 --requires Comma-separated hard modality needs, e.g. vision,computer-use --fanout Parallel agent count for this dispatch. Default: 1 --resident-local Local model currently loaded in llama-swap (residency bonus) --reserve Record this pick as in-flight under (real dispatch) --release Mark dispatch complete; no routing performed --tokens Optional token spend recorded with --reserve/--release --no-ledger Ignore the shared load ledger (stateless routing) --live-status Query for running models to auto-populate --resident-local --json Print JSON --explain Print full per-candidate scoring trace --dry-run-samples Run sample selections `); } // Query GET /api/providers and return the first loaded local model name. // Uses execFileSync+curl with args as array (no shell, no injection risk). function resolveLiveStatus(statusUrl) { try { const raw = execFileSync("curl", ["-s", "--max-time", "3", statusUrl], { timeout: 4000, encoding: "utf8" }); const data = JSON.parse(raw); for (const p of data.providers ?? []) { if (!p.ok || !p.models?.length) continue; const m = p.models[0]; return typeof m === "string" ? m : (m.id || m.model || ""); } return ""; } catch { return ""; } } function runOne(args) { const registry = readJson(args.modelTiersPath); const preset = readJson(args.presetPath); let residentLocal = args.residentLocal; if (!residentLocal && args.liveStatus) { residentLocal = resolveLiveStatus(args.liveStatus); } const request = { role: args.role, task: args.task, difficulty: args.difficulty, contextTokens: args.contextTokens, budget: args.budget, priority: args.priority || BUDGET_TO_PRIORITY[args.budget] || "balanced", fanout: args.fanout, requires: args.requires, residentLocal, noLedger: args.noLedger, }; const result = chooseProvider(request, preset, registry); // Record the pick so sibling fan-out calls see it as in-flight. Only with an // explicit --reserve id (a real dispatch); previews and samples never write. if (args.reserve && !args.noLedger) { ledger.reserve({ id: args.reserve, key: modelKey(result.provider), src: sourceOf(result.provider), at: Date.now(), tokens: args.tokens }); } if (args.json) console.log(JSON.stringify({ request, result }, null, 2)); else printHuman(result, request, args.presetPath, args.explain); } function runSamples(args) { const samples = [ { ...args, presetPath: "~/.paseo/presets/workhorse-mid.json", role: "ui", task: "review a screenshot-heavy frontend flow with visual design risks", requires: ["image"], contextTokens: 120000, difficulty: "standard", fanout: 1, explain: true }, { ...args, presetPath: "~/.paseo/presets/workhorse-mid.json", role: "impl", task: "apply a mechanical OpenSpec implementation across files", contextTokens: 80000, difficulty: "simple", fanout: 3, explain: true }, { ...args, presetPath: "~/.paseo/presets/workhorse-mid.json", role: "research", task: "browse a 1M-token repo and synthesize findings", contextTokens: 400000, difficulty: "hard", fanout: 1, explain: true }, ]; for (const sample of samples) { runOne(sample); console.log(""); } } function main() { const args = parseArgs(process.argv.slice(2)); if (args.help) return usage(); if (args.loadSnapshot) return printLoadSnapshot(args); // Release-only: mark a prior dispatch complete so it stops counting as in-flight. if (args.release) return ledger.release(args.release, { at: Date.now(), tokens: args.tokens }); if (args.dryRunSamples) return runSamples(args); if (!args.role) throw new Error("--role is required unless --dry-run-samples is used"); runOne(args); } try { main(); } catch (error) { console.error(`router error: ${error.message}`); process.exit(1); }