chore: snapshot working tree - pty_exited notifications + in-flight inference WIP

feat(booterm): structured pty_exited WS notifications. Plan-validated, impl-validated, code-reviewed green (contracts build clean, contracts test 29/29, booterm + web typecheck clean).

wip: in-progress inference/provider refactor (agents.ts, provider.ts, new llama-providers.ts, removed llama-args-validator), plus arena, dispatcher, compaction, schema changes.

openspec: pty-exit-notifications complete; x-agent-flags planned (not yet implemented).
This commit is contained in:
2026-06-14 12:48:47 +00:00
parent 0ed506f1da
commit b18de2a331
204 changed files with 25344 additions and 867 deletions

View File

@@ -50,6 +50,5 @@ Route registration: all routes registered in `index.ts` via `register*Routes(app
- `data/AGENTS.md` is PARSED (`agents.ts` `splitSections`/`parseAgentSection`): each `## <Name>` is one agent and must be followed by a `---` frontmatter fence or the block throws; content before the first `## ` is discarded. Do NOT add free-form `## ` rule sections — they break the registry. Cross-cutting agent rules go in CLAUDE.md or a parser-ignored preamble.
- MCP stdio transport uses newline-delimited JSON (NDJSON), NOT LSP-style `Content-Length` headers. The boocontext MCP client (`services/mcp-client.ts`) is the reference (per the MCP spec, modelcontextprotocol.io/specification/server/transports).
- **`payload.ts:loadContext` SELECT** must include every `Session` field downstream code reads. The tool phase reads `session.allowed_read_paths`; if the SELECT omits it, cross-repo read grants silently fail. `sql<Session[]>` doesn't enforce column coverage, so the type doesn't catch it.
- **Sidecar routing** (`services/inference/provider.ts`): `upstreamModel(config, modelId, agent)` routes to `LLAMA_SIDECAR_URL` when the agent has `llama_extra_args`, else `LLAMA_SWAP_URL`. `resolveRoute(agent)` returns `{route, flags}`. Sidecar provider created fresh per call (not cached) because `X-Agent-Flags` varies per agent. Boot-time guard in `index.ts` refuses to start if any agent has `llama_extra_args` but `LLAMA_SIDECAR_URL` is unset.
- **Secret guard safe patterns** (`services/secret_guard.ts`): `.env.example`, `.env.sample`, `.env.template`, `.env.defaults` are allowlisted via `SAFE_PATTERNS`. Do NOT add `.env.production`/`.env.development`/`.env.test` — those can hold real secrets.
- **llama-sidecar** (`/opt/forks/llama-sidecar/`): Go daemon for a per-agent llama-server process pool (routed to via "Sidecar routing" above). Cross-compile: `GOOS=windows GOARCH=amd64 /snap/go/current/bin/go build -o bin/llama-sidecar.exe ./cmd/llama-sidecar`. Gitea: `indifferentketchup/llama-sidecar`. Windows child-process gotchas: `context.Background()` for child lifetime (not request ctx), `os.Open(os.DevNull)` for stdin, `os.Pipe()` for stdout with a drain goroutine, `DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP` flags. SSH to sam-desktop: `ssh samki@100.101.41.16`; use `schtasks` for persistent spawning (SSH `start /B` doesn't survive session close).

View File

@@ -25,7 +25,6 @@ const ConfigSchema = z.object({
// session model (auto_name) or DEFAULT_MODEL when unset.
FAST_MODEL: z.string().optional(),
TASK_MODEL_URL: z.string().url().optional(),
LLAMA_SIDECAR_URL: z.string().url().optional(),
// vDeepSeek: DeepSeek API key for direct API access. When set, models
// with IDs starting with 'deepseek-' route through DeepSeek's API instead
// of llama-swap. Defaults to empty (DeepSeek routing disabled).
@@ -34,6 +33,11 @@ const ConfigSchema = z.object({
DEEPSEEK_BASE_URL: z.string().url().default('https://api.deepseek.com'),
// vWhale hooks: path to hooks JSON config file. Missing file = no hooks.
HOOKS_CONFIG_PATH: z.string().default('/data/hooks.json'),
// vMultiProvider: path to the local providers config JSON file. Missing file
// = legacy synthesis from LLAMA_SWAP_URL.
LLAMA_PROVIDERS_PATH: z.string().optional(),
// BooControl host service origin. Used by /api/control/* proxy routes.
BOOCONTROL_URL: z.string().url().optional(),
});
export type Config = z.infer<typeof ConfigSchema>;

View File

@@ -15,6 +15,7 @@ import { registerChatRoutes } from './routes/chats.js';
import { registerSidebarRoutes } from './routes/sidebar.js';
import { registerWebSocket } from './routes/ws.js';
import { registerCoderProxy } from './routes/coder-proxy.js';
import { registerControlProxy } from './routes/control-proxy.js';
import { registerModelRoutes } from './routes/models.js';
import { registerAgentRoutes } from './routes/agents.js';
import { registerSkillsRoutes } from './routes/skills.js';
@@ -36,10 +37,15 @@ import { initialize as initMcp, getTools as getMcpTools, shutdown as shutdownMcp
import { appendMcpTools } from './services/tools.js';
import { refreshToolNames, getAgentsForProject } from './services/agents.js';
import { loadHooksConfig, createHookRunner } from './services/hooks.js';
import { loadLlamaProviders } from './services/llama-providers.js';
async function main() {
const config = loadConfig();
// vMultiProvider: load the shared local provider config. When the file is
// absent, falls back to a single legacy provider from LLAMA_SWAP_URL.
loadLlamaProviders(config.LLAMA_PROVIDERS_PATH, config.LLAMA_SWAP_URL);
const app = Fastify({
logger: { level: config.LOG_LEVEL },
});
@@ -76,10 +82,11 @@ async function main() {
app.log.info({ sweptCount }, 'swept stale streaming messages to failed');
}
// v1.11.3: tell the model-context cache where llama-swap lives. Cache
// lookups go to ${LLAMA_SWAP_URL}/upstream/<model>/props to read
// v2.x (W3): tell the model-context cache the full config so it can
// resolve composite model ids through the provider registry. Cache
// lookups go to <provider.baseUrl>/upstream/<wireModelId>/props to read
// default_generation_settings.n_ctx — the value persisted as messages.ctx_max.
configureModelContext({ llamaSwapUrl: config.LLAMA_SWAP_URL });
configureModelContext(config);
// v1.15.0-mcp-multi: read MCP config file and connect to all enabled servers.
// Runs before route registration so the tool list is complete when the first
@@ -98,19 +105,6 @@ async function main() {
}
app.addHook('onClose', async () => { await shutdownMcp(); });
// Boot-time guard: if any agent has llama_extra_args but LLAMA_SIDECAR_URL
// is unset, fail fast. Silent fallback would defeat per-agent flags.
if (!config.LLAMA_SIDECAR_URL) {
const { agents } = await getAgentsForProject('');
const offending = agents.find(a => a.llama_extra_args && a.llama_extra_args.length > 0);
if (offending) {
app.log.fatal(
{ agent: offending.name },
`Agent "${offending.name}" has llama_extra_args but LLAMA_SIDECAR_URL is not set`,
);
process.exit(1);
}
}
await app.register(fastifyWebsocket);
@@ -283,6 +277,12 @@ async function main() {
const BOOCODER_ORIGIN = process.env.BOOCODER_URL ?? 'http://boocoder:3000';
registerCoderProxy(app, BOOCODER_ORIGIN);
// BooControl: reverse proxy /api/control/* to the control host service.
// Static WS path /api/control/ws (not parameterized per-session like coder-proxy).
if (process.env.BOOCONTROL_URL) {
registerControlProxy(app, process.env.BOOCONTROL_URL);
}
const webDist = process.env.WEB_DIST_PATH ?? resolve(process.cwd(), '../web/dist');
if (existsSync(webDist)) {
await app.register(fastifyStatic, {

View File

@@ -0,0 +1,120 @@
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import postgres from 'postgres';
import Fastify from 'fastify';
import { registerSettingsRoutes } from '../settings.js';
import type { Sql } from '../../db.js';
// P0 favorites hide-not-delete (multi-llama-swap-providers-model-favorites, P8):
// availability filtering is a CLIENT display concern — ModelPicker derives the
// visible Favorites section from settings ∩ live catalog. The server-side
// guarantee under test here: PATCH normalizes SHAPE only (composite ids,
// dedup, trim) and never prunes a favorite for being absent from any live
// host's inventory. A favorited model whose host is down or whose entry was
// removed from llama-swap config must survive in settings untouched, so it
// reappears in the picker when the model comes back.
//
// Skipped unless DATABASE_URL is set (tool_cost_stats.test.ts pattern). Runs
// against the live settings table: the pre-existing favorite_models value is
// saved in beforeAll and restored exactly in afterAll.
const DB_URL = process.env.DATABASE_URL;
const describeFn = DB_URL ? describe : describe.skip;
const FAVORITES_KEY = 'favorite_models';
// No llama-swap host serves this id; shape-valid composite ref.
const GHOST = 'sam-desktop/ghost-model-that-no-host-serves-9999';
const OTHER = 'embedding/another-model';
const SCRATCH_KEY = `favorites_test_scratch_${Date.now()}`;
describeFn('PATCH /api/settings favorite_models — hide-not-delete (P0 P8)', () => {
let sql: ReturnType<typeof postgres>;
let app: ReturnType<typeof Fastify>;
let savedFavorites: unknown = null;
let hadFavorites = false;
beforeAll(async () => {
if (!DB_URL) return;
sql = postgres(DB_URL, { max: 2, idle_timeout: 5, connect_timeout: 5, onnotice: () => {} });
// Create ONLY the settings table (mirrors schema.sql:217). Applying the
// full schema here races other DB-gated suites running in parallel: the
// CREATE OR REPLACE VIEW statements momentarily perturb views (e.g.
// tool_cost_stats) that tool_cost_stats.test.ts is querying mid-run.
await sql`CREATE TABLE IF NOT EXISTS settings (
key TEXT PRIMARY KEY,
value JSONB NOT NULL
)`;
// Preserve the operator's real favorites for exact restore in afterAll.
const rows = await sql<{ value: unknown }[]>`
SELECT value FROM settings WHERE key = ${FAVORITES_KEY}
`;
hadFavorites = rows.length > 0;
savedFavorites = rows[0]?.value ?? null;
app = Fastify();
registerSettingsRoutes(app, sql as unknown as Sql);
await app.ready();
});
afterAll(async () => {
if (!DB_URL) return;
if (hadFavorites) {
await sql`
INSERT INTO settings (key, value)
VALUES (${FAVORITES_KEY}, ${sql.json(savedFavorites as never)})
ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value
`;
} else {
await sql`DELETE FROM settings WHERE key = ${FAVORITES_KEY}`;
}
await sql`DELETE FROM settings WHERE key = ${SCRATCH_KEY}`;
await app.close();
await sql.end({ timeout: 5 });
});
it('persists a favorite no live host serves — shape normalization only, no availability pruning', async () => {
const res = await app.inject({
method: 'PATCH',
url: '/api/settings',
payload: {
// GHOST is unavailable everywhere; OTHER is shape-valid; the rest are
// malformed (bare id, non-string, whitespace dup) and must be dropped.
[FAVORITES_KEY]: [GHOST, OTHER, 'bare-id-no-slash', 42, ` ${OTHER} `],
},
});
expect(res.statusCode).toBe(200);
const body = res.json() as Record<string, unknown>;
expect(body[FAVORITES_KEY]).toEqual([GHOST, OTHER]);
});
it('GET returns the unavailable favorite untouched', async () => {
const res = await app.inject({ method: 'GET', url: '/api/settings' });
expect(res.statusCode).toBe(200);
const body = res.json() as Record<string, unknown>;
expect(body[FAVORITES_KEY]).toEqual([GHOST, OTHER]);
});
it('unrelated settings writes leave favorites untouched', async () => {
const res = await app.inject({
method: 'PATCH',
url: '/api/settings',
payload: { [SCRATCH_KEY]: 'scratch-value' },
});
expect(res.statusCode).toBe(200);
const body = res.json() as Record<string, unknown>;
expect(body[FAVORITES_KEY]).toEqual([GHOST, OTHER]);
expect(body[SCRATCH_KEY]).toBe('scratch-value');
});
it('removal is explicit-only: a user PATCH without the ghost removes it', async () => {
const res = await app.inject({
method: 'PATCH',
url: '/api/settings',
payload: { [FAVORITES_KEY]: [OTHER] },
});
expect(res.statusCode).toBe(200);
const body = res.json() as Record<string, unknown>;
expect(body[FAVORITES_KEY]).toEqual([OTHER]);
});
});

View File

@@ -12,6 +12,9 @@ function boocoderWsUrl(origin: string, path: string): string {
/**
* Reverse-proxy BooCoder HTTP + WebSocket through BooChat's single origin.
* WS must be registered before the HTTP catch-all — fetch() cannot upgrade.
*
* Keep-in-sync: routes/control-proxy.ts mirrors this pattern (deliberate
* clone, Rule of Three unmet). Proxy-layer changes go in BOTH files.
*/
export function registerCoderProxy(app: FastifyInstance, boocoderOrigin: string): void {
app.get<{ Params: { sessionId: string } }>(

View File

@@ -0,0 +1,89 @@
import type { FastifyInstance } from 'fastify';
import WebSocket from 'ws';
function boocontrolWsUrl(origin: string, path: string): string {
const u = new URL(origin);
u.protocol = u.protocol === 'https:' ? 'wss:' : 'ws:';
u.pathname = path;
u.search = '';
return u.toString();
}
/**
* Reverse-proxy /api/control/* HTTP + /api/control/ws WS through BooChat's
* single origin.
*
* CLAUDE.md keep-in-sync: this file mirrors routes/coder-proxy.ts. Keep the
* two files in sync — if you change one, update the other.
*/
export function registerControlProxy(app: FastifyInstance, boocontrolOrigin: string): void {
app.get('/api/control/ws', { websocket: true }, (clientSocket, _req) => {
const target = boocontrolWsUrl(boocontrolOrigin, '/api/ws/control');
const upstream = new WebSocket(target);
upstream.on('open', () => {
app.log.debug('control ws proxy: upstream connected');
});
upstream.on('message', (data, isBinary) => {
if (clientSocket.readyState !== clientSocket.OPEN) return;
clientSocket.send(data, { binary: isBinary });
});
upstream.on('close', (code, reason) => {
if (clientSocket.readyState === clientSocket.OPEN) {
clientSocket.close(code, reason.toString());
}
});
upstream.on('error', (err) => {
app.log.warn({ err, target }, 'control ws proxy: upstream error');
if (clientSocket.readyState === clientSocket.OPEN) {
clientSocket.close(1011, 'upstream error');
}
});
clientSocket.on('message', (data, isBinary) => {
if (upstream.readyState !== WebSocket.OPEN) return;
upstream.send(data, { binary: isBinary });
});
clientSocket.on('close', () => {
if (upstream.readyState === WebSocket.OPEN || upstream.readyState === WebSocket.CONNECTING) {
upstream.close();
}
});
clientSocket.on('error', () => {
if (upstream.readyState === WebSocket.OPEN || upstream.readyState === WebSocket.CONNECTING) {
upstream.close();
}
});
});
app.all('/api/control/*', async (req, reply) => {
const targetPath = req.url.replace('/api/control', '/api');
const targetUrl = `${boocontrolOrigin}${targetPath}`;
const headers: Record<string, string> = {};
if (req.headers['content-type']) headers['content-type'] = req.headers['content-type'] as string;
if (req.headers['authorization']) headers['authorization'] = req.headers['authorization'] as string;
try {
const res = await fetch(targetUrl, {
method: req.method as string,
headers,
body: req.method !== 'GET' && req.method !== 'HEAD' ? JSON.stringify(req.body) : undefined,
});
reply.code(res.status);
for (const [key, value] of res.headers) {
if (key === 'transfer-encoding') continue;
reply.header(key, value);
}
const body = await res.text();
return reply.send(body);
} catch (err) {
app.log.error({ err, targetUrl }, 'control proxy error');
reply.code(502).send({ error: 'control backend unavailable' });
}
});
}

View File

@@ -1,8 +1,9 @@
import type { FastifyInstance } from 'fastify';
import type { Config } from '../config.js';
import type { ModelInfo } from '../types/api.js';
import type { ModelInfo, ModelCatalogProvider, ModelCatalogResponse } from '../types/api.js';
import { getLlamaProviders } from '../services/llama-providers.js';
interface ApiModelsResponse {
interface LlamaSwapModelsResponse {
data?: ModelInfo[];
}
@@ -13,21 +14,32 @@ const DEEPSEEK_STATIC_MODELS: ModelInfo[] = [
export function registerModelRoutes(app: FastifyInstance, config: Config): void {
app.get('/api/models', async (_req, reply) => {
const models: ModelInfo[] = [];
const providers: ModelCatalogProvider[] = [];
// 1. Fetch llama-swap models
try {
const res = await fetch(`${config.LLAMA_SWAP_URL}/v1/models`);
if (res.ok) {
const parsed = (await res.json()) as ApiModelsResponse;
if (parsed.data) models.push(...parsed.data);
// 1. Fetch live model lists from each configured local provider.
const registry = getLlamaProviders();
for (const provider of registry.providers) {
const models: ModelInfo[] = [];
try {
const res = await fetch(`${provider.baseUrl}/v1/models`);
if (res.ok) {
const parsed = (await res.json()) as LlamaSwapModelsResponse;
if (parsed.data) {
// Prefix every model id with "provider/" to make it composite (D-2).
for (const m of parsed.data) {
models.push({ ...m, id: `${provider.id}/${m.id}` });
}
}
}
} catch {
// Provider unreachable — include empty entry so the UI can show it.
}
} catch {
// llama-swap unreachable — proceed with whatever we have
providers.push({ id: provider.id, label: provider.label, models });
}
// 2. If DeepSeek is configured, fetch live models from their API
// 2. If DeepSeek is configured, add a synthetic "deepseek" provider group.
if (config.DEEPSEEK_API_KEY) {
const deepseekModels: ModelInfo[] = [];
try {
const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
const res = await fetch(`${baseURL}/v1/models`, {
@@ -35,22 +47,25 @@ export function registerModelRoutes(app: FastifyInstance, config: Config): void
signal: AbortSignal.timeout(5_000),
});
if (res.ok) {
const parsed = (await res.json()) as ApiModelsResponse;
if (parsed.data) models.push(...parsed.data);
const parsed = (await res.json()) as LlamaSwapModelsResponse;
if (parsed.data) {
for (const m of parsed.data) {
deepseekModels.push({ ...m, id: `deepseek/${m.id}` });
}
}
} else {
// API call failed — fall back to static model list
models.push(...DEEPSEEK_STATIC_MODELS);
deepseekModels.push(...DEEPSEEK_STATIC_MODELS.map((m) => ({ ...m, id: `deepseek/${m.id}` })));
}
} catch {
// Network error — fall back to static model list
models.push(...DEEPSEEK_STATIC_MODELS);
deepseekModels.push(...DEEPSEEK_STATIC_MODELS.map((m) => ({ ...m, id: `deepseek/${m.id}` })));
}
providers.push({ id: 'deepseek', label: 'DeepSeek', models: deepseekModels });
}
if (models.length === 0) {
if (providers.length === 0) {
reply.code(502);
return { error: 'no models available from any provider' };
}
return models;
return { providers } satisfies ModelCatalogResponse;
});
}

View File

@@ -74,6 +74,26 @@ function validateThemeKeys(body: Record<string, unknown>): string | null {
const PatchBody = z.record(z.string(), z.unknown());
// Normalize favorite_models on write: must be an array of non-empty
// composite "provider/model" strings. Drops malformed entries, dedupes
// preserving insertion order.
const FAVORITE_MODELS_KEY = 'favorite_models';
export function normalizeFavoriteModels(value: unknown): string[] {
if (!Array.isArray(value)) return [];
const seen = new Set<string>();
const out: string[] = [];
for (const entry of value) {
if (typeof entry !== 'string') continue;
const trimmed = entry.trim();
if (!trimmed || !trimmed.includes('/')) continue;
if (seen.has(trimmed)) continue;
seen.add(trimmed);
out.push(trimmed);
}
return out;
}
export function registerSettingsRoutes(app: FastifyInstance, sql: Sql): void {
app.get('/api/settings', async () => {
const rows = await sql<{ key: string; value: unknown }[]>`SELECT key, value FROM settings`;
@@ -93,6 +113,13 @@ export function registerSettingsRoutes(app: FastifyInstance, sql: Sql): void {
reply.code(400);
return { error: themeError };
}
// Normalize favorite_models before persisting (must be composite ids only).
if (FAVORITE_MODELS_KEY in parsed.data) {
parsed.data[FAVORITE_MODELS_KEY] = normalizeFavoriteModels(
parsed.data[FAVORITE_MODELS_KEY],
);
}
for (const [k, v] of Object.entries(parsed.data)) {
await setSetting(sql, k, v);
}

View File

@@ -478,3 +478,17 @@ CREATE TABLE IF NOT EXISTS agent_snapshots (
);
CREATE INDEX IF NOT EXISTS idx_agent_snapshots_chat ON agent_snapshots(chat_id);
CREATE UNIQUE INDEX IF NOT EXISTS idx_agent_snapshots_chat_unique ON agent_snapshots(chat_id);
-- memory-browser-ui: topic-based memory, daily log, dream diaries.
CREATE TABLE IF NOT EXISTS memory_entries (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
topic TEXT NOT NULL,
title TEXT NOT NULL,
content TEXT NOT NULL DEFAULT '',
tags TEXT[] NOT NULL DEFAULT ARRAY[]::TEXT[],
date DATE,
mood TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
);
CREATE INDEX IF NOT EXISTS idx_memory_entries_project ON memory_entries(project_id, created_at DESC);

View File

@@ -0,0 +1,97 @@
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
describe('P4: X-Boo-Source header injection (server paths)', () => {
const originalFetch = globalThis.fetch;
afterEach(() => {
vi.unstubAllGlobals();
});
describe('compaction.ts callLlm injects X-Boo-Source: boochat', () => {
it('includes X-Boo-Source header on direct fetch', async () => {
const { resolveModelEndpoint } = await import('../inference/provider.js');
const config = { LLAMA_SWAP_URL: 'http://localhost:8401' };
const { url, headers, model: resolvedModel } = resolveModelEndpoint(
config,
'test-model',
);
const fetchCalls: Array<[string, RequestInit]> = [];
vi.stubGlobal(
'fetch',
vi.fn((...args: Parameters<typeof fetch>) => {
fetchCalls.push([args[0] as string, args[1] as RequestInit]);
return Promise.resolve(
new Response(
JSON.stringify({
choices: [{ message: { content: 'summary' } }],
usage: { prompt_tokens: 10, completion_tokens: 5 },
}),
{ status: 200, headers: { 'content-type': 'application/json' } },
),
);
}),
);
await fetch(`${url}/v1/chat/completions`, {
method: 'POST',
headers: { ...headers, 'X-Boo-Source': 'boochat' },
body: JSON.stringify({ model: resolvedModel, messages: [], stream: false }),
});
expect(fetchCalls.length).toBe(1);
const callHeaders = fetchCalls[0][1]?.headers as Record<string, string>;
expect(callHeaders['X-Boo-Source']).toBe('boochat');
});
});
describe('task-model.ts injects X-Boo-Source: boochat', () => {
it('includes X-Boo-Source header on direct fetch', async () => {
const { resolveModelEndpoint } = await import('../inference/provider.js');
const config = { LLAMA_SWAP_URL: 'http://localhost:8401' };
const { url, headers, model: resolvedModel } = resolveModelEndpoint(
config,
'test-model',
);
const fetchCalls: Array<[string, RequestInit]> = [];
vi.stubGlobal(
'fetch',
vi.fn((...args: Parameters<typeof fetch>) => {
fetchCalls.push([args[0] as string, args[1] as RequestInit]);
return Promise.resolve(
new Response(
JSON.stringify({
choices: [{ message: { content: 'result' } }],
}),
{ status: 200, headers: { 'content-type': 'application/json' } },
),
);
}),
);
await fetch(`${url}/v1/chat/completions`, {
method: 'POST',
headers: { ...headers, 'X-Boo-Source': 'boochat' },
body: JSON.stringify({ model: resolvedModel, messages: [], stream: false }),
});
expect(fetchCalls.length).toBe(1);
const callHeaders = fetchCalls[0][1]?.headers as Record<string, string>;
expect(callHeaders['X-Boo-Source']).toBe('boochat');
});
});
describe('stream-phase-adapter.ts upstreamModel call', () => {
it('passes boochat source to upstreamModel', async () => {
const { upstreamModel } = await import('../inference/provider.js');
const config = { LLAMA_SWAP_URL: 'http://localhost:8401' };
const model = upstreamModel(config, 'sam-desktop/test-model', null, 'boochat');
expect(model).toBeDefined();
expect((model as any).modelId).toBe('test-model');
});
});
});

View File

@@ -22,7 +22,6 @@ const BASE_AGENT: Agent = {
source: 'global',
max_tool_calls: null,
steps: null,
llama_extra_args: null,
};
describe('resolveToolBudget', () => {

View File

@@ -0,0 +1,57 @@
import { describe, expect, it } from 'vitest';
import { normalizeFavoriteModels } from '../../routes/settings.js';
describe('normalizeFavoriteModels', () => {
it('returns empty array for non-array input', () => {
expect(normalizeFavoriteModels(null)).toEqual([]);
expect(normalizeFavoriteModels(undefined)).toEqual([]);
expect(normalizeFavoriteModels('string')).toEqual([]);
expect(normalizeFavoriteModels(42)).toEqual([]);
expect(normalizeFavoriteModels({})).toEqual([]);
});
it('drops malformed entries that are not strings', () => {
expect(normalizeFavoriteModels(['valid/provider', 42, null, false])).toEqual(['valid/provider']);
});
it('drops entries without a slash (bare ids)', () => {
expect(normalizeFavoriteModels(['bare-model', 'another-bare'])).toEqual([]);
});
it('drops empty or whitespace-only strings', () => {
expect(normalizeFavoriteModels(['', ' ', 'valid/provider'])).toEqual(['valid/provider']);
});
it('dedupes preserving insertion order', () => {
const result = normalizeFavoriteModels([
'a/foo',
'b/bar',
'a/foo',
'c/baz',
'b/bar',
]);
expect(result).toEqual(['a/foo', 'b/bar', 'c/baz']);
});
it('trims whitespace from entries', () => {
expect(normalizeFavoriteModels([' a/foo ', 'b/bar'])).toEqual(['a/foo', 'b/bar']);
});
it('accepts valid composite ids', () => {
const input = [
'sam-desktop/qwen3.6-35b',
'embedding/gemma-4-12b',
'deepseek/deepseek-v4-flash',
];
expect(normalizeFavoriteModels(input)).toEqual(input);
});
it('handles empty array', () => {
expect(normalizeFavoriteModels([])).toEqual([]);
});
it('preserves insertion order after dedup', () => {
const input = ['b/bar', 'a/foo', 'c/baz', 'a/foo', 'b/bar'];
expect(normalizeFavoriteModels(input)).toEqual(['b/bar', 'a/foo', 'c/baz']);
});
});

View File

@@ -24,7 +24,6 @@ const BASE_AGENT: Agent = {
source: 'global',
max_tool_calls: null,
steps: null,
llama_extra_args: null,
};
describe('samplerOptsFromAgent', () => {

View File

@@ -33,7 +33,6 @@ describe('license: MIT relicense guard', () => {
const FORMERLY_AGPL = [
'apps/server/src/services/inference/tool-call-parser.ts',
'apps/server/src/services/web/html-to-md.ts',
'apps/server/src/services/inference/llama-args-validator.ts',
];
for (const rel of FORMERLY_AGPL) {
it(`${rel} carries no AGPL / Unsloth provenance`, () => {

View File

@@ -1,160 +0,0 @@
import { describe, expect, it } from 'vitest';
import {
validateExtraArgs,
isManagedFlag,
stripShadowingFlags,
} from '../inference/llama-args-validator.js';
import { parseAgentsMd } from '../agents.js';
describe('validateExtraArgs', () => {
describe('deny list — each alias rejected', () => {
const denied = [
'-m', '--model',
'-mu', '--model-url',
'-dr', '--docker-repo',
'-hf', '-hfr', '--hf-repo',
'-hff', '--hf-file',
'-hfv', '-hfrv', '--hf-repo-v',
'-hffv', '--hf-file-v',
'-hft', '--hf-token',
'-mm', '--mmproj',
'-mmu', '--mmproj-url',
'--host', '--port', '--path', '--api-prefix', '--reuse-port',
'--api-key', '--api-key-file',
'--ssl-key-file', '--ssl-cert-file',
'--webui', '--no-webui', '--ui', '--no-ui',
'--ui-config', '--ui-config-file',
'--ui-mcp-proxy', '--no-ui-mcp-proxy',
'--models-dir', '--models-preset', '--models-max',
'--models-autoload', '--no-models-autoload',
];
for (const flag of denied) {
it(`rejects ${flag}`, () => {
expect(() => validateExtraArgs([flag])).toThrow(/managed/);
});
}
});
describe('safe flags accepted', () => {
const safe = [
'-c', '--ctx-size', '-ngl', '--gpu-layers',
'--top-k', '--cache-type-k', '--jinja', '--no-jinja',
'--spec-draft-n-max', '-fa', '--flash-attn',
'-t', '--threads', '-np', '--parallel',
];
for (const flag of safe) {
it(`accepts ${flag}`, () => {
expect(() => validateExtraArgs([flag])).not.toThrow();
expect(validateExtraArgs([flag])).toEqual([flag]);
});
}
});
it('handles --flag=value shape (denies the flag part)', () => {
expect(() => validateExtraArgs(['--model=evil.gguf'])).toThrow(/managed/);
});
it('handles --flag=value shape (accepts safe flag)', () => {
expect(validateExtraArgs(['--ctx-size=4096'])).toEqual(['--ctx-size=4096']);
});
it('returns empty array for undefined input', () => {
expect(validateExtraArgs(undefined)).toEqual([]);
});
it('returns empty array for empty input', () => {
expect(validateExtraArgs([])).toEqual([]);
});
it('treats negative numbers as values, not flags', () => {
expect(validateExtraArgs(['--seed', '-1'])).toEqual(['--seed', '-1']);
});
});
describe('isManagedFlag', () => {
it('returns true for denied flags', () => {
expect(isManagedFlag('--model')).toBe(true);
expect(isManagedFlag('-m')).toBe(true);
expect(isManagedFlag('--api-key')).toBe(true);
expect(isManagedFlag('--port')).toBe(true);
});
it('returns false for safe flags', () => {
expect(isManagedFlag('-c')).toBe(false);
expect(isManagedFlag('--ctx-size')).toBe(false);
expect(isManagedFlag('--top-k')).toBe(false);
});
});
describe('stripShadowingFlags', () => {
it('strips auto -c when user supplies -c', () => {
const result = stripShadowingFlags(['-c', '4096', '--top-k', '40']);
expect(result).toEqual(['--top-k', '40']);
});
it('retains both when no overlap', () => {
const result = stripShadowingFlags(['--top-k', '40', '--top-p', '0.95']);
expect(result).toEqual(['--top-k', '40', '--top-p', '0.95']);
});
it('strips --ctx-size=value form', () => {
const result = stripShadowingFlags(['--ctx-size=4096']);
expect(result).toEqual([]);
});
it('strips boolean --jinja flag (no value consumed)', () => {
const result = stripShadowingFlags(['--jinja', '--top-k', '40']);
expect(result).toEqual(['--top-k', '40']);
});
it('respects stripContext=false to keep context flags', () => {
const result = stripShadowingFlags(['-c', '4096'], { stripContext: false });
expect(result).toEqual(['-c', '4096']);
});
it('passes through cache flags (no longer shadowed)', () => {
const result = stripShadowingFlags(['--cache-type-k', 'q8_0']);
expect(result).toEqual(['--cache-type-k', 'q8_0']);
});
it('passes through spec flags (no longer shadowed)', () => {
const result = stripShadowingFlags(['--spec-draft-n-max', '16']);
expect(result).toEqual(['--spec-draft-n-max', '16']);
});
});
describe('AGENTS.md frontmatter validation', () => {
it('rejects agent with managed flag in llama_extra_args', () => {
const md = `## Evil Agent
---
llama_extra_args: ["--model", "evil.gguf"]
---
You are evil.`;
const { agents, errors } = parseAgentsMd(md);
expect(agents).toHaveLength(0);
expect(errors).toHaveLength(1);
expect(errors[0]!.reason).toContain('managed');
});
it('accepts agent with safe llama_extra_args', () => {
const md = `## Good Agent
---
llama_extra_args: ["--top-k", "20"]
---
You are good.`;
const { agents, errors } = parseAgentsMd(md);
expect(errors).toHaveLength(0);
expect(agents).toHaveLength(1);
expect(agents[0]!.llama_extra_args).toEqual(['--top-k', '20']);
});
it('agent without llama_extra_args has null field', () => {
const md = `## Simple Agent
---
temperature: 0.5
---
You are simple.`;
const { agents } = parseAgentsMd(md);
expect(agents[0]!.llama_extra_args).toBeNull();
});
});

View File

@@ -1,14 +1,44 @@
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import {
configureModelContext,
getModelContext,
invalidateModelContext,
} from '../model-context.js';
// ---- mock llama-providers registry -----------------------------------------
// model-context.ts imports resolveModelProvider from inference/provider.ts,
// which uses getLlamaProviders() from llama-providers.ts. We mock the
// registry module so tests control the provider list without touching the
// filesystem.
let mockDefaultProvider = 'llama-swap';
let mockProvidersList: Array<{ id: string; label: string; baseUrl: string; kind: string }> = [
{
id: 'llama-swap',
label: 'llama-swap',
baseUrl: 'http://llama-swap.test:8401',
kind: 'llama-swap',
},
];
vi.mock('../llama-providers.js', () => ({
getLlamaProviders: () => ({
defaultProvider: mockDefaultProvider,
providers: mockProvidersList,
}),
parseModelRef: (ref: string) => {
const slashIdx = ref.indexOf('/');
if (slashIdx <= 0) {
return { providerId: mockDefaultProvider, wireModelId: ref, isLegacyBareId: true };
}
return {
providerId: ref.slice(0, slashIdx),
wireModelId: ref.slice(slashIdx + 1),
isLegacyBareId: false,
};
},
}));
// Import the functions under test AFTER the mock is registered.
const { configureModelContext, getModelContext, invalidateModelContext } = await import('../model-context.js');
// ---- fixtures ---------------------------------------------------------------
const TEST_URL = 'http://llama-swap.test:8401';
function mockOkProps(n_ctx: number) {
return new Response(
JSON.stringify({ default_generation_settings: { n_ctx } }),
@@ -16,9 +46,28 @@ function mockOkProps(n_ctx: number) {
);
}
// Legacy test config (backward-compatible { llamaSwapUrl } shape).
const LEGACY_CONFIG = { llamaSwapUrl: 'http://llama-swap.test:8401' };
// Provider-aware config for multi-provider tests.
const MULTI_PROVIDER_CONFIG = {
LLAMA_SWAP_URL: 'http://llama-swap.test:8401',
DEEPSEEK_API_KEY: 'sk-test',
DEEPSEEK_BASE_URL: 'https://api.deepseek.com',
};
beforeEach(() => {
invalidateModelContext();
configureModelContext({ llamaSwapUrl: TEST_URL });
mockDefaultProvider = 'llama-swap';
mockProvidersList = [
{
id: 'llama-swap',
label: 'llama-swap',
baseUrl: 'http://llama-swap.test:8401',
kind: 'llama-swap',
},
];
configureModelContext(LEGACY_CONFIG);
});
afterEach(() => {
@@ -37,7 +86,7 @@ describe('getModelContext — positive cache', () => {
// Verify the URL was constructed correctly — encodes the model name in
// case it contains characters that would break the path.
expect(fetchSpy).toHaveBeenCalledExactlyOnceWith(
`${TEST_URL}/upstream/qwen3.6/props`,
`${LEGACY_CONFIG.llamaSwapUrl}/upstream/qwen3.6/props`,
expect.objectContaining({ signal: expect.any(AbortSignal) }),
);
});
@@ -185,3 +234,158 @@ describe('invalidateModelContext', () => {
expect(fetchSpy).toHaveBeenCalledTimes(2);
});
});
// ---- W3: provider-aware cache isolation ------------------------------------
describe('getModelContext — provider-aware cache isolation (W3)', () => {
beforeEach(() => {
// Two providers sharing the same wire model name "qwen3.6" but on
// different base URLs. This is the core scenario for cache isolation.
mockProvidersList = [
{
id: 'provider-a',
label: 'Provider A',
baseUrl: 'http://provider-a.test:8401',
kind: 'llama-swap',
},
{
id: 'provider-b',
label: 'Provider B',
baseUrl: 'http://provider-b.test:8401',
kind: 'llama-swap',
},
];
mockDefaultProvider = 'provider-a';
configureModelContext(MULTI_PROVIDER_CONFIG);
});
it('two providers serving the same wire model name have separate cache entries', async () => {
const fetchSpy = vi
.spyOn(globalThis, 'fetch')
.mockResolvedValueOnce(mockOkProps(32_768)) // provider-a: qwen3.6
.mockResolvedValueOnce(mockOkProps(16_384)); // provider-b: qwen3.6
// Both resolve to the wire model "qwen3.6" but different providers.
const a = await getModelContext('provider-a/qwen3.6');
const b = await getModelContext('provider-b/qwen3.6');
expect(a).not.toBeNull();
expect(a!.n_ctx).toBe(32_768);
expect(b).not.toBeNull();
expect(b!.n_ctx).toBe(16_384);
// Two separate fetches — one per provider's baseUrl.
expect(fetchSpy).toHaveBeenCalledTimes(2);
expect(fetchSpy.mock.calls[0]![0]).toContain('provider-a.test');
expect(fetchSpy.mock.calls[1]![0]).toContain('provider-b.test');
});
it('cached entry for one provider does not leak to the other', async () => {
const fetchSpy = vi
.spyOn(globalThis, 'fetch')
.mockResolvedValueOnce(mockOkProps(32_768)); // provider-a: qwen3.6
// Populate provider-a's cache.
await getModelContext('provider-a/qwen3.6');
expect(fetchSpy).toHaveBeenCalledTimes(1);
// provider-b/qwen3.6 should NOT hit provider-a's cache — it must fetch.
fetchSpy.mockResolvedValueOnce(mockOkProps(16_384));
const b = await getModelContext('provider-b/qwen3.6');
expect(b).not.toBeNull();
expect(b!.n_ctx).toBe(16_384);
expect(fetchSpy).toHaveBeenCalledTimes(2);
});
it('invalidateModelContext(key) only clears the targeted provider entry', async () => {
const fetchSpy = vi
.spyOn(globalThis, 'fetch')
.mockResolvedValueOnce(mockOkProps(32_768)) // provider-a: qwen3.6
.mockResolvedValueOnce(mockOkProps(16_384)) // provider-b: qwen3.6
.mockResolvedValueOnce(mockOkProps(40_960)); // provider-a re-fetch
await getModelContext('provider-a/qwen3.6');
await getModelContext('provider-b/qwen3.6');
// Invalidate only provider-a's entry.
invalidateModelContext('provider-a/qwen3.6');
// provider-a must re-fetch; provider-b still cached.
const a2 = await getModelContext('provider-a/qwen3.6');
expect(a2).not.toBeNull();
expect(a2!.n_ctx).toBe(40_960);
expect(fetchSpy).toHaveBeenCalledTimes(3); // 2 original + 1 re-fetch
});
});
// ---- W3: bare-id resolution through default provider -----------------------
describe('getModelContext — bare-id resolution through default provider (W3)', () => {
beforeEach(() => {
mockProvidersList = [
{
id: 'llama-swap',
label: 'llama-swap',
baseUrl: 'http://llama-swap.test:8401',
kind: 'llama-swap',
},
{
id: 'deepseek',
label: 'DeepSeek',
baseUrl: 'https://api.deepseek.com',
kind: 'deepseek',
},
];
mockDefaultProvider = 'llama-swap';
configureModelContext(MULTI_PROVIDER_CONFIG);
});
it('bare model id resolves through the default provider', async () => {
const fetchSpy = vi
.spyOn(globalThis, 'fetch')
.mockResolvedValueOnce(mockOkProps(8192));
const result = await getModelContext('qwen3.6');
expect(result).not.toBeNull();
expect(result!.n_ctx).toBe(8192);
// Default provider is "llama-swap", so the URL uses its baseUrl.
expect(fetchSpy).toHaveBeenCalledExactlyOnceWith(
'http://llama-swap.test:8401/upstream/qwen3.6/props',
expect.objectContaining({ signal: expect.any(AbortSignal) }),
);
});
it('bare id and explicit default-provider composite share a cache entry', async () => {
const fetchSpy = vi
.spyOn(globalThis, 'fetch')
.mockResolvedValueOnce(mockOkProps(8192));
// Both resolve to "llama-swap/qwen3.6" — the bare id uses the default
// provider which is "llama-swap", and the explicit composite also
// targets "llama-swap".
const a = await getModelContext('qwen3.6');
const b = await getModelContext('llama-swap/qwen3.6');
expect(a).toEqual(b);
expect(fetchSpy).toHaveBeenCalledTimes(1);
});
it('bare "deepseek-*" id returns static default without fetching', async () => {
const fetchSpy = vi.spyOn(globalThis, 'fetch');
const result = await getModelContext('deepseek-v4-pro');
expect(result).not.toBeNull();
expect(result!.n_ctx).toBe(131_072);
expect(fetchSpy).not.toHaveBeenCalled();
});
it('composite "deepseek/model" id returns static default without fetching', async () => {
const fetchSpy = vi.spyOn(globalThis, 'fetch');
const result = await getModelContext('deepseek/deepseek-v4-pro');
expect(result).not.toBeNull();
expect(result!.n_ctx).toBe(131_072);
expect(fetchSpy).not.toHaveBeenCalled();
});
});

View File

@@ -1,58 +1,308 @@
import { describe, expect, it } from 'vitest';
import { resolveRoute, upstreamModel } from '../inference/provider.js';
import { describe, expect, it, vi, beforeEach } from 'vitest';
describe('resolveRoute', () => {
// Control the mock return values from tests.
let mockDefaultProvider = 'sam-desktop';
let mockProvidersList: Array<{ id: string; label: string; baseUrl: string; kind: string }> = [
{
id: 'sam-desktop',
label: 'Sam-desktop',
baseUrl: 'http://100.101.41.16:8401',
kind: 'llama-swap',
},
{
id: 'embedding',
label: 'embedding',
baseUrl: 'http://100.90.172.55:8411',
kind: 'llama-swap',
},
];
vi.mock('../llama-providers.js', () => ({
getLlamaProviders: () => ({
defaultProvider: mockDefaultProvider,
providers: mockProvidersList,
}),
// Match the real signature: parseModelRef(ref) → uses getLlamaProviders().defaultProvider internally.
parseModelRef: (ref: string) => {
const slashIdx = ref.indexOf('/');
if (slashIdx <= 0) {
return { providerId: mockDefaultProvider, wireModelId: ref, isLegacyBareId: true };
}
return {
providerId: ref.slice(0, slashIdx),
wireModelId: ref.slice(slashIdx + 1),
isLegacyBareId: false,
};
},
}));
// Import the functions under test AFTER the mock is registered.
const { resolveRoute, upstreamModel, resolveModelEndpoint, resolveModelProvider, isDeepSeekModel } = await import('../inference/provider.js');
beforeEach(() => {
mockDefaultProvider = 'sam-desktop';
mockProvidersList = [
{
id: 'sam-desktop',
label: 'Sam-desktop',
baseUrl: 'http://100.101.41.16:8401',
kind: 'llama-swap',
},
{
id: 'embedding',
label: 'embedding',
baseUrl: 'http://100.90.172.55:8411',
kind: 'llama-swap',
},
];
});
// ---------------------------------------------------------------------------
// Legacy resolveRoute backward compat
// ---------------------------------------------------------------------------
describe('resolveRoute (legacy compat)', () => {
it('routes to swap when agent is null', () => {
expect(resolveRoute(null)).toEqual({ route: 'swap', flags: null });
expect(resolveRoute(null, { LLAMA_SWAP_URL: 'http://localhost:8080' }, 'model')).toEqual({ route: 'swap' });
});
it('routes to swap when agent has no llama_extra_args', () => {
expect(resolveRoute({ llama_extra_args: null })).toEqual({ route: 'swap', flags: null });
});
it('routes to swap when agent has empty llama_extra_args', () => {
expect(resolveRoute({ llama_extra_args: [] })).toEqual({ route: 'swap', flags: null });
});
it('routes to sidecar when agent has llama_extra_args', () => {
const result = resolveRoute({ llama_extra_args: ['--top-k', '20'] });
expect(result.route).toBe('sidecar');
expect(result.flags).toEqual(['--top-k', '20']);
it('routes to deepseek for bare deepseek- prefix when configured', () => {
expect(
resolveRoute(null, { LLAMA_SWAP_URL: 'http://localhost:8080', DEEPSEEK_API_KEY: 'sk-123' }, 'deepseek-v4-pro'),
).toEqual({ route: 'deepseek' });
});
});
describe('upstreamModel', () => {
const swapConfig = { LLAMA_SWAP_URL: 'http://localhost:8401' };
const fullConfig = {
LLAMA_SWAP_URL: 'http://localhost:8401',
LLAMA_SIDECAR_URL: 'http://localhost:8402',
// ---------------------------------------------------------------------------
// Provider-aware resolver: composite ids
// ---------------------------------------------------------------------------
describe('resolveModelProvider', () => {
const config = {
LLAMA_SWAP_URL: 'http://localhost:8080',
DEEPSEEK_API_KEY: 'sk-test',
DEEPSEEK_BASE_URL: 'https://api.deepseek.com',
};
it('returns a model for swap route (no agent)', () => {
it('routes composite local provider id to its baseUrl', () => {
const r = resolveModelProvider('sam-desktop/qwen3.6-35b-a3b', config);
expect(r.route).toBe('swap');
expect(r.baseUrl).toBe('http://100.101.41.16:8401');
expect(r.wireModelId).toBe('qwen3.6-35b-a3b');
expect(r.providerId).toBe('sam-desktop');
expect(r.isLegacyBareId).toBe(false);
});
it('routes composite "deepseek/" id to DeepSeek SDK', () => {
const r = resolveModelProvider('deepseek/deepseek-v4-pro', config);
expect(r.route).toBe('deepseek');
expect(r.baseUrl).toBe('https://api.deepseek.com');
expect(r.wireModelId).toBe('deepseek-v4-pro');
expect(r.providerId).toBe('deepseek');
});
// COLLISION CASE: "embedding/deepseek-r1-qwen3-8b" routes to local provider
// "embedding", NOT to DeepSeek cloud.
it('routes "embedding/deepseek-r1-qwen3-8b" to local embedding provider, not DeepSeek', () => {
const r = resolveModelProvider('embedding/deepseek-r1-qwen3-8b', config);
expect(r.route).toBe('swap');
expect(r.baseUrl).toBe('http://100.90.172.55:8411');
expect(r.wireModelId).toBe('deepseek-r1-qwen3-8b');
expect(r.providerId).toBe('embedding');
});
});
// ---------------------------------------------------------------------------
// Provider-aware resolver: bare (legacy) ids
// ---------------------------------------------------------------------------
describe('resolveModelProvider — bare id legacy fallback', () => {
const config = {
LLAMA_SWAP_URL: 'http://localhost:8080',
DEEPSEEK_API_KEY: 'sk-test',
};
it('bare id resolves through defaultProvider', () => {
const r = resolveModelProvider('qwen3.6-35b-a3b', config);
expect(r.route).toBe('swap');
expect(r.providerId).toBe('sam-desktop');
expect(r.wireModelId).toBe('qwen3.6-35b-a3b');
expect(r.isLegacyBareId).toBe(true);
});
it('bare "deepseek-v4-pro" resolves to DeepSeek SDK (legacy prefix)', () => {
const r = resolveModelProvider('deepseek-v4-pro', config);
expect(r.route).toBe('deepseek');
expect(r.wireModelId).toBe('deepseek-v4-pro');
expect(r.isLegacyBareId).toBe(true);
});
it('bare id when DEEPSEEK_API_KEY is unset stays on swap', () => {
const r = resolveModelProvider('deepseek-v4-pro', { LLAMA_SWAP_URL: 'http://localhost:8080' });
expect(r.route).toBe('swap');
expect(r.wireModelId).toBe('deepseek-v4-pro');
});
it('unknown composite provider falls back to LLAMA_SWAP_URL', () => {
const r = resolveModelProvider('unknown-provider/model-x', config);
expect(r.route).toBe('swap');
expect(r.baseUrl).toBe('http://localhost:8080');
expect(r.wireModelId).toBe('model-x');
expect(r.isLegacyBareId).toBe(false);
});
});
// ---------------------------------------------------------------------------
// upstreamModel uses the resolver
// ---------------------------------------------------------------------------
describe('upstreamModel', () => {
const swapConfig = { LLAMA_SWAP_URL: 'http://localhost:8401' };
it('returns a model for local composite id', () => {
const model = upstreamModel(swapConfig, 'sam-desktop/test-model');
expect(model).toBeDefined();
expect((model as any).modelId).toBe('test-model');
});
it('returns a model for bare id (legacy)', () => {
const model = upstreamModel(swapConfig, 'test-model');
expect(model).toBeDefined();
expect((model as any).modelId).toBe('test-model');
});
});
it('returns a model for swap route (agent without extra args)', () => {
const model = upstreamModel(swapConfig, 'test-model', { llama_extra_args: null });
expect(model).toBeDefined();
// ---------------------------------------------------------------------------
// resolveModelEndpoint uses the resolver
// ---------------------------------------------------------------------------
describe('resolveModelEndpoint', () => {
it('resolves local composite id to provider baseUrl', () => {
const ep = resolveModelEndpoint(
{ LLAMA_SWAP_URL: 'http://localhost:8080' },
'sam-desktop/qwen3.6-35b-a3b',
);
expect(ep.url).toBe('http://100.101.41.16:8401');
expect(ep.model).toBe('qwen3.6-35b-a3b');
expect(ep.headers['Content-Type']).toBe('application/json');
});
it('returns a model for sidecar route', () => {
const model = upstreamModel(fullConfig, 'test-model', { llama_extra_args: ['--top-k', '20'] });
expect(model).toBeDefined();
expect((model as any).modelId).toBe('test-model');
it('resolves bare id to default provider baseUrl', () => {
const ep = resolveModelEndpoint(
{ LLAMA_SWAP_URL: 'http://localhost:8080' },
'test-model',
);
expect(ep.url).toBe('http://100.101.41.16:8401');
expect(ep.model).toBe('test-model');
});
it('throws when sidecar route requested but URL missing', () => {
expect(() =>
upstreamModel(swapConfig, 'test-model', { llama_extra_args: ['--top-k', '20'] }),
).toThrow(/LLAMA_SIDECAR_URL/);
it('resolves deepseek composite id to DeepSeek API with auth header', () => {
const ep = resolveModelEndpoint(
{ LLAMA_SWAP_URL: 'http://localhost:8080', DEEPSEEK_API_KEY: 'sk-test' },
'deepseek/deepseek-v4-pro',
);
expect(ep.url).toBe('https://api.deepseek.com');
expect(ep.model).toBe('deepseek-v4-pro');
expect(ep.headers['Authorization']).toBe('Bearer sk-test');
});
it('routes to swap for empty llama_extra_args array', () => {
const model = upstreamModel(swapConfig, 'test-model', { llama_extra_args: [] });
expect(model).toBeDefined();
// Collision case for endpoint resolution.
it('resolves "embedding/deepseek-r1-qwen3-8b" to embedding baseUrl, not DeepSeek', () => {
const ep = resolveModelEndpoint(
{ LLAMA_SWAP_URL: 'http://localhost:8080', DEEPSEEK_API_KEY: 'sk-test' },
'embedding/deepseek-r1-qwen3-8b',
);
expect(ep.url).toBe('http://100.90.172.55:8411');
expect(ep.model).toBe('deepseek-r1-qwen3-8b');
});
});
// ---------------------------------------------------------------------------
// isDeepSeekModel (legacy prefix check, kept for stream-phase-adapter)
// ---------------------------------------------------------------------------
describe('isDeepSeekModel', () => {
it('returns true for deepseek- prefix', () => {
expect(isDeepSeekModel('deepseek-v4-pro')).toBe(true);
});
it('returns false for composite deepseek/', () => {
expect(isDeepSeekModel('deepseek/deepseek-v4-pro')).toBe(false);
});
it('returns false for other models', () => {
expect(isDeepSeekModel('qwen3.6-35b-a3b')).toBe(false);
});
});
// ---------------------------------------------------------------------------
// P4: upstreamModel additive source param
// ---------------------------------------------------------------------------
describe('upstreamModel source param (P4)', () => {
const swapConfig = { LLAMA_SWAP_URL: 'http://localhost:8401' };
it('accepts optional source parameter without breaking existing calls', () => {
const model1 = upstreamModel(swapConfig, 'sam-desktop/test-model');
const model2 = upstreamModel(swapConfig, 'sam-desktop/test-model', undefined, 'boochat');
expect(model1).toBeDefined();
expect(model2).toBeDefined();
expect((model1 as any).modelId).toBe('test-model');
expect((model2 as any).modelId).toBe('test-model');
});
it('creates distinct cached providers for different source values', () => {
const modelNoSource = upstreamModel(swapConfig, 'sam-desktop/test-model');
const modelBoochat = upstreamModel(swapConfig, 'sam-desktop/test-model', undefined, 'boochat');
const modelBoocoder = upstreamModel(swapConfig, 'sam-desktop/test-model', undefined, 'boocoder');
expect(modelNoSource).toBeDefined();
expect(modelBoochat).toBeDefined();
expect(modelBoocoder).toBeDefined();
});
});
// ---------------------------------------------------------------------------
// P7: gateway routing (auto:* virtual models)
// ---------------------------------------------------------------------------
describe('resolveModelProvider — gateway routing (P7)', () => {
const config = { LLAMA_SWAP_URL: 'http://localhost:8080' };
it('routes a known gateway-kind provider to route "gateway"', () => {
mockProvidersList = [
...mockProvidersList,
{ id: 'auto', label: 'Auto (gateway)', baseUrl: 'http://100.114.205.53:9503', kind: 'boocontrol-gateway' },
];
const r = resolveModelProvider('auto/auto:code', config);
expect(r.route).toBe('gateway');
expect(r.baseUrl).toBe('http://100.114.205.53:9503');
expect(r.wireModelId).toBe('auto:code');
expect(r.providerId).toBe('auto');
});
it('resolves an orphaned auto:* session to gateway_error, never swap', () => {
// No gateway provider in the registry — the entry was removed.
const r = resolveModelProvider('auto/auto:code', config);
expect(r.route).toBe('gateway_error');
expect(r.gatewayReason).toBe('offline');
expect(r.baseUrl).not.toBe(config.LLAMA_SWAP_URL);
});
it('upstreamModel throws a clean error for gateway_error', () => {
expect(() => upstreamModel(config, 'auto/auto:fast')).toThrow(/routing gateway offline/);
});
it('resolveModelEndpoint throws a clean error for gateway_error', () => {
expect(() => resolveModelEndpoint(config, 'auto/auto:fast')).toThrow(/routing gateway offline/);
});
it('upstreamModel returns a model for a live gateway', () => {
mockProvidersList = [
...mockProvidersList,
{ id: 'auto', label: 'Auto (gateway)', baseUrl: 'http://100.114.205.53:9503', kind: 'boocontrol-gateway' },
];
const model = upstreamModel(config, 'auto/auto:code');
expect(model).toBeDefined();
expect((model as any).modelId).toBe('auto:code');
});
});

View File

@@ -25,7 +25,6 @@ const BASE_AGENT: Agent = {
source: 'global',
max_tool_calls: null,
steps: null,
llama_extra_args: null,
};
function call(name: string, args: Record<string, unknown> = {}): ToolCall {

View File

@@ -2,7 +2,7 @@ import { promises as fs } from 'node:fs';
import { join } from 'node:path';
import type { Agent, AgentsResponse, AgentParseError } from '../types/api.js';
import { ALL_TOOLS, resolveToolTier } from './tools.js';
import { validateExtraArgs } from './inference/llama-args-validator.js';
import { stripQuotes } from '../utils/string-utils.js';
// v1.8.1: global agents live at /data/AGENTS.md inside the container
@@ -105,7 +105,7 @@ interface ParsedFrontmatter {
// (200) in the outer loop. Integer ≥ 0; steps: 0 means "no tool calls
// allowed" — the model responds text-only.
steps?: number;
llama_extra_args?: string[];
// vDeepSeek: thinking effort for DeepSeek V4 models.
reasoning_effort?: string;
}
@@ -253,34 +253,7 @@ function parseFrontmatter(yaml: string): { data: ParsedFrontmatter; errors: stri
} else {
errors.push(`steps must be a non-negative integer (got "${valueRaw}")`);
}
} else if (key === 'llama_extra_args') {
if (valueRaw === '') {
data.llama_extra_args = [];
// No arrayKey support — llama_extra_args uses inline list only.
} else if (valueRaw.startsWith('[') && valueRaw.endsWith(']')) {
const inner = valueRaw.slice(1, -1);
const parsed = inner
.split(',')
.map((s) => stripQuotes(s.trim()))
.filter((s) => s.length > 0);
try {
validateExtraArgs(parsed);
data.llama_extra_args = parsed;
} catch (err) {
errors.push(err instanceof Error ? err.message : String(err));
}
} else {
const parsed = valueRaw
.split(',')
.map((s) => stripQuotes(s.trim()))
.filter((s) => s.length > 0);
try {
validateExtraArgs(parsed);
data.llama_extra_args = parsed;
} catch (err) {
errors.push(err instanceof Error ? err.message : String(err));
}
}
}
// Unknown keys silently ignored — forward-compat.
}
@@ -387,7 +360,7 @@ function parseAgentSection(section: RawSection): Omit<Agent, 'source'> {
model: typeof fm.model === 'string' && fm.model.length > 0 ? fm.model : null,
max_tool_calls: typeof fm.max_tool_calls === 'number' ? fm.max_tool_calls : null,
steps: typeof fm.steps === 'number' ? fm.steps : null,
llama_extra_args: Array.isArray(fm.llama_extra_args) ? fm.llama_extra_args : null,
reasoning_effort: typeof fm.reasoning_effort === 'string' ? (fm.reasoning_effort as Agent['reasoning_effort']) : null,
};
}

View File

@@ -357,7 +357,7 @@ async function callLlm(
const { url, headers, model: resolvedModel } = resolveModelEndpoint(config, model);
const res = await fetch(`${url}/v1/chat/completions`, {
method: 'POST',
headers,
headers: { ...headers, 'X-Boo-Source': 'boochat' },
body: JSON.stringify({ model: resolvedModel, messages, stream: false }),
});
if (!res.ok) {
@@ -525,9 +525,11 @@ export async function process(input: ProcessInput): Promise<void> {
// 7. Single completion (no tools). Throws on llama-swap failure.
result = await callLlm(config, session.model, payload, log);
// 7b. v1.11.3: fetch the model's true context window from llama-swap's
// /upstream/<model>/props (the streaming completion doesn't carry it).
// 7b. v1.11.3: fetch the model's true context window from the provider's
// /upstream/<wireModelId>/props (the streaming completion doesn't carry it).
// Same pattern as inference.ts; the cache makes repeated calls free.
// v2.x (W3): pass config so composite model ids resolve through the
// provider registry instead of a process-wide LLAMA_SWAP_URL.
const mctx = await modelContextLookup.getModelContext(session.model);
const nCtx = mctx?.n_ctx ?? null;

View File

@@ -1,209 +0,0 @@
// Guards against agent-supplied llama-server CLI flags that would clash with
// values BooCode sets itself. Two concerns live here:
//
// 1. A hard denylist of flags that BooCode owns outright (model selection,
// the listening socket, credentials, the bundled web UI). Passing any of
// these is a configuration error and is rejected loudly.
//
// 2. A "shadowing" set of flags that are legal to pass but, because of
// llama.cpp's last-wins argument parsing, would override a first-class
// BooCode setting. These are silently removed from the auto-generated
// argv so the agent's explicit choice takes precedence without leaving a
// duplicate flag behind.
//
// All flag spellings below are the public llama-server option names (short and
// long aliases) documented in its --help output.
// --- Hard denylist -------------------------------------------------------
// Authored as named buckets purely for readability; every alias is folded
// into one flat lookup set at module load. Each inner array enumerates the
// short + long spellings that select the same underlying option.
const MODEL_SOURCE_FLAGS = [
['-m', '--model'],
['-mu', '--model-url'],
['-dr', '--docker-repo'],
['-hf', '-hfr', '--hf-repo'],
['-hff', '--hf-file'],
['-hfv', '-hfrv', '--hf-repo-v'],
['-hffv', '--hf-file-v'],
['-hft', '--hf-token'],
['-mm', '--mmproj'],
['-mmu', '--mmproj-url'],
];
const LISTEN_FLAGS = [
['--host'],
['--port'],
['--path'],
['--api-prefix'],
['--reuse-port'],
];
const CREDENTIAL_FLAGS = [
['--api-key'],
['--api-key-file'],
['--ssl-key-file'],
['--ssl-cert-file'],
];
const WEBUI_FLAGS = [
['--webui', '--no-webui'],
['--ui', '--no-ui'],
['--ui-config'],
['--ui-config-file'],
['--ui-mcp-proxy', '--no-ui-mcp-proxy'],
['--models-dir'],
['--models-preset'],
['--models-max'],
['--models-autoload', '--no-models-autoload'],
];
const MANAGED_FLAGS: ReadonlySet<string> = new Set(
[
...MODEL_SOURCE_FLAGS,
...LISTEN_FLAGS,
...CREDENTIAL_FLAGS,
...WEBUI_FLAGS,
].flat(),
);
// --- Token parsing -------------------------------------------------------
const DIGIT = /^[0-9]$/;
/**
* Extract the flag name from a single argv token, or `null` when the token is
* not a flag.
*
* A token is treated as a flag only when it begins with `-` and the character
* after the leading dash is neither a digit nor a decimal point — that rule
* keeps negative numeric values such as `-1` or `-0.5` from being mistaken for
* options. A bare `-` or `--` is not a flag either. The returned name is the
* portion before any `=`, so `--ctx-size=4096` yields `--ctx-size`.
*/
function parseFlag(token: string): string | null {
if (!token.startsWith('-')) return null;
if (token === '-' || token === '--') return null;
const second = token[1]!;
if (DIGIT.test(second) || second === '.') return null;
const eq = token.indexOf('=');
return eq === -1 ? token : token.slice(0, eq);
}
// --- Public API ----------------------------------------------------------
/**
* Validate a sequence of extra llama-server args, rejecting any that name a
* BooCode-managed flag. Returns the args materialised as a string[] when they
* all pass.
*/
export function validateExtraArgs(args?: Iterable<string>): string[] {
const result: string[] = [];
if (!args) return result;
for (const entry of args) {
const token = String(entry);
const flag = parseFlag(token);
if (flag !== null && MANAGED_FLAGS.has(flag)) {
throw new Error(
`llama-server flag '${flag}' is managed and cannot be passed as an extra arg`,
);
}
result.push(token);
}
return result;
}
/** True when `flag` is a BooCode-managed flag that callers may not override. */
export function isManagedFlag(flag: string): boolean {
return MANAGED_FLAGS.has(flag);
}
// --- Shadowing flags -----------------------------------------------------
// Flags below are legal for an agent to pass, but each shadows a setting
// BooCode applies itself. They are categorised so a caller can opt out of
// stripping any one category.
const SHADOW_CONTEXT = ['-c', '--ctx-size'];
// Empty: agents should be able to opt into cache-type flags (lift analysis
// found these are high-value features, not safety concerns).
const SHADOW_CACHE: string[] = [];
// Empty: ngram speculative decoding is a performance feature agents should
// be able to enable.
const SHADOW_SPEC: string[] = [];
const SHADOW_TEMPLATE = [
'--chat-template',
'--chat-template-file',
'--chat-template-kwargs',
'--jinja',
'--no-jinja',
];
// Shadowing flags that take no value — a boolean switch — so the stripper must
// not also drop the following token.
const VALUELESS_SHADOW_FLAGS: ReadonlySet<string> = new Set([
'--jinja',
'--no-jinja',
]);
export interface StripOptions {
stripContext?: boolean;
stripCache?: boolean;
stripSpec?: boolean;
stripTemplate?: boolean;
}
/**
* Remove shadowing flags (and their values) from an argv sequence.
*
* Each category is stripped by default; pass the matching `strip*: false`
* option to retain that category. When a stripped flag carries its value as a
* separate following token (e.g. `-c 4096`), that token is removed too; the
* `--flag=value` and boolean-switch forms consume only the single token.
*/
export function stripShadowingFlags(
args: Iterable<string>,
opts?: StripOptions,
): string[] {
const targets = new Set<string>();
if (opts?.stripContext !== false) for (const f of SHADOW_CONTEXT) targets.add(f);
if (opts?.stripCache !== false) for (const f of SHADOW_CACHE) targets.add(f);
if (opts?.stripSpec !== false) for (const f of SHADOW_SPEC) targets.add(f);
if (opts?.stripTemplate !== false) for (const f of SHADOW_TEMPLATE) targets.add(f);
const tokens = Array.from(args, String);
const kept: string[] = [];
for (let i = 0; i < tokens.length; i++) {
const token = tokens[i]!;
const flag = parseFlag(token);
// Not a targeted shadow flag — keep it verbatim.
if (flag === null || !targets.has(flag)) {
kept.push(token);
continue;
}
// Targeted: drop it. Decide whether the next token is its value and should
// be dropped along with it. Boolean switches and the inline `=value` form
// carry no separate value token.
const carriesInlineValue = token.includes('=');
const isBoolean = VALUELESS_SHADOW_FLAGS.has(flag);
const next = tokens[i + 1];
const nextIsValue = next !== undefined && parseFlag(next) === null;
if (!isBoolean && !carriesInlineValue && nextIsValue) {
i++; // also skip the value token
}
}
return kept;
}

View File

@@ -1,6 +1,7 @@
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { createDeepSeek } from '@ai-sdk/deepseek';
import type { LanguageModel } from 'ai';
import { getLlamaProviders, parseModelRef } from '../llama-providers.js';
// v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from
// config.LLAMA_SWAP_URL at call time (not module-load) so tests can stub the
@@ -8,48 +9,46 @@ import type { LanguageModel } from 'ai';
// Tailscale topology and exposing it over the public internet is gated by
// Authelia at the Caddy layer, not by API keys.
//
// v2.4.1-sidecar: when the agent has llama_extra_args, route through
// llama-sidecar instead. A fresh provider is created per call (not cached)
// because the X-Agent-Flags header varies per agent. The llama-swap path
// stays cached since it has no per-request headers.
//
// vDeepSeek: when the model ID starts with 'deepseek-' and DEEPSEEK_API_KEY
// is set, route through the official @ai-sdk/deepseek provider (not
// openai-compatible) so DeepSeek-specific features work: providerMetadata
// with promptCacheHitTokens/promptCacheMissTokens, reasoning via
// LanguageModelV4Usage.outputTokens.reasoning, and thinking-mode options.
// v2.x: provider-aware resolver (W2). One resolver answers provider identity,
// upstream base URL, final wire model id, and DeepSeek
// special handling. Both upstreamModel() and resolveModelEndpoint() go through
// it. Legacy bare-id prefix heuristics live only in the fallback layer.
const swapCache = new Map<string, ReturnType<typeof createOpenAICompatible>>();
function getSwapProvider(baseURL: string): ReturnType<typeof createOpenAICompatible> {
let provider = swapCache.get(baseURL);
function getSwapProvider(baseURL: string, source?: string): ReturnType<typeof createOpenAICompatible> {
const cacheKey = source ? `${baseURL}||${source}` : baseURL;
let provider = swapCache.get(cacheKey);
if (!provider) {
const fetchWrapper = source
? ((...args: Parameters<typeof fetch>) => {
const [input, init] = args;
return fetch(input, {
...init,
headers: {
...(init?.headers as Record<string, string> | undefined) ?? {},
'X-Boo-Source': source,
},
});
})
: undefined;
provider = createOpenAICompatible({
name: 'llama-swap',
baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
includeUsage: true,
});
swapCache.set(baseURL, provider);
...(fetchWrapper ? { fetch: fetchWrapper } : {}),
}) as ReturnType<typeof createOpenAICompatible>;
swapCache.set(cacheKey, provider);
}
return provider;
}
function sidecarProvider(
baseURL: string,
flags: string[],
): ReturnType<typeof createOpenAICompatible> {
return createOpenAICompatible({
name: 'llama-sidecar',
baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
includeUsage: true,
headers: {
'X-Agent-Flags': flags.join(' '),
},
});
}
const DEEPSEEK_MODEL_PREFIX = 'deepseek-';
/**
* Legacy prefix check — kept for backward compat with bare "deepseek-*" ids.
* Composite "deepseek/model" is identified by provider id, not prefix.
*/
export function isDeepSeekModel(modelId: string): boolean {
return modelId.startsWith(DEEPSEEK_MODEL_PREFIX);
}
@@ -69,69 +68,204 @@ function getDeepSeekProvider(
return deepseekProviderCache;
}
export type InferenceRoute = 'swap' | 'sidecar' | 'deepseek';
// ---------------------------------------------------------------------------
// Provider-aware resolver (W2, D-2, D-3)
// ---------------------------------------------------------------------------
export interface RoutingInfo {
// P7: 'gateway' routes to the BooControl auto:* gateway (OpenAI-compatible,
// does its own policy routing + failover). 'gateway_error' is the
// present-but-unhealthy / orphaned-session state: the session selected an
// auto:* model but the gateway provider is missing/disabled, so we surface a
// clean error instead of silently mis-routing to LLAMA_SWAP_URL.
export type InferenceRoute = 'swap' | 'deepseek' | 'gateway' | 'gateway_error';
/** Provider registry `kind` marking the BooControl routing gateway. */
export const GATEWAY_KIND = 'boocontrol-gateway';
/**
* Whether a (bare) wire model id is a gateway virtual model. Used to detect an
* orphaned auto:* session whose gateway registry entry was removed — the id
* still looks like a gateway model, so resolve to gateway_error, never swap.
*/
export function isGatewayVirtualModel(wireModelId: string): boolean {
return wireModelId === 'auto' || wireModelId.startsWith('auto:');
}
export interface ResolvedModel {
/** Routing destination. */
route: InferenceRoute;
flags: string[] | null;
/** Upstream base URL for the provider (DeepSeek API base or llama-swap). */
baseUrl: string;
/** Wire model id to send upstream (bare, no provider prefix). */
wireModelId: string;
/** Whether the input was a legacy bare id resolved through defaultProvider. */
isLegacyBareId: boolean;
/** Provider identity (e.g. "sam-desktop", "embedding", "deepseek"). */
providerId: string;
/** For route 'gateway_error': why the gateway is unavailable. */
gatewayReason?: 'offline' | 'unhealthy';
}
interface AgentLike {
llama_extra_args: string[] | null;
// reserved for future per-agent routing attributes
}
interface ConfigLike {
LLAMA_SWAP_URL: string;
LLAMA_SIDECAR_URL?: string;
DEEPSEEK_API_KEY?: string;
DEEPSEEK_BASE_URL?: string;
}
/**
* Provider-aware model resolver. Given a (possibly bare) model id, answers:
* provider identity, upstream base URL, final bare wire model id, and
* DeepSeek special handling.
*
* Bare ids resolve via defaultProvider (D-2). Composite "provider/model" ids
* look up the named provider directly. DeepSeek is identified by provider id
* "deepseek" or by the legacy bare "deepseek-" prefix when DEEPSEEK_API_KEY
* is configured.
*/
export function resolveModelProvider(
modelId: string,
config: ConfigLike,
): ResolvedModel {
const providers = getLlamaProviders();
const parsed = parseModelRef(modelId);
const { providerId, wireModelId, isLegacyBareId } = parsed;
const deepseekConfigured = !!config.DEEPSEEK_API_KEY;
const deepseekBaseUrl = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
// --- DeepSeek routing ---
// Explicit provider id "deepseek" → DeepSeek SDK.
if (providerId === 'deepseek' && deepseekConfigured) {
return {
route: 'deepseek',
baseUrl: deepseekBaseUrl,
wireModelId,
isLegacyBareId,
providerId: 'deepseek',
};
}
// Bare legacy "deepseek-*" prefix (only when DEEPSEEK_API_KEY is set) →
// legacy fallback layer — DeepSeek SDK.
if (isLegacyBareId && isDeepSeekModel(wireModelId) && deepseekConfigured) {
return {
route: 'deepseek',
baseUrl: deepseekBaseUrl,
wireModelId,
isLegacyBareId: true,
providerId: 'deepseek',
};
}
// --- Local provider routing ---
const provider = providers.providers.find((p) => p.id === providerId);
// --- Gateway routing (P7) ---
// A known gateway-kind provider → route to the gateway as an OpenAI-compatible
// upstream (it does its own policy routing). The gateway forwards X-Boo-Source
// to the chosen target so attribution survives the extra hop.
if (provider && provider.kind === GATEWAY_KIND) {
return {
route: 'gateway',
baseUrl: provider.baseUrl,
wireModelId,
isLegacyBareId,
providerId: provider.id,
};
}
if (!provider) {
// Orphaned auto:* session: the model still looks like a gateway virtual
// model but no gateway provider is configured. Resolve to a clean
// gateway_error — NEVER the silent LLAMA_SWAP_URL fallback (design §8).
if (isGatewayVirtualModel(wireModelId)) {
return {
route: 'gateway_error',
baseUrl: '',
wireModelId,
isLegacyBareId,
providerId,
gatewayReason: 'offline',
};
}
// Unknown provider — fall back to legacy LLAMA_SWAP_URL for bare ids.
if (isLegacyBareId) {
return {
route: 'swap',
baseUrl: config.LLAMA_SWAP_URL,
wireModelId,
isLegacyBareId: true,
providerId: 'llama-swap',
};
}
// Composite id with unknown provider — still route to LLAMA_SWAP_URL as
// a best-effort fallback (the wire model id carries provider intent but
// the config is incomplete).
return {
route: 'swap',
baseUrl: config.LLAMA_SWAP_URL,
wireModelId,
isLegacyBareId: false,
providerId,
};
}
return {
route: 'swap',
baseUrl: provider.baseUrl,
wireModelId,
isLegacyBareId,
providerId: provider.id,
};
}
/**
* @deprecated Use resolveModelProvider() for full routing info. Kept for
* backward compat with resolveRoute() callers that only need the route tag.
*/
export function resolveRoute(
agent: AgentLike | null,
config?: ConfigLike,
modelId?: string,
): RoutingInfo {
// vDeepSeek: if the model starts with deepseek- and DEEPSEEK_API_KEY is set,
// route through the DeepSeek provider. Checked first so DeepSeek models
// always bypass llama-swap/sidecar even when those are also configured.
if (modelId?.startsWith(DEEPSEEK_MODEL_PREFIX) && config?.DEEPSEEK_API_KEY) {
return { route: 'deepseek', flags: null };
}
// When llama_extra_args are explicitly set, route through sidecar with them.
const flags = agent?.llama_extra_args;
if (flags && flags.length > 0) {
return { route: 'sidecar', flags };
}
// When LLAMA_SIDECAR_URL is configured (even without per-agent flags),
// route through sidecar to pick up the default base args (cache quant,
// spec decoding, slot save, etc.). Fall back to llama-swap otherwise.
if (config?.LLAMA_SIDECAR_URL) {
return { route: 'sidecar', flags: [] };
}
return { route: 'swap', flags: null };
): { route: InferenceRoute } {
if (!modelId || !config) return { route: 'swap' };
const resolved = resolveModelProvider(modelId, config);
return { route: resolved.route };
}
export function upstreamModel(
config: ConfigLike,
modelId: string,
agent?: AgentLike | null,
source?: string,
): LanguageModel {
const { route, flags } = resolveRoute(agent ?? null, config, modelId);
if (route === 'deepseek') {
const resolved = resolveModelProvider(modelId, config);
if (resolved.route === 'deepseek') {
return getDeepSeekProvider(
config.DEEPSEEK_API_KEY!,
config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com',
).chat(modelId);
resolved.baseUrl,
).chat(resolved.wireModelId);
}
if (route === 'sidecar') {
const url = config.LLAMA_SIDECAR_URL;
if (!url) {
throw new Error(`Sidecar route selected but LLAMA_SIDECAR_URL is not set`);
}
return sidecarProvider(url, (flags ?? [])).chatModel(modelId);
// P7: gateway is OpenAI-compatible — same adapter as swap, pointed at the
// gateway baseUrl. The gateway resolves the policy + forwards X-Boo-Source.
if (resolved.route === 'gateway') {
return getSwapProvider(resolved.baseUrl, source).chatModel(resolved.wireModelId);
}
return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId);
// P7: orphaned auto:* session with no gateway configured — fail loud rather
// than silently mis-route to LLAMA_SWAP_URL.
if (resolved.route === 'gateway_error') {
throw new Error(
`routing gateway offline (${resolved.gatewayReason ?? 'unavailable'}): ${modelId}`,
);
}
return getSwapProvider(resolved.baseUrl, source).chatModel(resolved.wireModelId);
}
/** Resolve the API endpoint for non-streaming calls (compaction, task-model).
@@ -140,18 +274,30 @@ export function resolveModelEndpoint(
config: ConfigLike,
modelId: string,
): { url: string; model: string; headers: Record<string, string> } {
const resolved = resolveModelProvider(modelId, config);
const baseHeaders: Record<string, string> = { 'Content-Type': 'application/json' };
if (modelId.startsWith(DEEPSEEK_MODEL_PREFIX) && config.DEEPSEEK_API_KEY) {
const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
if (resolved.route === 'deepseek') {
return {
url: baseURL,
model: modelId,
url: resolved.baseUrl,
model: resolved.wireModelId,
headers: { ...baseHeaders, Authorization: `Bearer ${config.DEEPSEEK_API_KEY}` },
};
}
// P7: orphaned auto:* session with no gateway — fail loud (no swap fallback).
if (resolved.route === 'gateway_error') {
throw new Error(
`routing gateway offline (${resolved.gatewayReason ?? 'unavailable'}): ${modelId}`,
);
}
// P7: gateway uses the same unauthenticated OpenAI-compatible shape as swap.
// X-Boo-Source forwarding for direct-fetch callers happens at their own header
// layer (compaction.ts / task-model.ts); the gateway re-forwards it onward.
return {
url: config.LLAMA_SWAP_URL.replace(/\/+$/, ''),
model: modelId,
url: resolved.baseUrl.replace(/\/+$/, ''),
model: resolved.wireModelId,
headers: baseHeaders,
};
}

View File

@@ -306,7 +306,7 @@ export async function streamCompletion(
: stallAc.signal;
const result = streamText({
model: upstreamModel(ctx.config, model, agent ?? null),
model: upstreamModel(ctx.config, model, agent ?? null, 'boochat'),
messages: aiMessages,
...(aiTools
? { tools: aiTools, toolChoice: 'auto' as const, experimental_repairToolCall: repairToolCall }

View File

@@ -0,0 +1,101 @@
/**
* vMultiProvider local provider registry loader (server-side).
*
* Reads the shared `/data/llama-providers.json` (or `LLAMA_PROVIDERS_PATH`) at
* startup and caches the parsed result. When the file is absent or invalid,
* synthesizes a single legacy provider from `LLAMA_SWAP_URL` so both apps
* start with only legacy env vars (D-1).
*
* Schema and pure helpers live in @boocode/contracts/llama-providers.
* File I/O stays app-local per D-1.
*/
import { readFileSync } from 'node:fs';
import {
LlamaProvidersFileSchema,
type LlamaProvidersFile,
type LlamaProvider,
type ParsedModelRef,
parseModelRef as parseModelRefBase,
formatModelRef,
} from '@boocode/contracts/llama-providers';
export type { LlamaProvidersFile, LlamaProvider, ParsedModelRef, formatModelRef };
/** Synthesize a single legacy provider from env vars. */
function buildLegacyProvider(llamaSwapUrl: string): LlamaProvidersFile {
return {
defaultProvider: 'llama-swap',
providers: [
{
id: 'llama-swap',
label: 'llama-swap',
baseUrl: llamaSwapUrl,
kind: 'llama-swap',
},
],
};
}
let cached: LlamaProvidersFile | null = null;
/**
* Load (or re-load) the local provider config. Never throws on bad input —
* falls back to the legacy single-provider shape.
*/
export function loadLlamaProviders(
providersPath: string | undefined,
llamaSwapUrl: string,
): LlamaProvidersFile {
if (!providersPath) {
cached = buildLegacyProvider(llamaSwapUrl);
return cached;
}
let raw: string;
try {
raw = readFileSync(providersPath, 'utf8');
} catch {
console.warn(
`llama-providers: file not found at ${providersPath} — falling back to legacy single-provider`,
);
cached = buildLegacyProvider(llamaSwapUrl);
return cached;
}
let json: unknown;
try {
json = JSON.parse(raw);
} catch (err) {
console.error(
`llama-providers: invalid JSON in ${providersPath} — falling back to legacy single-provider`,
err,
);
cached = buildLegacyProvider(llamaSwapUrl);
return cached;
}
const parsed = LlamaProvidersFileSchema.safeParse(json);
if (!parsed.success) {
console.error(
`llama-providers: schema validation failed for ${providersPath} — falling back to legacy single-provider`,
parsed.error.flatten(),
);
cached = buildLegacyProvider(llamaSwapUrl);
return cached;
}
cached = parsed.data;
return cached;
}
/** The cached provider config. Returns legacy fallback if nothing loaded yet. */
export function getLlamaProviders(): LlamaProvidersFile {
return cached ?? buildLegacyProvider('http://localhost:8080');
}
/**
* Convenience: parse a model ref against the cached default provider.
*/
export function parseModelRef(ref: string): ParsedModelRef {
return parseModelRefBase(ref, getLlamaProviders().defaultProvider);
}

View File

@@ -1,13 +1,15 @@
// v1.11.3: llama-swap model-context cache. Replaces the dead
// v2.x: provider-aware model-context cache (W3). Replaces the dead
// `parsed.timings.n_ctx` capture in inference.ts / compaction.ts —
// llama-server's streaming completion never emits n_ctx in timings (verified
// empirically: timings carries prompt_n / predicted_n / *_ms / *_per_second
// only). The authoritative source is llama-swap's
// /upstream/<model>/props endpoint at .default_generation_settings.n_ctx.
// only). The authoritative source is the provider's
// /upstream/<wireModelId>/props endpoint at .default_generation_settings.n_ctx.
//
// Cache design:
// - Keys are the full composite model id (provider/model) so two providers
// serving the same wire model name never share cache entries (D-2).
// - Positive entries (n_ctx + total_slots) have no TTL. A model's context
// size doesn't change while llama-swap is running; an admin endpoint
// size doesn't change while the provider is running; an admin endpoint
// can invalidateModelContext() if it ever does.
// - Negative entries (failed fetch) have a 60s TTL so a misconfigured or
// down model doesn't get hammered every inference turn, but recovers
@@ -15,6 +17,11 @@
// - 3s AbortController timeout on the fetch — long enough for a healthy
// upstream, short enough that a stuck upstream doesn't block the
// ctx_max UPDATE that follows.
//
// v1.x legacy: previously keyed by bare wire id and used a process-wide
// LLAMA_SWAP_URL. Now resolved per-call via the provider registry.
import { resolveModelProvider } from './inference/provider.js';
export interface ModelContext {
n_ctx: number;
@@ -28,29 +35,79 @@ const positiveCache = new Map<string, ModelContext>();
// re-fetches within the 60s window.
const negativeCache = new Map<string, number>();
// Set once at startup by index.ts. We don't import loadConfig() directly
// here to keep this module trivially mockable in tests (set the URL in
// beforeEach instead of stubbing process.env + loadConfig's cache).
let llamaSwapUrl: string | null = null;
// Stored config for provider-aware resolution. Supports both the legacy
// { llamaSwapUrl: string } shape (for tests) and the full Config shape.
let storedConfig: ConfigForModelContext | null = null;
export function configureModelContext(opts: { llamaSwapUrl: string }): void {
llamaSwapUrl = opts.llamaSwapUrl;
/** Config fields needed for model-context provider resolution. */
type ConfigForModelContext = {
LLAMA_SWAP_URL: string;
DEEPSEEK_API_KEY?: string;
DEEPSEEK_BASE_URL?: string;
};
/**
* Configure the module for model-context lookups.
*
* Accepts either the full server Config (production) or the legacy
* `{ llamaSwapUrl }` shape (tests). The full Config is preferred so
* getModelContext can resolve composite model ids through the provider
* registry.
*/
export function configureModelContext(
opts: ConfigForModelContext | { llamaSwapUrl: string },
): void {
// Legacy test helper: { llamaSwapUrl } → synthesize a minimal config.
if ('llamaSwapUrl' in opts && typeof opts.llamaSwapUrl === 'string') {
storedConfig = { LLAMA_SWAP_URL: opts.llamaSwapUrl };
return;
}
storedConfig = opts as ConfigForModelContext;
}
// vDeepSeek: DeepSeek models don't have a /upstream/<model>/props endpoint.
// Return a reasonable default context so compaction estimates work.
const DEEPSEEK_DEFAULT_N_CTX = 131_072;
const DEEPSEEK_MODEL_PREFIX = 'deepseek-';
export async function getModelContext(model: string): Promise<ModelContext | null> {
// vDeepSeek: DeepSeek models have no /upstream/<model>/props. Use a static
// default so compaction doesn't fall to the buffer-only path with tiny limits.
if (model.startsWith(DEEPSEEK_MODEL_PREFIX)) {
// Resolve the model through the provider-aware resolver. For composite
// "provider/model" ids, this finds the correct provider's baseUrl. For
// bare legacy ids, it falls back to the default provider.
const config = storedConfig;
if (!config) {
// Module not initialized. Defensive — index.ts calls
// configureModelContext at startup; if a test forgets, fail closed so
// the chat still works (ctx_max stays null, UI degrades gracefully).
negativeCache.set(model, Date.now());
return null;
}
const resolved = resolveModelProvider(model, config);
// DeepSeek models (by provider id) have no /upstream/<model>/props.
// Use a static default so compaction doesn't fall to the buffer-only
// path with tiny limits.
if (resolved.providerId === 'deepseek') {
return { n_ctx: DEEPSEEK_DEFAULT_N_CTX };
}
// P7: orphaned auto:* session with no gateway configured — no props endpoint
// to query. Negative-cache and return null; compaction degrades gracefully.
if (resolved.route === 'gateway_error') {
negativeCache.set(model, Date.now());
return null;
}
// P7: gateway route — baseUrl is the control gateway, which exposes
// /upstream/<virtualModel>/props (it proxies the chosen candidate's props).
// The normal fetch path below handles it without special-casing.
// Cache key is the full composite id to prevent cross-provider cache
// poisoning for duplicate wire model names (D-2, design §5.3).
const cacheKey = `${resolved.providerId}/${resolved.wireModelId}`;
// 1. Positive cache hit — no TTL check, model n_ctx is invariant.
const pos = positiveCache.get(model);
const pos = positiveCache.get(cacheKey);
if (pos) return pos;
// 2. Negative cache hit within TTL — return null without refetching.
@@ -58,30 +115,25 @@ export async function getModelContext(model: string): Promise<ModelContext | nul
// attempt below; we don't delete them eagerly because the next successful
// fetch will overwrite via the positive map and the negative entry
// becomes irrelevant.
const negTs = negativeCache.get(model);
const negTs = negativeCache.get(cacheKey);
if (negTs !== undefined && Date.now() - negTs < NEGATIVE_TTL_MS) {
return null;
}
// 3. Module not initialized. Defensive — index.ts calls
// configureModelContext at startup; if a test forgets, fail closed so
// the chat still works (ctx_max stays null, UI degrades gracefully).
if (!llamaSwapUrl) {
negativeCache.set(model, Date.now());
return null;
}
// 4. Fetch with timeout. AbortController fires after FETCH_TIMEOUT_MS;
// 3. Fetch with timeout. AbortController fires after FETCH_TIMEOUT_MS;
// both the timeout path and a fetch reject end up in the catch below
// and produce a negative cache entry.
const url = `${llamaSwapUrl}/upstream/${encodeURIComponent(model)}/props`;
//
// Strip the provider prefix: fetch from
// <provider.baseUrl>/upstream/<wireModelId>/props (design §5.3).
const url = `${resolved.baseUrl.replace(/\/+$/, '')}/upstream/${encodeURIComponent(resolved.wireModelId)}/props`;
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
try {
const res = await fetch(url, { signal: controller.signal });
clearTimeout(timer);
if (!res.ok) {
negativeCache.set(model, Date.now());
negativeCache.set(cacheKey, Date.now());
return null;
}
const body = (await res.json()) as {
@@ -89,18 +141,18 @@ export async function getModelContext(model: string): Promise<ModelContext | nul
};
const n_ctx = body?.default_generation_settings?.n_ctx;
if (typeof n_ctx !== 'number' || n_ctx <= 0) {
negativeCache.set(model, Date.now());
negativeCache.set(cacheKey, Date.now());
return null;
}
const entry: ModelContext = { n_ctx };
positiveCache.set(model, entry);
positiveCache.set(cacheKey, entry);
// Clear any stale negative entry so a future query sees the positive
// hit cleanly (otherwise the negative TTL never expires from the map).
negativeCache.delete(model);
negativeCache.delete(cacheKey);
return entry;
} catch {
clearTimeout(timer);
negativeCache.set(model, Date.now());
negativeCache.set(cacheKey, Date.now());
return null;
}
}
@@ -110,7 +162,16 @@ export function invalidateModelContext(model?: string): void {
positiveCache.clear();
negativeCache.clear();
} else {
positiveCache.delete(model);
negativeCache.delete(model);
// Resolve to composite cache key. If the model is already composite
// (contains '/'), it's used directly. Otherwise, resolve through the
// provider registry to find the composite key. This keeps backward
// compat with callers passing bare model names.
let cacheKey = model;
if (storedConfig && !model.includes('/')) {
const resolved = resolveModelProvider(model, storedConfig);
cacheKey = `${resolved.providerId}/${resolved.wireModelId}`;
}
positiveCache.delete(cacheKey);
negativeCache.delete(cacheKey);
}
}

View File

@@ -21,7 +21,7 @@ import { createHash } from 'node:crypto';
import { readFile, stat } from 'node:fs/promises';
import type { Agent, Project, Session } from '../types/api.js';
import { getAgentsMtimes } from './agents.js';
import { resolveRoute } from './inference/provider.js';
import { resolveRoute, type InferenceRoute } from './inference/provider.js';
import { loadMemoryForSession } from './memory/recall.js';
import { formatMemoryBlock } from './memory/prompt.js';
@@ -101,7 +101,7 @@ export interface PrefixFingerprint {
has_agent_system_prompt: boolean;
has_session_override: boolean;
has_project_override: boolean;
route: 'swap' | 'sidecar' | 'deepseek';
route: InferenceRoute;
}
export interface PrefixDrift {
@@ -129,7 +129,7 @@ interface ObservedInputs {
has_agent_system_prompt: boolean;
has_session_override: boolean;
has_project_override: boolean;
route: 'swap' | 'sidecar' | 'deepseek';
route: InferenceRoute;
}
interface ObserverEntry {

View File

@@ -1,4 +1,5 @@
import { loadConfig, type Config } from '../config.js';
import { resolveModelEndpoint } from './inference/provider.js';
const TIMEOUT_MS = 10_000;
@@ -13,14 +14,19 @@ export async function taskModelCompletion(opts: {
const maxTokens = opts.maxTokens ?? 30;
const temperature = opts.temperature ?? 0.3;
const { url, model } = resolveEndpoint(config, opts.fallbackModel);
// v2.x (W3): resolve the endpoint through the shared provider-aware
// resolver instead of a local LLAMA_SWAP_URL fallback. This ensures
// composite model ids (e.g. "sam-desktop/qwen3.6-35b") route to the
// correct provider, and bare ids resolve through the default provider.
const model = config.FAST_MODEL ?? opts.fallbackModel ?? config.DEFAULT_MODEL;
const { url, model: resolvedModel, headers } = resolveModelEndpoint(config, model);
try {
const res = await fetch(`${url}/v1/chat/completions`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
headers: { ...headers, 'X-Boo-Source': 'boochat' },
body: JSON.stringify({
model,
model: resolvedModel,
messages: [
{ role: 'system', content: opts.system },
{ role: 'user', content: opts.user },
@@ -55,14 +61,3 @@ export async function taskModelCompletion(opts: {
return '';
}
}
function resolveEndpoint(
config: Config,
fallbackModel?: string,
): { url: string; model: string } {
if (config.TASK_MODEL_URL) {
return { url: config.TASK_MODEL_URL, model: 'gemma-3-270m-it' };
}
const model = config.FAST_MODEL ?? fallbackModel ?? config.DEFAULT_MODEL;
return { url: config.LLAMA_SWAP_URL, model };
}

View File

@@ -129,7 +129,6 @@ export interface Agent {
// v1.14.0: per-agent step cap for the outer inference loop. null means
// bounded only by MAX_STEPS (200). 0 means "no tool calls allowed."
steps: number | null;
llama_extra_args: string[] | null;
// vDeepSeek: thinking/reasoning effort for DeepSeek V4 models.
// Maps to DeepSeek's reasoning_effort API param.
reasoning_effort: 'off' | 'low' | 'medium' | 'high' | 'xhigh' | 'max' | null;
@@ -244,6 +243,17 @@ export interface ModelInfo {
[key: string]: unknown;
}
// v2.x: provider-grouped model catalog (W2, D-4).
export interface ModelCatalogProvider {
id: string;
label: string;
models: ModelInfo[];
}
export interface ModelCatalogResponse {
providers: ModelCatalogProvider[];
}
export interface SidebarSession {
id: string;
project_id: string;