chore: snapshot working tree - pty_exited notifications + in-flight inference WIP
feat(booterm): structured pty_exited WS notifications. Plan-validated, impl-validated, code-reviewed green (contracts build clean, contracts test 29/29, booterm + web typecheck clean). wip: in-progress inference/provider refactor (agents.ts, provider.ts, new llama-providers.ts, removed llama-args-validator), plus arena, dispatcher, compaction, schema changes. openspec: pty-exit-notifications complete; x-agent-flags planned (not yet implemented).
This commit is contained in:
@@ -50,6 +50,5 @@ Route registration: all routes registered in `index.ts` via `register*Routes(app
|
||||
- `data/AGENTS.md` is PARSED (`agents.ts` `splitSections`/`parseAgentSection`): each `## <Name>` is one agent and must be followed by a `---` frontmatter fence or the block throws; content before the first `## ` is discarded. Do NOT add free-form `## ` rule sections — they break the registry. Cross-cutting agent rules go in CLAUDE.md or a parser-ignored preamble.
|
||||
- MCP stdio transport uses newline-delimited JSON (NDJSON), NOT LSP-style `Content-Length` headers. The boocontext MCP client (`services/mcp-client.ts`) is the reference (per the MCP spec, modelcontextprotocol.io/specification/server/transports).
|
||||
- **`payload.ts:loadContext` SELECT** must include every `Session` field downstream code reads. The tool phase reads `session.allowed_read_paths`; if the SELECT omits it, cross-repo read grants silently fail. `sql<Session[]>` doesn't enforce column coverage, so the type doesn't catch it.
|
||||
- **Sidecar routing** (`services/inference/provider.ts`): `upstreamModel(config, modelId, agent)` routes to `LLAMA_SIDECAR_URL` when the agent has `llama_extra_args`, else `LLAMA_SWAP_URL`. `resolveRoute(agent)` returns `{route, flags}`. Sidecar provider created fresh per call (not cached) because `X-Agent-Flags` varies per agent. Boot-time guard in `index.ts` refuses to start if any agent has `llama_extra_args` but `LLAMA_SIDECAR_URL` is unset.
|
||||
- **Secret guard safe patterns** (`services/secret_guard.ts`): `.env.example`, `.env.sample`, `.env.template`, `.env.defaults` are allowlisted via `SAFE_PATTERNS`. Do NOT add `.env.production`/`.env.development`/`.env.test` — those can hold real secrets.
|
||||
- **llama-sidecar** (`/opt/forks/llama-sidecar/`): Go daemon for a per-agent llama-server process pool (routed to via "Sidecar routing" above). Cross-compile: `GOOS=windows GOARCH=amd64 /snap/go/current/bin/go build -o bin/llama-sidecar.exe ./cmd/llama-sidecar`. Gitea: `indifferentketchup/llama-sidecar`. Windows child-process gotchas: `context.Background()` for child lifetime (not request ctx), `os.Open(os.DevNull)` for stdin, `os.Pipe()` for stdout with a drain goroutine, `DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP` flags. SSH to sam-desktop: `ssh samki@100.101.41.16`; use `schtasks` for persistent spawning (SSH `start /B` doesn't survive session close).
|
||||
|
||||
|
||||
@@ -25,7 +25,6 @@ const ConfigSchema = z.object({
|
||||
// session model (auto_name) or DEFAULT_MODEL when unset.
|
||||
FAST_MODEL: z.string().optional(),
|
||||
TASK_MODEL_URL: z.string().url().optional(),
|
||||
LLAMA_SIDECAR_URL: z.string().url().optional(),
|
||||
// vDeepSeek: DeepSeek API key for direct API access. When set, models
|
||||
// with IDs starting with 'deepseek-' route through DeepSeek's API instead
|
||||
// of llama-swap. Defaults to empty (DeepSeek routing disabled).
|
||||
@@ -34,6 +33,11 @@ const ConfigSchema = z.object({
|
||||
DEEPSEEK_BASE_URL: z.string().url().default('https://api.deepseek.com'),
|
||||
// vWhale hooks: path to hooks JSON config file. Missing file = no hooks.
|
||||
HOOKS_CONFIG_PATH: z.string().default('/data/hooks.json'),
|
||||
// vMultiProvider: path to the local providers config JSON file. Missing file
|
||||
// = legacy synthesis from LLAMA_SWAP_URL.
|
||||
LLAMA_PROVIDERS_PATH: z.string().optional(),
|
||||
// BooControl host service origin. Used by /api/control/* proxy routes.
|
||||
BOOCONTROL_URL: z.string().url().optional(),
|
||||
});
|
||||
|
||||
export type Config = z.infer<typeof ConfigSchema>;
|
||||
|
||||
@@ -15,6 +15,7 @@ import { registerChatRoutes } from './routes/chats.js';
|
||||
import { registerSidebarRoutes } from './routes/sidebar.js';
|
||||
import { registerWebSocket } from './routes/ws.js';
|
||||
import { registerCoderProxy } from './routes/coder-proxy.js';
|
||||
import { registerControlProxy } from './routes/control-proxy.js';
|
||||
import { registerModelRoutes } from './routes/models.js';
|
||||
import { registerAgentRoutes } from './routes/agents.js';
|
||||
import { registerSkillsRoutes } from './routes/skills.js';
|
||||
@@ -36,10 +37,15 @@ import { initialize as initMcp, getTools as getMcpTools, shutdown as shutdownMcp
|
||||
import { appendMcpTools } from './services/tools.js';
|
||||
import { refreshToolNames, getAgentsForProject } from './services/agents.js';
|
||||
import { loadHooksConfig, createHookRunner } from './services/hooks.js';
|
||||
import { loadLlamaProviders } from './services/llama-providers.js';
|
||||
|
||||
async function main() {
|
||||
const config = loadConfig();
|
||||
|
||||
// vMultiProvider: load the shared local provider config. When the file is
|
||||
// absent, falls back to a single legacy provider from LLAMA_SWAP_URL.
|
||||
loadLlamaProviders(config.LLAMA_PROVIDERS_PATH, config.LLAMA_SWAP_URL);
|
||||
|
||||
const app = Fastify({
|
||||
logger: { level: config.LOG_LEVEL },
|
||||
});
|
||||
@@ -76,10 +82,11 @@ async function main() {
|
||||
app.log.info({ sweptCount }, 'swept stale streaming messages to failed');
|
||||
}
|
||||
|
||||
// v1.11.3: tell the model-context cache where llama-swap lives. Cache
|
||||
// lookups go to ${LLAMA_SWAP_URL}/upstream/<model>/props to read
|
||||
// v2.x (W3): tell the model-context cache the full config so it can
|
||||
// resolve composite model ids through the provider registry. Cache
|
||||
// lookups go to <provider.baseUrl>/upstream/<wireModelId>/props to read
|
||||
// default_generation_settings.n_ctx — the value persisted as messages.ctx_max.
|
||||
configureModelContext({ llamaSwapUrl: config.LLAMA_SWAP_URL });
|
||||
configureModelContext(config);
|
||||
|
||||
// v1.15.0-mcp-multi: read MCP config file and connect to all enabled servers.
|
||||
// Runs before route registration so the tool list is complete when the first
|
||||
@@ -98,19 +105,6 @@ async function main() {
|
||||
}
|
||||
app.addHook('onClose', async () => { await shutdownMcp(); });
|
||||
|
||||
// Boot-time guard: if any agent has llama_extra_args but LLAMA_SIDECAR_URL
|
||||
// is unset, fail fast. Silent fallback would defeat per-agent flags.
|
||||
if (!config.LLAMA_SIDECAR_URL) {
|
||||
const { agents } = await getAgentsForProject('');
|
||||
const offending = agents.find(a => a.llama_extra_args && a.llama_extra_args.length > 0);
|
||||
if (offending) {
|
||||
app.log.fatal(
|
||||
{ agent: offending.name },
|
||||
`Agent "${offending.name}" has llama_extra_args but LLAMA_SIDECAR_URL is not set`,
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
await app.register(fastifyWebsocket);
|
||||
|
||||
@@ -283,6 +277,12 @@ async function main() {
|
||||
const BOOCODER_ORIGIN = process.env.BOOCODER_URL ?? 'http://boocoder:3000';
|
||||
registerCoderProxy(app, BOOCODER_ORIGIN);
|
||||
|
||||
// BooControl: reverse proxy /api/control/* to the control host service.
|
||||
// Static WS path /api/control/ws (not parameterized per-session like coder-proxy).
|
||||
if (process.env.BOOCONTROL_URL) {
|
||||
registerControlProxy(app, process.env.BOOCONTROL_URL);
|
||||
}
|
||||
|
||||
const webDist = process.env.WEB_DIST_PATH ?? resolve(process.cwd(), '../web/dist');
|
||||
if (existsSync(webDist)) {
|
||||
await app.register(fastifyStatic, {
|
||||
|
||||
120
apps/server/src/routes/__tests__/settings-favorites.test.ts
Normal file
120
apps/server/src/routes/__tests__/settings-favorites.test.ts
Normal file
@@ -0,0 +1,120 @@
|
||||
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
|
||||
import postgres from 'postgres';
|
||||
import Fastify from 'fastify';
|
||||
import { registerSettingsRoutes } from '../settings.js';
|
||||
import type { Sql } from '../../db.js';
|
||||
|
||||
// P0 favorites hide-not-delete (multi-llama-swap-providers-model-favorites, P8):
|
||||
// availability filtering is a CLIENT display concern — ModelPicker derives the
|
||||
// visible Favorites section from settings ∩ live catalog. The server-side
|
||||
// guarantee under test here: PATCH normalizes SHAPE only (composite ids,
|
||||
// dedup, trim) and never prunes a favorite for being absent from any live
|
||||
// host's inventory. A favorited model whose host is down or whose entry was
|
||||
// removed from llama-swap config must survive in settings untouched, so it
|
||||
// reappears in the picker when the model comes back.
|
||||
//
|
||||
// Skipped unless DATABASE_URL is set (tool_cost_stats.test.ts pattern). Runs
|
||||
// against the live settings table: the pre-existing favorite_models value is
|
||||
// saved in beforeAll and restored exactly in afterAll.
|
||||
|
||||
const DB_URL = process.env.DATABASE_URL;
|
||||
const describeFn = DB_URL ? describe : describe.skip;
|
||||
|
||||
const FAVORITES_KEY = 'favorite_models';
|
||||
// No llama-swap host serves this id; shape-valid composite ref.
|
||||
const GHOST = 'sam-desktop/ghost-model-that-no-host-serves-9999';
|
||||
const OTHER = 'embedding/another-model';
|
||||
const SCRATCH_KEY = `favorites_test_scratch_${Date.now()}`;
|
||||
|
||||
describeFn('PATCH /api/settings favorite_models — hide-not-delete (P0 P8)', () => {
|
||||
let sql: ReturnType<typeof postgres>;
|
||||
let app: ReturnType<typeof Fastify>;
|
||||
let savedFavorites: unknown = null;
|
||||
let hadFavorites = false;
|
||||
|
||||
beforeAll(async () => {
|
||||
if (!DB_URL) return;
|
||||
sql = postgres(DB_URL, { max: 2, idle_timeout: 5, connect_timeout: 5, onnotice: () => {} });
|
||||
|
||||
// Create ONLY the settings table (mirrors schema.sql:217). Applying the
|
||||
// full schema here races other DB-gated suites running in parallel: the
|
||||
// CREATE OR REPLACE VIEW statements momentarily perturb views (e.g.
|
||||
// tool_cost_stats) that tool_cost_stats.test.ts is querying mid-run.
|
||||
await sql`CREATE TABLE IF NOT EXISTS settings (
|
||||
key TEXT PRIMARY KEY,
|
||||
value JSONB NOT NULL
|
||||
)`;
|
||||
|
||||
// Preserve the operator's real favorites for exact restore in afterAll.
|
||||
const rows = await sql<{ value: unknown }[]>`
|
||||
SELECT value FROM settings WHERE key = ${FAVORITES_KEY}
|
||||
`;
|
||||
hadFavorites = rows.length > 0;
|
||||
savedFavorites = rows[0]?.value ?? null;
|
||||
|
||||
app = Fastify();
|
||||
registerSettingsRoutes(app, sql as unknown as Sql);
|
||||
await app.ready();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
if (!DB_URL) return;
|
||||
if (hadFavorites) {
|
||||
await sql`
|
||||
INSERT INTO settings (key, value)
|
||||
VALUES (${FAVORITES_KEY}, ${sql.json(savedFavorites as never)})
|
||||
ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value
|
||||
`;
|
||||
} else {
|
||||
await sql`DELETE FROM settings WHERE key = ${FAVORITES_KEY}`;
|
||||
}
|
||||
await sql`DELETE FROM settings WHERE key = ${SCRATCH_KEY}`;
|
||||
await app.close();
|
||||
await sql.end({ timeout: 5 });
|
||||
});
|
||||
|
||||
it('persists a favorite no live host serves — shape normalization only, no availability pruning', async () => {
|
||||
const res = await app.inject({
|
||||
method: 'PATCH',
|
||||
url: '/api/settings',
|
||||
payload: {
|
||||
// GHOST is unavailable everywhere; OTHER is shape-valid; the rest are
|
||||
// malformed (bare id, non-string, whitespace dup) and must be dropped.
|
||||
[FAVORITES_KEY]: [GHOST, OTHER, 'bare-id-no-slash', 42, ` ${OTHER} `],
|
||||
},
|
||||
});
|
||||
expect(res.statusCode).toBe(200);
|
||||
const body = res.json() as Record<string, unknown>;
|
||||
expect(body[FAVORITES_KEY]).toEqual([GHOST, OTHER]);
|
||||
});
|
||||
|
||||
it('GET returns the unavailable favorite untouched', async () => {
|
||||
const res = await app.inject({ method: 'GET', url: '/api/settings' });
|
||||
expect(res.statusCode).toBe(200);
|
||||
const body = res.json() as Record<string, unknown>;
|
||||
expect(body[FAVORITES_KEY]).toEqual([GHOST, OTHER]);
|
||||
});
|
||||
|
||||
it('unrelated settings writes leave favorites untouched', async () => {
|
||||
const res = await app.inject({
|
||||
method: 'PATCH',
|
||||
url: '/api/settings',
|
||||
payload: { [SCRATCH_KEY]: 'scratch-value' },
|
||||
});
|
||||
expect(res.statusCode).toBe(200);
|
||||
const body = res.json() as Record<string, unknown>;
|
||||
expect(body[FAVORITES_KEY]).toEqual([GHOST, OTHER]);
|
||||
expect(body[SCRATCH_KEY]).toBe('scratch-value');
|
||||
});
|
||||
|
||||
it('removal is explicit-only: a user PATCH without the ghost removes it', async () => {
|
||||
const res = await app.inject({
|
||||
method: 'PATCH',
|
||||
url: '/api/settings',
|
||||
payload: { [FAVORITES_KEY]: [OTHER] },
|
||||
});
|
||||
expect(res.statusCode).toBe(200);
|
||||
const body = res.json() as Record<string, unknown>;
|
||||
expect(body[FAVORITES_KEY]).toEqual([OTHER]);
|
||||
});
|
||||
});
|
||||
@@ -12,6 +12,9 @@ function boocoderWsUrl(origin: string, path: string): string {
|
||||
/**
|
||||
* Reverse-proxy BooCoder HTTP + WebSocket through BooChat's single origin.
|
||||
* WS must be registered before the HTTP catch-all — fetch() cannot upgrade.
|
||||
*
|
||||
* Keep-in-sync: routes/control-proxy.ts mirrors this pattern (deliberate
|
||||
* clone, Rule of Three unmet). Proxy-layer changes go in BOTH files.
|
||||
*/
|
||||
export function registerCoderProxy(app: FastifyInstance, boocoderOrigin: string): void {
|
||||
app.get<{ Params: { sessionId: string } }>(
|
||||
|
||||
89
apps/server/src/routes/control-proxy.ts
Normal file
89
apps/server/src/routes/control-proxy.ts
Normal file
@@ -0,0 +1,89 @@
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import WebSocket from 'ws';
|
||||
|
||||
function boocontrolWsUrl(origin: string, path: string): string {
|
||||
const u = new URL(origin);
|
||||
u.protocol = u.protocol === 'https:' ? 'wss:' : 'ws:';
|
||||
u.pathname = path;
|
||||
u.search = '';
|
||||
return u.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse-proxy /api/control/* HTTP + /api/control/ws WS through BooChat's
|
||||
* single origin.
|
||||
*
|
||||
* CLAUDE.md keep-in-sync: this file mirrors routes/coder-proxy.ts. Keep the
|
||||
* two files in sync — if you change one, update the other.
|
||||
*/
|
||||
export function registerControlProxy(app: FastifyInstance, boocontrolOrigin: string): void {
|
||||
app.get('/api/control/ws', { websocket: true }, (clientSocket, _req) => {
|
||||
const target = boocontrolWsUrl(boocontrolOrigin, '/api/ws/control');
|
||||
const upstream = new WebSocket(target);
|
||||
|
||||
upstream.on('open', () => {
|
||||
app.log.debug('control ws proxy: upstream connected');
|
||||
});
|
||||
|
||||
upstream.on('message', (data, isBinary) => {
|
||||
if (clientSocket.readyState !== clientSocket.OPEN) return;
|
||||
clientSocket.send(data, { binary: isBinary });
|
||||
});
|
||||
|
||||
upstream.on('close', (code, reason) => {
|
||||
if (clientSocket.readyState === clientSocket.OPEN) {
|
||||
clientSocket.close(code, reason.toString());
|
||||
}
|
||||
});
|
||||
|
||||
upstream.on('error', (err) => {
|
||||
app.log.warn({ err, target }, 'control ws proxy: upstream error');
|
||||
if (clientSocket.readyState === clientSocket.OPEN) {
|
||||
clientSocket.close(1011, 'upstream error');
|
||||
}
|
||||
});
|
||||
|
||||
clientSocket.on('message', (data, isBinary) => {
|
||||
if (upstream.readyState !== WebSocket.OPEN) return;
|
||||
upstream.send(data, { binary: isBinary });
|
||||
});
|
||||
|
||||
clientSocket.on('close', () => {
|
||||
if (upstream.readyState === WebSocket.OPEN || upstream.readyState === WebSocket.CONNECTING) {
|
||||
upstream.close();
|
||||
}
|
||||
});
|
||||
|
||||
clientSocket.on('error', () => {
|
||||
if (upstream.readyState === WebSocket.OPEN || upstream.readyState === WebSocket.CONNECTING) {
|
||||
upstream.close();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
app.all('/api/control/*', async (req, reply) => {
|
||||
const targetPath = req.url.replace('/api/control', '/api');
|
||||
const targetUrl = `${boocontrolOrigin}${targetPath}`;
|
||||
const headers: Record<string, string> = {};
|
||||
if (req.headers['content-type']) headers['content-type'] = req.headers['content-type'] as string;
|
||||
if (req.headers['authorization']) headers['authorization'] = req.headers['authorization'] as string;
|
||||
|
||||
try {
|
||||
const res = await fetch(targetUrl, {
|
||||
method: req.method as string,
|
||||
headers,
|
||||
body: req.method !== 'GET' && req.method !== 'HEAD' ? JSON.stringify(req.body) : undefined,
|
||||
});
|
||||
reply.code(res.status);
|
||||
for (const [key, value] of res.headers) {
|
||||
if (key === 'transfer-encoding') continue;
|
||||
reply.header(key, value);
|
||||
}
|
||||
const body = await res.text();
|
||||
return reply.send(body);
|
||||
} catch (err) {
|
||||
app.log.error({ err, targetUrl }, 'control proxy error');
|
||||
reply.code(502).send({ error: 'control backend unavailable' });
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -1,8 +1,9 @@
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import type { Config } from '../config.js';
|
||||
import type { ModelInfo } from '../types/api.js';
|
||||
import type { ModelInfo, ModelCatalogProvider, ModelCatalogResponse } from '../types/api.js';
|
||||
import { getLlamaProviders } from '../services/llama-providers.js';
|
||||
|
||||
interface ApiModelsResponse {
|
||||
interface LlamaSwapModelsResponse {
|
||||
data?: ModelInfo[];
|
||||
}
|
||||
|
||||
@@ -13,21 +14,32 @@ const DEEPSEEK_STATIC_MODELS: ModelInfo[] = [
|
||||
|
||||
export function registerModelRoutes(app: FastifyInstance, config: Config): void {
|
||||
app.get('/api/models', async (_req, reply) => {
|
||||
const models: ModelInfo[] = [];
|
||||
const providers: ModelCatalogProvider[] = [];
|
||||
|
||||
// 1. Fetch llama-swap models
|
||||
try {
|
||||
const res = await fetch(`${config.LLAMA_SWAP_URL}/v1/models`);
|
||||
if (res.ok) {
|
||||
const parsed = (await res.json()) as ApiModelsResponse;
|
||||
if (parsed.data) models.push(...parsed.data);
|
||||
// 1. Fetch live model lists from each configured local provider.
|
||||
const registry = getLlamaProviders();
|
||||
for (const provider of registry.providers) {
|
||||
const models: ModelInfo[] = [];
|
||||
try {
|
||||
const res = await fetch(`${provider.baseUrl}/v1/models`);
|
||||
if (res.ok) {
|
||||
const parsed = (await res.json()) as LlamaSwapModelsResponse;
|
||||
if (parsed.data) {
|
||||
// Prefix every model id with "provider/" to make it composite (D-2).
|
||||
for (const m of parsed.data) {
|
||||
models.push({ ...m, id: `${provider.id}/${m.id}` });
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Provider unreachable — include empty entry so the UI can show it.
|
||||
}
|
||||
} catch {
|
||||
// llama-swap unreachable — proceed with whatever we have
|
||||
providers.push({ id: provider.id, label: provider.label, models });
|
||||
}
|
||||
|
||||
// 2. If DeepSeek is configured, fetch live models from their API
|
||||
// 2. If DeepSeek is configured, add a synthetic "deepseek" provider group.
|
||||
if (config.DEEPSEEK_API_KEY) {
|
||||
const deepseekModels: ModelInfo[] = [];
|
||||
try {
|
||||
const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
|
||||
const res = await fetch(`${baseURL}/v1/models`, {
|
||||
@@ -35,22 +47,25 @@ export function registerModelRoutes(app: FastifyInstance, config: Config): void
|
||||
signal: AbortSignal.timeout(5_000),
|
||||
});
|
||||
if (res.ok) {
|
||||
const parsed = (await res.json()) as ApiModelsResponse;
|
||||
if (parsed.data) models.push(...parsed.data);
|
||||
const parsed = (await res.json()) as LlamaSwapModelsResponse;
|
||||
if (parsed.data) {
|
||||
for (const m of parsed.data) {
|
||||
deepseekModels.push({ ...m, id: `deepseek/${m.id}` });
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// API call failed — fall back to static model list
|
||||
models.push(...DEEPSEEK_STATIC_MODELS);
|
||||
deepseekModels.push(...DEEPSEEK_STATIC_MODELS.map((m) => ({ ...m, id: `deepseek/${m.id}` })));
|
||||
}
|
||||
} catch {
|
||||
// Network error — fall back to static model list
|
||||
models.push(...DEEPSEEK_STATIC_MODELS);
|
||||
deepseekModels.push(...DEEPSEEK_STATIC_MODELS.map((m) => ({ ...m, id: `deepseek/${m.id}` })));
|
||||
}
|
||||
providers.push({ id: 'deepseek', label: 'DeepSeek', models: deepseekModels });
|
||||
}
|
||||
|
||||
if (models.length === 0) {
|
||||
if (providers.length === 0) {
|
||||
reply.code(502);
|
||||
return { error: 'no models available from any provider' };
|
||||
}
|
||||
return models;
|
||||
return { providers } satisfies ModelCatalogResponse;
|
||||
});
|
||||
}
|
||||
|
||||
@@ -74,6 +74,26 @@ function validateThemeKeys(body: Record<string, unknown>): string | null {
|
||||
|
||||
const PatchBody = z.record(z.string(), z.unknown());
|
||||
|
||||
// Normalize favorite_models on write: must be an array of non-empty
|
||||
// composite "provider/model" strings. Drops malformed entries, dedupes
|
||||
// preserving insertion order.
|
||||
const FAVORITE_MODELS_KEY = 'favorite_models';
|
||||
|
||||
export function normalizeFavoriteModels(value: unknown): string[] {
|
||||
if (!Array.isArray(value)) return [];
|
||||
const seen = new Set<string>();
|
||||
const out: string[] = [];
|
||||
for (const entry of value) {
|
||||
if (typeof entry !== 'string') continue;
|
||||
const trimmed = entry.trim();
|
||||
if (!trimmed || !trimmed.includes('/')) continue;
|
||||
if (seen.has(trimmed)) continue;
|
||||
seen.add(trimmed);
|
||||
out.push(trimmed);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
export function registerSettingsRoutes(app: FastifyInstance, sql: Sql): void {
|
||||
app.get('/api/settings', async () => {
|
||||
const rows = await sql<{ key: string; value: unknown }[]>`SELECT key, value FROM settings`;
|
||||
@@ -93,6 +113,13 @@ export function registerSettingsRoutes(app: FastifyInstance, sql: Sql): void {
|
||||
reply.code(400);
|
||||
return { error: themeError };
|
||||
}
|
||||
// Normalize favorite_models before persisting (must be composite ids only).
|
||||
if (FAVORITE_MODELS_KEY in parsed.data) {
|
||||
parsed.data[FAVORITE_MODELS_KEY] = normalizeFavoriteModels(
|
||||
parsed.data[FAVORITE_MODELS_KEY],
|
||||
);
|
||||
}
|
||||
|
||||
for (const [k, v] of Object.entries(parsed.data)) {
|
||||
await setSetting(sql, k, v);
|
||||
}
|
||||
|
||||
@@ -478,3 +478,17 @@ CREATE TABLE IF NOT EXISTS agent_snapshots (
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_agent_snapshots_chat ON agent_snapshots(chat_id);
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_agent_snapshots_chat_unique ON agent_snapshots(chat_id);
|
||||
|
||||
-- memory-browser-ui: topic-based memory, daily log, dream diaries.
|
||||
CREATE TABLE IF NOT EXISTS memory_entries (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
|
||||
topic TEXT NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
content TEXT NOT NULL DEFAULT '',
|
||||
tags TEXT[] NOT NULL DEFAULT ARRAY[]::TEXT[],
|
||||
date DATE,
|
||||
mood TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_memory_entries_project ON memory_entries(project_id, created_at DESC);
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
|
||||
describe('P4: X-Boo-Source header injection (server paths)', () => {
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
describe('compaction.ts callLlm injects X-Boo-Source: boochat', () => {
|
||||
it('includes X-Boo-Source header on direct fetch', async () => {
|
||||
const { resolveModelEndpoint } = await import('../inference/provider.js');
|
||||
const config = { LLAMA_SWAP_URL: 'http://localhost:8401' };
|
||||
|
||||
const { url, headers, model: resolvedModel } = resolveModelEndpoint(
|
||||
config,
|
||||
'test-model',
|
||||
);
|
||||
|
||||
const fetchCalls: Array<[string, RequestInit]> = [];
|
||||
vi.stubGlobal(
|
||||
'fetch',
|
||||
vi.fn((...args: Parameters<typeof fetch>) => {
|
||||
fetchCalls.push([args[0] as string, args[1] as RequestInit]);
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
choices: [{ message: { content: 'summary' } }],
|
||||
usage: { prompt_tokens: 10, completion_tokens: 5 },
|
||||
}),
|
||||
{ status: 200, headers: { 'content-type': 'application/json' } },
|
||||
),
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
await fetch(`${url}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { ...headers, 'X-Boo-Source': 'boochat' },
|
||||
body: JSON.stringify({ model: resolvedModel, messages: [], stream: false }),
|
||||
});
|
||||
|
||||
expect(fetchCalls.length).toBe(1);
|
||||
const callHeaders = fetchCalls[0][1]?.headers as Record<string, string>;
|
||||
expect(callHeaders['X-Boo-Source']).toBe('boochat');
|
||||
});
|
||||
});
|
||||
|
||||
describe('task-model.ts injects X-Boo-Source: boochat', () => {
|
||||
it('includes X-Boo-Source header on direct fetch', async () => {
|
||||
const { resolveModelEndpoint } = await import('../inference/provider.js');
|
||||
const config = { LLAMA_SWAP_URL: 'http://localhost:8401' };
|
||||
|
||||
const { url, headers, model: resolvedModel } = resolveModelEndpoint(
|
||||
config,
|
||||
'test-model',
|
||||
);
|
||||
|
||||
const fetchCalls: Array<[string, RequestInit]> = [];
|
||||
vi.stubGlobal(
|
||||
'fetch',
|
||||
vi.fn((...args: Parameters<typeof fetch>) => {
|
||||
fetchCalls.push([args[0] as string, args[1] as RequestInit]);
|
||||
return Promise.resolve(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
choices: [{ message: { content: 'result' } }],
|
||||
}),
|
||||
{ status: 200, headers: { 'content-type': 'application/json' } },
|
||||
),
|
||||
);
|
||||
}),
|
||||
);
|
||||
|
||||
await fetch(`${url}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { ...headers, 'X-Boo-Source': 'boochat' },
|
||||
body: JSON.stringify({ model: resolvedModel, messages: [], stream: false }),
|
||||
});
|
||||
|
||||
expect(fetchCalls.length).toBe(1);
|
||||
const callHeaders = fetchCalls[0][1]?.headers as Record<string, string>;
|
||||
expect(callHeaders['X-Boo-Source']).toBe('boochat');
|
||||
});
|
||||
});
|
||||
|
||||
describe('stream-phase-adapter.ts upstreamModel call', () => {
|
||||
it('passes boochat source to upstreamModel', async () => {
|
||||
const { upstreamModel } = await import('../inference/provider.js');
|
||||
const config = { LLAMA_SWAP_URL: 'http://localhost:8401' };
|
||||
|
||||
const model = upstreamModel(config, 'sam-desktop/test-model', null, 'boochat');
|
||||
expect(model).toBeDefined();
|
||||
expect((model as any).modelId).toBe('test-model');
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -22,7 +22,6 @@ const BASE_AGENT: Agent = {
|
||||
source: 'global',
|
||||
max_tool_calls: null,
|
||||
steps: null,
|
||||
llama_extra_args: null,
|
||||
};
|
||||
|
||||
describe('resolveToolBudget', () => {
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { normalizeFavoriteModels } from '../../routes/settings.js';
|
||||
|
||||
describe('normalizeFavoriteModels', () => {
|
||||
it('returns empty array for non-array input', () => {
|
||||
expect(normalizeFavoriteModels(null)).toEqual([]);
|
||||
expect(normalizeFavoriteModels(undefined)).toEqual([]);
|
||||
expect(normalizeFavoriteModels('string')).toEqual([]);
|
||||
expect(normalizeFavoriteModels(42)).toEqual([]);
|
||||
expect(normalizeFavoriteModels({})).toEqual([]);
|
||||
});
|
||||
|
||||
it('drops malformed entries that are not strings', () => {
|
||||
expect(normalizeFavoriteModels(['valid/provider', 42, null, false])).toEqual(['valid/provider']);
|
||||
});
|
||||
|
||||
it('drops entries without a slash (bare ids)', () => {
|
||||
expect(normalizeFavoriteModels(['bare-model', 'another-bare'])).toEqual([]);
|
||||
});
|
||||
|
||||
it('drops empty or whitespace-only strings', () => {
|
||||
expect(normalizeFavoriteModels(['', ' ', 'valid/provider'])).toEqual(['valid/provider']);
|
||||
});
|
||||
|
||||
it('dedupes preserving insertion order', () => {
|
||||
const result = normalizeFavoriteModels([
|
||||
'a/foo',
|
||||
'b/bar',
|
||||
'a/foo',
|
||||
'c/baz',
|
||||
'b/bar',
|
||||
]);
|
||||
expect(result).toEqual(['a/foo', 'b/bar', 'c/baz']);
|
||||
});
|
||||
|
||||
it('trims whitespace from entries', () => {
|
||||
expect(normalizeFavoriteModels([' a/foo ', 'b/bar'])).toEqual(['a/foo', 'b/bar']);
|
||||
});
|
||||
|
||||
it('accepts valid composite ids', () => {
|
||||
const input = [
|
||||
'sam-desktop/qwen3.6-35b',
|
||||
'embedding/gemma-4-12b',
|
||||
'deepseek/deepseek-v4-flash',
|
||||
];
|
||||
expect(normalizeFavoriteModels(input)).toEqual(input);
|
||||
});
|
||||
|
||||
it('handles empty array', () => {
|
||||
expect(normalizeFavoriteModels([])).toEqual([]);
|
||||
});
|
||||
|
||||
it('preserves insertion order after dedup', () => {
|
||||
const input = ['b/bar', 'a/foo', 'c/baz', 'a/foo', 'b/bar'];
|
||||
expect(normalizeFavoriteModels(input)).toEqual(['b/bar', 'a/foo', 'c/baz']);
|
||||
});
|
||||
});
|
||||
@@ -24,7 +24,6 @@ const BASE_AGENT: Agent = {
|
||||
source: 'global',
|
||||
max_tool_calls: null,
|
||||
steps: null,
|
||||
llama_extra_args: null,
|
||||
};
|
||||
|
||||
describe('samplerOptsFromAgent', () => {
|
||||
|
||||
@@ -33,7 +33,6 @@ describe('license: MIT relicense guard', () => {
|
||||
const FORMERLY_AGPL = [
|
||||
'apps/server/src/services/inference/tool-call-parser.ts',
|
||||
'apps/server/src/services/web/html-to-md.ts',
|
||||
'apps/server/src/services/inference/llama-args-validator.ts',
|
||||
];
|
||||
for (const rel of FORMERLY_AGPL) {
|
||||
it(`${rel} carries no AGPL / Unsloth provenance`, () => {
|
||||
|
||||
@@ -1,160 +0,0 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
validateExtraArgs,
|
||||
isManagedFlag,
|
||||
stripShadowingFlags,
|
||||
} from '../inference/llama-args-validator.js';
|
||||
import { parseAgentsMd } from '../agents.js';
|
||||
|
||||
describe('validateExtraArgs', () => {
|
||||
describe('deny list — each alias rejected', () => {
|
||||
const denied = [
|
||||
'-m', '--model',
|
||||
'-mu', '--model-url',
|
||||
'-dr', '--docker-repo',
|
||||
'-hf', '-hfr', '--hf-repo',
|
||||
'-hff', '--hf-file',
|
||||
'-hfv', '-hfrv', '--hf-repo-v',
|
||||
'-hffv', '--hf-file-v',
|
||||
'-hft', '--hf-token',
|
||||
'-mm', '--mmproj',
|
||||
'-mmu', '--mmproj-url',
|
||||
'--host', '--port', '--path', '--api-prefix', '--reuse-port',
|
||||
'--api-key', '--api-key-file',
|
||||
'--ssl-key-file', '--ssl-cert-file',
|
||||
'--webui', '--no-webui', '--ui', '--no-ui',
|
||||
'--ui-config', '--ui-config-file',
|
||||
'--ui-mcp-proxy', '--no-ui-mcp-proxy',
|
||||
'--models-dir', '--models-preset', '--models-max',
|
||||
'--models-autoload', '--no-models-autoload',
|
||||
];
|
||||
for (const flag of denied) {
|
||||
it(`rejects ${flag}`, () => {
|
||||
expect(() => validateExtraArgs([flag])).toThrow(/managed/);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
describe('safe flags accepted', () => {
|
||||
const safe = [
|
||||
'-c', '--ctx-size', '-ngl', '--gpu-layers',
|
||||
'--top-k', '--cache-type-k', '--jinja', '--no-jinja',
|
||||
'--spec-draft-n-max', '-fa', '--flash-attn',
|
||||
'-t', '--threads', '-np', '--parallel',
|
||||
];
|
||||
for (const flag of safe) {
|
||||
it(`accepts ${flag}`, () => {
|
||||
expect(() => validateExtraArgs([flag])).not.toThrow();
|
||||
expect(validateExtraArgs([flag])).toEqual([flag]);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it('handles --flag=value shape (denies the flag part)', () => {
|
||||
expect(() => validateExtraArgs(['--model=evil.gguf'])).toThrow(/managed/);
|
||||
});
|
||||
|
||||
it('handles --flag=value shape (accepts safe flag)', () => {
|
||||
expect(validateExtraArgs(['--ctx-size=4096'])).toEqual(['--ctx-size=4096']);
|
||||
});
|
||||
|
||||
it('returns empty array for undefined input', () => {
|
||||
expect(validateExtraArgs(undefined)).toEqual([]);
|
||||
});
|
||||
|
||||
it('returns empty array for empty input', () => {
|
||||
expect(validateExtraArgs([])).toEqual([]);
|
||||
});
|
||||
|
||||
it('treats negative numbers as values, not flags', () => {
|
||||
expect(validateExtraArgs(['--seed', '-1'])).toEqual(['--seed', '-1']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('isManagedFlag', () => {
|
||||
it('returns true for denied flags', () => {
|
||||
expect(isManagedFlag('--model')).toBe(true);
|
||||
expect(isManagedFlag('-m')).toBe(true);
|
||||
expect(isManagedFlag('--api-key')).toBe(true);
|
||||
expect(isManagedFlag('--port')).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false for safe flags', () => {
|
||||
expect(isManagedFlag('-c')).toBe(false);
|
||||
expect(isManagedFlag('--ctx-size')).toBe(false);
|
||||
expect(isManagedFlag('--top-k')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('stripShadowingFlags', () => {
|
||||
it('strips auto -c when user supplies -c', () => {
|
||||
const result = stripShadowingFlags(['-c', '4096', '--top-k', '40']);
|
||||
expect(result).toEqual(['--top-k', '40']);
|
||||
});
|
||||
|
||||
it('retains both when no overlap', () => {
|
||||
const result = stripShadowingFlags(['--top-k', '40', '--top-p', '0.95']);
|
||||
expect(result).toEqual(['--top-k', '40', '--top-p', '0.95']);
|
||||
});
|
||||
|
||||
it('strips --ctx-size=value form', () => {
|
||||
const result = stripShadowingFlags(['--ctx-size=4096']);
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('strips boolean --jinja flag (no value consumed)', () => {
|
||||
const result = stripShadowingFlags(['--jinja', '--top-k', '40']);
|
||||
expect(result).toEqual(['--top-k', '40']);
|
||||
});
|
||||
|
||||
it('respects stripContext=false to keep context flags', () => {
|
||||
const result = stripShadowingFlags(['-c', '4096'], { stripContext: false });
|
||||
expect(result).toEqual(['-c', '4096']);
|
||||
});
|
||||
|
||||
it('passes through cache flags (no longer shadowed)', () => {
|
||||
const result = stripShadowingFlags(['--cache-type-k', 'q8_0']);
|
||||
expect(result).toEqual(['--cache-type-k', 'q8_0']);
|
||||
});
|
||||
|
||||
it('passes through spec flags (no longer shadowed)', () => {
|
||||
const result = stripShadowingFlags(['--spec-draft-n-max', '16']);
|
||||
expect(result).toEqual(['--spec-draft-n-max', '16']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('AGENTS.md frontmatter validation', () => {
|
||||
it('rejects agent with managed flag in llama_extra_args', () => {
|
||||
const md = `## Evil Agent
|
||||
---
|
||||
llama_extra_args: ["--model", "evil.gguf"]
|
||||
---
|
||||
You are evil.`;
|
||||
const { agents, errors } = parseAgentsMd(md);
|
||||
expect(agents).toHaveLength(0);
|
||||
expect(errors).toHaveLength(1);
|
||||
expect(errors[0]!.reason).toContain('managed');
|
||||
});
|
||||
|
||||
it('accepts agent with safe llama_extra_args', () => {
|
||||
const md = `## Good Agent
|
||||
---
|
||||
llama_extra_args: ["--top-k", "20"]
|
||||
---
|
||||
You are good.`;
|
||||
const { agents, errors } = parseAgentsMd(md);
|
||||
expect(errors).toHaveLength(0);
|
||||
expect(agents).toHaveLength(1);
|
||||
expect(agents[0]!.llama_extra_args).toEqual(['--top-k', '20']);
|
||||
});
|
||||
|
||||
it('agent without llama_extra_args has null field', () => {
|
||||
const md = `## Simple Agent
|
||||
---
|
||||
temperature: 0.5
|
||||
---
|
||||
You are simple.`;
|
||||
const { agents } = parseAgentsMd(md);
|
||||
expect(agents[0]!.llama_extra_args).toBeNull();
|
||||
});
|
||||
});
|
||||
@@ -1,14 +1,44 @@
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
configureModelContext,
|
||||
getModelContext,
|
||||
invalidateModelContext,
|
||||
} from '../model-context.js';
|
||||
|
||||
// ---- mock llama-providers registry -----------------------------------------
|
||||
// model-context.ts imports resolveModelProvider from inference/provider.ts,
|
||||
// which uses getLlamaProviders() from llama-providers.ts. We mock the
|
||||
// registry module so tests control the provider list without touching the
|
||||
// filesystem.
|
||||
|
||||
let mockDefaultProvider = 'llama-swap';
|
||||
let mockProvidersList: Array<{ id: string; label: string; baseUrl: string; kind: string }> = [
|
||||
{
|
||||
id: 'llama-swap',
|
||||
label: 'llama-swap',
|
||||
baseUrl: 'http://llama-swap.test:8401',
|
||||
kind: 'llama-swap',
|
||||
},
|
||||
];
|
||||
|
||||
vi.mock('../llama-providers.js', () => ({
|
||||
getLlamaProviders: () => ({
|
||||
defaultProvider: mockDefaultProvider,
|
||||
providers: mockProvidersList,
|
||||
}),
|
||||
parseModelRef: (ref: string) => {
|
||||
const slashIdx = ref.indexOf('/');
|
||||
if (slashIdx <= 0) {
|
||||
return { providerId: mockDefaultProvider, wireModelId: ref, isLegacyBareId: true };
|
||||
}
|
||||
return {
|
||||
providerId: ref.slice(0, slashIdx),
|
||||
wireModelId: ref.slice(slashIdx + 1),
|
||||
isLegacyBareId: false,
|
||||
};
|
||||
},
|
||||
}));
|
||||
|
||||
// Import the functions under test AFTER the mock is registered.
|
||||
const { configureModelContext, getModelContext, invalidateModelContext } = await import('../model-context.js');
|
||||
|
||||
// ---- fixtures ---------------------------------------------------------------
|
||||
|
||||
const TEST_URL = 'http://llama-swap.test:8401';
|
||||
|
||||
function mockOkProps(n_ctx: number) {
|
||||
return new Response(
|
||||
JSON.stringify({ default_generation_settings: { n_ctx } }),
|
||||
@@ -16,9 +46,28 @@ function mockOkProps(n_ctx: number) {
|
||||
);
|
||||
}
|
||||
|
||||
// Legacy test config (backward-compatible { llamaSwapUrl } shape).
|
||||
const LEGACY_CONFIG = { llamaSwapUrl: 'http://llama-swap.test:8401' };
|
||||
|
||||
// Provider-aware config for multi-provider tests.
|
||||
const MULTI_PROVIDER_CONFIG = {
|
||||
LLAMA_SWAP_URL: 'http://llama-swap.test:8401',
|
||||
DEEPSEEK_API_KEY: 'sk-test',
|
||||
DEEPSEEK_BASE_URL: 'https://api.deepseek.com',
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
invalidateModelContext();
|
||||
configureModelContext({ llamaSwapUrl: TEST_URL });
|
||||
mockDefaultProvider = 'llama-swap';
|
||||
mockProvidersList = [
|
||||
{
|
||||
id: 'llama-swap',
|
||||
label: 'llama-swap',
|
||||
baseUrl: 'http://llama-swap.test:8401',
|
||||
kind: 'llama-swap',
|
||||
},
|
||||
];
|
||||
configureModelContext(LEGACY_CONFIG);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
@@ -37,7 +86,7 @@ describe('getModelContext — positive cache', () => {
|
||||
// Verify the URL was constructed correctly — encodes the model name in
|
||||
// case it contains characters that would break the path.
|
||||
expect(fetchSpy).toHaveBeenCalledExactlyOnceWith(
|
||||
`${TEST_URL}/upstream/qwen3.6/props`,
|
||||
`${LEGACY_CONFIG.llamaSwapUrl}/upstream/qwen3.6/props`,
|
||||
expect.objectContaining({ signal: expect.any(AbortSignal) }),
|
||||
);
|
||||
});
|
||||
@@ -185,3 +234,158 @@ describe('invalidateModelContext', () => {
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
|
||||
// ---- W3: provider-aware cache isolation ------------------------------------
|
||||
|
||||
describe('getModelContext — provider-aware cache isolation (W3)', () => {
|
||||
beforeEach(() => {
|
||||
// Two providers sharing the same wire model name "qwen3.6" but on
|
||||
// different base URLs. This is the core scenario for cache isolation.
|
||||
mockProvidersList = [
|
||||
{
|
||||
id: 'provider-a',
|
||||
label: 'Provider A',
|
||||
baseUrl: 'http://provider-a.test:8401',
|
||||
kind: 'llama-swap',
|
||||
},
|
||||
{
|
||||
id: 'provider-b',
|
||||
label: 'Provider B',
|
||||
baseUrl: 'http://provider-b.test:8401',
|
||||
kind: 'llama-swap',
|
||||
},
|
||||
];
|
||||
mockDefaultProvider = 'provider-a';
|
||||
configureModelContext(MULTI_PROVIDER_CONFIG);
|
||||
});
|
||||
|
||||
it('two providers serving the same wire model name have separate cache entries', async () => {
|
||||
const fetchSpy = vi
|
||||
.spyOn(globalThis, 'fetch')
|
||||
.mockResolvedValueOnce(mockOkProps(32_768)) // provider-a: qwen3.6
|
||||
.mockResolvedValueOnce(mockOkProps(16_384)); // provider-b: qwen3.6
|
||||
|
||||
// Both resolve to the wire model "qwen3.6" but different providers.
|
||||
const a = await getModelContext('provider-a/qwen3.6');
|
||||
const b = await getModelContext('provider-b/qwen3.6');
|
||||
|
||||
expect(a).not.toBeNull();
|
||||
expect(a!.n_ctx).toBe(32_768);
|
||||
expect(b).not.toBeNull();
|
||||
expect(b!.n_ctx).toBe(16_384);
|
||||
|
||||
// Two separate fetches — one per provider's baseUrl.
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(2);
|
||||
expect(fetchSpy.mock.calls[0]![0]).toContain('provider-a.test');
|
||||
expect(fetchSpy.mock.calls[1]![0]).toContain('provider-b.test');
|
||||
});
|
||||
|
||||
it('cached entry for one provider does not leak to the other', async () => {
|
||||
const fetchSpy = vi
|
||||
.spyOn(globalThis, 'fetch')
|
||||
.mockResolvedValueOnce(mockOkProps(32_768)); // provider-a: qwen3.6
|
||||
|
||||
// Populate provider-a's cache.
|
||||
await getModelContext('provider-a/qwen3.6');
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
||||
|
||||
// provider-b/qwen3.6 should NOT hit provider-a's cache — it must fetch.
|
||||
fetchSpy.mockResolvedValueOnce(mockOkProps(16_384));
|
||||
const b = await getModelContext('provider-b/qwen3.6');
|
||||
expect(b).not.toBeNull();
|
||||
expect(b!.n_ctx).toBe(16_384);
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('invalidateModelContext(key) only clears the targeted provider entry', async () => {
|
||||
const fetchSpy = vi
|
||||
.spyOn(globalThis, 'fetch')
|
||||
.mockResolvedValueOnce(mockOkProps(32_768)) // provider-a: qwen3.6
|
||||
.mockResolvedValueOnce(mockOkProps(16_384)) // provider-b: qwen3.6
|
||||
.mockResolvedValueOnce(mockOkProps(40_960)); // provider-a re-fetch
|
||||
|
||||
await getModelContext('provider-a/qwen3.6');
|
||||
await getModelContext('provider-b/qwen3.6');
|
||||
|
||||
// Invalidate only provider-a's entry.
|
||||
invalidateModelContext('provider-a/qwen3.6');
|
||||
|
||||
// provider-a must re-fetch; provider-b still cached.
|
||||
const a2 = await getModelContext('provider-a/qwen3.6');
|
||||
expect(a2).not.toBeNull();
|
||||
expect(a2!.n_ctx).toBe(40_960);
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(3); // 2 original + 1 re-fetch
|
||||
});
|
||||
});
|
||||
|
||||
// ---- W3: bare-id resolution through default provider -----------------------
|
||||
|
||||
describe('getModelContext — bare-id resolution through default provider (W3)', () => {
|
||||
beforeEach(() => {
|
||||
mockProvidersList = [
|
||||
{
|
||||
id: 'llama-swap',
|
||||
label: 'llama-swap',
|
||||
baseUrl: 'http://llama-swap.test:8401',
|
||||
kind: 'llama-swap',
|
||||
},
|
||||
{
|
||||
id: 'deepseek',
|
||||
label: 'DeepSeek',
|
||||
baseUrl: 'https://api.deepseek.com',
|
||||
kind: 'deepseek',
|
||||
},
|
||||
];
|
||||
mockDefaultProvider = 'llama-swap';
|
||||
configureModelContext(MULTI_PROVIDER_CONFIG);
|
||||
});
|
||||
|
||||
it('bare model id resolves through the default provider', async () => {
|
||||
const fetchSpy = vi
|
||||
.spyOn(globalThis, 'fetch')
|
||||
.mockResolvedValueOnce(mockOkProps(8192));
|
||||
|
||||
const result = await getModelContext('qwen3.6');
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.n_ctx).toBe(8192);
|
||||
|
||||
// Default provider is "llama-swap", so the URL uses its baseUrl.
|
||||
expect(fetchSpy).toHaveBeenCalledExactlyOnceWith(
|
||||
'http://llama-swap.test:8401/upstream/qwen3.6/props',
|
||||
expect.objectContaining({ signal: expect.any(AbortSignal) }),
|
||||
);
|
||||
});
|
||||
|
||||
it('bare id and explicit default-provider composite share a cache entry', async () => {
|
||||
const fetchSpy = vi
|
||||
.spyOn(globalThis, 'fetch')
|
||||
.mockResolvedValueOnce(mockOkProps(8192));
|
||||
|
||||
// Both resolve to "llama-swap/qwen3.6" — the bare id uses the default
|
||||
// provider which is "llama-swap", and the explicit composite also
|
||||
// targets "llama-swap".
|
||||
const a = await getModelContext('qwen3.6');
|
||||
const b = await getModelContext('llama-swap/qwen3.6');
|
||||
|
||||
expect(a).toEqual(b);
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('bare "deepseek-*" id returns static default without fetching', async () => {
|
||||
const fetchSpy = vi.spyOn(globalThis, 'fetch');
|
||||
|
||||
const result = await getModelContext('deepseek-v4-pro');
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.n_ctx).toBe(131_072);
|
||||
expect(fetchSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('composite "deepseek/model" id returns static default without fetching', async () => {
|
||||
const fetchSpy = vi.spyOn(globalThis, 'fetch');
|
||||
|
||||
const result = await getModelContext('deepseek/deepseek-v4-pro');
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.n_ctx).toBe(131_072);
|
||||
expect(fetchSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,58 +1,308 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { resolveRoute, upstreamModel } from '../inference/provider.js';
|
||||
import { describe, expect, it, vi, beforeEach } from 'vitest';
|
||||
|
||||
describe('resolveRoute', () => {
|
||||
// Control the mock return values from tests.
|
||||
let mockDefaultProvider = 'sam-desktop';
|
||||
let mockProvidersList: Array<{ id: string; label: string; baseUrl: string; kind: string }> = [
|
||||
{
|
||||
id: 'sam-desktop',
|
||||
label: 'Sam-desktop',
|
||||
baseUrl: 'http://100.101.41.16:8401',
|
||||
kind: 'llama-swap',
|
||||
},
|
||||
{
|
||||
id: 'embedding',
|
||||
label: 'embedding',
|
||||
baseUrl: 'http://100.90.172.55:8411',
|
||||
kind: 'llama-swap',
|
||||
},
|
||||
];
|
||||
|
||||
vi.mock('../llama-providers.js', () => ({
|
||||
getLlamaProviders: () => ({
|
||||
defaultProvider: mockDefaultProvider,
|
||||
providers: mockProvidersList,
|
||||
}),
|
||||
// Match the real signature: parseModelRef(ref) → uses getLlamaProviders().defaultProvider internally.
|
||||
parseModelRef: (ref: string) => {
|
||||
const slashIdx = ref.indexOf('/');
|
||||
if (slashIdx <= 0) {
|
||||
return { providerId: mockDefaultProvider, wireModelId: ref, isLegacyBareId: true };
|
||||
}
|
||||
return {
|
||||
providerId: ref.slice(0, slashIdx),
|
||||
wireModelId: ref.slice(slashIdx + 1),
|
||||
isLegacyBareId: false,
|
||||
};
|
||||
},
|
||||
}));
|
||||
|
||||
// Import the functions under test AFTER the mock is registered.
|
||||
const { resolveRoute, upstreamModel, resolveModelEndpoint, resolveModelProvider, isDeepSeekModel } = await import('../inference/provider.js');
|
||||
|
||||
beforeEach(() => {
|
||||
mockDefaultProvider = 'sam-desktop';
|
||||
mockProvidersList = [
|
||||
{
|
||||
id: 'sam-desktop',
|
||||
label: 'Sam-desktop',
|
||||
baseUrl: 'http://100.101.41.16:8401',
|
||||
kind: 'llama-swap',
|
||||
},
|
||||
{
|
||||
id: 'embedding',
|
||||
label: 'embedding',
|
||||
baseUrl: 'http://100.90.172.55:8411',
|
||||
kind: 'llama-swap',
|
||||
},
|
||||
];
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Legacy resolveRoute backward compat
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('resolveRoute (legacy compat)', () => {
|
||||
it('routes to swap when agent is null', () => {
|
||||
expect(resolveRoute(null)).toEqual({ route: 'swap', flags: null });
|
||||
expect(resolveRoute(null, { LLAMA_SWAP_URL: 'http://localhost:8080' }, 'model')).toEqual({ route: 'swap' });
|
||||
});
|
||||
|
||||
it('routes to swap when agent has no llama_extra_args', () => {
|
||||
expect(resolveRoute({ llama_extra_args: null })).toEqual({ route: 'swap', flags: null });
|
||||
});
|
||||
|
||||
it('routes to swap when agent has empty llama_extra_args', () => {
|
||||
expect(resolveRoute({ llama_extra_args: [] })).toEqual({ route: 'swap', flags: null });
|
||||
});
|
||||
|
||||
it('routes to sidecar when agent has llama_extra_args', () => {
|
||||
const result = resolveRoute({ llama_extra_args: ['--top-k', '20'] });
|
||||
expect(result.route).toBe('sidecar');
|
||||
expect(result.flags).toEqual(['--top-k', '20']);
|
||||
it('routes to deepseek for bare deepseek- prefix when configured', () => {
|
||||
expect(
|
||||
resolveRoute(null, { LLAMA_SWAP_URL: 'http://localhost:8080', DEEPSEEK_API_KEY: 'sk-123' }, 'deepseek-v4-pro'),
|
||||
).toEqual({ route: 'deepseek' });
|
||||
});
|
||||
});
|
||||
|
||||
describe('upstreamModel', () => {
|
||||
const swapConfig = { LLAMA_SWAP_URL: 'http://localhost:8401' };
|
||||
const fullConfig = {
|
||||
LLAMA_SWAP_URL: 'http://localhost:8401',
|
||||
LLAMA_SIDECAR_URL: 'http://localhost:8402',
|
||||
// ---------------------------------------------------------------------------
|
||||
// Provider-aware resolver: composite ids
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('resolveModelProvider', () => {
|
||||
const config = {
|
||||
LLAMA_SWAP_URL: 'http://localhost:8080',
|
||||
DEEPSEEK_API_KEY: 'sk-test',
|
||||
DEEPSEEK_BASE_URL: 'https://api.deepseek.com',
|
||||
};
|
||||
|
||||
it('returns a model for swap route (no agent)', () => {
|
||||
it('routes composite local provider id to its baseUrl', () => {
|
||||
const r = resolveModelProvider('sam-desktop/qwen3.6-35b-a3b', config);
|
||||
expect(r.route).toBe('swap');
|
||||
expect(r.baseUrl).toBe('http://100.101.41.16:8401');
|
||||
expect(r.wireModelId).toBe('qwen3.6-35b-a3b');
|
||||
expect(r.providerId).toBe('sam-desktop');
|
||||
expect(r.isLegacyBareId).toBe(false);
|
||||
});
|
||||
|
||||
it('routes composite "deepseek/" id to DeepSeek SDK', () => {
|
||||
const r = resolveModelProvider('deepseek/deepseek-v4-pro', config);
|
||||
expect(r.route).toBe('deepseek');
|
||||
expect(r.baseUrl).toBe('https://api.deepseek.com');
|
||||
expect(r.wireModelId).toBe('deepseek-v4-pro');
|
||||
expect(r.providerId).toBe('deepseek');
|
||||
});
|
||||
|
||||
// COLLISION CASE: "embedding/deepseek-r1-qwen3-8b" routes to local provider
|
||||
// "embedding", NOT to DeepSeek cloud.
|
||||
it('routes "embedding/deepseek-r1-qwen3-8b" to local embedding provider, not DeepSeek', () => {
|
||||
const r = resolveModelProvider('embedding/deepseek-r1-qwen3-8b', config);
|
||||
expect(r.route).toBe('swap');
|
||||
expect(r.baseUrl).toBe('http://100.90.172.55:8411');
|
||||
expect(r.wireModelId).toBe('deepseek-r1-qwen3-8b');
|
||||
expect(r.providerId).toBe('embedding');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Provider-aware resolver: bare (legacy) ids
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('resolveModelProvider — bare id legacy fallback', () => {
|
||||
const config = {
|
||||
LLAMA_SWAP_URL: 'http://localhost:8080',
|
||||
DEEPSEEK_API_KEY: 'sk-test',
|
||||
};
|
||||
|
||||
it('bare id resolves through defaultProvider', () => {
|
||||
const r = resolveModelProvider('qwen3.6-35b-a3b', config);
|
||||
expect(r.route).toBe('swap');
|
||||
expect(r.providerId).toBe('sam-desktop');
|
||||
expect(r.wireModelId).toBe('qwen3.6-35b-a3b');
|
||||
expect(r.isLegacyBareId).toBe(true);
|
||||
});
|
||||
|
||||
it('bare "deepseek-v4-pro" resolves to DeepSeek SDK (legacy prefix)', () => {
|
||||
const r = resolveModelProvider('deepseek-v4-pro', config);
|
||||
expect(r.route).toBe('deepseek');
|
||||
expect(r.wireModelId).toBe('deepseek-v4-pro');
|
||||
expect(r.isLegacyBareId).toBe(true);
|
||||
});
|
||||
|
||||
it('bare id when DEEPSEEK_API_KEY is unset stays on swap', () => {
|
||||
const r = resolveModelProvider('deepseek-v4-pro', { LLAMA_SWAP_URL: 'http://localhost:8080' });
|
||||
expect(r.route).toBe('swap');
|
||||
expect(r.wireModelId).toBe('deepseek-v4-pro');
|
||||
});
|
||||
|
||||
it('unknown composite provider falls back to LLAMA_SWAP_URL', () => {
|
||||
const r = resolveModelProvider('unknown-provider/model-x', config);
|
||||
expect(r.route).toBe('swap');
|
||||
expect(r.baseUrl).toBe('http://localhost:8080');
|
||||
expect(r.wireModelId).toBe('model-x');
|
||||
expect(r.isLegacyBareId).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// upstreamModel uses the resolver
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('upstreamModel', () => {
|
||||
const swapConfig = { LLAMA_SWAP_URL: 'http://localhost:8401' };
|
||||
|
||||
it('returns a model for local composite id', () => {
|
||||
const model = upstreamModel(swapConfig, 'sam-desktop/test-model');
|
||||
expect(model).toBeDefined();
|
||||
expect((model as any).modelId).toBe('test-model');
|
||||
});
|
||||
|
||||
it('returns a model for bare id (legacy)', () => {
|
||||
const model = upstreamModel(swapConfig, 'test-model');
|
||||
expect(model).toBeDefined();
|
||||
expect((model as any).modelId).toBe('test-model');
|
||||
});
|
||||
});
|
||||
|
||||
it('returns a model for swap route (agent without extra args)', () => {
|
||||
const model = upstreamModel(swapConfig, 'test-model', { llama_extra_args: null });
|
||||
expect(model).toBeDefined();
|
||||
// ---------------------------------------------------------------------------
|
||||
// resolveModelEndpoint uses the resolver
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('resolveModelEndpoint', () => {
|
||||
it('resolves local composite id to provider baseUrl', () => {
|
||||
const ep = resolveModelEndpoint(
|
||||
{ LLAMA_SWAP_URL: 'http://localhost:8080' },
|
||||
'sam-desktop/qwen3.6-35b-a3b',
|
||||
);
|
||||
expect(ep.url).toBe('http://100.101.41.16:8401');
|
||||
expect(ep.model).toBe('qwen3.6-35b-a3b');
|
||||
expect(ep.headers['Content-Type']).toBe('application/json');
|
||||
});
|
||||
|
||||
it('returns a model for sidecar route', () => {
|
||||
const model = upstreamModel(fullConfig, 'test-model', { llama_extra_args: ['--top-k', '20'] });
|
||||
expect(model).toBeDefined();
|
||||
expect((model as any).modelId).toBe('test-model');
|
||||
it('resolves bare id to default provider baseUrl', () => {
|
||||
const ep = resolveModelEndpoint(
|
||||
{ LLAMA_SWAP_URL: 'http://localhost:8080' },
|
||||
'test-model',
|
||||
);
|
||||
expect(ep.url).toBe('http://100.101.41.16:8401');
|
||||
expect(ep.model).toBe('test-model');
|
||||
});
|
||||
|
||||
it('throws when sidecar route requested but URL missing', () => {
|
||||
expect(() =>
|
||||
upstreamModel(swapConfig, 'test-model', { llama_extra_args: ['--top-k', '20'] }),
|
||||
).toThrow(/LLAMA_SIDECAR_URL/);
|
||||
it('resolves deepseek composite id to DeepSeek API with auth header', () => {
|
||||
const ep = resolveModelEndpoint(
|
||||
{ LLAMA_SWAP_URL: 'http://localhost:8080', DEEPSEEK_API_KEY: 'sk-test' },
|
||||
'deepseek/deepseek-v4-pro',
|
||||
);
|
||||
expect(ep.url).toBe('https://api.deepseek.com');
|
||||
expect(ep.model).toBe('deepseek-v4-pro');
|
||||
expect(ep.headers['Authorization']).toBe('Bearer sk-test');
|
||||
});
|
||||
|
||||
it('routes to swap for empty llama_extra_args array', () => {
|
||||
const model = upstreamModel(swapConfig, 'test-model', { llama_extra_args: [] });
|
||||
expect(model).toBeDefined();
|
||||
// Collision case for endpoint resolution.
|
||||
it('resolves "embedding/deepseek-r1-qwen3-8b" to embedding baseUrl, not DeepSeek', () => {
|
||||
const ep = resolveModelEndpoint(
|
||||
{ LLAMA_SWAP_URL: 'http://localhost:8080', DEEPSEEK_API_KEY: 'sk-test' },
|
||||
'embedding/deepseek-r1-qwen3-8b',
|
||||
);
|
||||
expect(ep.url).toBe('http://100.90.172.55:8411');
|
||||
expect(ep.model).toBe('deepseek-r1-qwen3-8b');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// isDeepSeekModel (legacy prefix check, kept for stream-phase-adapter)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('isDeepSeekModel', () => {
|
||||
it('returns true for deepseek- prefix', () => {
|
||||
expect(isDeepSeekModel('deepseek-v4-pro')).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false for composite deepseek/', () => {
|
||||
expect(isDeepSeekModel('deepseek/deepseek-v4-pro')).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false for other models', () => {
|
||||
expect(isDeepSeekModel('qwen3.6-35b-a3b')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// P4: upstreamModel additive source param
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('upstreamModel source param (P4)', () => {
|
||||
const swapConfig = { LLAMA_SWAP_URL: 'http://localhost:8401' };
|
||||
|
||||
it('accepts optional source parameter without breaking existing calls', () => {
|
||||
const model1 = upstreamModel(swapConfig, 'sam-desktop/test-model');
|
||||
const model2 = upstreamModel(swapConfig, 'sam-desktop/test-model', undefined, 'boochat');
|
||||
expect(model1).toBeDefined();
|
||||
expect(model2).toBeDefined();
|
||||
expect((model1 as any).modelId).toBe('test-model');
|
||||
expect((model2 as any).modelId).toBe('test-model');
|
||||
});
|
||||
|
||||
it('creates distinct cached providers for different source values', () => {
|
||||
const modelNoSource = upstreamModel(swapConfig, 'sam-desktop/test-model');
|
||||
const modelBoochat = upstreamModel(swapConfig, 'sam-desktop/test-model', undefined, 'boochat');
|
||||
const modelBoocoder = upstreamModel(swapConfig, 'sam-desktop/test-model', undefined, 'boocoder');
|
||||
expect(modelNoSource).toBeDefined();
|
||||
expect(modelBoochat).toBeDefined();
|
||||
expect(modelBoocoder).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// P7: gateway routing (auto:* virtual models)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('resolveModelProvider — gateway routing (P7)', () => {
|
||||
const config = { LLAMA_SWAP_URL: 'http://localhost:8080' };
|
||||
|
||||
it('routes a known gateway-kind provider to route "gateway"', () => {
|
||||
mockProvidersList = [
|
||||
...mockProvidersList,
|
||||
{ id: 'auto', label: 'Auto (gateway)', baseUrl: 'http://100.114.205.53:9503', kind: 'boocontrol-gateway' },
|
||||
];
|
||||
const r = resolveModelProvider('auto/auto:code', config);
|
||||
expect(r.route).toBe('gateway');
|
||||
expect(r.baseUrl).toBe('http://100.114.205.53:9503');
|
||||
expect(r.wireModelId).toBe('auto:code');
|
||||
expect(r.providerId).toBe('auto');
|
||||
});
|
||||
|
||||
it('resolves an orphaned auto:* session to gateway_error, never swap', () => {
|
||||
// No gateway provider in the registry — the entry was removed.
|
||||
const r = resolveModelProvider('auto/auto:code', config);
|
||||
expect(r.route).toBe('gateway_error');
|
||||
expect(r.gatewayReason).toBe('offline');
|
||||
expect(r.baseUrl).not.toBe(config.LLAMA_SWAP_URL);
|
||||
});
|
||||
|
||||
it('upstreamModel throws a clean error for gateway_error', () => {
|
||||
expect(() => upstreamModel(config, 'auto/auto:fast')).toThrow(/routing gateway offline/);
|
||||
});
|
||||
|
||||
it('resolveModelEndpoint throws a clean error for gateway_error', () => {
|
||||
expect(() => resolveModelEndpoint(config, 'auto/auto:fast')).toThrow(/routing gateway offline/);
|
||||
});
|
||||
|
||||
it('upstreamModel returns a model for a live gateway', () => {
|
||||
mockProvidersList = [
|
||||
...mockProvidersList,
|
||||
{ id: 'auto', label: 'Auto (gateway)', baseUrl: 'http://100.114.205.53:9503', kind: 'boocontrol-gateway' },
|
||||
];
|
||||
const model = upstreamModel(config, 'auto/auto:code');
|
||||
expect(model).toBeDefined();
|
||||
expect((model as any).modelId).toBe('auto:code');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -25,7 +25,6 @@ const BASE_AGENT: Agent = {
|
||||
source: 'global',
|
||||
max_tool_calls: null,
|
||||
steps: null,
|
||||
llama_extra_args: null,
|
||||
};
|
||||
|
||||
function call(name: string, args: Record<string, unknown> = {}): ToolCall {
|
||||
|
||||
@@ -2,7 +2,7 @@ import { promises as fs } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import type { Agent, AgentsResponse, AgentParseError } from '../types/api.js';
|
||||
import { ALL_TOOLS, resolveToolTier } from './tools.js';
|
||||
import { validateExtraArgs } from './inference/llama-args-validator.js';
|
||||
|
||||
import { stripQuotes } from '../utils/string-utils.js';
|
||||
|
||||
// v1.8.1: global agents live at /data/AGENTS.md inside the container
|
||||
@@ -105,7 +105,7 @@ interface ParsedFrontmatter {
|
||||
// (200) in the outer loop. Integer ≥ 0; steps: 0 means "no tool calls
|
||||
// allowed" — the model responds text-only.
|
||||
steps?: number;
|
||||
llama_extra_args?: string[];
|
||||
|
||||
// vDeepSeek: thinking effort for DeepSeek V4 models.
|
||||
reasoning_effort?: string;
|
||||
}
|
||||
@@ -253,34 +253,7 @@ function parseFrontmatter(yaml: string): { data: ParsedFrontmatter; errors: stri
|
||||
} else {
|
||||
errors.push(`steps must be a non-negative integer (got "${valueRaw}")`);
|
||||
}
|
||||
} else if (key === 'llama_extra_args') {
|
||||
if (valueRaw === '') {
|
||||
data.llama_extra_args = [];
|
||||
// No arrayKey support — llama_extra_args uses inline list only.
|
||||
} else if (valueRaw.startsWith('[') && valueRaw.endsWith(']')) {
|
||||
const inner = valueRaw.slice(1, -1);
|
||||
const parsed = inner
|
||||
.split(',')
|
||||
.map((s) => stripQuotes(s.trim()))
|
||||
.filter((s) => s.length > 0);
|
||||
try {
|
||||
validateExtraArgs(parsed);
|
||||
data.llama_extra_args = parsed;
|
||||
} catch (err) {
|
||||
errors.push(err instanceof Error ? err.message : String(err));
|
||||
}
|
||||
} else {
|
||||
const parsed = valueRaw
|
||||
.split(',')
|
||||
.map((s) => stripQuotes(s.trim()))
|
||||
.filter((s) => s.length > 0);
|
||||
try {
|
||||
validateExtraArgs(parsed);
|
||||
data.llama_extra_args = parsed;
|
||||
} catch (err) {
|
||||
errors.push(err instanceof Error ? err.message : String(err));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
// Unknown keys silently ignored — forward-compat.
|
||||
}
|
||||
@@ -387,7 +360,7 @@ function parseAgentSection(section: RawSection): Omit<Agent, 'source'> {
|
||||
model: typeof fm.model === 'string' && fm.model.length > 0 ? fm.model : null,
|
||||
max_tool_calls: typeof fm.max_tool_calls === 'number' ? fm.max_tool_calls : null,
|
||||
steps: typeof fm.steps === 'number' ? fm.steps : null,
|
||||
llama_extra_args: Array.isArray(fm.llama_extra_args) ? fm.llama_extra_args : null,
|
||||
|
||||
reasoning_effort: typeof fm.reasoning_effort === 'string' ? (fm.reasoning_effort as Agent['reasoning_effort']) : null,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -357,7 +357,7 @@ async function callLlm(
|
||||
const { url, headers, model: resolvedModel } = resolveModelEndpoint(config, model);
|
||||
const res = await fetch(`${url}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
headers: { ...headers, 'X-Boo-Source': 'boochat' },
|
||||
body: JSON.stringify({ model: resolvedModel, messages, stream: false }),
|
||||
});
|
||||
if (!res.ok) {
|
||||
@@ -525,9 +525,11 @@ export async function process(input: ProcessInput): Promise<void> {
|
||||
// 7. Single completion (no tools). Throws on llama-swap failure.
|
||||
result = await callLlm(config, session.model, payload, log);
|
||||
|
||||
// 7b. v1.11.3: fetch the model's true context window from llama-swap's
|
||||
// /upstream/<model>/props (the streaming completion doesn't carry it).
|
||||
// 7b. v1.11.3: fetch the model's true context window from the provider's
|
||||
// /upstream/<wireModelId>/props (the streaming completion doesn't carry it).
|
||||
// Same pattern as inference.ts; the cache makes repeated calls free.
|
||||
// v2.x (W3): pass config so composite model ids resolve through the
|
||||
// provider registry instead of a process-wide LLAMA_SWAP_URL.
|
||||
const mctx = await modelContextLookup.getModelContext(session.model);
|
||||
const nCtx = mctx?.n_ctx ?? null;
|
||||
|
||||
|
||||
@@ -1,209 +0,0 @@
|
||||
// Guards against agent-supplied llama-server CLI flags that would clash with
|
||||
// values BooCode sets itself. Two concerns live here:
|
||||
//
|
||||
// 1. A hard denylist of flags that BooCode owns outright (model selection,
|
||||
// the listening socket, credentials, the bundled web UI). Passing any of
|
||||
// these is a configuration error and is rejected loudly.
|
||||
//
|
||||
// 2. A "shadowing" set of flags that are legal to pass but, because of
|
||||
// llama.cpp's last-wins argument parsing, would override a first-class
|
||||
// BooCode setting. These are silently removed from the auto-generated
|
||||
// argv so the agent's explicit choice takes precedence without leaving a
|
||||
// duplicate flag behind.
|
||||
//
|
||||
// All flag spellings below are the public llama-server option names (short and
|
||||
// long aliases) documented in its --help output.
|
||||
|
||||
// --- Hard denylist -------------------------------------------------------
|
||||
|
||||
// Authored as named buckets purely for readability; every alias is folded
|
||||
// into one flat lookup set at module load. Each inner array enumerates the
|
||||
// short + long spellings that select the same underlying option.
|
||||
const MODEL_SOURCE_FLAGS = [
|
||||
['-m', '--model'],
|
||||
['-mu', '--model-url'],
|
||||
['-dr', '--docker-repo'],
|
||||
['-hf', '-hfr', '--hf-repo'],
|
||||
['-hff', '--hf-file'],
|
||||
['-hfv', '-hfrv', '--hf-repo-v'],
|
||||
['-hffv', '--hf-file-v'],
|
||||
['-hft', '--hf-token'],
|
||||
['-mm', '--mmproj'],
|
||||
['-mmu', '--mmproj-url'],
|
||||
];
|
||||
|
||||
const LISTEN_FLAGS = [
|
||||
['--host'],
|
||||
['--port'],
|
||||
['--path'],
|
||||
['--api-prefix'],
|
||||
['--reuse-port'],
|
||||
];
|
||||
|
||||
const CREDENTIAL_FLAGS = [
|
||||
['--api-key'],
|
||||
['--api-key-file'],
|
||||
['--ssl-key-file'],
|
||||
['--ssl-cert-file'],
|
||||
];
|
||||
|
||||
const WEBUI_FLAGS = [
|
||||
['--webui', '--no-webui'],
|
||||
['--ui', '--no-ui'],
|
||||
['--ui-config'],
|
||||
['--ui-config-file'],
|
||||
['--ui-mcp-proxy', '--no-ui-mcp-proxy'],
|
||||
['--models-dir'],
|
||||
['--models-preset'],
|
||||
['--models-max'],
|
||||
['--models-autoload', '--no-models-autoload'],
|
||||
];
|
||||
|
||||
const MANAGED_FLAGS: ReadonlySet<string> = new Set(
|
||||
[
|
||||
...MODEL_SOURCE_FLAGS,
|
||||
...LISTEN_FLAGS,
|
||||
...CREDENTIAL_FLAGS,
|
||||
...WEBUI_FLAGS,
|
||||
].flat(),
|
||||
);
|
||||
|
||||
// --- Token parsing -------------------------------------------------------
|
||||
|
||||
const DIGIT = /^[0-9]$/;
|
||||
|
||||
/**
|
||||
* Extract the flag name from a single argv token, or `null` when the token is
|
||||
* not a flag.
|
||||
*
|
||||
* A token is treated as a flag only when it begins with `-` and the character
|
||||
* after the leading dash is neither a digit nor a decimal point — that rule
|
||||
* keeps negative numeric values such as `-1` or `-0.5` from being mistaken for
|
||||
* options. A bare `-` or `--` is not a flag either. The returned name is the
|
||||
* portion before any `=`, so `--ctx-size=4096` yields `--ctx-size`.
|
||||
*/
|
||||
function parseFlag(token: string): string | null {
|
||||
if (!token.startsWith('-')) return null;
|
||||
if (token === '-' || token === '--') return null;
|
||||
|
||||
const second = token[1]!;
|
||||
if (DIGIT.test(second) || second === '.') return null;
|
||||
|
||||
const eq = token.indexOf('=');
|
||||
return eq === -1 ? token : token.slice(0, eq);
|
||||
}
|
||||
|
||||
// --- Public API ----------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Validate a sequence of extra llama-server args, rejecting any that name a
|
||||
* BooCode-managed flag. Returns the args materialised as a string[] when they
|
||||
* all pass.
|
||||
*/
|
||||
export function validateExtraArgs(args?: Iterable<string>): string[] {
|
||||
const result: string[] = [];
|
||||
if (!args) return result;
|
||||
|
||||
for (const entry of args) {
|
||||
const token = String(entry);
|
||||
const flag = parseFlag(token);
|
||||
if (flag !== null && MANAGED_FLAGS.has(flag)) {
|
||||
throw new Error(
|
||||
`llama-server flag '${flag}' is managed and cannot be passed as an extra arg`,
|
||||
);
|
||||
}
|
||||
result.push(token);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/** True when `flag` is a BooCode-managed flag that callers may not override. */
|
||||
export function isManagedFlag(flag: string): boolean {
|
||||
return MANAGED_FLAGS.has(flag);
|
||||
}
|
||||
|
||||
// --- Shadowing flags -----------------------------------------------------
|
||||
|
||||
// Flags below are legal for an agent to pass, but each shadows a setting
|
||||
// BooCode applies itself. They are categorised so a caller can opt out of
|
||||
// stripping any one category.
|
||||
|
||||
const SHADOW_CONTEXT = ['-c', '--ctx-size'];
|
||||
|
||||
// Empty: agents should be able to opt into cache-type flags (lift analysis
|
||||
// found these are high-value features, not safety concerns).
|
||||
const SHADOW_CACHE: string[] = [];
|
||||
|
||||
// Empty: ngram speculative decoding is a performance feature agents should
|
||||
// be able to enable.
|
||||
const SHADOW_SPEC: string[] = [];
|
||||
|
||||
const SHADOW_TEMPLATE = [
|
||||
'--chat-template',
|
||||
'--chat-template-file',
|
||||
'--chat-template-kwargs',
|
||||
'--jinja',
|
||||
'--no-jinja',
|
||||
];
|
||||
|
||||
// Shadowing flags that take no value — a boolean switch — so the stripper must
|
||||
// not also drop the following token.
|
||||
const VALUELESS_SHADOW_FLAGS: ReadonlySet<string> = new Set([
|
||||
'--jinja',
|
||||
'--no-jinja',
|
||||
]);
|
||||
|
||||
export interface StripOptions {
|
||||
stripContext?: boolean;
|
||||
stripCache?: boolean;
|
||||
stripSpec?: boolean;
|
||||
stripTemplate?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove shadowing flags (and their values) from an argv sequence.
|
||||
*
|
||||
* Each category is stripped by default; pass the matching `strip*: false`
|
||||
* option to retain that category. When a stripped flag carries its value as a
|
||||
* separate following token (e.g. `-c 4096`), that token is removed too; the
|
||||
* `--flag=value` and boolean-switch forms consume only the single token.
|
||||
*/
|
||||
export function stripShadowingFlags(
|
||||
args: Iterable<string>,
|
||||
opts?: StripOptions,
|
||||
): string[] {
|
||||
const targets = new Set<string>();
|
||||
if (opts?.stripContext !== false) for (const f of SHADOW_CONTEXT) targets.add(f);
|
||||
if (opts?.stripCache !== false) for (const f of SHADOW_CACHE) targets.add(f);
|
||||
if (opts?.stripSpec !== false) for (const f of SHADOW_SPEC) targets.add(f);
|
||||
if (opts?.stripTemplate !== false) for (const f of SHADOW_TEMPLATE) targets.add(f);
|
||||
|
||||
const tokens = Array.from(args, String);
|
||||
const kept: string[] = [];
|
||||
|
||||
for (let i = 0; i < tokens.length; i++) {
|
||||
const token = tokens[i]!;
|
||||
const flag = parseFlag(token);
|
||||
|
||||
// Not a targeted shadow flag — keep it verbatim.
|
||||
if (flag === null || !targets.has(flag)) {
|
||||
kept.push(token);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Targeted: drop it. Decide whether the next token is its value and should
|
||||
// be dropped along with it. Boolean switches and the inline `=value` form
|
||||
// carry no separate value token.
|
||||
const carriesInlineValue = token.includes('=');
|
||||
const isBoolean = VALUELESS_SHADOW_FLAGS.has(flag);
|
||||
const next = tokens[i + 1];
|
||||
const nextIsValue = next !== undefined && parseFlag(next) === null;
|
||||
|
||||
if (!isBoolean && !carriesInlineValue && nextIsValue) {
|
||||
i++; // also skip the value token
|
||||
}
|
||||
}
|
||||
|
||||
return kept;
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
|
||||
import { createDeepSeek } from '@ai-sdk/deepseek';
|
||||
import type { LanguageModel } from 'ai';
|
||||
import { getLlamaProviders, parseModelRef } from '../llama-providers.js';
|
||||
|
||||
// v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from
|
||||
// config.LLAMA_SWAP_URL at call time (not module-load) so tests can stub the
|
||||
@@ -8,48 +9,46 @@ import type { LanguageModel } from 'ai';
|
||||
// Tailscale topology and exposing it over the public internet is gated by
|
||||
// Authelia at the Caddy layer, not by API keys.
|
||||
//
|
||||
// v2.4.1-sidecar: when the agent has llama_extra_args, route through
|
||||
// llama-sidecar instead. A fresh provider is created per call (not cached)
|
||||
// because the X-Agent-Flags header varies per agent. The llama-swap path
|
||||
// stays cached since it has no per-request headers.
|
||||
//
|
||||
// vDeepSeek: when the model ID starts with 'deepseek-' and DEEPSEEK_API_KEY
|
||||
// is set, route through the official @ai-sdk/deepseek provider (not
|
||||
// openai-compatible) so DeepSeek-specific features work: providerMetadata
|
||||
// with promptCacheHitTokens/promptCacheMissTokens, reasoning via
|
||||
// LanguageModelV4Usage.outputTokens.reasoning, and thinking-mode options.
|
||||
// v2.x: provider-aware resolver (W2). One resolver answers provider identity,
|
||||
// upstream base URL, final wire model id, and DeepSeek
|
||||
// special handling. Both upstreamModel() and resolveModelEndpoint() go through
|
||||
// it. Legacy bare-id prefix heuristics live only in the fallback layer.
|
||||
|
||||
const swapCache = new Map<string, ReturnType<typeof createOpenAICompatible>>();
|
||||
|
||||
function getSwapProvider(baseURL: string): ReturnType<typeof createOpenAICompatible> {
|
||||
let provider = swapCache.get(baseURL);
|
||||
function getSwapProvider(baseURL: string, source?: string): ReturnType<typeof createOpenAICompatible> {
|
||||
const cacheKey = source ? `${baseURL}||${source}` : baseURL;
|
||||
let provider = swapCache.get(cacheKey);
|
||||
if (!provider) {
|
||||
const fetchWrapper = source
|
||||
? ((...args: Parameters<typeof fetch>) => {
|
||||
const [input, init] = args;
|
||||
return fetch(input, {
|
||||
...init,
|
||||
headers: {
|
||||
...(init?.headers as Record<string, string> | undefined) ?? {},
|
||||
'X-Boo-Source': source,
|
||||
},
|
||||
});
|
||||
})
|
||||
: undefined;
|
||||
provider = createOpenAICompatible({
|
||||
name: 'llama-swap',
|
||||
baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
|
||||
includeUsage: true,
|
||||
});
|
||||
swapCache.set(baseURL, provider);
|
||||
...(fetchWrapper ? { fetch: fetchWrapper } : {}),
|
||||
}) as ReturnType<typeof createOpenAICompatible>;
|
||||
swapCache.set(cacheKey, provider);
|
||||
}
|
||||
return provider;
|
||||
}
|
||||
|
||||
function sidecarProvider(
|
||||
baseURL: string,
|
||||
flags: string[],
|
||||
): ReturnType<typeof createOpenAICompatible> {
|
||||
return createOpenAICompatible({
|
||||
name: 'llama-sidecar',
|
||||
baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
|
||||
includeUsage: true,
|
||||
headers: {
|
||||
'X-Agent-Flags': flags.join(' '),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const DEEPSEEK_MODEL_PREFIX = 'deepseek-';
|
||||
|
||||
/**
|
||||
* Legacy prefix check — kept for backward compat with bare "deepseek-*" ids.
|
||||
* Composite "deepseek/model" is identified by provider id, not prefix.
|
||||
*/
|
||||
export function isDeepSeekModel(modelId: string): boolean {
|
||||
return modelId.startsWith(DEEPSEEK_MODEL_PREFIX);
|
||||
}
|
||||
@@ -69,69 +68,204 @@ function getDeepSeekProvider(
|
||||
return deepseekProviderCache;
|
||||
}
|
||||
|
||||
export type InferenceRoute = 'swap' | 'sidecar' | 'deepseek';
|
||||
// ---------------------------------------------------------------------------
|
||||
// Provider-aware resolver (W2, D-2, D-3)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface RoutingInfo {
|
||||
// P7: 'gateway' routes to the BooControl auto:* gateway (OpenAI-compatible,
|
||||
// does its own policy routing + failover). 'gateway_error' is the
|
||||
// present-but-unhealthy / orphaned-session state: the session selected an
|
||||
// auto:* model but the gateway provider is missing/disabled, so we surface a
|
||||
// clean error instead of silently mis-routing to LLAMA_SWAP_URL.
|
||||
export type InferenceRoute = 'swap' | 'deepseek' | 'gateway' | 'gateway_error';
|
||||
|
||||
/** Provider registry `kind` marking the BooControl routing gateway. */
|
||||
export const GATEWAY_KIND = 'boocontrol-gateway';
|
||||
|
||||
/**
|
||||
* Whether a (bare) wire model id is a gateway virtual model. Used to detect an
|
||||
* orphaned auto:* session whose gateway registry entry was removed — the id
|
||||
* still looks like a gateway model, so resolve to gateway_error, never swap.
|
||||
*/
|
||||
export function isGatewayVirtualModel(wireModelId: string): boolean {
|
||||
return wireModelId === 'auto' || wireModelId.startsWith('auto:');
|
||||
}
|
||||
|
||||
export interface ResolvedModel {
|
||||
/** Routing destination. */
|
||||
route: InferenceRoute;
|
||||
flags: string[] | null;
|
||||
/** Upstream base URL for the provider (DeepSeek API base or llama-swap). */
|
||||
baseUrl: string;
|
||||
/** Wire model id to send upstream (bare, no provider prefix). */
|
||||
wireModelId: string;
|
||||
/** Whether the input was a legacy bare id resolved through defaultProvider. */
|
||||
isLegacyBareId: boolean;
|
||||
/** Provider identity (e.g. "sam-desktop", "embedding", "deepseek"). */
|
||||
providerId: string;
|
||||
/** For route 'gateway_error': why the gateway is unavailable. */
|
||||
gatewayReason?: 'offline' | 'unhealthy';
|
||||
}
|
||||
|
||||
interface AgentLike {
|
||||
llama_extra_args: string[] | null;
|
||||
// reserved for future per-agent routing attributes
|
||||
}
|
||||
|
||||
interface ConfigLike {
|
||||
LLAMA_SWAP_URL: string;
|
||||
LLAMA_SIDECAR_URL?: string;
|
||||
DEEPSEEK_API_KEY?: string;
|
||||
DEEPSEEK_BASE_URL?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provider-aware model resolver. Given a (possibly bare) model id, answers:
|
||||
* provider identity, upstream base URL, final bare wire model id, and
|
||||
* DeepSeek special handling.
|
||||
*
|
||||
* Bare ids resolve via defaultProvider (D-2). Composite "provider/model" ids
|
||||
* look up the named provider directly. DeepSeek is identified by provider id
|
||||
* "deepseek" or by the legacy bare "deepseek-" prefix when DEEPSEEK_API_KEY
|
||||
* is configured.
|
||||
*/
|
||||
export function resolveModelProvider(
|
||||
modelId: string,
|
||||
config: ConfigLike,
|
||||
): ResolvedModel {
|
||||
const providers = getLlamaProviders();
|
||||
const parsed = parseModelRef(modelId);
|
||||
const { providerId, wireModelId, isLegacyBareId } = parsed;
|
||||
|
||||
const deepseekConfigured = !!config.DEEPSEEK_API_KEY;
|
||||
const deepseekBaseUrl = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
|
||||
|
||||
// --- DeepSeek routing ---
|
||||
// Explicit provider id "deepseek" → DeepSeek SDK.
|
||||
if (providerId === 'deepseek' && deepseekConfigured) {
|
||||
return {
|
||||
route: 'deepseek',
|
||||
baseUrl: deepseekBaseUrl,
|
||||
wireModelId,
|
||||
isLegacyBareId,
|
||||
providerId: 'deepseek',
|
||||
};
|
||||
}
|
||||
|
||||
// Bare legacy "deepseek-*" prefix (only when DEEPSEEK_API_KEY is set) →
|
||||
// legacy fallback layer — DeepSeek SDK.
|
||||
if (isLegacyBareId && isDeepSeekModel(wireModelId) && deepseekConfigured) {
|
||||
return {
|
||||
route: 'deepseek',
|
||||
baseUrl: deepseekBaseUrl,
|
||||
wireModelId,
|
||||
isLegacyBareId: true,
|
||||
providerId: 'deepseek',
|
||||
};
|
||||
}
|
||||
|
||||
// --- Local provider routing ---
|
||||
const provider = providers.providers.find((p) => p.id === providerId);
|
||||
|
||||
// --- Gateway routing (P7) ---
|
||||
// A known gateway-kind provider → route to the gateway as an OpenAI-compatible
|
||||
// upstream (it does its own policy routing). The gateway forwards X-Boo-Source
|
||||
// to the chosen target so attribution survives the extra hop.
|
||||
if (provider && provider.kind === GATEWAY_KIND) {
|
||||
return {
|
||||
route: 'gateway',
|
||||
baseUrl: provider.baseUrl,
|
||||
wireModelId,
|
||||
isLegacyBareId,
|
||||
providerId: provider.id,
|
||||
};
|
||||
}
|
||||
|
||||
if (!provider) {
|
||||
// Orphaned auto:* session: the model still looks like a gateway virtual
|
||||
// model but no gateway provider is configured. Resolve to a clean
|
||||
// gateway_error — NEVER the silent LLAMA_SWAP_URL fallback (design §8).
|
||||
if (isGatewayVirtualModel(wireModelId)) {
|
||||
return {
|
||||
route: 'gateway_error',
|
||||
baseUrl: '',
|
||||
wireModelId,
|
||||
isLegacyBareId,
|
||||
providerId,
|
||||
gatewayReason: 'offline',
|
||||
};
|
||||
}
|
||||
// Unknown provider — fall back to legacy LLAMA_SWAP_URL for bare ids.
|
||||
if (isLegacyBareId) {
|
||||
return {
|
||||
route: 'swap',
|
||||
baseUrl: config.LLAMA_SWAP_URL,
|
||||
wireModelId,
|
||||
isLegacyBareId: true,
|
||||
providerId: 'llama-swap',
|
||||
};
|
||||
}
|
||||
// Composite id with unknown provider — still route to LLAMA_SWAP_URL as
|
||||
// a best-effort fallback (the wire model id carries provider intent but
|
||||
// the config is incomplete).
|
||||
return {
|
||||
route: 'swap',
|
||||
baseUrl: config.LLAMA_SWAP_URL,
|
||||
wireModelId,
|
||||
isLegacyBareId: false,
|
||||
providerId,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
route: 'swap',
|
||||
baseUrl: provider.baseUrl,
|
||||
wireModelId,
|
||||
isLegacyBareId,
|
||||
providerId: provider.id,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use resolveModelProvider() for full routing info. Kept for
|
||||
* backward compat with resolveRoute() callers that only need the route tag.
|
||||
*/
|
||||
export function resolveRoute(
|
||||
agent: AgentLike | null,
|
||||
config?: ConfigLike,
|
||||
modelId?: string,
|
||||
): RoutingInfo {
|
||||
// vDeepSeek: if the model starts with deepseek- and DEEPSEEK_API_KEY is set,
|
||||
// route through the DeepSeek provider. Checked first so DeepSeek models
|
||||
// always bypass llama-swap/sidecar even when those are also configured.
|
||||
if (modelId?.startsWith(DEEPSEEK_MODEL_PREFIX) && config?.DEEPSEEK_API_KEY) {
|
||||
return { route: 'deepseek', flags: null };
|
||||
}
|
||||
// When llama_extra_args are explicitly set, route through sidecar with them.
|
||||
const flags = agent?.llama_extra_args;
|
||||
if (flags && flags.length > 0) {
|
||||
return { route: 'sidecar', flags };
|
||||
}
|
||||
// When LLAMA_SIDECAR_URL is configured (even without per-agent flags),
|
||||
// route through sidecar to pick up the default base args (cache quant,
|
||||
// spec decoding, slot save, etc.). Fall back to llama-swap otherwise.
|
||||
if (config?.LLAMA_SIDECAR_URL) {
|
||||
return { route: 'sidecar', flags: [] };
|
||||
}
|
||||
return { route: 'swap', flags: null };
|
||||
): { route: InferenceRoute } {
|
||||
if (!modelId || !config) return { route: 'swap' };
|
||||
const resolved = resolveModelProvider(modelId, config);
|
||||
return { route: resolved.route };
|
||||
}
|
||||
|
||||
export function upstreamModel(
|
||||
config: ConfigLike,
|
||||
modelId: string,
|
||||
agent?: AgentLike | null,
|
||||
source?: string,
|
||||
): LanguageModel {
|
||||
const { route, flags } = resolveRoute(agent ?? null, config, modelId);
|
||||
if (route === 'deepseek') {
|
||||
const resolved = resolveModelProvider(modelId, config);
|
||||
if (resolved.route === 'deepseek') {
|
||||
return getDeepSeekProvider(
|
||||
config.DEEPSEEK_API_KEY!,
|
||||
config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com',
|
||||
).chat(modelId);
|
||||
resolved.baseUrl,
|
||||
).chat(resolved.wireModelId);
|
||||
}
|
||||
if (route === 'sidecar') {
|
||||
const url = config.LLAMA_SIDECAR_URL;
|
||||
if (!url) {
|
||||
throw new Error(`Sidecar route selected but LLAMA_SIDECAR_URL is not set`);
|
||||
}
|
||||
return sidecarProvider(url, (flags ?? [])).chatModel(modelId);
|
||||
|
||||
// P7: gateway is OpenAI-compatible — same adapter as swap, pointed at the
|
||||
// gateway baseUrl. The gateway resolves the policy + forwards X-Boo-Source.
|
||||
if (resolved.route === 'gateway') {
|
||||
return getSwapProvider(resolved.baseUrl, source).chatModel(resolved.wireModelId);
|
||||
}
|
||||
return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId);
|
||||
|
||||
// P7: orphaned auto:* session with no gateway configured — fail loud rather
|
||||
// than silently mis-route to LLAMA_SWAP_URL.
|
||||
if (resolved.route === 'gateway_error') {
|
||||
throw new Error(
|
||||
`routing gateway offline (${resolved.gatewayReason ?? 'unavailable'}): ${modelId}`,
|
||||
);
|
||||
}
|
||||
|
||||
return getSwapProvider(resolved.baseUrl, source).chatModel(resolved.wireModelId);
|
||||
}
|
||||
|
||||
/** Resolve the API endpoint for non-streaming calls (compaction, task-model).
|
||||
@@ -140,18 +274,30 @@ export function resolveModelEndpoint(
|
||||
config: ConfigLike,
|
||||
modelId: string,
|
||||
): { url: string; model: string; headers: Record<string, string> } {
|
||||
const resolved = resolveModelProvider(modelId, config);
|
||||
const baseHeaders: Record<string, string> = { 'Content-Type': 'application/json' };
|
||||
if (modelId.startsWith(DEEPSEEK_MODEL_PREFIX) && config.DEEPSEEK_API_KEY) {
|
||||
const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
|
||||
|
||||
if (resolved.route === 'deepseek') {
|
||||
return {
|
||||
url: baseURL,
|
||||
model: modelId,
|
||||
url: resolved.baseUrl,
|
||||
model: resolved.wireModelId,
|
||||
headers: { ...baseHeaders, Authorization: `Bearer ${config.DEEPSEEK_API_KEY}` },
|
||||
};
|
||||
}
|
||||
|
||||
// P7: orphaned auto:* session with no gateway — fail loud (no swap fallback).
|
||||
if (resolved.route === 'gateway_error') {
|
||||
throw new Error(
|
||||
`routing gateway offline (${resolved.gatewayReason ?? 'unavailable'}): ${modelId}`,
|
||||
);
|
||||
}
|
||||
|
||||
// P7: gateway uses the same unauthenticated OpenAI-compatible shape as swap.
|
||||
// X-Boo-Source forwarding for direct-fetch callers happens at their own header
|
||||
// layer (compaction.ts / task-model.ts); the gateway re-forwards it onward.
|
||||
return {
|
||||
url: config.LLAMA_SWAP_URL.replace(/\/+$/, ''),
|
||||
model: modelId,
|
||||
url: resolved.baseUrl.replace(/\/+$/, ''),
|
||||
model: resolved.wireModelId,
|
||||
headers: baseHeaders,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -306,7 +306,7 @@ export async function streamCompletion(
|
||||
: stallAc.signal;
|
||||
|
||||
const result = streamText({
|
||||
model: upstreamModel(ctx.config, model, agent ?? null),
|
||||
model: upstreamModel(ctx.config, model, agent ?? null, 'boochat'),
|
||||
messages: aiMessages,
|
||||
...(aiTools
|
||||
? { tools: aiTools, toolChoice: 'auto' as const, experimental_repairToolCall: repairToolCall }
|
||||
|
||||
101
apps/server/src/services/llama-providers.ts
Normal file
101
apps/server/src/services/llama-providers.ts
Normal file
@@ -0,0 +1,101 @@
|
||||
/**
|
||||
* vMultiProvider local provider registry loader (server-side).
|
||||
*
|
||||
* Reads the shared `/data/llama-providers.json` (or `LLAMA_PROVIDERS_PATH`) at
|
||||
* startup and caches the parsed result. When the file is absent or invalid,
|
||||
* synthesizes a single legacy provider from `LLAMA_SWAP_URL` so both apps
|
||||
* start with only legacy env vars (D-1).
|
||||
*
|
||||
* Schema and pure helpers live in @boocode/contracts/llama-providers.
|
||||
* File I/O stays app-local per D-1.
|
||||
*/
|
||||
import { readFileSync } from 'node:fs';
|
||||
import {
|
||||
LlamaProvidersFileSchema,
|
||||
type LlamaProvidersFile,
|
||||
type LlamaProvider,
|
||||
type ParsedModelRef,
|
||||
parseModelRef as parseModelRefBase,
|
||||
formatModelRef,
|
||||
} from '@boocode/contracts/llama-providers';
|
||||
|
||||
export type { LlamaProvidersFile, LlamaProvider, ParsedModelRef, formatModelRef };
|
||||
|
||||
/** Synthesize a single legacy provider from env vars. */
|
||||
function buildLegacyProvider(llamaSwapUrl: string): LlamaProvidersFile {
|
||||
return {
|
||||
defaultProvider: 'llama-swap',
|
||||
providers: [
|
||||
{
|
||||
id: 'llama-swap',
|
||||
label: 'llama-swap',
|
||||
baseUrl: llamaSwapUrl,
|
||||
kind: 'llama-swap',
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
let cached: LlamaProvidersFile | null = null;
|
||||
|
||||
/**
|
||||
* Load (or re-load) the local provider config. Never throws on bad input —
|
||||
* falls back to the legacy single-provider shape.
|
||||
*/
|
||||
export function loadLlamaProviders(
|
||||
providersPath: string | undefined,
|
||||
llamaSwapUrl: string,
|
||||
): LlamaProvidersFile {
|
||||
if (!providersPath) {
|
||||
cached = buildLegacyProvider(llamaSwapUrl);
|
||||
return cached;
|
||||
}
|
||||
|
||||
let raw: string;
|
||||
try {
|
||||
raw = readFileSync(providersPath, 'utf8');
|
||||
} catch {
|
||||
console.warn(
|
||||
`llama-providers: file not found at ${providersPath} — falling back to legacy single-provider`,
|
||||
);
|
||||
cached = buildLegacyProvider(llamaSwapUrl);
|
||||
return cached;
|
||||
}
|
||||
|
||||
let json: unknown;
|
||||
try {
|
||||
json = JSON.parse(raw);
|
||||
} catch (err) {
|
||||
console.error(
|
||||
`llama-providers: invalid JSON in ${providersPath} — falling back to legacy single-provider`,
|
||||
err,
|
||||
);
|
||||
cached = buildLegacyProvider(llamaSwapUrl);
|
||||
return cached;
|
||||
}
|
||||
|
||||
const parsed = LlamaProvidersFileSchema.safeParse(json);
|
||||
if (!parsed.success) {
|
||||
console.error(
|
||||
`llama-providers: schema validation failed for ${providersPath} — falling back to legacy single-provider`,
|
||||
parsed.error.flatten(),
|
||||
);
|
||||
cached = buildLegacyProvider(llamaSwapUrl);
|
||||
return cached;
|
||||
}
|
||||
|
||||
cached = parsed.data;
|
||||
return cached;
|
||||
}
|
||||
|
||||
/** The cached provider config. Returns legacy fallback if nothing loaded yet. */
|
||||
export function getLlamaProviders(): LlamaProvidersFile {
|
||||
return cached ?? buildLegacyProvider('http://localhost:8080');
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience: parse a model ref against the cached default provider.
|
||||
*/
|
||||
export function parseModelRef(ref: string): ParsedModelRef {
|
||||
return parseModelRefBase(ref, getLlamaProviders().defaultProvider);
|
||||
}
|
||||
@@ -1,13 +1,15 @@
|
||||
// v1.11.3: llama-swap model-context cache. Replaces the dead
|
||||
// v2.x: provider-aware model-context cache (W3). Replaces the dead
|
||||
// `parsed.timings.n_ctx` capture in inference.ts / compaction.ts —
|
||||
// llama-server's streaming completion never emits n_ctx in timings (verified
|
||||
// empirically: timings carries prompt_n / predicted_n / *_ms / *_per_second
|
||||
// only). The authoritative source is llama-swap's
|
||||
// /upstream/<model>/props endpoint at .default_generation_settings.n_ctx.
|
||||
// only). The authoritative source is the provider's
|
||||
// /upstream/<wireModelId>/props endpoint at .default_generation_settings.n_ctx.
|
||||
//
|
||||
// Cache design:
|
||||
// - Keys are the full composite model id (provider/model) so two providers
|
||||
// serving the same wire model name never share cache entries (D-2).
|
||||
// - Positive entries (n_ctx + total_slots) have no TTL. A model's context
|
||||
// size doesn't change while llama-swap is running; an admin endpoint
|
||||
// size doesn't change while the provider is running; an admin endpoint
|
||||
// can invalidateModelContext() if it ever does.
|
||||
// - Negative entries (failed fetch) have a 60s TTL so a misconfigured or
|
||||
// down model doesn't get hammered every inference turn, but recovers
|
||||
@@ -15,6 +17,11 @@
|
||||
// - 3s AbortController timeout on the fetch — long enough for a healthy
|
||||
// upstream, short enough that a stuck upstream doesn't block the
|
||||
// ctx_max UPDATE that follows.
|
||||
//
|
||||
// v1.x legacy: previously keyed by bare wire id and used a process-wide
|
||||
// LLAMA_SWAP_URL. Now resolved per-call via the provider registry.
|
||||
|
||||
import { resolveModelProvider } from './inference/provider.js';
|
||||
|
||||
export interface ModelContext {
|
||||
n_ctx: number;
|
||||
@@ -28,29 +35,79 @@ const positiveCache = new Map<string, ModelContext>();
|
||||
// re-fetches within the 60s window.
|
||||
const negativeCache = new Map<string, number>();
|
||||
|
||||
// Set once at startup by index.ts. We don't import loadConfig() directly
|
||||
// here to keep this module trivially mockable in tests (set the URL in
|
||||
// beforeEach instead of stubbing process.env + loadConfig's cache).
|
||||
let llamaSwapUrl: string | null = null;
|
||||
// Stored config for provider-aware resolution. Supports both the legacy
|
||||
// { llamaSwapUrl: string } shape (for tests) and the full Config shape.
|
||||
let storedConfig: ConfigForModelContext | null = null;
|
||||
|
||||
export function configureModelContext(opts: { llamaSwapUrl: string }): void {
|
||||
llamaSwapUrl = opts.llamaSwapUrl;
|
||||
/** Config fields needed for model-context provider resolution. */
|
||||
type ConfigForModelContext = {
|
||||
LLAMA_SWAP_URL: string;
|
||||
DEEPSEEK_API_KEY?: string;
|
||||
DEEPSEEK_BASE_URL?: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Configure the module for model-context lookups.
|
||||
*
|
||||
* Accepts either the full server Config (production) or the legacy
|
||||
* `{ llamaSwapUrl }` shape (tests). The full Config is preferred so
|
||||
* getModelContext can resolve composite model ids through the provider
|
||||
* registry.
|
||||
*/
|
||||
export function configureModelContext(
|
||||
opts: ConfigForModelContext | { llamaSwapUrl: string },
|
||||
): void {
|
||||
// Legacy test helper: { llamaSwapUrl } → synthesize a minimal config.
|
||||
if ('llamaSwapUrl' in opts && typeof opts.llamaSwapUrl === 'string') {
|
||||
storedConfig = { LLAMA_SWAP_URL: opts.llamaSwapUrl };
|
||||
return;
|
||||
}
|
||||
storedConfig = opts as ConfigForModelContext;
|
||||
}
|
||||
|
||||
// vDeepSeek: DeepSeek models don't have a /upstream/<model>/props endpoint.
|
||||
// Return a reasonable default context so compaction estimates work.
|
||||
const DEEPSEEK_DEFAULT_N_CTX = 131_072;
|
||||
const DEEPSEEK_MODEL_PREFIX = 'deepseek-';
|
||||
|
||||
export async function getModelContext(model: string): Promise<ModelContext | null> {
|
||||
// vDeepSeek: DeepSeek models have no /upstream/<model>/props. Use a static
|
||||
// default so compaction doesn't fall to the buffer-only path with tiny limits.
|
||||
if (model.startsWith(DEEPSEEK_MODEL_PREFIX)) {
|
||||
// Resolve the model through the provider-aware resolver. For composite
|
||||
// "provider/model" ids, this finds the correct provider's baseUrl. For
|
||||
// bare legacy ids, it falls back to the default provider.
|
||||
const config = storedConfig;
|
||||
if (!config) {
|
||||
// Module not initialized. Defensive — index.ts calls
|
||||
// configureModelContext at startup; if a test forgets, fail closed so
|
||||
// the chat still works (ctx_max stays null, UI degrades gracefully).
|
||||
negativeCache.set(model, Date.now());
|
||||
return null;
|
||||
}
|
||||
|
||||
const resolved = resolveModelProvider(model, config);
|
||||
|
||||
// DeepSeek models (by provider id) have no /upstream/<model>/props.
|
||||
// Use a static default so compaction doesn't fall to the buffer-only
|
||||
// path with tiny limits.
|
||||
if (resolved.providerId === 'deepseek') {
|
||||
return { n_ctx: DEEPSEEK_DEFAULT_N_CTX };
|
||||
}
|
||||
|
||||
// P7: orphaned auto:* session with no gateway configured — no props endpoint
|
||||
// to query. Negative-cache and return null; compaction degrades gracefully.
|
||||
if (resolved.route === 'gateway_error') {
|
||||
negativeCache.set(model, Date.now());
|
||||
return null;
|
||||
}
|
||||
|
||||
// P7: gateway route — baseUrl is the control gateway, which exposes
|
||||
// /upstream/<virtualModel>/props (it proxies the chosen candidate's props).
|
||||
// The normal fetch path below handles it without special-casing.
|
||||
|
||||
// Cache key is the full composite id to prevent cross-provider cache
|
||||
// poisoning for duplicate wire model names (D-2, design §5.3).
|
||||
const cacheKey = `${resolved.providerId}/${resolved.wireModelId}`;
|
||||
|
||||
// 1. Positive cache hit — no TTL check, model n_ctx is invariant.
|
||||
const pos = positiveCache.get(model);
|
||||
const pos = positiveCache.get(cacheKey);
|
||||
if (pos) return pos;
|
||||
|
||||
// 2. Negative cache hit within TTL — return null without refetching.
|
||||
@@ -58,30 +115,25 @@ export async function getModelContext(model: string): Promise<ModelContext | nul
|
||||
// attempt below; we don't delete them eagerly because the next successful
|
||||
// fetch will overwrite via the positive map and the negative entry
|
||||
// becomes irrelevant.
|
||||
const negTs = negativeCache.get(model);
|
||||
const negTs = negativeCache.get(cacheKey);
|
||||
if (negTs !== undefined && Date.now() - negTs < NEGATIVE_TTL_MS) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 3. Module not initialized. Defensive — index.ts calls
|
||||
// configureModelContext at startup; if a test forgets, fail closed so
|
||||
// the chat still works (ctx_max stays null, UI degrades gracefully).
|
||||
if (!llamaSwapUrl) {
|
||||
negativeCache.set(model, Date.now());
|
||||
return null;
|
||||
}
|
||||
|
||||
// 4. Fetch with timeout. AbortController fires after FETCH_TIMEOUT_MS;
|
||||
// 3. Fetch with timeout. AbortController fires after FETCH_TIMEOUT_MS;
|
||||
// both the timeout path and a fetch reject end up in the catch below
|
||||
// and produce a negative cache entry.
|
||||
const url = `${llamaSwapUrl}/upstream/${encodeURIComponent(model)}/props`;
|
||||
//
|
||||
// Strip the provider prefix: fetch from
|
||||
// <provider.baseUrl>/upstream/<wireModelId>/props (design §5.3).
|
||||
const url = `${resolved.baseUrl.replace(/\/+$/, '')}/upstream/${encodeURIComponent(resolved.wireModelId)}/props`;
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
|
||||
try {
|
||||
const res = await fetch(url, { signal: controller.signal });
|
||||
clearTimeout(timer);
|
||||
if (!res.ok) {
|
||||
negativeCache.set(model, Date.now());
|
||||
negativeCache.set(cacheKey, Date.now());
|
||||
return null;
|
||||
}
|
||||
const body = (await res.json()) as {
|
||||
@@ -89,18 +141,18 @@ export async function getModelContext(model: string): Promise<ModelContext | nul
|
||||
};
|
||||
const n_ctx = body?.default_generation_settings?.n_ctx;
|
||||
if (typeof n_ctx !== 'number' || n_ctx <= 0) {
|
||||
negativeCache.set(model, Date.now());
|
||||
negativeCache.set(cacheKey, Date.now());
|
||||
return null;
|
||||
}
|
||||
const entry: ModelContext = { n_ctx };
|
||||
positiveCache.set(model, entry);
|
||||
positiveCache.set(cacheKey, entry);
|
||||
// Clear any stale negative entry so a future query sees the positive
|
||||
// hit cleanly (otherwise the negative TTL never expires from the map).
|
||||
negativeCache.delete(model);
|
||||
negativeCache.delete(cacheKey);
|
||||
return entry;
|
||||
} catch {
|
||||
clearTimeout(timer);
|
||||
negativeCache.set(model, Date.now());
|
||||
negativeCache.set(cacheKey, Date.now());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -110,7 +162,16 @@ export function invalidateModelContext(model?: string): void {
|
||||
positiveCache.clear();
|
||||
negativeCache.clear();
|
||||
} else {
|
||||
positiveCache.delete(model);
|
||||
negativeCache.delete(model);
|
||||
// Resolve to composite cache key. If the model is already composite
|
||||
// (contains '/'), it's used directly. Otherwise, resolve through the
|
||||
// provider registry to find the composite key. This keeps backward
|
||||
// compat with callers passing bare model names.
|
||||
let cacheKey = model;
|
||||
if (storedConfig && !model.includes('/')) {
|
||||
const resolved = resolveModelProvider(model, storedConfig);
|
||||
cacheKey = `${resolved.providerId}/${resolved.wireModelId}`;
|
||||
}
|
||||
positiveCache.delete(cacheKey);
|
||||
negativeCache.delete(cacheKey);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ import { createHash } from 'node:crypto';
|
||||
import { readFile, stat } from 'node:fs/promises';
|
||||
import type { Agent, Project, Session } from '../types/api.js';
|
||||
import { getAgentsMtimes } from './agents.js';
|
||||
import { resolveRoute } from './inference/provider.js';
|
||||
import { resolveRoute, type InferenceRoute } from './inference/provider.js';
|
||||
import { loadMemoryForSession } from './memory/recall.js';
|
||||
import { formatMemoryBlock } from './memory/prompt.js';
|
||||
|
||||
@@ -101,7 +101,7 @@ export interface PrefixFingerprint {
|
||||
has_agent_system_prompt: boolean;
|
||||
has_session_override: boolean;
|
||||
has_project_override: boolean;
|
||||
route: 'swap' | 'sidecar' | 'deepseek';
|
||||
route: InferenceRoute;
|
||||
}
|
||||
|
||||
export interface PrefixDrift {
|
||||
@@ -129,7 +129,7 @@ interface ObservedInputs {
|
||||
has_agent_system_prompt: boolean;
|
||||
has_session_override: boolean;
|
||||
has_project_override: boolean;
|
||||
route: 'swap' | 'sidecar' | 'deepseek';
|
||||
route: InferenceRoute;
|
||||
}
|
||||
|
||||
interface ObserverEntry {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { loadConfig, type Config } from '../config.js';
|
||||
import { resolveModelEndpoint } from './inference/provider.js';
|
||||
|
||||
const TIMEOUT_MS = 10_000;
|
||||
|
||||
@@ -13,14 +14,19 @@ export async function taskModelCompletion(opts: {
|
||||
const maxTokens = opts.maxTokens ?? 30;
|
||||
const temperature = opts.temperature ?? 0.3;
|
||||
|
||||
const { url, model } = resolveEndpoint(config, opts.fallbackModel);
|
||||
// v2.x (W3): resolve the endpoint through the shared provider-aware
|
||||
// resolver instead of a local LLAMA_SWAP_URL fallback. This ensures
|
||||
// composite model ids (e.g. "sam-desktop/qwen3.6-35b") route to the
|
||||
// correct provider, and bare ids resolve through the default provider.
|
||||
const model = config.FAST_MODEL ?? opts.fallbackModel ?? config.DEFAULT_MODEL;
|
||||
const { url, model: resolvedModel, headers } = resolveModelEndpoint(config, model);
|
||||
|
||||
try {
|
||||
const res = await fetch(`${url}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
headers: { ...headers, 'X-Boo-Source': 'boochat' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
model: resolvedModel,
|
||||
messages: [
|
||||
{ role: 'system', content: opts.system },
|
||||
{ role: 'user', content: opts.user },
|
||||
@@ -55,14 +61,3 @@ export async function taskModelCompletion(opts: {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
function resolveEndpoint(
|
||||
config: Config,
|
||||
fallbackModel?: string,
|
||||
): { url: string; model: string } {
|
||||
if (config.TASK_MODEL_URL) {
|
||||
return { url: config.TASK_MODEL_URL, model: 'gemma-3-270m-it' };
|
||||
}
|
||||
const model = config.FAST_MODEL ?? fallbackModel ?? config.DEFAULT_MODEL;
|
||||
return { url: config.LLAMA_SWAP_URL, model };
|
||||
}
|
||||
|
||||
@@ -129,7 +129,6 @@ export interface Agent {
|
||||
// v1.14.0: per-agent step cap for the outer inference loop. null means
|
||||
// bounded only by MAX_STEPS (200). 0 means "no tool calls allowed."
|
||||
steps: number | null;
|
||||
llama_extra_args: string[] | null;
|
||||
// vDeepSeek: thinking/reasoning effort for DeepSeek V4 models.
|
||||
// Maps to DeepSeek's reasoning_effort API param.
|
||||
reasoning_effort: 'off' | 'low' | 'medium' | 'high' | 'xhigh' | 'max' | null;
|
||||
@@ -244,6 +243,17 @@ export interface ModelInfo {
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
// v2.x: provider-grouped model catalog (W2, D-4).
|
||||
export interface ModelCatalogProvider {
|
||||
id: string;
|
||||
label: string;
|
||||
models: ModelInfo[];
|
||||
}
|
||||
|
||||
export interface ModelCatalogResponse {
|
||||
providers: ModelCatalogProvider[];
|
||||
}
|
||||
|
||||
export interface SidebarSession {
|
||||
id: string;
|
||||
project_id: string;
|
||||
|
||||
Reference in New Issue
Block a user