Files
boocode/apps/server/src/services/inference/llama-args-validator.ts
indifferentketchup 41366b46c0 feat: relicense AGPL-3.0 → MIT (v2.7.0)
Clear the 3 Unsloth-Studio-derived AGPL files and flip LICENSE + 5
package.json from AGPL-3.0-only to MIT.

- html-to-md.ts → MIT node-html-markdown (parse5 dropped)
- llama-args-validator.ts → clean-room (flag denylist = facts)
- tool-call-parser.ts → delete dead Unsloth-ported code; keep
  extractToolCallBlocks/stripToolMarkup byte-identical (no behavior change)
- LICENSE → MIT (Copyright (c) 2026 indifferentketchup); 5 package.json → MIT;
  AGPL SPDX headers removed; README License section; license-mit guard test
- roadmap License-debt batch marked shipped; openspec/changes/license-debt-mit

Decouples the relicense from the native-parsing retirement (the ported parser
was dead code). Server suite 519 passing; build + coder typecheck clean.
2026-06-01 08:16:03 +00:00

221 lines
6.7 KiB
TypeScript

// Guards against agent-supplied llama-server CLI flags that would clash with
// values BooCode sets itself. Two concerns live here:
//
// 1. A hard denylist of flags that BooCode owns outright (model selection,
// the listening socket, credentials, the bundled web UI). Passing any of
// these is a configuration error and is rejected loudly.
//
// 2. A "shadowing" set of flags that are legal to pass but, because of
// llama.cpp's last-wins argument parsing, would override a first-class
// BooCode setting. These are silently removed from the auto-generated
// argv so the agent's explicit choice takes precedence without leaving a
// duplicate flag behind.
//
// All flag spellings below are the public llama-server option names (short and
// long aliases) documented in its --help output.
// --- Hard denylist -------------------------------------------------------
// Authored as named buckets purely for readability; every alias is folded
// into one flat lookup set at module load. Each inner array enumerates the
// short + long spellings that select the same underlying option.
const MODEL_SOURCE_FLAGS = [
['-m', '--model'],
['-mu', '--model-url'],
['-dr', '--docker-repo'],
['-hf', '-hfr', '--hf-repo'],
['-hff', '--hf-file'],
['-hfv', '-hfrv', '--hf-repo-v'],
['-hffv', '--hf-file-v'],
['-hft', '--hf-token'],
['-mm', '--mmproj'],
['-mmu', '--mmproj-url'],
];
const LISTEN_FLAGS = [
['--host'],
['--port'],
['--path'],
['--api-prefix'],
['--reuse-port'],
];
const CREDENTIAL_FLAGS = [
['--api-key'],
['--api-key-file'],
['--ssl-key-file'],
['--ssl-cert-file'],
];
const WEBUI_FLAGS = [
['--webui', '--no-webui'],
['--ui', '--no-ui'],
['--ui-config'],
['--ui-config-file'],
['--ui-mcp-proxy', '--no-ui-mcp-proxy'],
['--models-dir'],
['--models-preset'],
['--models-max'],
['--models-autoload', '--no-models-autoload'],
];
const MANAGED_FLAGS: ReadonlySet<string> = new Set(
[
...MODEL_SOURCE_FLAGS,
...LISTEN_FLAGS,
...CREDENTIAL_FLAGS,
...WEBUI_FLAGS,
].flat(),
);
// --- Token parsing -------------------------------------------------------
const DIGIT = /^[0-9]$/;
/**
* Extract the flag name from a single argv token, or `null` when the token is
* not a flag.
*
* A token is treated as a flag only when it begins with `-` and the character
* after the leading dash is neither a digit nor a decimal point — that rule
* keeps negative numeric values such as `-1` or `-0.5` from being mistaken for
* options. A bare `-` or `--` is not a flag either. The returned name is the
* portion before any `=`, so `--ctx-size=4096` yields `--ctx-size`.
*/
function parseFlag(token: string): string | null {
if (!token.startsWith('-')) return null;
if (token === '-' || token === '--') return null;
const second = token[1]!;
if (DIGIT.test(second) || second === '.') return null;
const eq = token.indexOf('=');
return eq === -1 ? token : token.slice(0, eq);
}
// --- Public API ----------------------------------------------------------
/**
* Validate a sequence of extra llama-server args, rejecting any that name a
* BooCode-managed flag. Returns the args materialised as a string[] when they
* all pass.
*/
export function validateExtraArgs(args?: Iterable<string>): string[] {
const result: string[] = [];
if (!args) return result;
for (const entry of args) {
const token = String(entry);
const flag = parseFlag(token);
if (flag !== null && MANAGED_FLAGS.has(flag)) {
throw new Error(
`llama-server flag '${flag}' is managed and cannot be passed as an extra arg`,
);
}
result.push(token);
}
return result;
}
/** True when `flag` is a BooCode-managed flag that callers may not override. */
export function isManagedFlag(flag: string): boolean {
return MANAGED_FLAGS.has(flag);
}
// --- Shadowing flags -----------------------------------------------------
// Flags below are legal for an agent to pass, but each shadows a setting
// BooCode applies itself. They are categorised so a caller can opt out of
// stripping any one category.
const SHADOW_CONTEXT = ['-c', '--ctx-size'];
const SHADOW_CACHE = ['-ctk', '--cache-type-k', '-ctv', '--cache-type-v'];
const SHADOW_SPEC = [
'--spec-default',
'--spec-type',
'--spec-ngram-size-n',
'--spec-ngram-size',
'--draft-min',
'--draft-max',
'--spec-draft-n-max',
'--spec-draft-n-min',
'--spec-draft-p-min',
'--spec-draft-p-split',
'--spec-ngram-mod-n-match',
'--spec-ngram-mod-n-min',
'--spec-ngram-mod-n-max',
];
const SHADOW_TEMPLATE = [
'--chat-template',
'--chat-template-file',
'--chat-template-kwargs',
'--jinja',
'--no-jinja',
];
// Shadowing flags that take no value — a boolean switch — so the stripper must
// not also drop the following token.
const VALUELESS_SHADOW_FLAGS: ReadonlySet<string> = new Set([
'--spec-default',
'--jinja',
'--no-jinja',
]);
export interface StripOptions {
stripContext?: boolean;
stripCache?: boolean;
stripSpec?: boolean;
stripTemplate?: boolean;
}
/**
* Remove shadowing flags (and their values) from an argv sequence.
*
* Each category is stripped by default; pass the matching `strip*: false`
* option to retain that category. When a stripped flag carries its value as a
* separate following token (e.g. `-c 4096`), that token is removed too; the
* `--flag=value` and boolean-switch forms consume only the single token.
*/
export function stripShadowingFlags(
args: Iterable<string>,
opts?: StripOptions,
): string[] {
const targets = new Set<string>();
if (opts?.stripContext !== false) for (const f of SHADOW_CONTEXT) targets.add(f);
if (opts?.stripCache !== false) for (const f of SHADOW_CACHE) targets.add(f);
if (opts?.stripSpec !== false) for (const f of SHADOW_SPEC) targets.add(f);
if (opts?.stripTemplate !== false) for (const f of SHADOW_TEMPLATE) targets.add(f);
const tokens = Array.from(args, String);
const kept: string[] = [];
for (let i = 0; i < tokens.length; i++) {
const token = tokens[i]!;
const flag = parseFlag(token);
// Not a targeted shadow flag — keep it verbatim.
if (flag === null || !targets.has(flag)) {
kept.push(token);
continue;
}
// Targeted: drop it. Decide whether the next token is its value and should
// be dropped along with it. Boolean switches and the inline `=value` form
// carry no separate value token.
const carriesInlineValue = token.includes('=');
const isBoolean = VALUELESS_SHADOW_FLAGS.has(flag);
const next = tokens[i + 1];
const nextIsValue = next !== undefined && parseFlag(next) === null;
if (!isBoolean && !carriesInlineValue && nextIsValue) {
i++; // also skip the value token
}
}
return kept;
}