KV cache quantization (--cache-type-k q4_0) and ngram speculative decoding (--spec-type ngram-mod) are high-value llama.cpp features that improve VRAM usage and tokens/sec. Removing them from the shadowing lists allows agents to enable them via llama_extra_args.
210 lines
6.6 KiB
TypeScript
210 lines
6.6 KiB
TypeScript
// Guards against agent-supplied llama-server CLI flags that would clash with
|
|
// values BooCode sets itself. Two concerns live here:
|
|
//
|
|
// 1. A hard denylist of flags that BooCode owns outright (model selection,
|
|
// the listening socket, credentials, the bundled web UI). Passing any of
|
|
// these is a configuration error and is rejected loudly.
|
|
//
|
|
// 2. A "shadowing" set of flags that are legal to pass but, because of
|
|
// llama.cpp's last-wins argument parsing, would override a first-class
|
|
// BooCode setting. These are silently removed from the auto-generated
|
|
// argv so the agent's explicit choice takes precedence without leaving a
|
|
// duplicate flag behind.
|
|
//
|
|
// All flag spellings below are the public llama-server option names (short and
|
|
// long aliases) documented in its --help output.
|
|
|
|
// --- Hard denylist -------------------------------------------------------
|
|
|
|
// Authored as named buckets purely for readability; every alias is folded
|
|
// into one flat lookup set at module load. Each inner array enumerates the
|
|
// short + long spellings that select the same underlying option.
|
|
const MODEL_SOURCE_FLAGS = [
|
|
['-m', '--model'],
|
|
['-mu', '--model-url'],
|
|
['-dr', '--docker-repo'],
|
|
['-hf', '-hfr', '--hf-repo'],
|
|
['-hff', '--hf-file'],
|
|
['-hfv', '-hfrv', '--hf-repo-v'],
|
|
['-hffv', '--hf-file-v'],
|
|
['-hft', '--hf-token'],
|
|
['-mm', '--mmproj'],
|
|
['-mmu', '--mmproj-url'],
|
|
];
|
|
|
|
const LISTEN_FLAGS = [
|
|
['--host'],
|
|
['--port'],
|
|
['--path'],
|
|
['--api-prefix'],
|
|
['--reuse-port'],
|
|
];
|
|
|
|
const CREDENTIAL_FLAGS = [
|
|
['--api-key'],
|
|
['--api-key-file'],
|
|
['--ssl-key-file'],
|
|
['--ssl-cert-file'],
|
|
];
|
|
|
|
const WEBUI_FLAGS = [
|
|
['--webui', '--no-webui'],
|
|
['--ui', '--no-ui'],
|
|
['--ui-config'],
|
|
['--ui-config-file'],
|
|
['--ui-mcp-proxy', '--no-ui-mcp-proxy'],
|
|
['--models-dir'],
|
|
['--models-preset'],
|
|
['--models-max'],
|
|
['--models-autoload', '--no-models-autoload'],
|
|
];
|
|
|
|
const MANAGED_FLAGS: ReadonlySet<string> = new Set(
|
|
[
|
|
...MODEL_SOURCE_FLAGS,
|
|
...LISTEN_FLAGS,
|
|
...CREDENTIAL_FLAGS,
|
|
...WEBUI_FLAGS,
|
|
].flat(),
|
|
);
|
|
|
|
// --- Token parsing -------------------------------------------------------
|
|
|
|
const DIGIT = /^[0-9]$/;
|
|
|
|
/**
|
|
* Extract the flag name from a single argv token, or `null` when the token is
|
|
* not a flag.
|
|
*
|
|
* A token is treated as a flag only when it begins with `-` and the character
|
|
* after the leading dash is neither a digit nor a decimal point — that rule
|
|
* keeps negative numeric values such as `-1` or `-0.5` from being mistaken for
|
|
* options. A bare `-` or `--` is not a flag either. The returned name is the
|
|
* portion before any `=`, so `--ctx-size=4096` yields `--ctx-size`.
|
|
*/
|
|
function parseFlag(token: string): string | null {
|
|
if (!token.startsWith('-')) return null;
|
|
if (token === '-' || token === '--') return null;
|
|
|
|
const second = token[1]!;
|
|
if (DIGIT.test(second) || second === '.') return null;
|
|
|
|
const eq = token.indexOf('=');
|
|
return eq === -1 ? token : token.slice(0, eq);
|
|
}
|
|
|
|
// --- Public API ----------------------------------------------------------
|
|
|
|
/**
|
|
* Validate a sequence of extra llama-server args, rejecting any that name a
|
|
* BooCode-managed flag. Returns the args materialised as a string[] when they
|
|
* all pass.
|
|
*/
|
|
export function validateExtraArgs(args?: Iterable<string>): string[] {
|
|
const result: string[] = [];
|
|
if (!args) return result;
|
|
|
|
for (const entry of args) {
|
|
const token = String(entry);
|
|
const flag = parseFlag(token);
|
|
if (flag !== null && MANAGED_FLAGS.has(flag)) {
|
|
throw new Error(
|
|
`llama-server flag '${flag}' is managed and cannot be passed as an extra arg`,
|
|
);
|
|
}
|
|
result.push(token);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/** True when `flag` is a BooCode-managed flag that callers may not override. */
|
|
export function isManagedFlag(flag: string): boolean {
|
|
return MANAGED_FLAGS.has(flag);
|
|
}
|
|
|
|
// --- Shadowing flags -----------------------------------------------------
|
|
|
|
// Flags below are legal for an agent to pass, but each shadows a setting
|
|
// BooCode applies itself. They are categorised so a caller can opt out of
|
|
// stripping any one category.
|
|
|
|
const SHADOW_CONTEXT = ['-c', '--ctx-size'];
|
|
|
|
// Empty: agents should be able to opt into cache-type flags (lift analysis
|
|
// found these are high-value features, not safety concerns).
|
|
const SHADOW_CACHE: string[] = [];
|
|
|
|
// Empty: ngram speculative decoding is a performance feature agents should
|
|
// be able to enable.
|
|
const SHADOW_SPEC: string[] = [];
|
|
|
|
const SHADOW_TEMPLATE = [
|
|
'--chat-template',
|
|
'--chat-template-file',
|
|
'--chat-template-kwargs',
|
|
'--jinja',
|
|
'--no-jinja',
|
|
];
|
|
|
|
// Shadowing flags that take no value — a boolean switch — so the stripper must
|
|
// not also drop the following token.
|
|
const VALUELESS_SHADOW_FLAGS: ReadonlySet<string> = new Set([
|
|
'--jinja',
|
|
'--no-jinja',
|
|
]);
|
|
|
|
export interface StripOptions {
|
|
stripContext?: boolean;
|
|
stripCache?: boolean;
|
|
stripSpec?: boolean;
|
|
stripTemplate?: boolean;
|
|
}
|
|
|
|
/**
|
|
* Remove shadowing flags (and their values) from an argv sequence.
|
|
*
|
|
* Each category is stripped by default; pass the matching `strip*: false`
|
|
* option to retain that category. When a stripped flag carries its value as a
|
|
* separate following token (e.g. `-c 4096`), that token is removed too; the
|
|
* `--flag=value` and boolean-switch forms consume only the single token.
|
|
*/
|
|
export function stripShadowingFlags(
|
|
args: Iterable<string>,
|
|
opts?: StripOptions,
|
|
): string[] {
|
|
const targets = new Set<string>();
|
|
if (opts?.stripContext !== false) for (const f of SHADOW_CONTEXT) targets.add(f);
|
|
if (opts?.stripCache !== false) for (const f of SHADOW_CACHE) targets.add(f);
|
|
if (opts?.stripSpec !== false) for (const f of SHADOW_SPEC) targets.add(f);
|
|
if (opts?.stripTemplate !== false) for (const f of SHADOW_TEMPLATE) targets.add(f);
|
|
|
|
const tokens = Array.from(args, String);
|
|
const kept: string[] = [];
|
|
|
|
for (let i = 0; i < tokens.length; i++) {
|
|
const token = tokens[i]!;
|
|
const flag = parseFlag(token);
|
|
|
|
// Not a targeted shadow flag — keep it verbatim.
|
|
if (flag === null || !targets.has(flag)) {
|
|
kept.push(token);
|
|
continue;
|
|
}
|
|
|
|
// Targeted: drop it. Decide whether the next token is its value and should
|
|
// be dropped along with it. Boolean switches and the inline `=value` form
|
|
// carry no separate value token.
|
|
const carriesInlineValue = token.includes('=');
|
|
const isBoolean = VALUELESS_SHADOW_FLAGS.has(flag);
|
|
const next = tokens[i + 1];
|
|
const nextIsValue = next !== undefined && parseFlag(next) === null;
|
|
|
|
if (!isBoolean && !carriesInlineValue && nextIsValue) {
|
|
i++; // also skip the value token
|
|
}
|
|
}
|
|
|
|
return kept;
|
|
}
|