boocode/apps/server/src/services/inference/llama-args-validator.ts

// Guards against agent-supplied llama-server CLI flags that would clash with
// values BooCode sets itself. Two concerns live here:
//
//   1. A hard denylist of flags that BooCode owns outright (model selection,
//      the listening socket, credentials, the bundled web UI). Passing any of
//      these is a configuration error and is rejected loudly.
//
//   2. A "shadowing" set of flags that are legal to pass but, because of
//      llama.cpp's last-wins argument parsing, would override a first-class
//      BooCode setting. These are silently removed from the auto-generated
//      argv so the agent's explicit choice takes precedence without leaving a
//      duplicate flag behind.
//
// All flag spellings below are the public llama-server option names (short and
// long aliases) documented in its --help output.

// --- Hard denylist -------------------------------------------------------

// Authored as named buckets purely for readability; every alias is folded
// into one flat lookup set at module load. Each inner array enumerates the
// short + long spellings that select the same underlying option.
const MODEL_SOURCE_FLAGS = [
  ['-m', '--model'],
  ['-mu', '--model-url'],
  ['-dr', '--docker-repo'],
  ['-hf', '-hfr', '--hf-repo'],
  ['-hff', '--hf-file'],
  ['-hfv', '-hfrv', '--hf-repo-v'],
  ['-hffv', '--hf-file-v'],
  ['-hft', '--hf-token'],
  ['-mm', '--mmproj'],
  ['-mmu', '--mmproj-url'],
];

const LISTEN_FLAGS = [
  ['--host'],
  ['--port'],
  ['--path'],
  ['--api-prefix'],
  ['--reuse-port'],
];

const CREDENTIAL_FLAGS = [
  ['--api-key'],
  ['--api-key-file'],
  ['--ssl-key-file'],
  ['--ssl-cert-file'],
];

const WEBUI_FLAGS = [
  ['--webui', '--no-webui'],
  ['--ui', '--no-ui'],
  ['--ui-config'],
  ['--ui-config-file'],
  ['--ui-mcp-proxy', '--no-ui-mcp-proxy'],
  ['--models-dir'],
  ['--models-preset'],
  ['--models-max'],
  ['--models-autoload', '--no-models-autoload'],
];

const MANAGED_FLAGS: ReadonlySet<string> = new Set(
  [
    ...MODEL_SOURCE_FLAGS,
    ...LISTEN_FLAGS,
    ...CREDENTIAL_FLAGS,
    ...WEBUI_FLAGS,
  ].flat(),
);

// --- Token parsing -------------------------------------------------------

const DIGIT = /^[0-9]$/;

/**
 * Extract the flag name from a single argv token, or `null` when the token is
 * not a flag.
 *
 * A token is treated as a flag only when it begins with `-` and the character
 * after the leading dash is neither a digit nor a decimal point — that rule
 * keeps negative numeric values such as `-1` or `-0.5` from being mistaken for
 * options. A bare `-` or `--` is not a flag either. The returned name is the
 * portion before any `=`, so `--ctx-size=4096` yields `--ctx-size`.
 */
function parseFlag(token: string): string | null {
  if (!token.startsWith('-')) return null;
  if (token === '-' || token === '--') return null;

  const second = token[1]!;
  if (DIGIT.test(second) || second === '.') return null;

  const eq = token.indexOf('=');
  return eq === -1 ? token : token.slice(0, eq);
}

// --- Public API ----------------------------------------------------------

/**
 * Validate a sequence of extra llama-server args, rejecting any that name a
 * BooCode-managed flag. Returns the args materialised as a string[] when they
 * all pass.
 */
export function validateExtraArgs(args?: Iterable<string>): string[] {
  const result: string[] = [];
  if (!args) return result;

  for (const entry of args) {
    const token = String(entry);
    const flag = parseFlag(token);
    if (flag !== null && MANAGED_FLAGS.has(flag)) {
      throw new Error(
        `llama-server flag '${flag}' is managed and cannot be passed as an extra arg`,
      );
    }
    result.push(token);
  }

  return result;
}

/** True when `flag` is a BooCode-managed flag that callers may not override. */
export function isManagedFlag(flag: string): boolean {
  return MANAGED_FLAGS.has(flag);
}

// --- Shadowing flags -----------------------------------------------------

// Flags below are legal for an agent to pass, but each shadows a setting
// BooCode applies itself. They are categorised so a caller can opt out of
// stripping any one category.

const SHADOW_CONTEXT = ['-c', '--ctx-size'];

// Empty: agents should be able to opt into cache-type flags (lift analysis
// found these are high-value features, not safety concerns).
const SHADOW_CACHE: string[] = [];

// Empty: ngram speculative decoding is a performance feature agents should
// be able to enable.
const SHADOW_SPEC: string[] = [];

const SHADOW_TEMPLATE = [
  '--chat-template',
  '--chat-template-file',
  '--chat-template-kwargs',
  '--jinja',
  '--no-jinja',
];

// Shadowing flags that take no value — a boolean switch — so the stripper must
// not also drop the following token.
const VALUELESS_SHADOW_FLAGS: ReadonlySet<string> = new Set([
  '--jinja',
  '--no-jinja',
]);

export interface StripOptions {
  stripContext?: boolean;
  stripCache?: boolean;
  stripSpec?: boolean;
  stripTemplate?: boolean;
}

/**
 * Remove shadowing flags (and their values) from an argv sequence.
 *
 * Each category is stripped by default; pass the matching `strip*: false`
 * option to retain that category. When a stripped flag carries its value as a
 * separate following token (e.g. `-c 4096`), that token is removed too; the
 * `--flag=value` and boolean-switch forms consume only the single token.
 */
export function stripShadowingFlags(
  args: Iterable<string>,
  opts?: StripOptions,
): string[] {
  const targets = new Set<string>();
  if (opts?.stripContext !== false) for (const f of SHADOW_CONTEXT) targets.add(f);
  if (opts?.stripCache !== false) for (const f of SHADOW_CACHE) targets.add(f);
  if (opts?.stripSpec !== false) for (const f of SHADOW_SPEC) targets.add(f);
  if (opts?.stripTemplate !== false) for (const f of SHADOW_TEMPLATE) targets.add(f);

  const tokens = Array.from(args, String);
  const kept: string[] = [];

  for (let i = 0; i < tokens.length; i++) {
    const token = tokens[i]!;
    const flag = parseFlag(token);

    // Not a targeted shadow flag — keep it verbatim.
    if (flag === null || !targets.has(flag)) {
      kept.push(token);
      continue;
    }

    // Targeted: drop it. Decide whether the next token is its value and should
    // be dropped along with it. Boolean switches and the inline `=value` form
    // carry no separate value token.
    const carriesInlineValue = token.includes('=');
    const isBoolean = VALUELESS_SHADOW_FLAGS.has(flag);
    const next = tokens[i + 1];
    const nextIsValue = next !== undefined && parseFlag(next) === null;

    if (!isBoolean && !carriesInlineValue && nextIsValue) {
      i++; // also skip the value token
    }
  }

  return kept;
}