boocode/apps/server/src/services/inference/llama-args-validator.ts

// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
// Ported from studio/backend/core/inference/llama_server_args.py.
// Original: https://github.com/unslothai/unsloth/blob/main/studio/backend/core/inference/llama_server_args.py

// Each group is the full set of aliases (short + long) for one hard-denied
// flag, taken from the llama-server README. Flags NOT in this list pass
// through and override auto-set values via llama.cpp's last-wins CLI parsing.
const DENYLIST_GROUPS: ReadonlyArray<ReadonlySet<string>> = [
  // Model identity
  new Set(['-m', '--model']),
  new Set(['-mu', '--model-url']),
  new Set(['-dr', '--docker-repo']),
  new Set(['-hf', '-hfr', '--hf-repo']),
  new Set(['-hff', '--hf-file']),
  new Set(['-hfv', '-hfrv', '--hf-repo-v']),
  new Set(['-hffv', '--hf-file-v']),
  new Set(['-hft', '--hf-token']),
  new Set(['-mm', '--mmproj']),
  new Set(['-mmu', '--mmproj-url']),
  // Networking
  new Set(['--host']),
  new Set(['--port']),
  new Set(['--path']),
  new Set(['--api-prefix']),
  new Set(['--reuse-port']),
  // Auth / TLS
  new Set(['--api-key']),
  new Set(['--api-key-file']),
  new Set(['--ssl-key-file']),
  new Set(['--ssl-cert-file']),
  // Single-model server / UI
  new Set(['--webui', '--no-webui']),
  new Set(['--ui', '--no-ui']),
  new Set(['--ui-config']),
  new Set(['--ui-config-file']),
  new Set(['--ui-mcp-proxy', '--no-ui-mcp-proxy']),
  new Set(['--models-dir']),
  new Set(['--models-preset']),
  new Set(['--models-max']),
  new Set(['--models-autoload', '--no-models-autoload']),
];

const DENYLIST: ReadonlySet<string> = new Set(
  DENYLIST_GROUPS.flatMap((g) => [...g]),
);

function flagName(token: string): string | null {
  if (!token.startsWith('-') || token === '-' || token === '--') return null;
  if (token.length >= 2 && (token[1]!.match(/\d/) || token[1] === '.')) return null;
  return token.split('=', 1)[0]!;
}

export function validateExtraArgs(args?: Iterable<string>): string[] {
  if (!args) return [];
  const out: string[] = [];
  for (const raw of args) {
    const token = String(raw);
    const flag = flagName(token);
    if (flag !== null && DENYLIST.has(flag)) {
      throw new Error(
        `llama-server flag '${flag}' is managed and cannot be passed as an extra arg`,
      );
    }
    out.push(token);
  }
  return out;
}

export function isManagedFlag(flag: string): boolean {
  return DENYLIST.has(flag);
}

// Shadowing flag groups: pass-through flags that shadow first-class settings.
const CONTEXT_FLAGS = new Set(['-c', '--ctx-size']);
const CACHE_FLAGS = new Set(['-ctk', '--cache-type-k', '-ctv', '--cache-type-v']);
const SPEC_FLAGS = new Set([
  '--spec-default',
  '--spec-type',
  '--spec-ngram-size-n',
  '--spec-ngram-size',
  '--draft-min',
  '--draft-max',
  '--spec-draft-n-max',
  '--spec-draft-n-min',
  '--spec-draft-p-min',
  '--spec-draft-p-split',
  '--spec-ngram-mod-n-match',
  '--spec-ngram-mod-n-min',
  '--spec-ngram-mod-n-max',
]);
const TEMPLATE_FLAGS = new Set([
  '--chat-template',
  '--chat-template-file',
  '--chat-template-kwargs',
  '--jinja',
  '--no-jinja',
]);

const BOOLEAN_SHADOWING_FLAGS = new Set([
  '--spec-default', '--jinja', '--no-jinja',
]);

export interface StripOptions {
  stripContext?: boolean;
  stripCache?: boolean;
  stripSpec?: boolean;
  stripTemplate?: boolean;
}

export function stripShadowingFlags(
  args: Iterable<string>,
  opts?: StripOptions,
): string[] {
  const shadowing = new Set<string>();
  if (opts?.stripContext !== false) for (const f of CONTEXT_FLAGS) shadowing.add(f);
  if (opts?.stripCache !== false) for (const f of CACHE_FLAGS) shadowing.add(f);
  if (opts?.stripSpec !== false) for (const f of SPEC_FLAGS) shadowing.add(f);
  if (opts?.stripTemplate !== false) for (const f of TEMPLATE_FLAGS) shadowing.add(f);

  const tokens = [...args].map(String);
  const out: string[] = [];
  let i = 0;
  const n = tokens.length;
  while (i < n) {
    const tok = tokens[i]!;
    const flag = flagName(tok);
    if (flag === null || !shadowing.has(flag)) {
      out.push(tok);
      i++;
      continue;
    }
    if (BOOLEAN_SHADOWING_FLAGS.has(flag) || tok.includes('=')) {
      i++;
    } else if (i + 1 < n && flagName(tokens[i + 1]!) === null) {
      i += 2;
    } else {
      i++;
    }
  }
  return out;
}