// SPDX-License-Identifier: AGPL-3.0-only // Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. // Ported from studio/backend/core/inference/llama_server_args.py. // Original: https://github.com/unslothai/unsloth/blob/main/studio/backend/core/inference/llama_server_args.py // Each group is the full set of aliases (short + long) for one hard-denied // flag, taken from the llama-server README. Flags NOT in this list pass // through and override auto-set values via llama.cpp's last-wins CLI parsing. const DENYLIST_GROUPS: ReadonlyArray> = [ // Model identity new Set(['-m', '--model']), new Set(['-mu', '--model-url']), new Set(['-dr', '--docker-repo']), new Set(['-hf', '-hfr', '--hf-repo']), new Set(['-hff', '--hf-file']), new Set(['-hfv', '-hfrv', '--hf-repo-v']), new Set(['-hffv', '--hf-file-v']), new Set(['-hft', '--hf-token']), new Set(['-mm', '--mmproj']), new Set(['-mmu', '--mmproj-url']), // Networking new Set(['--host']), new Set(['--port']), new Set(['--path']), new Set(['--api-prefix']), new Set(['--reuse-port']), // Auth / TLS new Set(['--api-key']), new Set(['--api-key-file']), new Set(['--ssl-key-file']), new Set(['--ssl-cert-file']), // Single-model server / UI new Set(['--webui', '--no-webui']), new Set(['--ui', '--no-ui']), new Set(['--ui-config']), new Set(['--ui-config-file']), new Set(['--ui-mcp-proxy', '--no-ui-mcp-proxy']), new Set(['--models-dir']), new Set(['--models-preset']), new Set(['--models-max']), new Set(['--models-autoload', '--no-models-autoload']), ]; const DENYLIST: ReadonlySet = new Set( DENYLIST_GROUPS.flatMap((g) => [...g]), ); function flagName(token: string): string | null { if (!token.startsWith('-') || token === '-' || token === '--') return null; if (token.length >= 2 && (token[1]!.match(/\d/) || token[1] === '.')) return null; return token.split('=', 1)[0]!; } export function validateExtraArgs(args?: Iterable): string[] { if (!args) return []; const out: string[] = []; for (const raw of args) { const token = String(raw); const flag = flagName(token); if (flag !== null && DENYLIST.has(flag)) { throw new Error( `llama-server flag '${flag}' is managed and cannot be passed as an extra arg`, ); } out.push(token); } return out; } export function isManagedFlag(flag: string): boolean { return DENYLIST.has(flag); } // Shadowing flag groups: pass-through flags that shadow first-class settings. const CONTEXT_FLAGS = new Set(['-c', '--ctx-size']); const CACHE_FLAGS = new Set(['-ctk', '--cache-type-k', '-ctv', '--cache-type-v']); const SPEC_FLAGS = new Set([ '--spec-default', '--spec-type', '--spec-ngram-size-n', '--spec-ngram-size', '--draft-min', '--draft-max', '--spec-draft-n-max', '--spec-draft-n-min', '--spec-draft-p-min', '--spec-draft-p-split', '--spec-ngram-mod-n-match', '--spec-ngram-mod-n-min', '--spec-ngram-mod-n-max', ]); const TEMPLATE_FLAGS = new Set([ '--chat-template', '--chat-template-file', '--chat-template-kwargs', '--jinja', '--no-jinja', ]); const BOOLEAN_SHADOWING_FLAGS = new Set([ '--spec-default', '--jinja', '--no-jinja', ]); export interface StripOptions { stripContext?: boolean; stripCache?: boolean; stripSpec?: boolean; stripTemplate?: boolean; } export function stripShadowingFlags( args: Iterable, opts?: StripOptions, ): string[] { const shadowing = new Set(); if (opts?.stripContext !== false) for (const f of CONTEXT_FLAGS) shadowing.add(f); if (opts?.stripCache !== false) for (const f of CACHE_FLAGS) shadowing.add(f); if (opts?.stripSpec !== false) for (const f of SPEC_FLAGS) shadowing.add(f); if (opts?.stripTemplate !== false) for (const f of TEMPLATE_FLAGS) shadowing.add(f); const tokens = [...args].map(String); const out: string[] = []; let i = 0; const n = tokens.length; while (i < n) { const tok = tokens[i]!; const flag = flagName(tok); if (flag === null || !shadowing.has(flag)) { out.push(tok); i++; continue; } if (BOOLEAN_SHADOWING_FLAGS.has(flag) || tok.includes('=')) { i++; } else if (i + 1 < n && flagName(tokens[i + 1]!) === null) { i += 2; } else { i++; } } return out; }