Batch 1 — tool-call-parser.ts: replaces xml-parser.ts with a port of
Unsloth's tool_call_parser.py. Adds balanced-brace JSON scanner,
single-param fast path, hasToolSignal/stripToolMarkup/parseToolCallsFromText
exports, and stream-finalization stripping at all three final-write sites
(error-handler, finalizeCompletion, executeToolPhase). Anthropic <invoke>
shape preserved. 75+12 tests.
Batch 2 — web/html-to-md.ts: parse5 tree-walking HTML-to-Markdown converter
ported from Unsloth's _html_to_md.py. Replaces web_fetch's regex stripHtml
with structured markdown output (headings, links, lists, tables, code blocks,
blockquotes, entity decoding). 29 tests.
Batch 3 — llama-args-validator.ts: port of llama_server_args.py deny-list
validator. Wired into AGENTS.md frontmatter parser — llama_extra_args field
validated at load time, rejects managed flags (model identity, networking,
auth/TLS, server UI). No runtime consumer yet (llama-swap boundary). 76 tests.
All three files carry SPDX-License-Identifier: AGPL-3.0-only headers.
LICENSE flipped to AGPL-3.0-only in prior commit (a938cf1).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
143 lines
4.3 KiB
TypeScript
143 lines
4.3 KiB
TypeScript
// SPDX-License-Identifier: AGPL-3.0-only
|
|
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
|
|
// Ported from studio/backend/core/inference/llama_server_args.py.
|
|
// Original: https://github.com/unslothai/unsloth/blob/main/studio/backend/core/inference/llama_server_args.py
|
|
|
|
// Each group is the full set of aliases (short + long) for one hard-denied
|
|
// flag, taken from the llama-server README. Flags NOT in this list pass
|
|
// through and override auto-set values via llama.cpp's last-wins CLI parsing.
|
|
const DENYLIST_GROUPS: ReadonlyArray<ReadonlySet<string>> = [
|
|
// Model identity
|
|
new Set(['-m', '--model']),
|
|
new Set(['-mu', '--model-url']),
|
|
new Set(['-dr', '--docker-repo']),
|
|
new Set(['-hf', '-hfr', '--hf-repo']),
|
|
new Set(['-hff', '--hf-file']),
|
|
new Set(['-hfv', '-hfrv', '--hf-repo-v']),
|
|
new Set(['-hffv', '--hf-file-v']),
|
|
new Set(['-hft', '--hf-token']),
|
|
new Set(['-mm', '--mmproj']),
|
|
new Set(['-mmu', '--mmproj-url']),
|
|
// Networking
|
|
new Set(['--host']),
|
|
new Set(['--port']),
|
|
new Set(['--path']),
|
|
new Set(['--api-prefix']),
|
|
new Set(['--reuse-port']),
|
|
// Auth / TLS
|
|
new Set(['--api-key']),
|
|
new Set(['--api-key-file']),
|
|
new Set(['--ssl-key-file']),
|
|
new Set(['--ssl-cert-file']),
|
|
// Single-model server / UI
|
|
new Set(['--webui', '--no-webui']),
|
|
new Set(['--ui', '--no-ui']),
|
|
new Set(['--ui-config']),
|
|
new Set(['--ui-config-file']),
|
|
new Set(['--ui-mcp-proxy', '--no-ui-mcp-proxy']),
|
|
new Set(['--models-dir']),
|
|
new Set(['--models-preset']),
|
|
new Set(['--models-max']),
|
|
new Set(['--models-autoload', '--no-models-autoload']),
|
|
];
|
|
|
|
const DENYLIST: ReadonlySet<string> = new Set(
|
|
DENYLIST_GROUPS.flatMap((g) => [...g]),
|
|
);
|
|
|
|
function flagName(token: string): string | null {
|
|
if (!token.startsWith('-') || token === '-' || token === '--') return null;
|
|
if (token.length >= 2 && (token[1]!.match(/\d/) || token[1] === '.')) return null;
|
|
return token.split('=', 1)[0]!;
|
|
}
|
|
|
|
export function validateExtraArgs(args?: Iterable<string>): string[] {
|
|
if (!args) return [];
|
|
const out: string[] = [];
|
|
for (const raw of args) {
|
|
const token = String(raw);
|
|
const flag = flagName(token);
|
|
if (flag !== null && DENYLIST.has(flag)) {
|
|
throw new Error(
|
|
`llama-server flag '${flag}' is managed and cannot be passed as an extra arg`,
|
|
);
|
|
}
|
|
out.push(token);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
export function isManagedFlag(flag: string): boolean {
|
|
return DENYLIST.has(flag);
|
|
}
|
|
|
|
// Shadowing flag groups: pass-through flags that shadow first-class settings.
|
|
const CONTEXT_FLAGS = new Set(['-c', '--ctx-size']);
|
|
const CACHE_FLAGS = new Set(['-ctk', '--cache-type-k', '-ctv', '--cache-type-v']);
|
|
const SPEC_FLAGS = new Set([
|
|
'--spec-default',
|
|
'--spec-type',
|
|
'--spec-ngram-size-n',
|
|
'--spec-ngram-size',
|
|
'--draft-min',
|
|
'--draft-max',
|
|
'--spec-draft-n-max',
|
|
'--spec-draft-n-min',
|
|
'--spec-draft-p-min',
|
|
'--spec-draft-p-split',
|
|
'--spec-ngram-mod-n-match',
|
|
'--spec-ngram-mod-n-min',
|
|
'--spec-ngram-mod-n-max',
|
|
]);
|
|
const TEMPLATE_FLAGS = new Set([
|
|
'--chat-template',
|
|
'--chat-template-file',
|
|
'--chat-template-kwargs',
|
|
'--jinja',
|
|
'--no-jinja',
|
|
]);
|
|
|
|
const BOOLEAN_SHADOWING_FLAGS = new Set([
|
|
'--spec-default', '--jinja', '--no-jinja',
|
|
]);
|
|
|
|
export interface StripOptions {
|
|
stripContext?: boolean;
|
|
stripCache?: boolean;
|
|
stripSpec?: boolean;
|
|
stripTemplate?: boolean;
|
|
}
|
|
|
|
export function stripShadowingFlags(
|
|
args: Iterable<string>,
|
|
opts?: StripOptions,
|
|
): string[] {
|
|
const shadowing = new Set<string>();
|
|
if (opts?.stripContext !== false) for (const f of CONTEXT_FLAGS) shadowing.add(f);
|
|
if (opts?.stripCache !== false) for (const f of CACHE_FLAGS) shadowing.add(f);
|
|
if (opts?.stripSpec !== false) for (const f of SPEC_FLAGS) shadowing.add(f);
|
|
if (opts?.stripTemplate !== false) for (const f of TEMPLATE_FLAGS) shadowing.add(f);
|
|
|
|
const tokens = [...args].map(String);
|
|
const out: string[] = [];
|
|
let i = 0;
|
|
const n = tokens.length;
|
|
while (i < n) {
|
|
const tok = tokens[i]!;
|
|
const flag = flagName(tok);
|
|
if (flag === null || !shadowing.has(flag)) {
|
|
out.push(tok);
|
|
i++;
|
|
continue;
|
|
}
|
|
if (BOOLEAN_SHADOWING_FLAGS.has(flag) || tok.includes('=')) {
|
|
i++;
|
|
} else if (i + 1 < n && flagName(tokens[i + 1]!) === null) {
|
|
i += 2;
|
|
} else {
|
|
i++;
|
|
}
|
|
}
|
|
return out;
|
|
}
|