feat: relicense AGPL-3.0 → MIT (v2.7.0)

Clear the 3 Unsloth-Studio-derived AGPL files and flip LICENSE + 5
package.json from AGPL-3.0-only to MIT.

- html-to-md.ts → MIT node-html-markdown (parse5 dropped)
- llama-args-validator.ts → clean-room (flag denylist = facts)
- tool-call-parser.ts → delete dead Unsloth-ported code; keep
  extractToolCallBlocks/stripToolMarkup byte-identical (no behavior change)
- LICENSE → MIT (Copyright (c) 2026 indifferentketchup); 5 package.json → MIT;
  AGPL SPDX headers removed; README License section; license-mit guard test
- roadmap License-debt batch marked shipped; openspec/changes/license-debt-mit

Decouples the relicense from the native-parsing retirement (the ported parser
was dead code). Server suite 519 passing; build + coder typecheck clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-01 08:16:03 +00:00
parent 9c1ddcaa7c
commit a8bfde8f8d
18 changed files with 499 additions and 1566 deletions

View File

@@ -1,80 +1,139 @@
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
// Ported from studio/backend/core/inference/llama_server_args.py.
// Original: https://github.com/unslothai/unsloth/blob/main/studio/backend/core/inference/llama_server_args.py
// Guards against agent-supplied llama-server CLI flags that would clash with
// values BooCode sets itself. Two concerns live here:
//
// 1. A hard denylist of flags that BooCode owns outright (model selection,
// the listening socket, credentials, the bundled web UI). Passing any of
// these is a configuration error and is rejected loudly.
//
// 2. A "shadowing" set of flags that are legal to pass but, because of
// llama.cpp's last-wins argument parsing, would override a first-class
// BooCode setting. These are silently removed from the auto-generated
// argv so the agent's explicit choice takes precedence without leaving a
// duplicate flag behind.
//
// All flag spellings below are the public llama-server option names (short and
// long aliases) documented in its --help output.
// Each group is the full set of aliases (short + long) for one hard-denied
// flag, taken from the llama-server README. Flags NOT in this list pass
// through and override auto-set values via llama.cpp's last-wins CLI parsing.
const DENYLIST_GROUPS: ReadonlyArray<ReadonlySet<string>> = [
// Model identity
new Set(['-m', '--model']),
new Set(['-mu', '--model-url']),
new Set(['-dr', '--docker-repo']),
new Set(['-hf', '-hfr', '--hf-repo']),
new Set(['-hff', '--hf-file']),
new Set(['-hfv', '-hfrv', '--hf-repo-v']),
new Set(['-hffv', '--hf-file-v']),
new Set(['-hft', '--hf-token']),
new Set(['-mm', '--mmproj']),
new Set(['-mmu', '--mmproj-url']),
// Networking
new Set(['--host']),
new Set(['--port']),
new Set(['--path']),
new Set(['--api-prefix']),
new Set(['--reuse-port']),
// Auth / TLS
new Set(['--api-key']),
new Set(['--api-key-file']),
new Set(['--ssl-key-file']),
new Set(['--ssl-cert-file']),
// Single-model server / UI
new Set(['--webui', '--no-webui']),
new Set(['--ui', '--no-ui']),
new Set(['--ui-config']),
new Set(['--ui-config-file']),
new Set(['--ui-mcp-proxy', '--no-ui-mcp-proxy']),
new Set(['--models-dir']),
new Set(['--models-preset']),
new Set(['--models-max']),
new Set(['--models-autoload', '--no-models-autoload']),
// --- Hard denylist -------------------------------------------------------
// Authored as named buckets purely for readability; every alias is folded
// into one flat lookup set at module load. Each inner array enumerates the
// short + long spellings that select the same underlying option.
const MODEL_SOURCE_FLAGS = [
['-m', '--model'],
['-mu', '--model-url'],
['-dr', '--docker-repo'],
['-hf', '-hfr', '--hf-repo'],
['-hff', '--hf-file'],
['-hfv', '-hfrv', '--hf-repo-v'],
['-hffv', '--hf-file-v'],
['-hft', '--hf-token'],
['-mm', '--mmproj'],
['-mmu', '--mmproj-url'],
];
const DENYLIST: ReadonlySet<string> = new Set(
DENYLIST_GROUPS.flatMap((g) => [...g]),
const LISTEN_FLAGS = [
['--host'],
['--port'],
['--path'],
['--api-prefix'],
['--reuse-port'],
];
const CREDENTIAL_FLAGS = [
['--api-key'],
['--api-key-file'],
['--ssl-key-file'],
['--ssl-cert-file'],
];
const WEBUI_FLAGS = [
['--webui', '--no-webui'],
['--ui', '--no-ui'],
['--ui-config'],
['--ui-config-file'],
['--ui-mcp-proxy', '--no-ui-mcp-proxy'],
['--models-dir'],
['--models-preset'],
['--models-max'],
['--models-autoload', '--no-models-autoload'],
];
const MANAGED_FLAGS: ReadonlySet<string> = new Set(
[
...MODEL_SOURCE_FLAGS,
...LISTEN_FLAGS,
...CREDENTIAL_FLAGS,
...WEBUI_FLAGS,
].flat(),
);
function flagName(token: string): string | null {
if (!token.startsWith('-') || token === '-' || token === '--') return null;
if (token.length >= 2 && (token[1]!.match(/\d/) || token[1] === '.')) return null;
return token.split('=', 1)[0]!;
// --- Token parsing -------------------------------------------------------
const DIGIT = /^[0-9]$/;
/**
* Extract the flag name from a single argv token, or `null` when the token is
* not a flag.
*
* A token is treated as a flag only when it begins with `-` and the character
* after the leading dash is neither a digit nor a decimal point — that rule
* keeps negative numeric values such as `-1` or `-0.5` from being mistaken for
* options. A bare `-` or `--` is not a flag either. The returned name is the
* portion before any `=`, so `--ctx-size=4096` yields `--ctx-size`.
*/
function parseFlag(token: string): string | null {
if (!token.startsWith('-')) return null;
if (token === '-' || token === '--') return null;
const second = token[1]!;
if (DIGIT.test(second) || second === '.') return null;
const eq = token.indexOf('=');
return eq === -1 ? token : token.slice(0, eq);
}
// --- Public API ----------------------------------------------------------
/**
* Validate a sequence of extra llama-server args, rejecting any that name a
* BooCode-managed flag. Returns the args materialised as a string[] when they
* all pass.
*/
export function validateExtraArgs(args?: Iterable<string>): string[] {
if (!args) return [];
const out: string[] = [];
for (const raw of args) {
const token = String(raw);
const flag = flagName(token);
if (flag !== null && DENYLIST.has(flag)) {
const result: string[] = [];
if (!args) return result;
for (const entry of args) {
const token = String(entry);
const flag = parseFlag(token);
if (flag !== null && MANAGED_FLAGS.has(flag)) {
throw new Error(
`llama-server flag '${flag}' is managed and cannot be passed as an extra arg`,
);
}
out.push(token);
result.push(token);
}
return out;
return result;
}
/** True when `flag` is a BooCode-managed flag that callers may not override. */
export function isManagedFlag(flag: string): boolean {
return DENYLIST.has(flag);
return MANAGED_FLAGS.has(flag);
}
// Shadowing flag groups: pass-through flags that shadow first-class settings.
const CONTEXT_FLAGS = new Set(['-c', '--ctx-size']);
const CACHE_FLAGS = new Set(['-ctk', '--cache-type-k', '-ctv', '--cache-type-v']);
const SPEC_FLAGS = new Set([
// --- Shadowing flags -----------------------------------------------------
// Flags below are legal for an agent to pass, but each shadows a setting
// BooCode applies itself. They are categorised so a caller can opt out of
// stripping any one category.
const SHADOW_CONTEXT = ['-c', '--ctx-size'];
const SHADOW_CACHE = ['-ctk', '--cache-type-k', '-ctv', '--cache-type-v'];
const SHADOW_SPEC = [
'--spec-default',
'--spec-type',
'--spec-ngram-size-n',
@@ -88,17 +147,22 @@ const SPEC_FLAGS = new Set([
'--spec-ngram-mod-n-match',
'--spec-ngram-mod-n-min',
'--spec-ngram-mod-n-max',
]);
const TEMPLATE_FLAGS = new Set([
];
const SHADOW_TEMPLATE = [
'--chat-template',
'--chat-template-file',
'--chat-template-kwargs',
'--jinja',
'--no-jinja',
]);
];
const BOOLEAN_SHADOWING_FLAGS = new Set([
'--spec-default', '--jinja', '--no-jinja',
// Shadowing flags that take no value — a boolean switch — so the stripper must
// not also drop the following token.
const VALUELESS_SHADOW_FLAGS: ReadonlySet<string> = new Set([
'--spec-default',
'--jinja',
'--no-jinja',
]);
export interface StripOptions {
@@ -108,35 +172,49 @@ export interface StripOptions {
stripTemplate?: boolean;
}
/**
* Remove shadowing flags (and their values) from an argv sequence.
*
* Each category is stripped by default; pass the matching `strip*: false`
* option to retain that category. When a stripped flag carries its value as a
* separate following token (e.g. `-c 4096`), that token is removed too; the
* `--flag=value` and boolean-switch forms consume only the single token.
*/
export function stripShadowingFlags(
args: Iterable<string>,
opts?: StripOptions,
): string[] {
const shadowing = new Set<string>();
if (opts?.stripContext !== false) for (const f of CONTEXT_FLAGS) shadowing.add(f);
if (opts?.stripCache !== false) for (const f of CACHE_FLAGS) shadowing.add(f);
if (opts?.stripSpec !== false) for (const f of SPEC_FLAGS) shadowing.add(f);
if (opts?.stripTemplate !== false) for (const f of TEMPLATE_FLAGS) shadowing.add(f);
const targets = new Set<string>();
if (opts?.stripContext !== false) for (const f of SHADOW_CONTEXT) targets.add(f);
if (opts?.stripCache !== false) for (const f of SHADOW_CACHE) targets.add(f);
if (opts?.stripSpec !== false) for (const f of SHADOW_SPEC) targets.add(f);
if (opts?.stripTemplate !== false) for (const f of SHADOW_TEMPLATE) targets.add(f);
const tokens = [...args].map(String);
const out: string[] = [];
let i = 0;
const n = tokens.length;
while (i < n) {
const tok = tokens[i]!;
const flag = flagName(tok);
if (flag === null || !shadowing.has(flag)) {
out.push(tok);
i++;
const tokens = Array.from(args, String);
const kept: string[] = [];
for (let i = 0; i < tokens.length; i++) {
const token = tokens[i]!;
const flag = parseFlag(token);
// Not a targeted shadow flag — keep it verbatim.
if (flag === null || !targets.has(flag)) {
kept.push(token);
continue;
}
if (BOOLEAN_SHADOWING_FLAGS.has(flag) || tok.includes('=')) {
i++;
} else if (i + 1 < n && flagName(tokens[i + 1]!) === null) {
i += 2;
} else {
i++;
// Targeted: drop it. Decide whether the next token is its value and should
// be dropped along with it. Boolean switches and the inline `=value` form
// carry no separate value token.
const carriesInlineValue = token.includes('=');
const isBoolean = VALUELESS_SHADOW_FLAGS.has(flag);
const next = tokens[i + 1];
const nextIsValue = next !== undefined && parseFlag(next) === null;
if (!isBoolean && !carriesInlineValue && nextIsValue) {
i++; // also skip the value token
}
}
return out;
return kept;
}

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
// Ported from studio/backend/core/inference/tool_call_parser.py.
// Original: https://github.com/unslothai/unsloth/blob/main/studio/backend/core/inference/tool_call_parser.py
// Streaming tool-call extraction for the qwen3.6 XML fallback path.
// `extractToolCallBlocks` is the incremental streaming scanner used by
// stream-phase.ts; `stripToolMarkup` removes tool-call wire markup from
// assistant prose (used by tool-phase.ts and error-handler.ts).
// ── Constants ────────────────────────────────────────────────────────────
@@ -10,34 +10,6 @@ export const XML_TOOL_CLOSE = '</tool_call>';
export const INVOKE_TOOL_OPEN = '<invoke';
export const INVOKE_TOOL_CLOSE = '</invoke>';
export const TOOL_XML_SIGNALS = [XML_TOOL_OPEN, '<function=', INVOKE_TOOL_OPEN] as const;
export const TOOL_ERROR_PREFIXES = [
'Error',
'Search failed',
'Execution error',
'Blocked:',
'Exit code',
'Failed to fetch',
'Failed to resolve',
'No query provided',
] as const;
export const DUPLICATE_CALL_NUDGE =
'You already made this exact call. Do not repeat the same tool ' +
'call. Try a different approach: fetch a URL from previous ' +
'results, use Python to process data you already have, or ' +
'provide your final answer now.';
export const TOOL_ERROR_NUDGE =
'\n\nThe tool call encountered an issue. Please try a different ' +
'approach or rephrase your request.';
export const BUDGET_EXHAUSTED_NUDGE =
'You have used all available tool calls. Based on everything you ' +
'have found so far, provide your final answer now. Do not call ' +
'any more tools.';
// ── Strip patterns ───────────────────────────────────────────────────────
const TOOL_CLOSED_PATS = [
@@ -53,7 +25,7 @@ const TOOL_ALL_PATS = [
/<invoke\s[^>]*>.*$/gs,
];
// ── Strip / signal ───────────────────────────────────────────────────────
// ── Strip ────────────────────────────────────────────────────────────────
export function stripToolMarkup(text: string, opts?: { final?: boolean }): string {
const pats = opts?.final ? TOOL_ALL_PATS : TOOL_CLOSED_PATS;
@@ -63,206 +35,6 @@ export function stripToolMarkup(text: string, opts?: { final?: boolean }): strin
return opts?.final ? text.trim() : text;
}
export function hasToolSignal(text: string): boolean {
return TOOL_XML_SIGNALS.some((s) => text.includes(s));
}
// ── parseToolCallsFromText (Unsloth port + Anthropic extension) ──────────
export interface OpenAiToolCall {
id: string;
type: 'function';
function: { name: string; arguments: string };
}
const TC_JSON_START_RE = /<tool_call>\s*\{/g;
const TC_FUNC_START_RE = /<function=(\w+)>\s*/g;
const TC_END_TAG_RE = /<\/tool_call>/;
const TC_FUNC_CLOSE_RE = /\s*<\/function>\s*$/;
const TC_PARAM_START_RE = /<parameter=(\w+)>\s*/g;
const TC_PARAM_CLOSE_RE = /\s*<\/parameter>\s*$/;
const TC_INVOKE_START_RE = /<invoke\s+name\s*=\s*(?:"([^"]*)"|'([^']*)')\s*>/g;
const TC_INVOKE_CLOSE_RE = /\s*<\/invoke>\s*$/;
const TC_INVOKE_PARAM_RE = /<parameter\s+name\s*=\s*(?:"([^"]*)"|'([^']*)')\s*>/g;
const TC_INVOKE_PARAM_CLOSE_RE = /\s*<\/parameter>\s*$/;
function scanBalancedBraces(content: string, start: number): number {
let depth = 0;
let i = start;
let inString = false;
while (i < content.length) {
const ch = content[i]!;
if (inString) {
if (ch === '\\' && i + 1 < content.length) {
i += 2;
continue;
}
if (ch === '"') inString = false;
} else if (ch === '"') {
inString = true;
} else if (ch === '{') {
depth++;
} else if (ch === '}') {
depth--;
if (depth === 0) return i;
}
i++;
}
return -1;
}
export function parseToolCallsFromText(
content: string,
opts?: { idOffset?: number },
): OpenAiToolCall[] {
const toolCalls: OpenAiToolCall[] = [];
const idOffset = opts?.idOffset ?? 0;
// Pattern 1: <tool_call>{json}</tool_call> -- balanced-brace JSON scanner.
// Skips braces inside JSON strings so nested objects parse correctly.
TC_JSON_START_RE.lastIndex = 0;
let m: RegExpExecArray | null;
while ((m = TC_JSON_START_RE.exec(content)) !== null) {
const braceStart = m.index + m[0].length - 1;
const braceEnd = scanBalancedBraces(content, braceStart);
if (braceEnd === -1) continue;
const jsonStr = content.slice(braceStart, braceEnd + 1);
try {
const obj = JSON.parse(jsonStr) as Record<string, unknown>;
const name = typeof obj.name === 'string' ? obj.name : '';
let args: string;
const rawArgs = obj.arguments ?? {};
if (typeof rawArgs === 'string') {
args = rawArgs;
} else {
args = JSON.stringify(rawArgs);
}
toolCalls.push({
id: `call_${idOffset + toolCalls.length}`,
type: 'function',
function: { name, arguments: args },
});
} catch {
// malformed JSON -- skip
}
}
// Pattern 2: <function=name><parameter=key>value -- closing tags optional.
// Body boundary uses </tool_call> or next <function= (not </function>,
// because code parameter values can contain that literal).
if (toolCalls.length === 0) {
TC_FUNC_START_RE.lastIndex = 0;
const funcStarts: Array<{ match: RegExpExecArray; name: string }> = [];
while ((m = TC_FUNC_START_RE.exec(content)) !== null) {
funcStarts.push({ match: m, name: m[1]! });
}
for (let idx = 0; idx < funcStarts.length; idx++) {
const { match: fm, name: funcName } = funcStarts[idx]!;
const bodyStart = fm.index + fm[0].length;
const nextFunc = idx + 1 < funcStarts.length
? funcStarts[idx + 1]!.match.index
: content.length;
const endTag = TC_END_TAG_RE.exec(content.slice(bodyStart));
let bodyEnd = endTag ? bodyStart + endTag.index : content.length;
bodyEnd = Math.min(bodyEnd, nextFunc);
let body = content.slice(bodyStart, bodyEnd);
body = body.replace(TC_FUNC_CLOSE_RE, '');
const args: Record<string, string> = {};
TC_PARAM_START_RE.lastIndex = 0;
const paramStarts: Array<{ match: RegExpExecArray; name: string }> = [];
let pm: RegExpExecArray | null;
while ((pm = TC_PARAM_START_RE.exec(body)) !== null) {
paramStarts.push({ match: pm, name: pm[1]! });
}
if (paramStarts.length === 1) {
// Single param: take everything to body end so embedded
// </parameter> in code strings is preserved.
const p = paramStarts[0]!;
let val = body.slice(p.match.index + p.match[0].length);
val = val.replace(TC_PARAM_CLOSE_RE, '');
args[p.name] = val.trim();
} else {
for (let pidx = 0; pidx < paramStarts.length; pidx++) {
const p = paramStarts[pidx]!;
const valStart = p.match.index + p.match[0].length;
const nextParam = pidx + 1 < paramStarts.length
? paramStarts[pidx + 1]!.match.index
: body.length;
let val = body.slice(valStart, nextParam);
val = val.replace(TC_PARAM_CLOSE_RE, '');
args[p.name] = val.trim();
}
}
toolCalls.push({
id: `call_${idOffset + toolCalls.length}`,
type: 'function',
function: { name: funcName, arguments: JSON.stringify(args) },
});
}
}
// Pattern 3: <invoke name="..."><parameter name="...">value -- Anthropic
// shape that qwen3.6 drifts to from Claude Code documentation residue.
// Closing tags optional; same single-param fast path as pattern 2.
if (toolCalls.length === 0) {
TC_INVOKE_START_RE.lastIndex = 0;
const invokeStarts: Array<{ match: RegExpExecArray; name: string }> = [];
while ((m = TC_INVOKE_START_RE.exec(content)) !== null) {
const name = (m[1] ?? m[2] ?? '').trim();
if (name) invokeStarts.push({ match: m, name });
}
for (let idx = 0; idx < invokeStarts.length; idx++) {
const { match: im, name: invokeName } = invokeStarts[idx]!;
const bodyStart = im.index + im[0].length;
const nextInvoke = idx + 1 < invokeStarts.length
? invokeStarts[idx + 1]!.match.index
: content.length;
const closeTag = content.slice(bodyStart).match(/<\/invoke>/);
let bodyEnd = closeTag ? bodyStart + (closeTag.index ?? 0) : content.length;
bodyEnd = Math.min(bodyEnd, nextInvoke);
let body = content.slice(bodyStart, bodyEnd);
body = body.replace(TC_INVOKE_CLOSE_RE, '');
const args: Record<string, string> = {};
TC_INVOKE_PARAM_RE.lastIndex = 0;
const paramStarts: Array<{ match: RegExpExecArray; name: string }> = [];
let pm: RegExpExecArray | null;
while ((pm = TC_INVOKE_PARAM_RE.exec(body)) !== null) {
const pname = (pm[1] ?? pm[2] ?? '').trim();
if (pname) paramStarts.push({ match: pm, name: pname });
}
if (paramStarts.length === 1) {
const p = paramStarts[0]!;
let val = body.slice(p.match.index + p.match[0].length);
val = val.replace(TC_INVOKE_PARAM_CLOSE_RE, '');
args[p.name] = val.trim();
} else {
for (let pidx = 0; pidx < paramStarts.length; pidx++) {
const p = paramStarts[pidx]!;
const valStart = p.match.index + p.match[0].length;
const nextParam = pidx + 1 < paramStarts.length
? paramStarts[pidx + 1]!.match.index
: body.length;
let val = body.slice(valStart, nextParam);
val = val.replace(TC_INVOKE_PARAM_CLOSE_RE, '');
args[p.name] = val.trim();
}
}
toolCalls.push({
id: `call_${idOffset + toolCalls.length}`,
type: 'function',
function: { name: invokeName, arguments: JSON.stringify(args) },
});
}
}
return toolCalls;
}
// ── BooCode streaming helpers ────────────────────────────────────────────
export interface ParsedCall {