feat: relicense AGPL-3.0 → MIT (v2.7.0)

Clear the 3 Unsloth-Studio-derived AGPL files and flip LICENSE + 5 package.json from AGPL-3.0-only to MIT. - html-to-md.ts → MIT node-html-markdown (parse5 dropped) - llama-args-validator.ts → clean-room (flag denylist = facts) - tool-call-parser.ts → delete dead Unsloth-ported code; keep extractToolCallBlocks/stripToolMarkup byte-identical (no behavior change) - LICENSE → MIT (Copyright (c) 2026 indifferentketchup); 5 package.json → MIT; AGPL SPDX headers removed; README License section; license-mit guard test - roadmap License-debt batch marked shipped; openspec/changes/license-debt-mit Decouples the relicense from the native-parsing retirement (the ported parser was dead code). Server suite 519 passing; build + coder typecheck clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-01 08:16:03 +00:00
parent 9c1ddcaa7c
commit a8bfde8f8d
18 changed files with 499 additions and 1566 deletions
--- a/apps/server/src/services/inference/llama-args-validator.ts
+++ b/apps/server/src/services/inference/llama-args-validator.ts
@@ -1,80 +1,139 @@
-// SPDX-License-Identifier: AGPL-3.0-only
-// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
-// Ported from studio/backend/core/inference/llama_server_args.py.
-// Original: https://github.com/unslothai/unsloth/blob/main/studio/backend/core/inference/llama_server_args.py
+// Guards against agent-supplied llama-server CLI flags that would clash with
+// values BooCode sets itself. Two concerns live here:
+//
+//   1. A hard denylist of flags that BooCode owns outright (model selection,
+//      the listening socket, credentials, the bundled web UI). Passing any of
+//      these is a configuration error and is rejected loudly.
+//
+//   2. A "shadowing" set of flags that are legal to pass but, because of
+//      llama.cpp's last-wins argument parsing, would override a first-class
+//      BooCode setting. These are silently removed from the auto-generated
+//      argv so the agent's explicit choice takes precedence without leaving a
+//      duplicate flag behind.
+//
+// All flag spellings below are the public llama-server option names (short and
+// long aliases) documented in its --help output.

-// Each group is the full set of aliases (short + long) for one hard-denied
-// flag, taken from the llama-server README. Flags NOT in this list pass
-// through and override auto-set values via llama.cpp's last-wins CLI parsing.
-const DENYLIST_GROUPS: ReadonlyArray<ReadonlySet<string>> = [
-  // Model identity
-  new Set(['-m', '--model']),
-  new Set(['-mu', '--model-url']),
-  new Set(['-dr', '--docker-repo']),
-  new Set(['-hf', '-hfr', '--hf-repo']),
-  new Set(['-hff', '--hf-file']),
-  new Set(['-hfv', '-hfrv', '--hf-repo-v']),
-  new Set(['-hffv', '--hf-file-v']),
-  new Set(['-hft', '--hf-token']),
-  new Set(['-mm', '--mmproj']),
-  new Set(['-mmu', '--mmproj-url']),
-  // Networking
-  new Set(['--host']),
-  new Set(['--port']),
-  new Set(['--path']),
-  new Set(['--api-prefix']),
-  new Set(['--reuse-port']),
-  // Auth / TLS
-  new Set(['--api-key']),
-  new Set(['--api-key-file']),
-  new Set(['--ssl-key-file']),
-  new Set(['--ssl-cert-file']),
-  // Single-model server / UI
-  new Set(['--webui', '--no-webui']),
-  new Set(['--ui', '--no-ui']),
-  new Set(['--ui-config']),
-  new Set(['--ui-config-file']),
-  new Set(['--ui-mcp-proxy', '--no-ui-mcp-proxy']),
-  new Set(['--models-dir']),
-  new Set(['--models-preset']),
-  new Set(['--models-max']),
-  new Set(['--models-autoload', '--no-models-autoload']),
+// --- Hard denylist -------------------------------------------------------
+
+// Authored as named buckets purely for readability; every alias is folded
+// into one flat lookup set at module load. Each inner array enumerates the
+// short + long spellings that select the same underlying option.
+const MODEL_SOURCE_FLAGS = [
+  ['-m', '--model'],
+  ['-mu', '--model-url'],
+  ['-dr', '--docker-repo'],
+  ['-hf', '-hfr', '--hf-repo'],
+  ['-hff', '--hf-file'],
+  ['-hfv', '-hfrv', '--hf-repo-v'],
+  ['-hffv', '--hf-file-v'],
+  ['-hft', '--hf-token'],
+  ['-mm', '--mmproj'],
+  ['-mmu', '--mmproj-url'],
 ];

-const DENYLIST: ReadonlySet<string> = new Set(
-  DENYLIST_GROUPS.flatMap((g) => [...g]),
+const LISTEN_FLAGS = [
+  ['--host'],
+  ['--port'],
+  ['--path'],
+  ['--api-prefix'],
+  ['--reuse-port'],
+];
+
+const CREDENTIAL_FLAGS = [
+  ['--api-key'],
+  ['--api-key-file'],
+  ['--ssl-key-file'],
+  ['--ssl-cert-file'],
+];
+
+const WEBUI_FLAGS = [
+  ['--webui', '--no-webui'],
+  ['--ui', '--no-ui'],
+  ['--ui-config'],
+  ['--ui-config-file'],
+  ['--ui-mcp-proxy', '--no-ui-mcp-proxy'],
+  ['--models-dir'],
+  ['--models-preset'],
+  ['--models-max'],
+  ['--models-autoload', '--no-models-autoload'],
+];
+
+const MANAGED_FLAGS: ReadonlySet<string> = new Set(
+  [
+    ...MODEL_SOURCE_FLAGS,
+    ...LISTEN_FLAGS,
+    ...CREDENTIAL_FLAGS,
+    ...WEBUI_FLAGS,
+  ].flat(),
 );

-function flagName(token: string): string | null {
-  if (!token.startsWith('-') || token === '-' || token === '--') return null;
-  if (token.length >= 2 && (token[1]!.match(/\d/) || token[1] === '.')) return null;
-  return token.split('=', 1)[0]!;
+// --- Token parsing -------------------------------------------------------
+
+const DIGIT = /^[0-9]$/;
+
+/**
+ * Extract the flag name from a single argv token, or `null` when the token is
+ * not a flag.
+ *
+ * A token is treated as a flag only when it begins with `-` and the character
+ * after the leading dash is neither a digit nor a decimal point — that rule
+ * keeps negative numeric values such as `-1` or `-0.5` from being mistaken for
+ * options. A bare `-` or `--` is not a flag either. The returned name is the
+ * portion before any `=`, so `--ctx-size=4096` yields `--ctx-size`.
+ */
+function parseFlag(token: string): string | null {
+  if (!token.startsWith('-')) return null;
+  if (token === '-' || token === '--') return null;
+
+  const second = token[1]!;
+  if (DIGIT.test(second) || second === '.') return null;
+
+  const eq = token.indexOf('=');
+  return eq === -1 ? token : token.slice(0, eq);
 }

+// --- Public API ----------------------------------------------------------
+
+/**
+ * Validate a sequence of extra llama-server args, rejecting any that name a
+ * BooCode-managed flag. Returns the args materialised as a string[] when they
+ * all pass.
+ */
 export function validateExtraArgs(args?: Iterable<string>): string[] {
-  if (!args) return [];
-  const out: string[] = [];
-  for (const raw of args) {
-    const token = String(raw);
-    const flag = flagName(token);
-    if (flag !== null && DENYLIST.has(flag)) {
+  const result: string[] = [];
+  if (!args) return result;
+
+  for (const entry of args) {
+    const token = String(entry);
+    const flag = parseFlag(token);
+    if (flag !== null && MANAGED_FLAGS.has(flag)) {
      throw new Error(
        `llama-server flag '${flag}' is managed and cannot be passed as an extra arg`,
      );
    }
-    out.push(token);
+    result.push(token);
  }
-  return out;
+
+  return result;
 }

+/** True when `flag` is a BooCode-managed flag that callers may not override. */
 export function isManagedFlag(flag: string): boolean {
-  return DENYLIST.has(flag);
+  return MANAGED_FLAGS.has(flag);
 }

-// Shadowing flag groups: pass-through flags that shadow first-class settings.
-const CONTEXT_FLAGS = new Set(['-c', '--ctx-size']);
-const CACHE_FLAGS = new Set(['-ctk', '--cache-type-k', '-ctv', '--cache-type-v']);
-const SPEC_FLAGS = new Set([
+// --- Shadowing flags -----------------------------------------------------
+
+// Flags below are legal for an agent to pass, but each shadows a setting
+// BooCode applies itself. They are categorised so a caller can opt out of
+// stripping any one category.
+
+const SHADOW_CONTEXT = ['-c', '--ctx-size'];
+
+const SHADOW_CACHE = ['-ctk', '--cache-type-k', '-ctv', '--cache-type-v'];
+
+const SHADOW_SPEC = [
  '--spec-default',
  '--spec-type',
  '--spec-ngram-size-n',
@@ -88,17 +147,22 @@ const SPEC_FLAGS = new Set([
  '--spec-ngram-mod-n-match',
  '--spec-ngram-mod-n-min',
  '--spec-ngram-mod-n-max',
-]);
-const TEMPLATE_FLAGS = new Set([
+];
+
+const SHADOW_TEMPLATE = [
  '--chat-template',
  '--chat-template-file',
  '--chat-template-kwargs',
  '--jinja',
  '--no-jinja',
-]);
+];

-const BOOLEAN_SHADOWING_FLAGS = new Set([
-  '--spec-default', '--jinja', '--no-jinja',
+// Shadowing flags that take no value — a boolean switch — so the stripper must
+// not also drop the following token.
+const VALUELESS_SHADOW_FLAGS: ReadonlySet<string> = new Set([
+  '--spec-default',
+  '--jinja',
+  '--no-jinja',
 ]);

 export interface StripOptions {
@@ -108,35 +172,49 @@ export interface StripOptions {
  stripTemplate?: boolean;
 }

+/**
+ * Remove shadowing flags (and their values) from an argv sequence.
+ *
+ * Each category is stripped by default; pass the matching `strip*: false`
+ * option to retain that category. When a stripped flag carries its value as a
+ * separate following token (e.g. `-c 4096`), that token is removed too; the
+ * `--flag=value` and boolean-switch forms consume only the single token.
+ */
 export function stripShadowingFlags(
  args: Iterable<string>,
  opts?: StripOptions,
 ): string[] {
-  const shadowing = new Set<string>();
-  if (opts?.stripContext !== false) for (const f of CONTEXT_FLAGS) shadowing.add(f);
-  if (opts?.stripCache !== false) for (const f of CACHE_FLAGS) shadowing.add(f);
-  if (opts?.stripSpec !== false) for (const f of SPEC_FLAGS) shadowing.add(f);
-  if (opts?.stripTemplate !== false) for (const f of TEMPLATE_FLAGS) shadowing.add(f);
+  const targets = new Set<string>();
+  if (opts?.stripContext !== false) for (const f of SHADOW_CONTEXT) targets.add(f);
+  if (opts?.stripCache !== false) for (const f of SHADOW_CACHE) targets.add(f);
+  if (opts?.stripSpec !== false) for (const f of SHADOW_SPEC) targets.add(f);
+  if (opts?.stripTemplate !== false) for (const f of SHADOW_TEMPLATE) targets.add(f);

-  const tokens = [...args].map(String);
-  const out: string[] = [];
-  let i = 0;
-  const n = tokens.length;
-  while (i < n) {
-    const tok = tokens[i]!;
-    const flag = flagName(tok);
-    if (flag === null || !shadowing.has(flag)) {
-      out.push(tok);
-      i++;
+  const tokens = Array.from(args, String);
+  const kept: string[] = [];
+
+  for (let i = 0; i < tokens.length; i++) {
+    const token = tokens[i]!;
+    const flag = parseFlag(token);
+
+    // Not a targeted shadow flag — keep it verbatim.
+    if (flag === null || !targets.has(flag)) {
+      kept.push(token);
      continue;
    }
-    if (BOOLEAN_SHADOWING_FLAGS.has(flag) || tok.includes('=')) {
-      i++;
-    } else if (i + 1 < n && flagName(tokens[i + 1]!) === null) {
-      i += 2;
-    } else {
-      i++;
+
+    // Targeted: drop it. Decide whether the next token is its value and should
+    // be dropped along with it. Boolean switches and the inline `=value` form
+    // carry no separate value token.
+    const carriesInlineValue = token.includes('=');
+    const isBoolean = VALUELESS_SHADOW_FLAGS.has(flag);
+    const next = tokens[i + 1];
+    const nextIsValue = next !== undefined && parseFlag(next) === null;
+
+    if (!isBoolean && !carriesInlineValue && nextIsValue) {
+      i++; // also skip the value token
    }
  }
-  return out;
+
+  return kept;
 }