feat: relicense AGPL-3.0 → MIT (v2.7.0)

Clear the 3 Unsloth-Studio-derived AGPL files and flip LICENSE + 5
package.json from AGPL-3.0-only to MIT.

- html-to-md.ts → MIT node-html-markdown (parse5 dropped)
- llama-args-validator.ts → clean-room (flag denylist = facts)
- tool-call-parser.ts → delete dead Unsloth-ported code; keep
  extractToolCallBlocks/stripToolMarkup byte-identical (no behavior change)
- LICENSE → MIT (Copyright (c) 2026 indifferentketchup); 5 package.json → MIT;
  AGPL SPDX headers removed; README License section; license-mit guard test
- roadmap License-debt batch marked shipped; openspec/changes/license-debt-mit

Decouples the relicense from the native-parsing retirement (the ported parser
was dead code). Server suite 519 passing; build + coder typecheck clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-01 08:16:03 +00:00
parent 9c1ddcaa7c
commit a8bfde8f8d
18 changed files with 499 additions and 1566 deletions

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
// Ported from studio/backend/core/inference/tool_call_parser.py.
// Original: https://github.com/unslothai/unsloth/blob/main/studio/backend/core/inference/tool_call_parser.py
// Streaming tool-call extraction for the qwen3.6 XML fallback path.
// `extractToolCallBlocks` is the incremental streaming scanner used by
// stream-phase.ts; `stripToolMarkup` removes tool-call wire markup from
// assistant prose (used by tool-phase.ts and error-handler.ts).
// ── Constants ────────────────────────────────────────────────────────────
@@ -10,34 +10,6 @@ export const XML_TOOL_CLOSE = '</tool_call>';
export const INVOKE_TOOL_OPEN = '<invoke';
export const INVOKE_TOOL_CLOSE = '</invoke>';
export const TOOL_XML_SIGNALS = [XML_TOOL_OPEN, '<function=', INVOKE_TOOL_OPEN] as const;
export const TOOL_ERROR_PREFIXES = [
'Error',
'Search failed',
'Execution error',
'Blocked:',
'Exit code',
'Failed to fetch',
'Failed to resolve',
'No query provided',
] as const;
export const DUPLICATE_CALL_NUDGE =
'You already made this exact call. Do not repeat the same tool ' +
'call. Try a different approach: fetch a URL from previous ' +
'results, use Python to process data you already have, or ' +
'provide your final answer now.';
export const TOOL_ERROR_NUDGE =
'\n\nThe tool call encountered an issue. Please try a different ' +
'approach or rephrase your request.';
export const BUDGET_EXHAUSTED_NUDGE =
'You have used all available tool calls. Based on everything you ' +
'have found so far, provide your final answer now. Do not call ' +
'any more tools.';
// ── Strip patterns ───────────────────────────────────────────────────────
const TOOL_CLOSED_PATS = [
@@ -53,7 +25,7 @@ const TOOL_ALL_PATS = [
/<invoke\s[^>]*>.*$/gs,
];
// ── Strip / signal ───────────────────────────────────────────────────────
// ── Strip ────────────────────────────────────────────────────────────────
export function stripToolMarkup(text: string, opts?: { final?: boolean }): string {
const pats = opts?.final ? TOOL_ALL_PATS : TOOL_CLOSED_PATS;
@@ -63,206 +35,6 @@ export function stripToolMarkup(text: string, opts?: { final?: boolean }): strin
return opts?.final ? text.trim() : text;
}
export function hasToolSignal(text: string): boolean {
return TOOL_XML_SIGNALS.some((s) => text.includes(s));
}
// ── parseToolCallsFromText (Unsloth port + Anthropic extension) ──────────
export interface OpenAiToolCall {
id: string;
type: 'function';
function: { name: string; arguments: string };
}
const TC_JSON_START_RE = /<tool_call>\s*\{/g;
const TC_FUNC_START_RE = /<function=(\w+)>\s*/g;
const TC_END_TAG_RE = /<\/tool_call>/;
const TC_FUNC_CLOSE_RE = /\s*<\/function>\s*$/;
const TC_PARAM_START_RE = /<parameter=(\w+)>\s*/g;
const TC_PARAM_CLOSE_RE = /\s*<\/parameter>\s*$/;
const TC_INVOKE_START_RE = /<invoke\s+name\s*=\s*(?:"([^"]*)"|'([^']*)')\s*>/g;
const TC_INVOKE_CLOSE_RE = /\s*<\/invoke>\s*$/;
const TC_INVOKE_PARAM_RE = /<parameter\s+name\s*=\s*(?:"([^"]*)"|'([^']*)')\s*>/g;
const TC_INVOKE_PARAM_CLOSE_RE = /\s*<\/parameter>\s*$/;
function scanBalancedBraces(content: string, start: number): number {
let depth = 0;
let i = start;
let inString = false;
while (i < content.length) {
const ch = content[i]!;
if (inString) {
if (ch === '\\' && i + 1 < content.length) {
i += 2;
continue;
}
if (ch === '"') inString = false;
} else if (ch === '"') {
inString = true;
} else if (ch === '{') {
depth++;
} else if (ch === '}') {
depth--;
if (depth === 0) return i;
}
i++;
}
return -1;
}
export function parseToolCallsFromText(
content: string,
opts?: { idOffset?: number },
): OpenAiToolCall[] {
const toolCalls: OpenAiToolCall[] = [];
const idOffset = opts?.idOffset ?? 0;
// Pattern 1: <tool_call>{json}</tool_call> -- balanced-brace JSON scanner.
// Skips braces inside JSON strings so nested objects parse correctly.
TC_JSON_START_RE.lastIndex = 0;
let m: RegExpExecArray | null;
while ((m = TC_JSON_START_RE.exec(content)) !== null) {
const braceStart = m.index + m[0].length - 1;
const braceEnd = scanBalancedBraces(content, braceStart);
if (braceEnd === -1) continue;
const jsonStr = content.slice(braceStart, braceEnd + 1);
try {
const obj = JSON.parse(jsonStr) as Record<string, unknown>;
const name = typeof obj.name === 'string' ? obj.name : '';
let args: string;
const rawArgs = obj.arguments ?? {};
if (typeof rawArgs === 'string') {
args = rawArgs;
} else {
args = JSON.stringify(rawArgs);
}
toolCalls.push({
id: `call_${idOffset + toolCalls.length}`,
type: 'function',
function: { name, arguments: args },
});
} catch {
// malformed JSON -- skip
}
}
// Pattern 2: <function=name><parameter=key>value -- closing tags optional.
// Body boundary uses </tool_call> or next <function= (not </function>,
// because code parameter values can contain that literal).
if (toolCalls.length === 0) {
TC_FUNC_START_RE.lastIndex = 0;
const funcStarts: Array<{ match: RegExpExecArray; name: string }> = [];
while ((m = TC_FUNC_START_RE.exec(content)) !== null) {
funcStarts.push({ match: m, name: m[1]! });
}
for (let idx = 0; idx < funcStarts.length; idx++) {
const { match: fm, name: funcName } = funcStarts[idx]!;
const bodyStart = fm.index + fm[0].length;
const nextFunc = idx + 1 < funcStarts.length
? funcStarts[idx + 1]!.match.index
: content.length;
const endTag = TC_END_TAG_RE.exec(content.slice(bodyStart));
let bodyEnd = endTag ? bodyStart + endTag.index : content.length;
bodyEnd = Math.min(bodyEnd, nextFunc);
let body = content.slice(bodyStart, bodyEnd);
body = body.replace(TC_FUNC_CLOSE_RE, '');
const args: Record<string, string> = {};
TC_PARAM_START_RE.lastIndex = 0;
const paramStarts: Array<{ match: RegExpExecArray; name: string }> = [];
let pm: RegExpExecArray | null;
while ((pm = TC_PARAM_START_RE.exec(body)) !== null) {
paramStarts.push({ match: pm, name: pm[1]! });
}
if (paramStarts.length === 1) {
// Single param: take everything to body end so embedded
// </parameter> in code strings is preserved.
const p = paramStarts[0]!;
let val = body.slice(p.match.index + p.match[0].length);
val = val.replace(TC_PARAM_CLOSE_RE, '');
args[p.name] = val.trim();
} else {
for (let pidx = 0; pidx < paramStarts.length; pidx++) {
const p = paramStarts[pidx]!;
const valStart = p.match.index + p.match[0].length;
const nextParam = pidx + 1 < paramStarts.length
? paramStarts[pidx + 1]!.match.index
: body.length;
let val = body.slice(valStart, nextParam);
val = val.replace(TC_PARAM_CLOSE_RE, '');
args[p.name] = val.trim();
}
}
toolCalls.push({
id: `call_${idOffset + toolCalls.length}`,
type: 'function',
function: { name: funcName, arguments: JSON.stringify(args) },
});
}
}
// Pattern 3: <invoke name="..."><parameter name="...">value -- Anthropic
// shape that qwen3.6 drifts to from Claude Code documentation residue.
// Closing tags optional; same single-param fast path as pattern 2.
if (toolCalls.length === 0) {
TC_INVOKE_START_RE.lastIndex = 0;
const invokeStarts: Array<{ match: RegExpExecArray; name: string }> = [];
while ((m = TC_INVOKE_START_RE.exec(content)) !== null) {
const name = (m[1] ?? m[2] ?? '').trim();
if (name) invokeStarts.push({ match: m, name });
}
for (let idx = 0; idx < invokeStarts.length; idx++) {
const { match: im, name: invokeName } = invokeStarts[idx]!;
const bodyStart = im.index + im[0].length;
const nextInvoke = idx + 1 < invokeStarts.length
? invokeStarts[idx + 1]!.match.index
: content.length;
const closeTag = content.slice(bodyStart).match(/<\/invoke>/);
let bodyEnd = closeTag ? bodyStart + (closeTag.index ?? 0) : content.length;
bodyEnd = Math.min(bodyEnd, nextInvoke);
let body = content.slice(bodyStart, bodyEnd);
body = body.replace(TC_INVOKE_CLOSE_RE, '');
const args: Record<string, string> = {};
TC_INVOKE_PARAM_RE.lastIndex = 0;
const paramStarts: Array<{ match: RegExpExecArray; name: string }> = [];
let pm: RegExpExecArray | null;
while ((pm = TC_INVOKE_PARAM_RE.exec(body)) !== null) {
const pname = (pm[1] ?? pm[2] ?? '').trim();
if (pname) paramStarts.push({ match: pm, name: pname });
}
if (paramStarts.length === 1) {
const p = paramStarts[0]!;
let val = body.slice(p.match.index + p.match[0].length);
val = val.replace(TC_INVOKE_PARAM_CLOSE_RE, '');
args[p.name] = val.trim();
} else {
for (let pidx = 0; pidx < paramStarts.length; pidx++) {
const p = paramStarts[pidx]!;
const valStart = p.match.index + p.match[0].length;
const nextParam = pidx + 1 < paramStarts.length
? paramStarts[pidx + 1]!.match.index
: body.length;
let val = body.slice(valStart, nextParam);
val = val.replace(TC_INVOKE_PARAM_CLOSE_RE, '');
args[p.name] = val.trim();
}
}
toolCalls.push({
id: `call_${idOffset + toolCalls.length}`,
type: 'function',
function: { name: invokeName, arguments: JSON.stringify(args) },
});
}
}
return toolCalls;
}
// ── BooCode streaming helpers ────────────────────────────────────────────
export interface ParsedCall {