Clear the 3 Unsloth-Studio-derived AGPL files and flip LICENSE + 5 package.json from AGPL-3.0-only to MIT. - html-to-md.ts → MIT node-html-markdown (parse5 dropped) - llama-args-validator.ts → clean-room (flag denylist = facts) - tool-call-parser.ts → delete dead Unsloth-ported code; keep extractToolCallBlocks/stripToolMarkup byte-identical (no behavior change) - LICENSE → MIT (Copyright (c) 2026 indifferentketchup); 5 package.json → MIT; AGPL SPDX headers removed; README License section; license-mit guard test - roadmap License-debt batch marked shipped; openspec/changes/license-debt-mit Decouples the relicense from the native-parsing retirement (the ported parser was dead code). Server suite 519 passing; build + coder typecheck clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
199 lines
6.3 KiB
TypeScript
199 lines
6.3 KiB
TypeScript
// Streaming tool-call extraction for the qwen3.6 XML fallback path.
|
|
// `extractToolCallBlocks` is the incremental streaming scanner used by
|
|
// stream-phase.ts; `stripToolMarkup` removes tool-call wire markup from
|
|
// assistant prose (used by tool-phase.ts and error-handler.ts).
|
|
|
|
// ── Constants ────────────────────────────────────────────────────────────
|
|
|
|
export const XML_TOOL_OPEN = '<tool_call>';
|
|
export const XML_TOOL_CLOSE = '</tool_call>';
|
|
export const INVOKE_TOOL_OPEN = '<invoke';
|
|
export const INVOKE_TOOL_CLOSE = '</invoke>';
|
|
|
|
// ── Strip patterns ───────────────────────────────────────────────────────
|
|
|
|
const TOOL_CLOSED_PATS = [
|
|
/<tool_call>.*?<\/tool_call>/gs,
|
|
/<function=\w+>.*?<\/function>/gs,
|
|
/<invoke\s[^>]*>.*?<\/invoke>/gs,
|
|
];
|
|
|
|
const TOOL_ALL_PATS = [
|
|
...TOOL_CLOSED_PATS,
|
|
/<tool_call>.*$/gs,
|
|
/<function=\w+>.*$/gs,
|
|
/<invoke\s[^>]*>.*$/gs,
|
|
];
|
|
|
|
// ── Strip ────────────────────────────────────────────────────────────────
|
|
|
|
export function stripToolMarkup(text: string, opts?: { final?: boolean }): string {
|
|
const pats = opts?.final ? TOOL_ALL_PATS : TOOL_CLOSED_PATS;
|
|
for (const pat of pats) {
|
|
text = text.replace(pat, '');
|
|
}
|
|
return opts?.final ? text.trim() : text;
|
|
}
|
|
|
|
// ── BooCode streaming helpers ────────────────────────────────────────────
|
|
|
|
export interface ParsedCall {
|
|
name: string;
|
|
args: Record<string, unknown>;
|
|
}
|
|
|
|
const PLACEHOLDER_LITERALS = new Set(['...', 'placeholder', '<path>', '<file>']);
|
|
const ANGLE_BRACKET_SENTINEL_RE = /^<[^>]+>$/;
|
|
|
|
export function isPlaceholderArgValue(value: unknown): boolean {
|
|
if (typeof value !== 'string') return false;
|
|
const trimmed = value.trim();
|
|
if (trimmed === '') return true;
|
|
if (PLACEHOLDER_LITERALS.has(trimmed)) return true;
|
|
if (ANGLE_BRACKET_SENTINEL_RE.test(trimmed)) return true;
|
|
return false;
|
|
}
|
|
|
|
function hasPlaceholderArgs(args: Record<string, unknown>): boolean {
|
|
for (const value of Object.values(args)) {
|
|
if (isPlaceholderArgValue(value)) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
function logRejectedPlaceholder(parsed: ParsedCall): void {
|
|
console.debug(
|
|
{ toolName: parsed.name, args: parsed.args },
|
|
'rejected placeholder tool call at parse time',
|
|
);
|
|
}
|
|
|
|
const QWEN_FUNCTION_RE = /<function\s*=\s*([^>\s]+)\s*>/;
|
|
const QWEN_PARAM_RE = /<parameter\s*=\s*([^>\s]+)\s*>([\s\S]*?)<\/parameter>/g;
|
|
|
|
export function parseXmlToolCall(block: string): ParsedCall | null {
|
|
const nameMatch = block.match(QWEN_FUNCTION_RE);
|
|
if (!nameMatch || !nameMatch[1]) return null;
|
|
const name = nameMatch[1].trim();
|
|
if (!name) return null;
|
|
const args: Record<string, unknown> = {};
|
|
for (const m of block.matchAll(QWEN_PARAM_RE)) {
|
|
const key = (m[1] ?? '').trim();
|
|
if (!key) continue;
|
|
const raw = (m[2] ?? '').trim();
|
|
try {
|
|
args[key] = JSON.parse(raw);
|
|
} catch {
|
|
args[key] = raw;
|
|
}
|
|
}
|
|
return { name, args };
|
|
}
|
|
|
|
const INVOKE_NAME_RE =
|
|
/<invoke\s+name\s*=\s*("([^"]*)"|'([^']*)')\s*>/;
|
|
const INVOKE_PARAM_RE =
|
|
/<parameter\s+name\s*=\s*("([^"]*)"|'([^']*)')\s*>([\s\S]*?)<\/parameter>/g;
|
|
|
|
export function parseInvokeToolCall(block: string): ParsedCall | null {
|
|
const nameMatch = block.match(INVOKE_NAME_RE);
|
|
if (!nameMatch) return null;
|
|
const name = (nameMatch[2] ?? nameMatch[3] ?? '').trim();
|
|
if (!name) return null;
|
|
const args: Record<string, unknown> = {};
|
|
for (const m of block.matchAll(INVOKE_PARAM_RE)) {
|
|
const key = ((m[2] ?? m[3] ?? '') as string).trim();
|
|
if (!key) continue;
|
|
const raw = (m[4] ?? '').trim();
|
|
try {
|
|
args[key] = JSON.parse(raw);
|
|
} catch {
|
|
args[key] = raw;
|
|
}
|
|
}
|
|
return { name, args };
|
|
}
|
|
|
|
const ALL_OPENERS = [XML_TOOL_OPEN, INVOKE_TOOL_OPEN] as const;
|
|
|
|
export function partialXmlOpenerStart(s: string): number {
|
|
let earliest = -1;
|
|
for (const op of ALL_OPENERS) {
|
|
const idx = s.indexOf(op);
|
|
if (idx === -1) continue;
|
|
if (earliest === -1 || idx < earliest) earliest = idx;
|
|
}
|
|
if (earliest !== -1) return earliest;
|
|
const lastLt = s.lastIndexOf('<');
|
|
if (lastLt === -1) return -1;
|
|
const suffix = s.slice(lastLt);
|
|
for (const op of ALL_OPENERS) {
|
|
if (op.startsWith(suffix) && suffix.length < op.length) return lastLt;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
export interface ToolCallExtraction {
|
|
flushed: string;
|
|
calls: ParsedCall[];
|
|
remaining: string;
|
|
}
|
|
|
|
interface OpenerSpec {
|
|
open: string;
|
|
close: string;
|
|
parse: (block: string) => ParsedCall | null;
|
|
}
|
|
|
|
const OPENER_SPECS: ReadonlyArray<OpenerSpec> = [
|
|
{ open: XML_TOOL_OPEN, close: XML_TOOL_CLOSE, parse: parseXmlToolCall },
|
|
{ open: INVOKE_TOOL_OPEN, close: INVOKE_TOOL_CLOSE, parse: parseInvokeToolCall },
|
|
];
|
|
|
|
export function extractToolCallBlocks(buffer: string): ToolCallExtraction {
|
|
let flushed = '';
|
|
const calls: ParsedCall[] = [];
|
|
let pos = 0;
|
|
|
|
while (pos < buffer.length) {
|
|
let next: { spec: OpenerSpec; openIdx: number; closeIdx: number } | null = null;
|
|
for (const spec of OPENER_SPECS) {
|
|
const openIdx = buffer.indexOf(spec.open, pos);
|
|
if (openIdx === -1) continue;
|
|
const closeIdx = buffer.indexOf(spec.close, openIdx);
|
|
if (closeIdx === -1) continue;
|
|
if (next === null || openIdx < next.openIdx) {
|
|
next = { spec, openIdx, closeIdx };
|
|
}
|
|
}
|
|
if (next === null) break;
|
|
|
|
if (next.openIdx > pos) {
|
|
flushed += buffer.slice(pos, next.openIdx);
|
|
}
|
|
const blockEnd = next.closeIdx + next.spec.close.length;
|
|
const block = buffer.slice(next.openIdx, blockEnd);
|
|
const parsed = next.spec.parse(block);
|
|
if (parsed) {
|
|
if (hasPlaceholderArgs(parsed.args)) {
|
|
logRejectedPlaceholder(parsed);
|
|
flushed += block;
|
|
} else {
|
|
calls.push(parsed);
|
|
}
|
|
}
|
|
pos = blockEnd;
|
|
}
|
|
|
|
const tail = buffer.slice(pos);
|
|
const partialIdx = partialXmlOpenerStart(tail);
|
|
if (partialIdx === -1) {
|
|
flushed += tail;
|
|
return { flushed, calls, remaining: '' };
|
|
}
|
|
if (partialIdx > 0) {
|
|
flushed += tail.slice(0, partialIdx);
|
|
}
|
|
return { flushed, calls, remaining: tail.slice(partialIdx) };
|
|
}
|