Files
boocode/apps/server/src/services/inference/tool-call-parser.ts
indifferentketchup a8bfde8f8d feat: relicense AGPL-3.0 → MIT (v2.7.0)
Clear the 3 Unsloth-Studio-derived AGPL files and flip LICENSE + 5
package.json from AGPL-3.0-only to MIT.

- html-to-md.ts → MIT node-html-markdown (parse5 dropped)
- llama-args-validator.ts → clean-room (flag denylist = facts)
- tool-call-parser.ts → delete dead Unsloth-ported code; keep
  extractToolCallBlocks/stripToolMarkup byte-identical (no behavior change)
- LICENSE → MIT (Copyright (c) 2026 indifferentketchup); 5 package.json → MIT;
  AGPL SPDX headers removed; README License section; license-mit guard test
- roadmap License-debt batch marked shipped; openspec/changes/license-debt-mit

Decouples the relicense from the native-parsing retirement (the ported parser
was dead code). Server suite 519 passing; build + coder typecheck clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-01 08:16:03 +00:00

199 lines
6.3 KiB
TypeScript

// Streaming tool-call extraction for the qwen3.6 XML fallback path.
// `extractToolCallBlocks` is the incremental streaming scanner used by
// stream-phase.ts; `stripToolMarkup` removes tool-call wire markup from
// assistant prose (used by tool-phase.ts and error-handler.ts).
// ── Constants ────────────────────────────────────────────────────────────
export const XML_TOOL_OPEN = '<tool_call>';
export const XML_TOOL_CLOSE = '</tool_call>';
export const INVOKE_TOOL_OPEN = '<invoke';
export const INVOKE_TOOL_CLOSE = '</invoke>';
// ── Strip patterns ───────────────────────────────────────────────────────
const TOOL_CLOSED_PATS = [
/<tool_call>.*?<\/tool_call>/gs,
/<function=\w+>.*?<\/function>/gs,
/<invoke\s[^>]*>.*?<\/invoke>/gs,
];
const TOOL_ALL_PATS = [
...TOOL_CLOSED_PATS,
/<tool_call>.*$/gs,
/<function=\w+>.*$/gs,
/<invoke\s[^>]*>.*$/gs,
];
// ── Strip ────────────────────────────────────────────────────────────────
export function stripToolMarkup(text: string, opts?: { final?: boolean }): string {
const pats = opts?.final ? TOOL_ALL_PATS : TOOL_CLOSED_PATS;
for (const pat of pats) {
text = text.replace(pat, '');
}
return opts?.final ? text.trim() : text;
}
// ── BooCode streaming helpers ────────────────────────────────────────────
export interface ParsedCall {
name: string;
args: Record<string, unknown>;
}
const PLACEHOLDER_LITERALS = new Set(['...', 'placeholder', '<path>', '<file>']);
const ANGLE_BRACKET_SENTINEL_RE = /^<[^>]+>$/;
export function isPlaceholderArgValue(value: unknown): boolean {
if (typeof value !== 'string') return false;
const trimmed = value.trim();
if (trimmed === '') return true;
if (PLACEHOLDER_LITERALS.has(trimmed)) return true;
if (ANGLE_BRACKET_SENTINEL_RE.test(trimmed)) return true;
return false;
}
function hasPlaceholderArgs(args: Record<string, unknown>): boolean {
for (const value of Object.values(args)) {
if (isPlaceholderArgValue(value)) return true;
}
return false;
}
function logRejectedPlaceholder(parsed: ParsedCall): void {
console.debug(
{ toolName: parsed.name, args: parsed.args },
'rejected placeholder tool call at parse time',
);
}
const QWEN_FUNCTION_RE = /<function\s*=\s*([^>\s]+)\s*>/;
const QWEN_PARAM_RE = /<parameter\s*=\s*([^>\s]+)\s*>([\s\S]*?)<\/parameter>/g;
export function parseXmlToolCall(block: string): ParsedCall | null {
const nameMatch = block.match(QWEN_FUNCTION_RE);
if (!nameMatch || !nameMatch[1]) return null;
const name = nameMatch[1].trim();
if (!name) return null;
const args: Record<string, unknown> = {};
for (const m of block.matchAll(QWEN_PARAM_RE)) {
const key = (m[1] ?? '').trim();
if (!key) continue;
const raw = (m[2] ?? '').trim();
try {
args[key] = JSON.parse(raw);
} catch {
args[key] = raw;
}
}
return { name, args };
}
const INVOKE_NAME_RE =
/<invoke\s+name\s*=\s*("([^"]*)"|'([^']*)')\s*>/;
const INVOKE_PARAM_RE =
/<parameter\s+name\s*=\s*("([^"]*)"|'([^']*)')\s*>([\s\S]*?)<\/parameter>/g;
export function parseInvokeToolCall(block: string): ParsedCall | null {
const nameMatch = block.match(INVOKE_NAME_RE);
if (!nameMatch) return null;
const name = (nameMatch[2] ?? nameMatch[3] ?? '').trim();
if (!name) return null;
const args: Record<string, unknown> = {};
for (const m of block.matchAll(INVOKE_PARAM_RE)) {
const key = ((m[2] ?? m[3] ?? '') as string).trim();
if (!key) continue;
const raw = (m[4] ?? '').trim();
try {
args[key] = JSON.parse(raw);
} catch {
args[key] = raw;
}
}
return { name, args };
}
const ALL_OPENERS = [XML_TOOL_OPEN, INVOKE_TOOL_OPEN] as const;
export function partialXmlOpenerStart(s: string): number {
let earliest = -1;
for (const op of ALL_OPENERS) {
const idx = s.indexOf(op);
if (idx === -1) continue;
if (earliest === -1 || idx < earliest) earliest = idx;
}
if (earliest !== -1) return earliest;
const lastLt = s.lastIndexOf('<');
if (lastLt === -1) return -1;
const suffix = s.slice(lastLt);
for (const op of ALL_OPENERS) {
if (op.startsWith(suffix) && suffix.length < op.length) return lastLt;
}
return -1;
}
export interface ToolCallExtraction {
flushed: string;
calls: ParsedCall[];
remaining: string;
}
interface OpenerSpec {
open: string;
close: string;
parse: (block: string) => ParsedCall | null;
}
const OPENER_SPECS: ReadonlyArray<OpenerSpec> = [
{ open: XML_TOOL_OPEN, close: XML_TOOL_CLOSE, parse: parseXmlToolCall },
{ open: INVOKE_TOOL_OPEN, close: INVOKE_TOOL_CLOSE, parse: parseInvokeToolCall },
];
export function extractToolCallBlocks(buffer: string): ToolCallExtraction {
let flushed = '';
const calls: ParsedCall[] = [];
let pos = 0;
while (pos < buffer.length) {
let next: { spec: OpenerSpec; openIdx: number; closeIdx: number } | null = null;
for (const spec of OPENER_SPECS) {
const openIdx = buffer.indexOf(spec.open, pos);
if (openIdx === -1) continue;
const closeIdx = buffer.indexOf(spec.close, openIdx);
if (closeIdx === -1) continue;
if (next === null || openIdx < next.openIdx) {
next = { spec, openIdx, closeIdx };
}
}
if (next === null) break;
if (next.openIdx > pos) {
flushed += buffer.slice(pos, next.openIdx);
}
const blockEnd = next.closeIdx + next.spec.close.length;
const block = buffer.slice(next.openIdx, blockEnd);
const parsed = next.spec.parse(block);
if (parsed) {
if (hasPlaceholderArgs(parsed.args)) {
logRejectedPlaceholder(parsed);
flushed += block;
} else {
calls.push(parsed);
}
}
pos = blockEnd;
}
const tail = buffer.slice(pos);
const partialIdx = partialXmlOpenerStart(tail);
if (partialIdx === -1) {
flushed += tail;
return { flushed, calls, remaining: '' };
}
if (partialIdx > 0) {
flushed += tail.slice(0, partialIdx);
}
return { flushed, calls, remaining: tail.slice(partialIdx) };
}