// SPDX-License-Identifier: AGPL-3.0-only // Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. // Ported from studio/backend/core/inference/tool_call_parser.py. // Original: https://github.com/unslothai/unsloth/blob/main/studio/backend/core/inference/tool_call_parser.py // ── Constants ──────────────────────────────────────────────────────────── export const XML_TOOL_OPEN = ''; export const XML_TOOL_CLOSE = ''; export const INVOKE_TOOL_OPEN = '.*?<\/tool_call>/gs, /.*?<\/function>/gs, /]*>.*?<\/invoke>/gs, ]; const TOOL_ALL_PATS = [ ...TOOL_CLOSED_PATS, /.*$/gs, /.*$/gs, /]*>.*$/gs, ]; // ── Strip / signal ─────────────────────────────────────────────────────── export function stripToolMarkup(text: string, opts?: { final?: boolean }): string { const pats = opts?.final ? TOOL_ALL_PATS : TOOL_CLOSED_PATS; for (const pat of pats) { text = text.replace(pat, ''); } return opts?.final ? text.trim() : text; } export function hasToolSignal(text: string): boolean { return TOOL_XML_SIGNALS.some((s) => text.includes(s)); } // ── parseToolCallsFromText (Unsloth port + Anthropic extension) ────────── export interface OpenAiToolCall { id: string; type: 'function'; function: { name: string; arguments: string }; } const TC_JSON_START_RE = /\s*\{/g; const TC_FUNC_START_RE = /\s*/g; const TC_END_TAG_RE = /<\/tool_call>/; const TC_FUNC_CLOSE_RE = /\s*<\/function>\s*$/; const TC_PARAM_START_RE = /\s*/g; const TC_PARAM_CLOSE_RE = /\s*<\/parameter>\s*$/; const TC_INVOKE_START_RE = //g; const TC_INVOKE_CLOSE_RE = /\s*<\/invoke>\s*$/; const TC_INVOKE_PARAM_RE = //g; const TC_INVOKE_PARAM_CLOSE_RE = /\s*<\/parameter>\s*$/; function scanBalancedBraces(content: string, start: number): number { let depth = 0; let i = start; let inString = false; while (i < content.length) { const ch = content[i]!; if (inString) { if (ch === '\\' && i + 1 < content.length) { i += 2; continue; } if (ch === '"') inString = false; } else if (ch === '"') { inString = true; } else if (ch === '{') { depth++; } else if (ch === '}') { depth--; if (depth === 0) return i; } i++; } return -1; } export function parseToolCallsFromText( content: string, opts?: { idOffset?: number }, ): OpenAiToolCall[] { const toolCalls: OpenAiToolCall[] = []; const idOffset = opts?.idOffset ?? 0; // Pattern 1: {json} -- balanced-brace JSON scanner. // Skips braces inside JSON strings so nested objects parse correctly. TC_JSON_START_RE.lastIndex = 0; let m: RegExpExecArray | null; while ((m = TC_JSON_START_RE.exec(content)) !== null) { const braceStart = m.index + m[0].length - 1; const braceEnd = scanBalancedBraces(content, braceStart); if (braceEnd === -1) continue; const jsonStr = content.slice(braceStart, braceEnd + 1); try { const obj = JSON.parse(jsonStr) as Record; const name = typeof obj.name === 'string' ? obj.name : ''; let args: string; const rawArgs = obj.arguments ?? {}; if (typeof rawArgs === 'string') { args = rawArgs; } else { args = JSON.stringify(rawArgs); } toolCalls.push({ id: `call_${idOffset + toolCalls.length}`, type: 'function', function: { name, arguments: args }, }); } catch { // malformed JSON -- skip } } // Pattern 2: value -- closing tags optional. // Body boundary uses or next , // because code parameter values can contain that literal). if (toolCalls.length === 0) { TC_FUNC_START_RE.lastIndex = 0; const funcStarts: Array<{ match: RegExpExecArray; name: string }> = []; while ((m = TC_FUNC_START_RE.exec(content)) !== null) { funcStarts.push({ match: m, name: m[1]! }); } for (let idx = 0; idx < funcStarts.length; idx++) { const { match: fm, name: funcName } = funcStarts[idx]!; const bodyStart = fm.index + fm[0].length; const nextFunc = idx + 1 < funcStarts.length ? funcStarts[idx + 1]!.match.index : content.length; const endTag = TC_END_TAG_RE.exec(content.slice(bodyStart)); let bodyEnd = endTag ? bodyStart + endTag.index : content.length; bodyEnd = Math.min(bodyEnd, nextFunc); let body = content.slice(bodyStart, bodyEnd); body = body.replace(TC_FUNC_CLOSE_RE, ''); const args: Record = {}; TC_PARAM_START_RE.lastIndex = 0; const paramStarts: Array<{ match: RegExpExecArray; name: string }> = []; let pm: RegExpExecArray | null; while ((pm = TC_PARAM_START_RE.exec(body)) !== null) { paramStarts.push({ match: pm, name: pm[1]! }); } if (paramStarts.length === 1) { // Single param: take everything to body end so embedded // in code strings is preserved. const p = paramStarts[0]!; let val = body.slice(p.match.index + p.match[0].length); val = val.replace(TC_PARAM_CLOSE_RE, ''); args[p.name] = val.trim(); } else { for (let pidx = 0; pidx < paramStarts.length; pidx++) { const p = paramStarts[pidx]!; const valStart = p.match.index + p.match[0].length; const nextParam = pidx + 1 < paramStarts.length ? paramStarts[pidx + 1]!.match.index : body.length; let val = body.slice(valStart, nextParam); val = val.replace(TC_PARAM_CLOSE_RE, ''); args[p.name] = val.trim(); } } toolCalls.push({ id: `call_${idOffset + toolCalls.length}`, type: 'function', function: { name: funcName, arguments: JSON.stringify(args) }, }); } } // Pattern 3: value -- Anthropic // shape that qwen3.6 drifts to from Claude Code documentation residue. // Closing tags optional; same single-param fast path as pattern 2. if (toolCalls.length === 0) { TC_INVOKE_START_RE.lastIndex = 0; const invokeStarts: Array<{ match: RegExpExecArray; name: string }> = []; while ((m = TC_INVOKE_START_RE.exec(content)) !== null) { const name = (m[1] ?? m[2] ?? '').trim(); if (name) invokeStarts.push({ match: m, name }); } for (let idx = 0; idx < invokeStarts.length; idx++) { const { match: im, name: invokeName } = invokeStarts[idx]!; const bodyStart = im.index + im[0].length; const nextInvoke = idx + 1 < invokeStarts.length ? invokeStarts[idx + 1]!.match.index : content.length; const closeTag = content.slice(bodyStart).match(/<\/invoke>/); let bodyEnd = closeTag ? bodyStart + (closeTag.index ?? 0) : content.length; bodyEnd = Math.min(bodyEnd, nextInvoke); let body = content.slice(bodyStart, bodyEnd); body = body.replace(TC_INVOKE_CLOSE_RE, ''); const args: Record = {}; TC_INVOKE_PARAM_RE.lastIndex = 0; const paramStarts: Array<{ match: RegExpExecArray; name: string }> = []; let pm: RegExpExecArray | null; while ((pm = TC_INVOKE_PARAM_RE.exec(body)) !== null) { const pname = (pm[1] ?? pm[2] ?? '').trim(); if (pname) paramStarts.push({ match: pm, name: pname }); } if (paramStarts.length === 1) { const p = paramStarts[0]!; let val = body.slice(p.match.index + p.match[0].length); val = val.replace(TC_INVOKE_PARAM_CLOSE_RE, ''); args[p.name] = val.trim(); } else { for (let pidx = 0; pidx < paramStarts.length; pidx++) { const p = paramStarts[pidx]!; const valStart = p.match.index + p.match[0].length; const nextParam = pidx + 1 < paramStarts.length ? paramStarts[pidx + 1]!.match.index : body.length; let val = body.slice(valStart, nextParam); val = val.replace(TC_INVOKE_PARAM_CLOSE_RE, ''); args[p.name] = val.trim(); } } toolCalls.push({ id: `call_${idOffset + toolCalls.length}`, type: 'function', function: { name: invokeName, arguments: JSON.stringify(args) }, }); } } return toolCalls; } // ── BooCode streaming helpers ──────────────────────────────────────────── export interface ParsedCall { name: string; args: Record; } const PLACEHOLDER_LITERALS = new Set(['...', 'placeholder', '', '']); const ANGLE_BRACKET_SENTINEL_RE = /^<[^>]+>$/; export function isPlaceholderArgValue(value: unknown): boolean { if (typeof value !== 'string') return false; const trimmed = value.trim(); if (trimmed === '') return true; if (PLACEHOLDER_LITERALS.has(trimmed)) return true; if (ANGLE_BRACKET_SENTINEL_RE.test(trimmed)) return true; return false; } function hasPlaceholderArgs(args: Record): boolean { for (const value of Object.values(args)) { if (isPlaceholderArgValue(value)) return true; } return false; } function logRejectedPlaceholder(parsed: ParsedCall): void { console.debug( { toolName: parsed.name, args: parsed.args }, 'rejected placeholder tool call at parse time', ); } const QWEN_FUNCTION_RE = /\s]+)\s*>/; const QWEN_PARAM_RE = /\s]+)\s*>([\s\S]*?)<\/parameter>/g; export function parseXmlToolCall(block: string): ParsedCall | null { const nameMatch = block.match(QWEN_FUNCTION_RE); if (!nameMatch || !nameMatch[1]) return null; const name = nameMatch[1].trim(); if (!name) return null; const args: Record = {}; for (const m of block.matchAll(QWEN_PARAM_RE)) { const key = (m[1] ?? '').trim(); if (!key) continue; const raw = (m[2] ?? '').trim(); try { args[key] = JSON.parse(raw); } catch { args[key] = raw; } } return { name, args }; } const INVOKE_NAME_RE = //; const INVOKE_PARAM_RE = /([\s\S]*?)<\/parameter>/g; export function parseInvokeToolCall(block: string): ParsedCall | null { const nameMatch = block.match(INVOKE_NAME_RE); if (!nameMatch) return null; const name = (nameMatch[2] ?? nameMatch[3] ?? '').trim(); if (!name) return null; const args: Record = {}; for (const m of block.matchAll(INVOKE_PARAM_RE)) { const key = ((m[2] ?? m[3] ?? '') as string).trim(); if (!key) continue; const raw = (m[4] ?? '').trim(); try { args[key] = JSON.parse(raw); } catch { args[key] = raw; } } return { name, args }; } const ALL_OPENERS = [XML_TOOL_OPEN, INVOKE_TOOL_OPEN] as const; export function partialXmlOpenerStart(s: string): number { let earliest = -1; for (const op of ALL_OPENERS) { const idx = s.indexOf(op); if (idx === -1) continue; if (earliest === -1 || idx < earliest) earliest = idx; } if (earliest !== -1) return earliest; const lastLt = s.lastIndexOf('<'); if (lastLt === -1) return -1; const suffix = s.slice(lastLt); for (const op of ALL_OPENERS) { if (op.startsWith(suffix) && suffix.length < op.length) return lastLt; } return -1; } export interface ToolCallExtraction { flushed: string; calls: ParsedCall[]; remaining: string; } interface OpenerSpec { open: string; close: string; parse: (block: string) => ParsedCall | null; } const OPENER_SPECS: ReadonlyArray = [ { open: XML_TOOL_OPEN, close: XML_TOOL_CLOSE, parse: parseXmlToolCall }, { open: INVOKE_TOOL_OPEN, close: INVOKE_TOOL_CLOSE, parse: parseInvokeToolCall }, ]; export function extractToolCallBlocks(buffer: string): ToolCallExtraction { let flushed = ''; const calls: ParsedCall[] = []; let pos = 0; while (pos < buffer.length) { let next: { spec: OpenerSpec; openIdx: number; closeIdx: number } | null = null; for (const spec of OPENER_SPECS) { const openIdx = buffer.indexOf(spec.open, pos); if (openIdx === -1) continue; const closeIdx = buffer.indexOf(spec.close, openIdx); if (closeIdx === -1) continue; if (next === null || openIdx < next.openIdx) { next = { spec, openIdx, closeIdx }; } } if (next === null) break; if (next.openIdx > pos) { flushed += buffer.slice(pos, next.openIdx); } const blockEnd = next.closeIdx + next.spec.close.length; const block = buffer.slice(next.openIdx, blockEnd); const parsed = next.spec.parse(block); if (parsed) { if (hasPlaceholderArgs(parsed.args)) { logRejectedPlaceholder(parsed); flushed += block; } else { calls.push(parsed); } } pos = blockEnd; } const tail = buffer.slice(pos); const partialIdx = partialXmlOpenerStart(tail); if (partialIdx === -1) { flushed += tail; return { flushed, calls, remaining: '' }; } if (partialIdx > 0) { flushed += tail.slice(0, partialIdx); } return { flushed, calls, remaining: tail.slice(partialIdx) }; }