v1.10.5: inference XML tool-call fallback parser
Some local models (qwen3-coder via llama-swap) emit tool calls as inline XML inside delta.content rather than structured delta.tool_calls. streamCompletion now buffers delta.content, extracts complete <tool_call>...</tool_call> blocks via parseXmlToolCall, and pushes synthetic entries (id prefix xml_call_) into the existing toolCallsBuffer. Native JSON path unchanged — both coexist. Partial openers are held back so a tool tag never leaks to the chat mid-tag. Unclosed XML at end-of-stream is flushed as plain content (no silent drops). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -310,6 +310,70 @@ interface StreamOptions {
|
|||||||
temperature?: number;
|
temperature?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// v1.10.5 Qwen-coder XML fallback. Some local models (notably qwen3-coder via
|
||||||
|
// llama-swap) emit tool calls as inline XML inside delta.content rather than
|
||||||
|
// the structured delta.tool_calls field. The XML shape is:
|
||||||
|
// <tool_call>
|
||||||
|
// <function=NAME>
|
||||||
|
// <parameter=KEY>
|
||||||
|
// VALUE
|
||||||
|
// </parameter>
|
||||||
|
// ...more parameters...
|
||||||
|
// </function>
|
||||||
|
// </tool_call>
|
||||||
|
// Multiple <tool_call> blocks may appear back-to-back; they never nest.
|
||||||
|
// streamCompletion buffers delta.content, extracts complete blocks, parses
|
||||||
|
// them via parseXmlToolCall, and pushes synthetic entries into the existing
|
||||||
|
// toolCallsBuffer alongside any native JSON-format tool calls.
|
||||||
|
const XML_TOOL_OPEN = '<tool_call>';
|
||||||
|
const XML_TOOL_CLOSE = '</tool_call>';
|
||||||
|
|
||||||
|
function parseXmlToolCall(
|
||||||
|
block: string,
|
||||||
|
): { name: string; args: Record<string, unknown> } | null {
|
||||||
|
const nameMatch = block.match(/<function=([^>]+)>/);
|
||||||
|
if (!nameMatch || !nameMatch[1]) return null;
|
||||||
|
const name = nameMatch[1].trim();
|
||||||
|
if (!name) return null;
|
||||||
|
const args: Record<string, unknown> = {};
|
||||||
|
// Non-greedy body so each <parameter=…>…</parameter> pair is matched
|
||||||
|
// independently even when multiple appear in the same block.
|
||||||
|
const paramRe = /<parameter=([^>]+)>([\s\S]*?)<\/parameter>/g;
|
||||||
|
for (const m of block.matchAll(paramRe)) {
|
||||||
|
const key = (m[1] ?? '').trim();
|
||||||
|
if (!key) continue;
|
||||||
|
const raw = (m[2] ?? '').trim();
|
||||||
|
try {
|
||||||
|
args[key] = JSON.parse(raw);
|
||||||
|
} catch {
|
||||||
|
args[key] = raw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return { name, args };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Locate the first character that begins (or completely contains) an
|
||||||
|
// unfinished <tool_call> opener in `s`. Returns -1 when `s` can be flushed
|
||||||
|
// to the client in full without risking a partial tag leak.
|
||||||
|
// Case 1: a full `<tool_call>` opener with no matching closer — caller
|
||||||
|
// must keep everything from that index forward until the next
|
||||||
|
// chunk arrives with the closer.
|
||||||
|
// Case 2: `s` ends with a strict prefix of `<tool_call>` (e.g. `<tool_c`).
|
||||||
|
// Caller must keep just that suffix in the buffer.
|
||||||
|
// Note: case 1 assumes the calling loop already extracted every complete
|
||||||
|
// <tool_call>…</tool_call> pair before reaching this check.
|
||||||
|
function partialXmlOpenerStart(s: string): number {
|
||||||
|
const fullOpener = s.indexOf(XML_TOOL_OPEN);
|
||||||
|
if (fullOpener !== -1) return fullOpener;
|
||||||
|
const lastLt = s.lastIndexOf('<');
|
||||||
|
if (lastLt === -1) return -1;
|
||||||
|
const suffix = s.slice(lastLt);
|
||||||
|
if (XML_TOOL_OPEN.startsWith(suffix) && suffix.length < XML_TOOL_OPEN.length) {
|
||||||
|
return lastLt;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
async function streamCompletion(
|
async function streamCompletion(
|
||||||
ctx: InferenceContext,
|
ctx: InferenceContext,
|
||||||
model: string,
|
model: string,
|
||||||
@@ -344,6 +408,10 @@ async function streamCompletion(
|
|||||||
}
|
}
|
||||||
|
|
||||||
let content = '';
|
let content = '';
|
||||||
|
// v1.10.5: holds delta.content bytes that may contain a partial XML tool
|
||||||
|
// call. Anything not part of a (possibly forming) <tool_call>…</tool_call>
|
||||||
|
// pair is flushed to content + onDelta as soon as we know it's safe.
|
||||||
|
let pendingBuffer = '';
|
||||||
let finishReason: string | null = null;
|
let finishReason: string | null = null;
|
||||||
let promptTokens: number | null = null;
|
let promptTokens: number | null = null;
|
||||||
let completionTokens: number | null = null;
|
let completionTokens: number | null = null;
|
||||||
@@ -377,8 +445,50 @@ async function streamCompletion(
|
|||||||
if (!choice) continue;
|
if (!choice) continue;
|
||||||
const delta = choice.delta ?? {};
|
const delta = choice.delta ?? {};
|
||||||
if (typeof delta.content === 'string' && delta.content.length > 0) {
|
if (typeof delta.content === 'string' && delta.content.length > 0) {
|
||||||
content += delta.content;
|
// v1.10.5 XML fallback. Append, then extract any complete tool_call
|
||||||
onDelta(delta.content);
|
// blocks before deciding what's safe to flush as visible content.
|
||||||
|
pendingBuffer += delta.content;
|
||||||
|
while (true) {
|
||||||
|
const startIdx = pendingBuffer.indexOf(XML_TOOL_OPEN);
|
||||||
|
if (startIdx === -1) break;
|
||||||
|
const closeIdx = pendingBuffer.indexOf(XML_TOOL_CLOSE, startIdx);
|
||||||
|
if (closeIdx === -1) break;
|
||||||
|
const blockEnd = closeIdx + XML_TOOL_CLOSE.length;
|
||||||
|
const block = pendingBuffer.slice(startIdx, blockEnd);
|
||||||
|
// Any text before the opener is plain content — flush it now.
|
||||||
|
if (startIdx > 0) {
|
||||||
|
const before = pendingBuffer.slice(0, startIdx);
|
||||||
|
content += before;
|
||||||
|
onDelta(before);
|
||||||
|
}
|
||||||
|
const parsedCall = parseXmlToolCall(block);
|
||||||
|
if (parsedCall) {
|
||||||
|
const synthIdx = toolCallsBuffer.size;
|
||||||
|
toolCallsBuffer.set(synthIdx, {
|
||||||
|
id: `xml_call_${synthIdx}`,
|
||||||
|
name: parsedCall.name,
|
||||||
|
argsText: JSON.stringify(parsedCall.args),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// If parsing failed we still drop the block — emitting unparseable
|
||||||
|
// XML to the chat would look worse than silently swallowing it.
|
||||||
|
pendingBuffer = pendingBuffer.slice(blockEnd);
|
||||||
|
}
|
||||||
|
// After all complete blocks are out, hold back any (partial or full)
|
||||||
|
// unclosed opener; flush the rest.
|
||||||
|
const partialIdx = partialXmlOpenerStart(pendingBuffer);
|
||||||
|
if (partialIdx >= 0) {
|
||||||
|
if (partialIdx > 0) {
|
||||||
|
const flush = pendingBuffer.slice(0, partialIdx);
|
||||||
|
content += flush;
|
||||||
|
onDelta(flush);
|
||||||
|
}
|
||||||
|
pendingBuffer = pendingBuffer.slice(partialIdx);
|
||||||
|
} else if (pendingBuffer.length > 0) {
|
||||||
|
content += pendingBuffer;
|
||||||
|
onDelta(pendingBuffer);
|
||||||
|
pendingBuffer = '';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (Array.isArray(delta.tool_calls)) {
|
if (Array.isArray(delta.tool_calls)) {
|
||||||
for (const tc of delta.tool_calls) {
|
for (const tc of delta.tool_calls) {
|
||||||
@@ -393,6 +503,15 @@ async function streamCompletion(
|
|||||||
if (choice.finish_reason) finishReason = choice.finish_reason;
|
if (choice.finish_reason) finishReason = choice.finish_reason;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// v1.10.5: if the stream ended mid-XML (e.g. model truncated, no closer
|
||||||
|
// ever arrived), flush whatever was buffered as plain content so it isn't
|
||||||
|
// silently dropped. Better to show a stray `<tool_call>` than vanish text.
|
||||||
|
if (pendingBuffer.length > 0) {
|
||||||
|
content += pendingBuffer;
|
||||||
|
onDelta(pendingBuffer);
|
||||||
|
pendingBuffer = '';
|
||||||
|
}
|
||||||
|
|
||||||
const toolCalls: ToolCall[] = [];
|
const toolCalls: ToolCall[] = [];
|
||||||
for (const [, t] of [...toolCallsBuffer.entries()].sort(([a], [b]) => a - b)) {
|
for (const [, t] of [...toolCallsBuffer.entries()].sort(([a], [b]) => a - b)) {
|
||||||
let args: Record<string, unknown> = {};
|
let args: Record<string, unknown> = {};
|
||||||
|
|||||||
Reference in New Issue
Block a user