v1.13.16-xml-parser: Anthropic <invoke> support + unknown-tool recovery hints
Two-part fix for the model-emitted XML drift the v1.13.15-codecontext-synth
investigation surfaced (1 raw <invoke> leak observed out of 190 qwen3.6
turns — qwen3.6-35b-a3b-mxfp4 drifts to the Anthropic format when prompted
as an Architect-style agent because Claude Code documentation in its
pre-training corpus uses that shape).
## Parser extension
xml-parser.ts now recognizes BOTH XML tool-call flavors:
- Qwen/Hermes: <tool_call><function=NAME>...<parameter=K>V</parameter>...</function></tool_call>
- Anthropic: <invoke name="NAME"><parameter name="K">V</parameter></invoke>
Both route through the same synthetic-id xml_call_${idx} ToolCall path.
extractToolCallBlocks() and partialXmlOpenerStart() handle both openers
(<tool_call> and <invoke...) so partial buffers don't get prematurely
flushed during streaming.
The existing Qwen parser was tightened to tolerate whitespace around `=`
(<function = name>, <parameter = key>...) so a stray space doesn't get
absorbed into the function name. Name capture is non-whitespace,
non-`>`.
## Unknown-tool recovery hint
New tool-suggestions.ts exports levenshtein() + suggestToolName() +
formatUnknownToolError(). When tool-phase.ts:executeToolCall receives a
toolCall.name that isn't in TOOLS_BY_NAME, the error returned to the
model now includes a "Did you mean: X?" hint based on Levenshtein
distance ≤3 or substring match against Object.keys(TOOLS_BY_NAME).
Targets the qwen3.6 drift to read_file → suggest view_file. Applies to
all unknown tool names, not just <invoke>-derived ones — at the
dispatch layer we no longer know which format produced the call, and
the extra signal is harmless for Qwen-derived calls.
## Test coverage
xml-parser.test.ts: 46 tests, all green. Covers both parsers
(well-formed, malformed, multi-parameter, nested-content), the
partial-opener detector for both flavors, the unified extraction
helper, and the unknown-tool error formatter.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -6,12 +6,9 @@ import type {
|
||||
import * as modelContext from '../model-context.js';
|
||||
import { toolJsonSchemas, type ToolJsonSchema } from '../tools.js';
|
||||
import type { OpenAiMessage } from './payload.js';
|
||||
import {
|
||||
XML_TOOL_CLOSE,
|
||||
XML_TOOL_OPEN,
|
||||
parseXmlToolCall,
|
||||
partialXmlOpenerStart,
|
||||
} from './xml-parser.js';
|
||||
// v1.13.16: extractToolCallBlocks replaces the inline opener-search loop and
|
||||
// recognizes both Qwen <tool_call> and Anthropic <invoke> markup in one pass.
|
||||
import { extractToolCallBlocks } from './xml-parser.js';
|
||||
import { DB_FLUSH_INTERVAL_MS, type StreamPhaseState } from './types.js';
|
||||
import type {
|
||||
InferenceContext,
|
||||
@@ -132,16 +129,24 @@ function buildAiTools(schemas: ToolJsonSchema[]): Record<string, ReturnType<type
|
||||
// v1.10.5 Qwen-coder XML fallback. Some local models (notably qwen3-coder via
|
||||
// llama-swap) emit tool calls as inline XML inside delta.content rather than
|
||||
// the structured tool_calls field. We extract them out of the streamed text
|
||||
// before flushing it to the client, mirroring the pre-AI-SDK behavior.
|
||||
// before flushing it to the client.
|
||||
//
|
||||
// XML shape:
|
||||
// Qwen shape:
|
||||
// <tool_call>
|
||||
// <function=NAME>
|
||||
// <parameter=KEY>VALUE</parameter>
|
||||
// ...
|
||||
// </function>
|
||||
// </tool_call>
|
||||
// Multiple <tool_call> blocks may appear back-to-back; they never nest.
|
||||
//
|
||||
// v1.13.16: also recognize Anthropic <invoke> markup that qwen3.6-35b-a3b-mxfp4
|
||||
// drifts to (training-data residue from Claude Code documentation):
|
||||
// <invoke name="NAME">
|
||||
// <parameter name="KEY">VALUE</parameter>
|
||||
// </invoke>
|
||||
// Both formats share the synthetic xml_call_${idx} ID space; the counter
|
||||
// increments across whichever opener appears first. Multiple blocks may
|
||||
// appear back-to-back in either format and they never nest.
|
||||
export async function streamCompletion(
|
||||
ctx: InferenceContext,
|
||||
model: string,
|
||||
@@ -209,47 +214,24 @@ export async function streamCompletion(
|
||||
switch (part.type) {
|
||||
case 'text-delta': {
|
||||
pendingBuffer += part.text;
|
||||
// Extract any complete <tool_call>...</tool_call> blocks before
|
||||
// flushing visible text.
|
||||
while (true) {
|
||||
const startIdx = pendingBuffer.indexOf(XML_TOOL_OPEN);
|
||||
if (startIdx === -1) break;
|
||||
const closeIdx = pendingBuffer.indexOf(XML_TOOL_CLOSE, startIdx);
|
||||
if (closeIdx === -1) break;
|
||||
const blockEnd = closeIdx + XML_TOOL_CLOSE.length;
|
||||
const block = pendingBuffer.slice(startIdx, blockEnd);
|
||||
if (startIdx > 0) {
|
||||
const before = pendingBuffer.slice(0, startIdx);
|
||||
content += before;
|
||||
onDelta(before);
|
||||
}
|
||||
const parsedCall = parseXmlToolCall(block);
|
||||
if (parsedCall) {
|
||||
const synthIdx = toolCalls.length;
|
||||
toolCalls.push({
|
||||
id: `xml_call_${synthIdx}`,
|
||||
name: parsedCall.name,
|
||||
args: parsedCall.args,
|
||||
});
|
||||
}
|
||||
// Parse failures still drop the block — leaking <tool_call> XML to
|
||||
// the chat would look worse than silently swallowing the bad block.
|
||||
pendingBuffer = pendingBuffer.slice(blockEnd);
|
||||
// v1.13.16: unified extraction. The helper finds the earliest-opening
|
||||
// complete <tool_call> or <invoke> block, flushes prose between/around
|
||||
// them, holds any partial opener for the next chunk, and silently
|
||||
// drops blocks that fail to parse (matches pre-v1.13.16 behavior).
|
||||
const extracted = extractToolCallBlocks(pendingBuffer);
|
||||
if (extracted.flushed.length > 0) {
|
||||
content += extracted.flushed;
|
||||
onDelta(extracted.flushed);
|
||||
}
|
||||
// Hold back any (partial or full) unclosed opener; flush the rest.
|
||||
const partialIdx = partialXmlOpenerStart(pendingBuffer);
|
||||
if (partialIdx >= 0) {
|
||||
if (partialIdx > 0) {
|
||||
const flush = pendingBuffer.slice(0, partialIdx);
|
||||
content += flush;
|
||||
onDelta(flush);
|
||||
}
|
||||
pendingBuffer = pendingBuffer.slice(partialIdx);
|
||||
} else if (pendingBuffer.length > 0) {
|
||||
content += pendingBuffer;
|
||||
onDelta(pendingBuffer);
|
||||
pendingBuffer = '';
|
||||
for (const call of extracted.calls) {
|
||||
const synthIdx = toolCalls.length;
|
||||
toolCalls.push({
|
||||
id: `xml_call_${synthIdx}`,
|
||||
name: call.name,
|
||||
args: call.args,
|
||||
});
|
||||
}
|
||||
pendingBuffer = extracted.remaining;
|
||||
break;
|
||||
}
|
||||
case 'tool-call': {
|
||||
|
||||
@@ -4,6 +4,12 @@ import { PathScopeError } from '../path_guard.js';
|
||||
import { TOOLS_BY_NAME } from '../tools.js';
|
||||
import { maybeFlagForCompaction } from './payload.js';
|
||||
import { insertParts, partsFromAssistantMessage, partsFromToolMessage } from './parts.js';
|
||||
// v1.13.16: richer unknown-tool error so the model can self-correct when it
|
||||
// drifts to a Claude Code tool name (e.g. read_file → suggest view_file).
|
||||
// Applies to all unknown tool names, not just <invoke>-derived ones — at the
|
||||
// dispatch layer we no longer know which format produced the call, and the
|
||||
// extra signal is harmless for Qwen-derived calls.
|
||||
import { formatUnknownToolError } from './tool-suggestions.js';
|
||||
import type {
|
||||
InferenceContext,
|
||||
StreamResult,
|
||||
@@ -26,7 +32,11 @@ async function executeToolCall(
|
||||
): Promise<{ output: unknown; truncated: boolean; error?: string }> {
|
||||
const tool = TOOLS_BY_NAME[toolCall.name];
|
||||
if (!tool) {
|
||||
return { output: null, truncated: false, error: `unknown tool: ${toolCall.name}` };
|
||||
return {
|
||||
output: null,
|
||||
truncated: false,
|
||||
error: formatUnknownToolError(toolCall.name, Object.keys(TOOLS_BY_NAME)),
|
||||
};
|
||||
}
|
||||
const parsed = tool.inputSchema.safeParse(toolCall.args);
|
||||
if (!parsed.success) {
|
||||
|
||||
63
apps/server/src/services/inference/tool-suggestions.ts
Normal file
63
apps/server/src/services/inference/tool-suggestions.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
// v1.13.16: Levenshtein + suggestion + formatter for the unknown-tool error
|
||||
// returned to the model when an XML-extracted tool call references a name
|
||||
// that isn't in TOOLS_BY_NAME. The drift incident this targets: qwen3.6
|
||||
// emitting <invoke name="read_file"> from its Claude Code training residue
|
||||
// when BooCode's actual file-read tool is view_file. Hand-rolled distance
|
||||
// function — no new dep.
|
||||
|
||||
export function levenshtein(a: string, b: string): number {
|
||||
if (a.length === 0) return b.length;
|
||||
if (b.length === 0) return a.length;
|
||||
const dp: number[][] = Array.from(
|
||||
{ length: a.length + 1 },
|
||||
() => new Array<number>(b.length + 1).fill(0),
|
||||
);
|
||||
for (let i = 0; i <= a.length; i++) dp[i]![0] = i;
|
||||
for (let j = 0; j <= b.length; j++) dp[0]![j] = j;
|
||||
for (let i = 1; i <= a.length; i++) {
|
||||
for (let j = 1; j <= b.length; j++) {
|
||||
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
||||
dp[i]![j] = Math.min(
|
||||
dp[i - 1]![j]! + 1,
|
||||
dp[i]![j - 1]! + 1,
|
||||
dp[i - 1]![j - 1]! + cost,
|
||||
);
|
||||
}
|
||||
}
|
||||
return dp[a.length]![b.length]!;
|
||||
}
|
||||
|
||||
// Threshold per the v1.13.16 dispatch: distance <= 3 OR substring match
|
||||
// (either direction). Ties broken by smallest distance, then alphabetical.
|
||||
export function suggestToolName(
|
||||
name: string,
|
||||
available: readonly string[],
|
||||
): string | null {
|
||||
const lower = name.toLowerCase();
|
||||
let best: { name: string; dist: number } | null = null;
|
||||
for (const tool of available) {
|
||||
const tlower = tool.toLowerCase();
|
||||
const dist = levenshtein(lower, tlower);
|
||||
const isSubstr = tlower.includes(lower) || lower.includes(tlower);
|
||||
if (dist > 3 && !isSubstr) continue;
|
||||
if (
|
||||
best === null ||
|
||||
dist < best.dist ||
|
||||
(dist === best.dist && tool.localeCompare(best.name) < 0)
|
||||
) {
|
||||
best = { name: tool, dist };
|
||||
}
|
||||
}
|
||||
return best?.name ?? null;
|
||||
}
|
||||
|
||||
export function formatUnknownToolError(
|
||||
name: string,
|
||||
available: readonly string[],
|
||||
): string {
|
||||
const sorted = [...available].sort();
|
||||
const suggestion = suggestToolName(name, sorted);
|
||||
const list = sorted.join(', ');
|
||||
const tail = suggestion ? ` Did you mean: ${suggestion}?` : '';
|
||||
return `Tool '${name}' not found. Available tools: [${list}].${tail}`;
|
||||
}
|
||||
@@ -1,23 +1,42 @@
|
||||
// v1.10.5: XML-tag tool-call fallback. Some models emit
|
||||
// <tool_call><function=foo><parameter=key>value</parameter></function></tool_call>
|
||||
// in plain content instead of using the OpenAI tool_calls JSON channel.
|
||||
// The streaming loop in inference.ts extracts these blocks via these helpers.
|
||||
// The streaming loop in stream-phase.ts extracts these blocks via these helpers.
|
||||
//
|
||||
// v1.13.16: also recognize Anthropic <invoke name="..."><parameter name="...">
|
||||
// markup. qwen3.6-35b-a3b-mxfp4 drifts to this format when prompted as an
|
||||
// "Architect"-style agent because Claude Code documentation in its
|
||||
// pre-training data uses this shape. Both formats route through the same
|
||||
// synthetic ToolCall path with shared xml_call_${idx} IDs; downstream
|
||||
// dispatch handles unknown tool names with a richer error (see
|
||||
// tool-suggestions.ts + tool-phase.ts).
|
||||
|
||||
export const XML_TOOL_OPEN = '<tool_call>';
|
||||
export const XML_TOOL_CLOSE = '</tool_call>';
|
||||
|
||||
export function parseXmlToolCall(
|
||||
block: string,
|
||||
): { name: string; args: Record<string, unknown> } | null {
|
||||
const nameMatch = block.match(/<function=([^>]+)>/);
|
||||
// v1.13.16: Anthropic <invoke> opener is matched by prefix (not the full
|
||||
// `<invoke ...>` tag) because attributes follow. Closer is the literal tag.
|
||||
export const INVOKE_TOOL_OPEN = '<invoke';
|
||||
export const INVOKE_TOOL_CLOSE = '</invoke>';
|
||||
|
||||
export interface ParsedCall {
|
||||
name: string;
|
||||
args: Record<string, unknown>;
|
||||
}
|
||||
|
||||
// v1.10.5: Qwen-flavor parser. Tightened in v1.13.16 to tolerate whitespace
|
||||
// around `=` (e.g. `<function = view_file>`). Name capture is non-whitespace,
|
||||
// non-`>` so a stray space doesn't get absorbed into the function name.
|
||||
const QWEN_FUNCTION_RE = /<function\s*=\s*([^>\s]+)\s*>/;
|
||||
const QWEN_PARAM_RE = /<parameter\s*=\s*([^>\s]+)\s*>([\s\S]*?)<\/parameter>/g;
|
||||
|
||||
export function parseXmlToolCall(block: string): ParsedCall | null {
|
||||
const nameMatch = block.match(QWEN_FUNCTION_RE);
|
||||
if (!nameMatch || !nameMatch[1]) return null;
|
||||
const name = nameMatch[1].trim();
|
||||
if (!name) return null;
|
||||
const args: Record<string, unknown> = {};
|
||||
// Non-greedy body so each <parameter=…>…</parameter> pair is matched
|
||||
// independently even when multiple appear in the same block.
|
||||
const paramRe = /<parameter=([^>]+)>([\s\S]*?)<\/parameter>/g;
|
||||
for (const m of block.matchAll(paramRe)) {
|
||||
for (const m of block.matchAll(QWEN_PARAM_RE)) {
|
||||
const key = (m[1] ?? '').trim();
|
||||
if (!key) continue;
|
||||
const raw = (m[2] ?? '').trim();
|
||||
@@ -30,24 +49,121 @@ export function parseXmlToolCall(
|
||||
return { name, args };
|
||||
}
|
||||
|
||||
// v1.13.16: Anthropic-flavor parser. Same JSON-parse-with-string-fallback
|
||||
// shape as parseXmlToolCall so the dispatch layer doesn't need to care which
|
||||
// flavor produced the call.
|
||||
const INVOKE_NAME_RE =
|
||||
/<invoke\s+name\s*=\s*("([^"]*)"|'([^']*)')\s*>/;
|
||||
const INVOKE_PARAM_RE =
|
||||
/<parameter\s+name\s*=\s*("([^"]*)"|'([^']*)')\s*>([\s\S]*?)<\/parameter>/g;
|
||||
|
||||
export function parseInvokeToolCall(block: string): ParsedCall | null {
|
||||
const nameMatch = block.match(INVOKE_NAME_RE);
|
||||
if (!nameMatch) return null;
|
||||
const name = (nameMatch[2] ?? nameMatch[3] ?? '').trim();
|
||||
if (!name) return null;
|
||||
const args: Record<string, unknown> = {};
|
||||
for (const m of block.matchAll(INVOKE_PARAM_RE)) {
|
||||
const key = ((m[2] ?? m[3] ?? '') as string).trim();
|
||||
if (!key) continue;
|
||||
const raw = (m[4] ?? '').trim();
|
||||
try {
|
||||
args[key] = JSON.parse(raw);
|
||||
} catch {
|
||||
args[key] = raw;
|
||||
}
|
||||
}
|
||||
return { name, args };
|
||||
}
|
||||
|
||||
// Locate the first character that begins (or completely contains) an
|
||||
// unfinished <tool_call> opener in `s`. Returns -1 when `s` can be flushed
|
||||
// to the client in full without risking a partial tag leak.
|
||||
// Case 1: a full `<tool_call>` opener with no matching closer — caller
|
||||
// must keep everything from that index forward until the next
|
||||
// chunk arrives with the closer.
|
||||
// Case 2: `s` ends with a strict prefix of `<tool_call>` (e.g. `<tool_c`).
|
||||
// Caller must keep just that suffix in the buffer.
|
||||
// unfinished opener (either flavor) in `s`. Returns -1 when `s` can be
|
||||
// flushed to the client in full without risking a partial tag leak.
|
||||
// Case 1: a full opener (`<tool_call>` or `<invoke`) with no matching
|
||||
// closer — caller must keep everything from that index forward
|
||||
// until the next chunk arrives with the closer.
|
||||
// Case 2: `s` ends with a strict prefix of either opener (e.g. `<tool_c`
|
||||
// or `<invo`). Caller must keep just that suffix in the buffer.
|
||||
// Note: case 1 assumes the calling loop already extracted every complete
|
||||
// <tool_call>…</tool_call> pair before reaching this check.
|
||||
// block before reaching this check.
|
||||
const ALL_OPENERS = [XML_TOOL_OPEN, INVOKE_TOOL_OPEN] as const;
|
||||
|
||||
export function partialXmlOpenerStart(s: string): number {
|
||||
const fullOpener = s.indexOf(XML_TOOL_OPEN);
|
||||
if (fullOpener !== -1) return fullOpener;
|
||||
let earliest = -1;
|
||||
for (const op of ALL_OPENERS) {
|
||||
const idx = s.indexOf(op);
|
||||
if (idx === -1) continue;
|
||||
if (earliest === -1 || idx < earliest) earliest = idx;
|
||||
}
|
||||
if (earliest !== -1) return earliest;
|
||||
const lastLt = s.lastIndexOf('<');
|
||||
if (lastLt === -1) return -1;
|
||||
const suffix = s.slice(lastLt);
|
||||
if (XML_TOOL_OPEN.startsWith(suffix) && suffix.length < XML_TOOL_OPEN.length) {
|
||||
return lastLt;
|
||||
for (const op of ALL_OPENERS) {
|
||||
if (op.startsWith(suffix) && suffix.length < op.length) return lastLt;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// v1.13.16: unified extraction. Replaces the inline loop that used to live
|
||||
// in stream-phase.ts. Pure function — returns the visible text to flush,
|
||||
// the parsed tool-call payloads in source order, and the buffer remainder
|
||||
// to retain for the next streaming chunk. Parse failures are silently
|
||||
// dropped (matches the pre-v1.13.16 behavior — leaking partial XML to the
|
||||
// chat looks worse than swallowing a bad block).
|
||||
export interface ToolCallExtraction {
|
||||
flushed: string;
|
||||
calls: ParsedCall[];
|
||||
remaining: string;
|
||||
}
|
||||
|
||||
interface OpenerSpec {
|
||||
open: string;
|
||||
close: string;
|
||||
parse: (block: string) => ParsedCall | null;
|
||||
}
|
||||
|
||||
const OPENER_SPECS: ReadonlyArray<OpenerSpec> = [
|
||||
{ open: XML_TOOL_OPEN, close: XML_TOOL_CLOSE, parse: parseXmlToolCall },
|
||||
{ open: INVOKE_TOOL_OPEN, close: INVOKE_TOOL_CLOSE, parse: parseInvokeToolCall },
|
||||
];
|
||||
|
||||
export function extractToolCallBlocks(buffer: string): ToolCallExtraction {
|
||||
let flushed = '';
|
||||
const calls: ParsedCall[] = [];
|
||||
let pos = 0;
|
||||
|
||||
while (pos < buffer.length) {
|
||||
let next: { spec: OpenerSpec; openIdx: number; closeIdx: number } | null = null;
|
||||
for (const spec of OPENER_SPECS) {
|
||||
const openIdx = buffer.indexOf(spec.open, pos);
|
||||
if (openIdx === -1) continue;
|
||||
const closeIdx = buffer.indexOf(spec.close, openIdx);
|
||||
if (closeIdx === -1) continue;
|
||||
if (next === null || openIdx < next.openIdx) {
|
||||
next = { spec, openIdx, closeIdx };
|
||||
}
|
||||
}
|
||||
if (next === null) break;
|
||||
|
||||
if (next.openIdx > pos) {
|
||||
flushed += buffer.slice(pos, next.openIdx);
|
||||
}
|
||||
const blockEnd = next.closeIdx + next.spec.close.length;
|
||||
const block = buffer.slice(next.openIdx, blockEnd);
|
||||
const parsed = next.spec.parse(block);
|
||||
if (parsed) calls.push(parsed);
|
||||
pos = blockEnd;
|
||||
}
|
||||
|
||||
const tail = buffer.slice(pos);
|
||||
const partialIdx = partialXmlOpenerStart(tail);
|
||||
if (partialIdx === -1) {
|
||||
flushed += tail;
|
||||
return { flushed, calls, remaining: '' };
|
||||
}
|
||||
if (partialIdx > 0) {
|
||||
flushed += tail.slice(0, partialIdx);
|
||||
}
|
||||
return { flushed, calls, remaining: tail.slice(partialIdx) };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user