v1.13.16-xml-parser: Anthropic <invoke> support + unknown-tool recovery hints
Two-part fix for the model-emitted XML drift the v1.13.15-codecontext-synth
investigation surfaced (1 raw <invoke> leak observed out of 190 qwen3.6
turns — qwen3.6-35b-a3b-mxfp4 drifts to the Anthropic format when prompted
as an Architect-style agent because Claude Code documentation in its
pre-training corpus uses that shape).
## Parser extension
xml-parser.ts now recognizes BOTH XML tool-call flavors:
- Qwen/Hermes: <tool_call><function=NAME>...<parameter=K>V</parameter>...</function></tool_call>
- Anthropic: <invoke name="NAME"><parameter name="K">V</parameter></invoke>
Both route through the same synthetic-id xml_call_${idx} ToolCall path.
extractToolCallBlocks() and partialXmlOpenerStart() handle both openers
(<tool_call> and <invoke...) so partial buffers don't get prematurely
flushed during streaming.
The existing Qwen parser was tightened to tolerate whitespace around `=`
(<function = name>, <parameter = key>...) so a stray space doesn't get
absorbed into the function name. Name capture is non-whitespace,
non-`>`.
## Unknown-tool recovery hint
New tool-suggestions.ts exports levenshtein() + suggestToolName() +
formatUnknownToolError(). When tool-phase.ts:executeToolCall receives a
toolCall.name that isn't in TOOLS_BY_NAME, the error returned to the
model now includes a "Did you mean: X?" hint based on Levenshtein
distance ≤3 or substring match against Object.keys(TOOLS_BY_NAME).
Targets the qwen3.6 drift to read_file → suggest view_file. Applies to
all unknown tool names, not just <invoke>-derived ones — at the
dispatch layer we no longer know which format produced the call, and
the extra signal is harmless for Qwen-derived calls.
## Test coverage
xml-parser.test.ts: 46 tests, all green. Covers both parsers
(well-formed, malformed, multi-parameter, nested-content), the
partial-opener detector for both flavors, the unified extraction
helper, and the unknown-tool error formatter.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -6,12 +6,9 @@ import type {
|
||||
import * as modelContext from '../model-context.js';
|
||||
import { toolJsonSchemas, type ToolJsonSchema } from '../tools.js';
|
||||
import type { OpenAiMessage } from './payload.js';
|
||||
import {
|
||||
XML_TOOL_CLOSE,
|
||||
XML_TOOL_OPEN,
|
||||
parseXmlToolCall,
|
||||
partialXmlOpenerStart,
|
||||
} from './xml-parser.js';
|
||||
// v1.13.16: extractToolCallBlocks replaces the inline opener-search loop and
|
||||
// recognizes both Qwen <tool_call> and Anthropic <invoke> markup in one pass.
|
||||
import { extractToolCallBlocks } from './xml-parser.js';
|
||||
import { DB_FLUSH_INTERVAL_MS, type StreamPhaseState } from './types.js';
|
||||
import type {
|
||||
InferenceContext,
|
||||
@@ -132,16 +129,24 @@ function buildAiTools(schemas: ToolJsonSchema[]): Record<string, ReturnType<type
|
||||
// v1.10.5 Qwen-coder XML fallback. Some local models (notably qwen3-coder via
|
||||
// llama-swap) emit tool calls as inline XML inside delta.content rather than
|
||||
// the structured tool_calls field. We extract them out of the streamed text
|
||||
// before flushing it to the client, mirroring the pre-AI-SDK behavior.
|
||||
// before flushing it to the client.
|
||||
//
|
||||
// XML shape:
|
||||
// Qwen shape:
|
||||
// <tool_call>
|
||||
// <function=NAME>
|
||||
// <parameter=KEY>VALUE</parameter>
|
||||
// ...
|
||||
// </function>
|
||||
// </tool_call>
|
||||
// Multiple <tool_call> blocks may appear back-to-back; they never nest.
|
||||
//
|
||||
// v1.13.16: also recognize Anthropic <invoke> markup that qwen3.6-35b-a3b-mxfp4
|
||||
// drifts to (training-data residue from Claude Code documentation):
|
||||
// <invoke name="NAME">
|
||||
// <parameter name="KEY">VALUE</parameter>
|
||||
// </invoke>
|
||||
// Both formats share the synthetic xml_call_${idx} ID space; the counter
|
||||
// increments across whichever opener appears first. Multiple blocks may
|
||||
// appear back-to-back in either format and they never nest.
|
||||
export async function streamCompletion(
|
||||
ctx: InferenceContext,
|
||||
model: string,
|
||||
@@ -209,47 +214,24 @@ export async function streamCompletion(
|
||||
switch (part.type) {
|
||||
case 'text-delta': {
|
||||
pendingBuffer += part.text;
|
||||
// Extract any complete <tool_call>...</tool_call> blocks before
|
||||
// flushing visible text.
|
||||
while (true) {
|
||||
const startIdx = pendingBuffer.indexOf(XML_TOOL_OPEN);
|
||||
if (startIdx === -1) break;
|
||||
const closeIdx = pendingBuffer.indexOf(XML_TOOL_CLOSE, startIdx);
|
||||
if (closeIdx === -1) break;
|
||||
const blockEnd = closeIdx + XML_TOOL_CLOSE.length;
|
||||
const block = pendingBuffer.slice(startIdx, blockEnd);
|
||||
if (startIdx > 0) {
|
||||
const before = pendingBuffer.slice(0, startIdx);
|
||||
content += before;
|
||||
onDelta(before);
|
||||
}
|
||||
const parsedCall = parseXmlToolCall(block);
|
||||
if (parsedCall) {
|
||||
const synthIdx = toolCalls.length;
|
||||
toolCalls.push({
|
||||
id: `xml_call_${synthIdx}`,
|
||||
name: parsedCall.name,
|
||||
args: parsedCall.args,
|
||||
});
|
||||
}
|
||||
// Parse failures still drop the block — leaking <tool_call> XML to
|
||||
// the chat would look worse than silently swallowing the bad block.
|
||||
pendingBuffer = pendingBuffer.slice(blockEnd);
|
||||
// v1.13.16: unified extraction. The helper finds the earliest-opening
|
||||
// complete <tool_call> or <invoke> block, flushes prose between/around
|
||||
// them, holds any partial opener for the next chunk, and silently
|
||||
// drops blocks that fail to parse (matches pre-v1.13.16 behavior).
|
||||
const extracted = extractToolCallBlocks(pendingBuffer);
|
||||
if (extracted.flushed.length > 0) {
|
||||
content += extracted.flushed;
|
||||
onDelta(extracted.flushed);
|
||||
}
|
||||
// Hold back any (partial or full) unclosed opener; flush the rest.
|
||||
const partialIdx = partialXmlOpenerStart(pendingBuffer);
|
||||
if (partialIdx >= 0) {
|
||||
if (partialIdx > 0) {
|
||||
const flush = pendingBuffer.slice(0, partialIdx);
|
||||
content += flush;
|
||||
onDelta(flush);
|
||||
}
|
||||
pendingBuffer = pendingBuffer.slice(partialIdx);
|
||||
} else if (pendingBuffer.length > 0) {
|
||||
content += pendingBuffer;
|
||||
onDelta(pendingBuffer);
|
||||
pendingBuffer = '';
|
||||
for (const call of extracted.calls) {
|
||||
const synthIdx = toolCalls.length;
|
||||
toolCalls.push({
|
||||
id: `xml_call_${synthIdx}`,
|
||||
name: call.name,
|
||||
args: call.args,
|
||||
});
|
||||
}
|
||||
pendingBuffer = extracted.remaining;
|
||||
break;
|
||||
}
|
||||
case 'tool-call': {
|
||||
|
||||
Reference in New Issue
Block a user