v1.13.16-xml-parser: Anthropic <invoke> support + unknown-tool recovery hints

Two-part fix for the model-emitted XML drift the v1.13.15-codecontext-synth investigation surfaced (1 raw <invoke> leak observed out of 190 qwen3.6 turns — qwen3.6-35b-a3b-mxfp4 drifts to the Anthropic format when prompted as an Architect-style agent because Claude Code documentation in its pre-training corpus uses that shape). ## Parser extension xml-parser.ts now recognizes BOTH XML tool-call flavors: - Qwen/Hermes: <tool_call><function=NAME>...<parameter=K>V</parameter>...</function></tool_call> - Anthropic: <invoke name="NAME"><parameter name="K">V</parameter></invoke> Both route through the same synthetic-id xml_call_${idx} ToolCall path. extractToolCallBlocks() and partialXmlOpenerStart() handle both openers (<tool_call> and <invoke...) so partial buffers don't get prematurely flushed during streaming. The existing Qwen parser was tightened to tolerate whitespace around `=` (<function = name>, <parameter = key>...) so a stray space doesn't get absorbed into the function name. Name capture is non-whitespace, non-`>`. ## Unknown-tool recovery hint New tool-suggestions.ts exports levenshtein() + suggestToolName() + formatUnknownToolError(). When tool-phase.ts:executeToolCall receives a toolCall.name that isn't in TOOLS_BY_NAME, the error returned to the model now includes a "Did you mean: X?" hint based on Levenshtein distance ≤3 or substring match against Object.keys(TOOLS_BY_NAME). Targets the qwen3.6 drift to read_file → suggest view_file. Applies to all unknown tool names, not just <invoke>-derived ones — at the dispatch layer we no longer know which format produced the call, and the extra signal is harmless for Qwen-derived calls. ## Test coverage xml-parser.test.ts: 46 tests, all green. Covers both parsers (well-formed, malformed, multi-parameter, nested-content), the partial-opener detector for both flavors, the unified extraction helper, and the unknown-tool error formatter. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-22 20:59:25 +00:00
parent 61308cf17c
commit 2e1a81de72
7 changed files with 604 additions and 71 deletions
--- a/apps/server/src/services/inference/stream-phase.ts
+++ b/apps/server/src/services/inference/stream-phase.ts
@@ -6,12 +6,9 @@ import type {
 import * as modelContext from '../model-context.js';
 import { toolJsonSchemas, type ToolJsonSchema } from '../tools.js';
 import type { OpenAiMessage } from './payload.js';
-import {
-  XML_TOOL_CLOSE,
-  XML_TOOL_OPEN,
-  parseXmlToolCall,
-  partialXmlOpenerStart,
-} from './xml-parser.js';
+// v1.13.16: extractToolCallBlocks replaces the inline opener-search loop and
+// recognizes both Qwen <tool_call> and Anthropic <invoke> markup in one pass.
+import { extractToolCallBlocks } from './xml-parser.js';
 import { DB_FLUSH_INTERVAL_MS, type StreamPhaseState } from './types.js';
 import type {
  InferenceContext,
@@ -132,16 +129,24 @@ function buildAiTools(schemas: ToolJsonSchema[]): Record<string, ReturnType<type
 // v1.10.5 Qwen-coder XML fallback. Some local models (notably qwen3-coder via
 // llama-swap) emit tool calls as inline XML inside delta.content rather than
 // the structured tool_calls field. We extract them out of the streamed text
-// before flushing it to the client, mirroring the pre-AI-SDK behavior.
+// before flushing it to the client.
 //
-// XML shape:
+// Qwen shape:
 //   <tool_call>
 //   <function=NAME>
 //   <parameter=KEY>VALUE</parameter>
 //   ...
 //   </function>
 //   </tool_call>
-// Multiple <tool_call> blocks may appear back-to-back; they never nest.
+//
+// v1.13.16: also recognize Anthropic <invoke> markup that qwen3.6-35b-a3b-mxfp4
+// drifts to (training-data residue from Claude Code documentation):
+//   <invoke name="NAME">
+//   <parameter name="KEY">VALUE</parameter>
+//   </invoke>
+// Both formats share the synthetic xml_call_${idx} ID space; the counter
+// increments across whichever opener appears first. Multiple blocks may
+// appear back-to-back in either format and they never nest.
 export async function streamCompletion(
  ctx: InferenceContext,
  model: string,
@@ -209,47 +214,24 @@ export async function streamCompletion(
    switch (part.type) {
      case 'text-delta': {
        pendingBuffer += part.text;
-        // Extract any complete <tool_call>...</tool_call> blocks before
-        // flushing visible text.
-        while (true) {
-          const startIdx = pendingBuffer.indexOf(XML_TOOL_OPEN);
-          if (startIdx === -1) break;
-          const closeIdx = pendingBuffer.indexOf(XML_TOOL_CLOSE, startIdx);
-          if (closeIdx === -1) break;
-          const blockEnd = closeIdx + XML_TOOL_CLOSE.length;
-          const block = pendingBuffer.slice(startIdx, blockEnd);
-          if (startIdx > 0) {
-            const before = pendingBuffer.slice(0, startIdx);
-            content += before;
-            onDelta(before);
-          }
-          const parsedCall = parseXmlToolCall(block);
-          if (parsedCall) {
-            const synthIdx = toolCalls.length;
-            toolCalls.push({
-              id: `xml_call_${synthIdx}`,
-              name: parsedCall.name,
-              args: parsedCall.args,
-            });
-          }
-          // Parse failures still drop the block — leaking <tool_call> XML to
-          // the chat would look worse than silently swallowing the bad block.
-          pendingBuffer = pendingBuffer.slice(blockEnd);
+        // v1.13.16: unified extraction. The helper finds the earliest-opening
+        // complete <tool_call> or <invoke> block, flushes prose between/around
+        // them, holds any partial opener for the next chunk, and silently
+        // drops blocks that fail to parse (matches pre-v1.13.16 behavior).
+        const extracted = extractToolCallBlocks(pendingBuffer);
+        if (extracted.flushed.length > 0) {
+          content += extracted.flushed;
+          onDelta(extracted.flushed);
        }
-        // Hold back any (partial or full) unclosed opener; flush the rest.
-        const partialIdx = partialXmlOpenerStart(pendingBuffer);
-        if (partialIdx >= 0) {
-          if (partialIdx > 0) {
-            const flush = pendingBuffer.slice(0, partialIdx);
-            content += flush;
-            onDelta(flush);
-          }
-          pendingBuffer = pendingBuffer.slice(partialIdx);
-        } else if (pendingBuffer.length > 0) {
-          content += pendingBuffer;
-          onDelta(pendingBuffer);
-          pendingBuffer = '';
+        for (const call of extracted.calls) {
+          const synthIdx = toolCalls.length;
+          toolCalls.push({
+            id: `xml_call_${synthIdx}`,
+            name: call.name,
+            args: call.args,
+          });
        }
+        pendingBuffer = extracted.remaining;
        break;
      }
      case 'tool-call': {