boocode/apps/server/src/services/__tests__/xml-parser.test.ts

// v1.13.16: covers the Qwen/Hermes <tool_call> parser, the new Anthropic
// <invoke> parser, the partial-opener detector for both flavors, the unified
// extraction helper, and the unknown-tool error formatter that downstream
// dispatch uses to give the model a recovery hint when it drifts to a
// Claude Code tool name like read_file instead of BooCode's view_file.

import { describe, expect, it } from 'vitest';
import {
  parseXmlToolCall,
  parseInvokeToolCall,
  partialXmlOpenerStart,
  extractToolCallBlocks,
  XML_TOOL_OPEN,
  XML_TOOL_CLOSE,
  INVOKE_TOOL_OPEN,
  INVOKE_TOOL_CLOSE,
} from '../inference/xml-parser.js';
import {
  levenshtein,
  suggestToolName,
  formatUnknownToolError,
} from '../inference/tool-suggestions.js';

describe('parseXmlToolCall (Qwen/Hermes <tool_call>)', () => {
  it('parses a well-formed single-parameter call', () => {
    const block = '<tool_call><function=view_file><parameter=path>/tmp/foo</parameter></function></tool_call>';
    expect(parseXmlToolCall(block)).toEqual({
      name: 'view_file',
      args: { path: '/tmp/foo' },
    });
  });

  it('parses multi-parameter call', () => {
    const block = '<tool_call><function=grep><parameter=pattern>foo</parameter><parameter=path>src/</parameter></function></tool_call>';
    expect(parseXmlToolCall(block)).toEqual({
      name: 'grep',
      args: { pattern: 'foo', path: 'src/' },
    });
  });

  it('JSON-parses numeric parameter values', () => {
    const block = '<tool_call><function=foo><parameter=count>42</parameter></function></tool_call>';
    expect(parseXmlToolCall(block)).toEqual({ name: 'foo', args: { count: 42 } });
  });

  it('tolerates whitespace around = in function (v1.13.16 tightening)', () => {
    const block = '<tool_call><function = view_file><parameter=path>/tmp/foo</parameter></function></tool_call>';
    expect(parseXmlToolCall(block)).toEqual({
      name: 'view_file',
      args: { path: '/tmp/foo' },
    });
  });

  it('tolerates whitespace around = in parameter (v1.13.16 tightening)', () => {
    const block = '<tool_call><function=view_file><parameter = path>/tmp/foo</parameter></function></tool_call>';
    expect(parseXmlToolCall(block)).toEqual({
      name: 'view_file',
      args: { path: '/tmp/foo' },
    });
  });

  it('returns null when function name is missing', () => {
    const block = '<tool_call><parameter=path>/tmp/foo</parameter></tool_call>';
    expect(parseXmlToolCall(block)).toBeNull();
  });
});

describe('parseInvokeToolCall (Anthropic <invoke>) — v1.13.16', () => {
  // Spec case 1
  it('parses a well-formed single-parameter call (spec case 1)', () => {
    const block = '<invoke name="view_file"><parameter name="path">/tmp/foo</parameter></invoke>';
    expect(parseInvokeToolCall(block)).toEqual({
      name: 'view_file',
      args: { path: '/tmp/foo' },
    });
  });

  // Spec case 2
  it('parses a multi-parameter call (spec case 2)', () => {
    const block = '<invoke name="grep"><parameter name="pattern">foo</parameter><parameter name="path">src/</parameter></invoke>';
    expect(parseInvokeToolCall(block)).toEqual({
      name: 'grep',
      args: { pattern: 'foo', path: 'src/' },
    });
  });

  // Spec case 3
  it('tolerates newlines and spaces in attributes (spec case 3)', () => {
    const block = `<invoke
      name="view_file"
    >
      <parameter
        name="path"
      >/tmp/foo</parameter>
    </invoke>`;
    expect(parseInvokeToolCall(block)).toEqual({
      name: 'view_file',
      args: { path: '/tmp/foo' },
    });
  });

  // Spec case 4 (parser portion — the not-found enrichment is tested below)
  it('parses a call whose name is not a registered BooCode tool (spec case 4)', () => {
    const block = '<invoke name="read_file"><parameter name="path">/tmp/foo</parameter></invoke>';
    expect(parseInvokeToolCall(block)).toEqual({
      name: 'read_file',
      args: { path: '/tmp/foo' },
    });
  });

  it('supports single-quoted attribute values', () => {
    const block = "<invoke name='view_file'><parameter name='path'>/tmp/foo</parameter></invoke>";
    expect(parseInvokeToolCall(block)).toEqual({
      name: 'view_file',
      args: { path: '/tmp/foo' },
    });
  });

  it('JSON-parses numeric parameter values', () => {
    const block = '<invoke name="foo"><parameter name="count">42</parameter></invoke>';
    expect(parseInvokeToolCall(block)).toEqual({ name: 'foo', args: { count: 42 } });
  });

  it('tolerates spaces around = inside name attribute', () => {
    const block = '<invoke name = "view_file"><parameter name = "path">/tmp/foo</parameter></invoke>';
    expect(parseInvokeToolCall(block)).toEqual({
      name: 'view_file',
      args: { path: '/tmp/foo' },
    });
  });

  it('returns null when name attribute is missing', () => {
    const block = '<invoke><parameter name="path">/tmp/foo</parameter></invoke>';
    expect(parseInvokeToolCall(block)).toBeNull();
  });

  it('returns null when name attribute is empty', () => {
    const block = '<invoke name=""><parameter name="path">/tmp/foo</parameter></invoke>';
    expect(parseInvokeToolCall(block)).toBeNull();
  });

  it('exports the expected delimiters', () => {
    expect(INVOKE_TOOL_OPEN).toBe('<invoke');
    expect(INVOKE_TOOL_CLOSE).toBe('</invoke>');
    expect(XML_TOOL_OPEN).toBe('<tool_call>');
    expect(XML_TOOL_CLOSE).toBe('</tool_call>');
  });
});

describe('partialXmlOpenerStart (v1.13.16 — both flavors)', () => {
  it('returns -1 when the buffer is empty', () => {
    expect(partialXmlOpenerStart('')).toBe(-1);
  });

  it('returns -1 when the buffer has no openers', () => {
    expect(partialXmlOpenerStart('plain prose, no markup')).toBe(-1);
  });

  it('returns the index of a complete <tool_call> opener (existing)', () => {
    expect(partialXmlOpenerStart('prose <tool_call>more')).toBe(6);
  });

  it('returns the index of a complete <invoke opener (v1.13.16)', () => {
    expect(partialXmlOpenerStart('prose <invoke name=')).toBe(6);
  });

  it('holds a partial <tool_ prefix at end of buffer', () => {
    expect(partialXmlOpenerStart('text <tool_')).toBe(5);
  });

  it('holds a partial <invo prefix at end of buffer (v1.13.16)', () => {
    expect(partialXmlOpenerStart('text <invo')).toBe(5);
  });

  it('holds a bare < at end of buffer', () => {
    expect(partialXmlOpenerStart('text <')).toBe(5);
  });

  it('returns -1 when < is followed by non-opener text', () => {
    expect(partialXmlOpenerStart('text <unknown>')).toBe(-1);
  });

  it('returns the earliest opener when both flavors are present', () => {
    expect(partialXmlOpenerStart('xxx <tool_call>YYY <invoke>')).toBe(4);
    expect(partialXmlOpenerStart('xxx <invoke>YYY <tool_call>')).toBe(4);
  });
});

describe('extractToolCallBlocks (v1.13.16 — unified extraction)', () => {
  // Spec case 1 (extraction-level)
  it('extracts a single <invoke> block (spec case 1)', () => {
    const input = '<invoke name="view_file"><parameter name="path">/tmp/foo</parameter></invoke>';
    const result = extractToolCallBlocks(input);
    expect(result.calls).toEqual([{ name: 'view_file', args: { path: '/tmp/foo' } }]);
    expect(result.flushed).toBe('');
    expect(result.remaining).toBe('');
  });

  // Spec case 5: opener arrives in one chunk, closer in the next.
  it('holds the partial <invoke> chunk when the closer has not arrived (spec case 5, first chunk)', () => {
    const firstChunk = '<invoke name="view_file"><parameter name="path">/tmp/foo</parameter>';
    const result = extractToolCallBlocks(firstChunk);
    expect(result.calls).toEqual([]);
    expect(result.flushed).toBe('');
    expect(result.remaining).toBe(firstChunk);
  });

  it('extracts the block once the closer arrives in a later chunk (spec case 5, completion)', () => {
    const firstChunk = '<invoke name="view_file"><parameter name="path">/tmp/foo</parameter>';
    const r1 = extractToolCallBlocks(firstChunk);
    const combined = r1.remaining + '</invoke>';
    const r2 = extractToolCallBlocks(combined);
    expect(r2.calls).toEqual([{ name: 'view_file', args: { path: '/tmp/foo' } }]);
    expect(r2.flushed).toBe('');
    expect(r2.remaining).toBe('');
  });

  // Spec case 6: prose interleaving
  it('flushes prose around a recognized block but not the markup itself (spec case 6)', () => {
    const input = 'I will read the file.\n<invoke name="view_file"><parameter name="path">/tmp/foo</parameter></invoke>\nThanks.';
    const result = extractToolCallBlocks(input);
    expect(result.calls).toEqual([{ name: 'view_file', args: { path: '/tmp/foo' } }]);
    expect(result.flushed).toBe('I will read the file.\n\nThanks.');
    expect(result.remaining).toBe('');
  });

  // Spec case 7 regression
  it('extracts a <tool_call> Qwen block alongside the new code path (spec case 7 regression)', () => {
    const input = '<tool_call><function=view_file><parameter=path>/tmp/foo</parameter></function></tool_call>';
    const result = extractToolCallBlocks(input);
    expect(result.calls).toEqual([{ name: 'view_file', args: { path: '/tmp/foo' } }]);
    expect(result.flushed).toBe('');
    expect(result.remaining).toBe('');
  });

  it('extracts mixed-format blocks in source order (hand-back: shared counter)', () => {
    const input =
      '<invoke name="view_file"><parameter name="path">/a</parameter></invoke>' +
      ' middle ' +
      '<tool_call><function=grep><parameter=pattern>foo</parameter></function></tool_call>';
    const result = extractToolCallBlocks(input);
    expect(result.calls).toEqual([
      { name: 'view_file', args: { path: '/a' } },
      { name: 'grep', args: { pattern: 'foo' } },
    ]);
    expect(result.flushed).toBe(' middle ');
    expect(result.remaining).toBe('');
  });

  it('drops a malformed <invoke> block silently (matches existing <tool_call> behavior)', () => {
    const input = 'prose <invoke><parameter name="path">/a</parameter></invoke> trailing';
    const result = extractToolCallBlocks(input);
    expect(result.calls).toEqual([]);
    expect(result.flushed).toBe('prose  trailing');
    expect(result.remaining).toBe('');
  });

  it('holds a tail with a fresh partial opener after extracting earlier complete blocks', () => {
    const input = '<invoke name="view_file"><parameter name="path">/a</parameter></invoke> next: <tool_';
    const result = extractToolCallBlocks(input);
    expect(result.calls).toEqual([{ name: 'view_file', args: { path: '/a' } }]);
    expect(result.flushed).toBe(' next: ');
    expect(result.remaining).toBe('<tool_');
  });

  it('passes plain prose straight through when no markup is present', () => {
    const input = 'just some text with a < character but no opener';
    const result = extractToolCallBlocks(input);
    expect(result.calls).toEqual([]);
    expect(result.flushed).toBe(input);
    expect(result.remaining).toBe('');
  });
});

describe('levenshtein', () => {
  it('returns 0 for identical strings', () => {
    expect(levenshtein('view_file', 'view_file')).toBe(0);
  });

  it('returns the length when one string is empty', () => {
    expect(levenshtein('', 'view_file')).toBe(9);
    expect(levenshtein('view_file', '')).toBe(9);
  });

  it('computes a small distance for a single-character substitution', () => {
    expect(levenshtein('cat', 'bat')).toBe(1);
  });

  it('computes a known case: read_file → view_file is 4', () => {
    // r→v, e→i, a→e, d→w → 4 substitutions, same length
    expect(levenshtein('read_file', 'view_file')).toBe(4);
  });
});

describe('suggestToolName (v1.13.16)', () => {
  const tools = [
    'view_file',
    'list_dir',
    'grep',
    'find_files',
    'view_truncated_output',
    'ask_user_input',
    'web_search',
  ];

  it('suggests the closest match when distance is small', () => {
    expect(suggestToolName('view_files', tools)).toBe('view_file');
  });

  it('suggests via substring match when distance alone would miss', () => {
    // 'file' is a substring of multiple tools; closest by distance wins.
    expect(suggestToolName('file', tools)).toBe('view_file');
  });

  it('returns null when nothing is close', () => {
    expect(suggestToolName('xxxx_yyyy_zzzz', tools)).toBeNull();
  });

  it('is case-insensitive in the distance check', () => {
    expect(suggestToolName('VIEW_FILE', tools)).toBe('view_file');
  });
});

describe('formatUnknownToolError (v1.13.16)', () => {
  const tools = ['view_file', 'list_dir', 'grep', 'find_files'];

  it('includes the wrong name and the available tools list', () => {
    const msg = formatUnknownToolError('read_file', tools);
    expect(msg).toContain("Tool 'read_file' not found");
    expect(msg).toContain('Available tools:');
    expect(msg).toContain('view_file');
    expect(msg).toContain('find_files');
  });

  it('includes a suggestion when the drifted name is within threshold', () => {
    // distance(view_files, view_file) = 1 (one extra char)
    const msg = formatUnknownToolError('view_files', tools);
    expect(msg).toContain('Did you mean: view_file?');
  });

  it('omits the suggestion clause when no tool is close enough', () => {
    const msg = formatUnknownToolError('zzzzzzz', tools);
    expect(msg).toContain("Tool 'zzzzzzz' not found");
    expect(msg).toContain('Available tools:');
    expect(msg).not.toContain('Did you mean');
  });

  // The drift incident in the recon (chat 30d8…1be7167, msg 7ff558f4) had the
  // model emit <invoke name="read_file">. lev(read_file, view_file) = 4, so
  // the spec's threshold (<=3) doesn't suggest view_file — the model still
  // gets the available-tools list to pick from. This pins that behavior so a
  // future loosening of the threshold is a deliberate choice.
  it('does not suggest view_file for the read_file drift case (distance is 4, over threshold)', () => {
    const msg = formatUnknownToolError('read_file', tools);
    expect(msg).not.toContain('Did you mean');
  });
});