boocode/apps/server/src/services/__tests__/web_tools.test.ts

import { afterEach, describe, expect, it, vi } from 'vitest';
import { executeWebSearch } from '../web_search.js';
import { executeWebFetch } from '../web_fetch.js';
import { isPublicUrl } from '../url_guard.js';

const TEST_SEARXNG = 'http://searxng.test:8888';

function mockResponse(
  body: unknown,
  init: { status?: number; contentType?: string; contentLength?: number } = {},
): Response {
  const status = init.status ?? 200;
  const headers: Record<string, string> = {};
  if (init.contentType) headers['content-type'] = init.contentType;
  if (init.contentLength !== undefined) headers['content-length'] = String(init.contentLength);
  const stringBody = typeof body === 'string' ? body : JSON.stringify(body);
  return new Response(stringBody, { status, headers });
}

afterEach(() => {
  vi.restoreAllMocks();
});

// ============================================================================
// url_guard — SSRF protection
// ============================================================================

describe('isPublicUrl', () => {
  it('blocks http://localhost', () => {
    expect(isPublicUrl('http://localhost').ok).toBe(false);
  });

  it('blocks http://127.0.0.1:3000', () => {
    const r = isPublicUrl('http://127.0.0.1:3000');
    expect(r.ok).toBe(false);
    expect(r.reason).toMatch(/loopback/);
  });

  it('blocks RFC1918 192.168.x.x', () => {
    expect(isPublicUrl('http://192.168.1.1').ok).toBe(false);
  });

  it('blocks RFC1918 10.x.x.x', () => {
    expect(isPublicUrl('http://10.0.0.5').ok).toBe(false);
  });

  it('blocks RFC1918 172.16-31.x.x', () => {
    expect(isPublicUrl('http://172.20.0.1').ok).toBe(false);
    // Boundary: 172.15 is public; 172.16 is private; 172.31 is private; 172.32 is public.
    expect(isPublicUrl('http://172.15.0.1').ok).toBe(true);
    expect(isPublicUrl('http://172.31.255.255').ok).toBe(false);
    expect(isPublicUrl('http://172.32.0.1').ok).toBe(true);
  });

  it('blocks Tailscale CGNAT 100.64.0.0/10', () => {
    const r = isPublicUrl('http://100.114.205.53');
    expect(r.ok).toBe(false);
    expect(r.reason).toMatch(/cgnat/);
  });

  it('allows 100.x outside CGNAT range', () => {
    // 100.63 is public (one below CGNAT lower bound).
    expect(isPublicUrl('http://100.63.0.1').ok).toBe(true);
    // 100.128 is public (one above CGNAT upper bound).
    expect(isPublicUrl('http://100.128.0.1').ok).toBe(true);
  });

  it('blocks ftp:// (non-http protocol)', () => {
    const r = isPublicUrl('ftp://example.com');
    expect(r.ok).toBe(false);
    expect(r.reason).toMatch(/unsupported_protocol/);
  });

  it('blocks file:///etc/passwd', () => {
    expect(isPublicUrl('file:///etc/passwd').ok).toBe(false);
  });

  it('blocks anything.local (mDNS suffix)', () => {
    const r = isPublicUrl('http://anything.local');
    expect(r.ok).toBe(false);
    expect(r.reason).toMatch(/private_suffix/);
  });

  it('blocks anything.internal', () => {
    expect(isPublicUrl('http://service.internal').ok).toBe(false);
  });

  it('blocks 169.254.x.x link-local (covers AWS/GCP IMDS)', () => {
    expect(isPublicUrl('http://169.254.169.254').ok).toBe(false);
  });

  it('allows https://example.com', () => {
    expect(isPublicUrl('https://example.com').ok).toBe(true);
  });

  it('rejects malformed URLs', () => {
    const r = isPublicUrl('not a url');
    expect(r.ok).toBe(false);
    expect(r.reason).toBe('invalid_url');
  });
});

// ============================================================================
// web_search
// ============================================================================

describe('executeWebSearch', () => {
  it('returns top N results, mapped to {title,url,snippet}', async () => {
    const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
      mockResponse(
        {
          results: [
            { title: 'A', url: 'https://a.example/', content: 'snippet a' },
            { title: 'B', url: 'https://b.example/', content: 'snippet b' },
            { title: 'C', url: 'https://c.example/', content: 'snippet c' },
          ],
        },
        { contentType: 'application/json' },
      ),
    );
    const out = await executeWebSearch({ query: 'foo', max_results: 2 }, TEST_SEARXNG);
    expect(out.results).toHaveLength(2);
    expect(out.results[0]).toEqual({ title: 'A', url: 'https://a.example/', snippet: 'snippet a' });
    // URL-encodes the query and hits /search?...&format=json.
    expect(fetchSpy).toHaveBeenCalledExactlyOnceWith(
      `${TEST_SEARXNG}/search?q=foo&format=json`,
      expect.objectContaining({ signal: expect.any(AbortSignal) }),
    );
  });

  it('caps max_results at 10 even if a larger value is requested', async () => {
    const many = Array.from({ length: 20 }, (_, i) => ({
      title: `t${i}`,
      url: `https://${i}.example/`,
      content: `c${i}`,
    }));
    vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
      mockResponse({ results: many }, { contentType: 'application/json' }),
    );
    const out = await executeWebSearch({ query: 'x', max_results: 999 }, TEST_SEARXNG);
    expect(out.results).toHaveLength(10);
  });

  it('throws on non-200 from SearXNG (executeToolCall surfaces the error to the LLM)', async () => {
    vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
      new Response('boom', { status: 503 }),
    );
    await expect(
      executeWebSearch({ query: 'x' }, TEST_SEARXNG),
    ).rejects.toThrow(/SearXNG returned 503/);
  });

  it('returns empty results cleanly when SearXNG has no matches', async () => {
    vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
      mockResponse({ results: [] }, { contentType: 'application/json' }),
    );
    const out = await executeWebSearch({ query: 'xyz' }, TEST_SEARXNG);
    expect(out.results).toEqual([]);
    expect(out.total).toBe(0);
  });

  it('drops result entries with missing url (defensive)', async () => {
    vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
      mockResponse(
        { results: [{ title: 'no url', content: 'orphan' }, { url: 'https://ok/', title: 't', content: 's' }] },
        { contentType: 'application/json' },
      ),
    );
    const out = await executeWebSearch({ query: 'x' }, TEST_SEARXNG);
    expect(out.results).toHaveLength(1);
    expect(out.results[0]!.url).toBe('https://ok/');
  });

  it('uses the injected fetcher when one is passed (v1.11.8 review)', async () => {
    // Direct injection vs vi.spyOn(globalThis, 'fetch'): the injected
    // path lets tests run without monkey-patching globals, and the
    // production code path defaults to global fetch when no fetcher is
    // supplied. Asserts the stub is the thing actually called.
    const globalSpy = vi.spyOn(globalThis, 'fetch');
    const stub = vi.fn().mockResolvedValue(
      mockResponse(
        { results: [{ title: 'injected', url: 'https://inj/', content: 's' }] },
        { contentType: 'application/json' },
      ),
    );
    const out = await executeWebSearch(
      { query: 'q' },
      TEST_SEARXNG,
      stub as unknown as typeof fetch,
    );
    expect(stub).toHaveBeenCalledOnce();
    expect(globalSpy).not.toHaveBeenCalled();
    expect(out.results[0]!.url).toBe('https://inj/');
  });
});

// ============================================================================
// web_fetch
// ============================================================================

describe('executeWebFetch — URL-guard short-circuit', () => {
  it('returns blocked_by_url_guard for ftp://', async () => {
    const result = await executeWebFetch({ url: 'ftp://example.com' });
    expect('error' in result && result.error).toBe('blocked_by_url_guard');
  });

  it('returns blocked_by_url_guard for file:///', async () => {
    const result = await executeWebFetch({ url: 'file:///etc/passwd' });
    expect('error' in result && result.error).toBe('blocked_by_url_guard');
  });

  it('returns blocked_by_url_guard for Tailscale CGNAT', async () => {
    const result = await executeWebFetch({ url: 'http://100.114.205.53/admin' });
    expect('error' in result && result.error).toBe('blocked_by_url_guard');
  });
});

describe('executeWebFetch — content-type handling', () => {
  it('strips HTML tags and returns plain text + title', async () => {
    const html = `<html><head><title>  Hello World  </title></head>
      <body><script>alert('xss')</script><h1>Heading</h1><p>Body text</p></body></html>`;
    const fakeFetch = vi.fn().mockResolvedValue(
      mockResponse(html, { contentType: 'text/html; charset=utf-8' }),
    );
    const result = await executeWebFetch(
      { url: 'https://example.com/page' },
      fakeFetch as unknown as typeof fetch,
    );
    expect('content' in result).toBe(true);
    if ('content' in result) {
      expect(result.title).toBe('Hello World');
      // Script CONTENT must not leak through — the regex stripper deletes
      // the whole <script>...</script> block, not just the tags.
      expect(result.content).not.toContain('alert(');
      expect(result.content).toContain('Heading');
      expect(result.content).toContain('Body text');
    }
  });

  it('returns JSON content as-is (no stripping)', async () => {
    const json = '{"foo": "bar"}';
    const fakeFetch = vi.fn().mockResolvedValue(
      mockResponse(json, { contentType: 'application/json' }),
    );
    const result = await executeWebFetch(
      { url: 'https://example.com/api' },
      fakeFetch as unknown as typeof fetch,
    );
    expect('content' in result && result.content).toBe(json);
  });

  it('returns plain text as-is', async () => {
    const txt = 'just\nplain\ntext';
    const fakeFetch = vi.fn().mockResolvedValue(
      mockResponse(txt, { contentType: 'text/plain' }),
    );
    const result = await executeWebFetch(
      { url: 'https://example.com/file.txt' },
      fakeFetch as unknown as typeof fetch,
    );
    expect('content' in result && result.content).toBe(txt);
  });

  it('returns unsupported_content_type for binary content', async () => {
    const fakeFetch = vi.fn().mockResolvedValue(
      mockResponse('binary garbage', { contentType: 'application/octet-stream' }),
    );
    const result = await executeWebFetch(
      { url: 'https://example.com/blob' },
      fakeFetch as unknown as typeof fetch,
    );
    expect('error' in result && result.error).toBe('unsupported_content_type');
  });
});

describe('executeWebFetch — size + truncation', () => {
  it('rejects responses whose Content-Length exceeds 5MB', async () => {
    const fakeFetch = vi.fn().mockResolvedValue(
      new Response('small body', {
        status: 200,
        headers: {
          'content-type': 'text/plain',
          'content-length': String(6 * 1024 * 1024),
        },
      }),
    );
    const result = await executeWebFetch(
      { url: 'https://example.com/huge' },
      fakeFetch as unknown as typeof fetch,
    );
    expect('error' in result && result.error).toBe('response_too_large');
  });

  it('rejects multi-byte content that exceeds 5MB in bytes but fits in chars (v1.11.8 review)', async () => {
    // 1.5M U+1F600 emojis: each is length 2 in UTF-16 (surrogate pair) and
    // 4 bytes in UTF-8. body.length = 3,000,000 chars (~2.86 MiB by
    // UTF-16 count) but Buffer.byteLength = 6,000,000 bytes (>5 MiB).
    // v1.11.10: streaming reader catches this as body_too_large (was
    // response_too_large in the post-consumption check). No
    // Content-Length header so the pre-flight pass and the streaming
    // path is the one that rejects.
    const heavy = '😀'.repeat(1_500_000);
    const fakeFetch = vi.fn().mockResolvedValue(
      new Response(heavy, { status: 200, headers: { 'content-type': 'text/plain' } }),
    );
    const result = await executeWebFetch(
      { url: 'https://example.com/multibyte' },
      fakeFetch as unknown as typeof fetch,
    );
    expect('error' in result).toBe(true);
    if ('error' in result) {
      expect(result.error).toBe('body_too_large');
      expect(result.reason).toMatch(/exceeded/);
    }
  });

  it('truncates output to max_chars and appends a marker', async () => {
    const big = 'A'.repeat(50_000);
    const fakeFetch = vi.fn().mockResolvedValue(
      mockResponse(big, { contentType: 'text/plain' }),
    );
    const result = await executeWebFetch(
      { url: 'https://example.com/big', max_chars: 200 },
      fakeFetch as unknown as typeof fetch,
    );
    expect('content' in result).toBe(true);
    if ('content' in result) {
      expect(result.truncated).toBe(true);
      expect(result.content).toContain('[truncated');
      // First 200 chars + the marker line.
      expect(result.content.startsWith('A'.repeat(200))).toBe(true);
    }
  });

  it('does NOT mark short content as truncated', async () => {
    const fakeFetch = vi.fn().mockResolvedValue(
      mockResponse('short', { contentType: 'text/plain' }),
    );
    const result = await executeWebFetch(
      { url: 'https://example.com/tiny' },
      fakeFetch as unknown as typeof fetch,
    );
    expect('content' in result && result.truncated).toBe(false);
  });
});

// ============================================================================
// v1.11.9: manual redirect handling — re-run URL guard on each hop
// ============================================================================

// Helper: build a 30x redirect Response. status 302 by default; tests
// pass other codes (or omit the Location header) when they need to.
function redirect(loc: string | null, status = 302): Response {
  const headers: Record<string, string> = {};
  if (loc !== null) headers['location'] = loc;
  return new Response('', { status, headers });
}

describe('executeWebFetch — redirect handling', () => {
  it('blocks a redirect target that resolves to a private IP (AWS IMDS)', async () => {
    // Public-IP origin 302s into 169.254.169.254 (link-local). Pre-v1.11.9
    // `redirect: 'follow'` would silently follow this; the new manual
    // loop re-runs isPublicUrl on the resolved target and blocks.
    const fakeFetch = vi
      .fn<typeof fetch>()
      .mockResolvedValueOnce(redirect('http://169.254.169.254/latest/meta-data/'));
    const result = await executeWebFetch(
      { url: 'https://example.com/redirect' },
      fakeFetch as unknown as typeof fetch,
    );
    expect('error' in result).toBe(true);
    if ('error' in result) {
      expect(result.error).toBe('blocked_by_url_guard');
      // Reason should make it clear this was a REDIRECT hop, not the
      // initial URL — so logs can distinguish the two failure modes.
      expect(result.reason).toMatch(/redirect target/);
    }
    // Critical: the second fetch (the private target) must NOT happen.
    expect(fakeFetch).toHaveBeenCalledTimes(1);
  });

  it('follows a public-to-public redirect and returns the final body', async () => {
    const fakeFetch = vi
      .fn<typeof fetch>()
      .mockResolvedValueOnce(redirect('https://example.org/final'))
      .mockResolvedValueOnce(mockResponse('ok body', { contentType: 'text/plain' }));
    const result = await executeWebFetch(
      { url: 'https://example.com/start' },
      fakeFetch as unknown as typeof fetch,
    );
    expect('content' in result).toBe(true);
    if ('content' in result) {
      expect(result.content).toBe('ok body');
      // Final URL is reported back so the model knows where the body came from.
      expect(result.url).toBe('https://example.org/final');
    }
    expect(fakeFetch).toHaveBeenCalledTimes(2);
  });

  it('bails after MAX_REDIRECTS hops with a Too many redirects error', async () => {
    // Chain 6 redirects — one more than the loop allows. Each Location
    // points at a distinct public host so the URL guard stays happy and
    // we exercise the redirectCount > MAX_REDIRECTS branch specifically.
    const fakeFetch = vi
      .fn<typeof fetch>()
      .mockResolvedValueOnce(redirect('https://a.example/'))
      .mockResolvedValueOnce(redirect('https://b.example/'))
      .mockResolvedValueOnce(redirect('https://c.example/'))
      .mockResolvedValueOnce(redirect('https://d.example/'))
      .mockResolvedValueOnce(redirect('https://e.example/'))
      .mockResolvedValueOnce(redirect('https://f.example/'));
    const result = await executeWebFetch(
      { url: 'https://start.example/' },
      fakeFetch as unknown as typeof fetch,
    );
    expect('error' in result).toBe(true);
    if ('error' in result) {
      expect(result.error).toBe('too_many_redirects');
      expect(result.reason).toMatch(/Too many redirects/);
    }
  });

  it('errors when a 30x response omits the Location header', async () => {
    const fakeFetch = vi
      .fn<typeof fetch>()
      .mockResolvedValueOnce(redirect(null, 302));
    const result = await executeWebFetch(
      { url: 'https://example.com/' },
      fakeFetch as unknown as typeof fetch,
    );
    expect('error' in result).toBe(true);
    if ('error' in result) {
      expect(result.error).toBe('redirect_missing_location');
      expect(result.reason).toMatch(/no Location/);
    }
  });

  it('resolves a relative Location against the current URL', async () => {
    // Server sends `Location: /foo` (relative) on a request to
    // https://example.com/path. RFC 9110 says resolve against the
    // request URL, so the next hop is https://example.com/foo. Assert
    // the second fetch was called with the absolute resolved URL.
    const fakeFetch = vi
      .fn<typeof fetch>()
      .mockResolvedValueOnce(redirect('/foo'))
      .mockResolvedValueOnce(mockResponse('final', { contentType: 'text/plain' }));
    const result = await executeWebFetch(
      { url: 'https://example.com/path' },
      fakeFetch as unknown as typeof fetch,
    );
    expect('content' in result && result.content).toBe('final');
    expect(fakeFetch).toHaveBeenCalledTimes(2);
    expect(fakeFetch.mock.calls[1]![0]).toBe('https://example.com/foo');
  });
});

// ============================================================================
// v1.11.10: streaming body cap — abort the response stream at MAX_BYTES
// ============================================================================

// MAX_BYTES is 5 * 1024 * 1024 = 5_242_880. Repeating this here (rather
// than importing) so a change to the cap surfaces as a test failure —
// the limit is part of the public contract.
const MAX_BYTES_TEST = 5 * 1024 * 1024;

// Build a Response whose body is a real ReadableStream. Uses pull() (not
// start()) so chunks are produced lazily — without backpressure, an
// unbounded start() enqueues everything and calls controller.close()
// before the consumer reads, which means a subsequent reader.cancel()
// finds the stream already closed and the cancel callback never fires.
// `cancelFlag` lets the test observe whether reader.cancel() reached the
// underlying source mid-stream.
function streamedResponse(
  chunks: Uint8Array[],
  init: { contentType?: string; contentLength?: number | null; cancelFlag?: { cancelled: boolean } } = {},
): Response {
  let idx = 0;
  const stream = new ReadableStream({
    pull(controller) {
      if (idx >= chunks.length) {
        controller.close();
        return;
      }
      controller.enqueue(chunks[idx]!);
      idx += 1;
    },
    cancel() {
      if (init.cancelFlag) init.cancelFlag.cancelled = true;
    },
  });
  const headers: Record<string, string> = {};
  if (init.contentType) headers['content-type'] = init.contentType;
  if (init.contentLength !== undefined && init.contentLength !== null) {
    headers['content-length'] = String(init.contentLength);
  }
  return new Response(stream, { status: 200, headers });
}

describe('executeWebFetch — streaming body cap (v1.11.10)', () => {
  it('aborts the stream when a server lies about Content-Length and emits over the cap', async () => {
    // Honest header would have failed the pre-flight check. The lie is
    // the point: pre-flight passes (100 < 5MB) and the streaming reader
    // has to be the thing that catches the oversized body.
    //
    // Chunk count is deliberately higher than what the reader will
    // consume (10 × 1MB available, but the reader will cancel after ~6
    // chunks land it over 5MB). That headroom keeps the stream in
    // 'readable' state at the moment reader.cancel() runs — otherwise
    // a pull-then-close race could make the source close the stream
    // before cancel reaches it, and the cancel() callback wouldn't fire.
    const oneMB = new Uint8Array(1024 * 1024).fill(65); // 'A'
    const tenMBInChunks = Array.from({ length: 10 }, () => oneMB);
    const cancelFlag = { cancelled: false };
    const fakeFetch = vi.fn().mockResolvedValue(
      streamedResponse(tenMBInChunks, {
        contentType: 'text/plain',
        contentLength: 100,
        cancelFlag,
      }),
    );
    const result = await executeWebFetch(
      { url: 'https://example.com/lying-server' },
      fakeFetch as unknown as typeof fetch,
    );
    expect('error' in result).toBe(true);
    if ('error' in result) {
      expect(result.error).toBe('body_too_large');
      expect(result.reason).toMatch(/exceeded/);
    }
    // Critical: reader.cancel() actually fired so the underlying
    // connection / stream got released. Otherwise the abort would be
    // notional and the server could keep streaming.
    expect(cancelFlag.cancelled).toBe(true);
  });

  it('catches an oversized stream when Content-Length is omitted entirely', async () => {
    // Many real servers (chunked transfer-encoding, dynamic responses)
    // never send Content-Length. The pre-flight check has nothing to
    // gate on; the streaming reader is the only line of defense.
    // 10 chunks vs the ~6 the reader will consume — same headroom
    // rationale as the lying-Content-Length test above.
    const oneMB = new Uint8Array(1024 * 1024).fill(66); // 'B'
    const tenMBInChunks = Array.from({ length: 10 }, () => oneMB);
    const fakeFetch = vi.fn().mockResolvedValue(
      streamedResponse(tenMBInChunks, { contentType: 'text/plain' }),
    );
    const result = await executeWebFetch(
      { url: 'https://example.com/no-length' },
      fakeFetch as unknown as typeof fetch,
    );
    expect('error' in result && result.error).toBe('body_too_large');
  });

  it('passes a multi-chunk body that totals just under the cap', async () => {
    // Boundary case: MAX_BYTES - 1 bytes split across N chunks. The
    // streaming reader's `total > maxBytes` check is strict-greater so
    // exactly MAX_BYTES would still succeed; MAX_BYTES + 1 would fail.
    // - 1 leaves clear headroom without coinciding with the boundary.
    const targetTotal = MAX_BYTES_TEST - 1;
    const chunkSize = 256 * 1024; // 256 KiB chunks
    const chunks: Uint8Array[] = [];
    let remaining = targetTotal;
    while (remaining > 0) {
      const size = Math.min(chunkSize, remaining);
      chunks.push(new Uint8Array(size).fill(67)); // 'C'
      remaining -= size;
    }
    const fakeFetch = vi.fn().mockResolvedValue(
      streamedResponse(chunks, { contentType: 'text/plain' }),
    );
    const result = await executeWebFetch(
      { url: 'https://example.com/right-at-cap' },
      fakeFetch as unknown as typeof fetch,
    );
    // The streaming reader succeeded — we got a content shape, not an
    // error. (Downstream truncate() will clamp the final string to
    // MAX_CHARS_CAP=32000 and set truncated:true; that's the existing
    // truncation logic and is exercised by its own test. The point of
    // THIS test is that readBodyCapped didn't trip on a body that
    // sits just under its byte limit.)
    expect('content' in result).toBe(true);
    if ('content' in result) {
      expect(result.content.length).toBeGreaterThan(0);
      // All ASCII 'C's, so the leading 200 chars before any truncation
      // marker should be all C — proves we read real bytes through the
      // streaming reader rather than getting an empty buffer.
      expect(result.content.slice(0, 200)).toBe('C'.repeat(200));
    }
  });
});