import { afterEach, describe, expect, it, vi } from 'vitest'; import { executeWebSearch } from '../web_search.js'; import { executeWebFetch } from '../web_fetch.js'; import { isPublicUrl } from '../url_guard.js'; const TEST_SEARXNG = 'http://searxng.test:8888'; function mockResponse( body: unknown, init: { status?: number; contentType?: string; contentLength?: number } = {}, ): Response { const status = init.status ?? 200; const headers: Record = {}; if (init.contentType) headers['content-type'] = init.contentType; if (init.contentLength !== undefined) headers['content-length'] = String(init.contentLength); const stringBody = typeof body === 'string' ? body : JSON.stringify(body); return new Response(stringBody, { status, headers }); } afterEach(() => { vi.restoreAllMocks(); }); // ============================================================================ // url_guard — SSRF protection // ============================================================================ describe('isPublicUrl', () => { it('blocks http://localhost', () => { expect(isPublicUrl('http://localhost').ok).toBe(false); }); it('blocks http://127.0.0.1:3000', () => { const r = isPublicUrl('http://127.0.0.1:3000'); expect(r.ok).toBe(false); expect(r.reason).toMatch(/loopback/); }); it('blocks RFC1918 192.168.x.x', () => { expect(isPublicUrl('http://192.168.1.1').ok).toBe(false); }); it('blocks RFC1918 10.x.x.x', () => { expect(isPublicUrl('http://10.0.0.5').ok).toBe(false); }); it('blocks RFC1918 172.16-31.x.x', () => { expect(isPublicUrl('http://172.20.0.1').ok).toBe(false); // Boundary: 172.15 is public; 172.16 is private; 172.31 is private; 172.32 is public. expect(isPublicUrl('http://172.15.0.1').ok).toBe(true); expect(isPublicUrl('http://172.31.255.255').ok).toBe(false); expect(isPublicUrl('http://172.32.0.1').ok).toBe(true); }); it('blocks Tailscale CGNAT 100.64.0.0/10', () => { const r = isPublicUrl('http://100.114.205.53'); expect(r.ok).toBe(false); expect(r.reason).toMatch(/cgnat/); }); it('allows 100.x outside CGNAT range', () => { // 100.63 is public (one below CGNAT lower bound). expect(isPublicUrl('http://100.63.0.1').ok).toBe(true); // 100.128 is public (one above CGNAT upper bound). expect(isPublicUrl('http://100.128.0.1').ok).toBe(true); }); it('blocks ftp:// (non-http protocol)', () => { const r = isPublicUrl('ftp://example.com'); expect(r.ok).toBe(false); expect(r.reason).toMatch(/unsupported_protocol/); }); it('blocks file:///etc/passwd', () => { expect(isPublicUrl('file:///etc/passwd').ok).toBe(false); }); it('blocks anything.local (mDNS suffix)', () => { const r = isPublicUrl('http://anything.local'); expect(r.ok).toBe(false); expect(r.reason).toMatch(/private_suffix/); }); it('blocks anything.internal', () => { expect(isPublicUrl('http://service.internal').ok).toBe(false); }); it('blocks 169.254.x.x link-local (covers AWS/GCP IMDS)', () => { expect(isPublicUrl('http://169.254.169.254').ok).toBe(false); }); it('allows https://example.com', () => { expect(isPublicUrl('https://example.com').ok).toBe(true); }); it('rejects malformed URLs', () => { const r = isPublicUrl('not a url'); expect(r.ok).toBe(false); expect(r.reason).toBe('invalid_url'); }); }); // ============================================================================ // web_search // ============================================================================ describe('executeWebSearch', () => { it('returns top N results, mapped to {title,url,snippet}', async () => { const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce( mockResponse( { results: [ { title: 'A', url: 'https://a.example/', content: 'snippet a' }, { title: 'B', url: 'https://b.example/', content: 'snippet b' }, { title: 'C', url: 'https://c.example/', content: 'snippet c' }, ], }, { contentType: 'application/json' }, ), ); const out = await executeWebSearch({ query: 'foo', max_results: 2 }, TEST_SEARXNG); expect(out.results).toHaveLength(2); expect(out.results[0]).toEqual({ title: 'A', url: 'https://a.example/', snippet: 'snippet a' }); // URL-encodes the query and hits /search?...&format=json. expect(fetchSpy).toHaveBeenCalledExactlyOnceWith( `${TEST_SEARXNG}/search?q=foo&format=json`, expect.objectContaining({ signal: expect.any(AbortSignal) }), ); }); it('caps max_results at 10 even if a larger value is requested', async () => { const many = Array.from({ length: 20 }, (_, i) => ({ title: `t${i}`, url: `https://${i}.example/`, content: `c${i}`, })); vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce( mockResponse({ results: many }, { contentType: 'application/json' }), ); const out = await executeWebSearch({ query: 'x', max_results: 999 }, TEST_SEARXNG); expect(out.results).toHaveLength(10); }); it('throws on non-200 from SearXNG (executeToolCall surfaces the error to the LLM)', async () => { vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce( new Response('boom', { status: 503 }), ); await expect( executeWebSearch({ query: 'x' }, TEST_SEARXNG), ).rejects.toThrow(/SearXNG returned 503/); }); it('returns empty results cleanly when SearXNG has no matches', async () => { vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce( mockResponse({ results: [] }, { contentType: 'application/json' }), ); const out = await executeWebSearch({ query: 'xyz' }, TEST_SEARXNG); expect(out.results).toEqual([]); expect(out.total).toBe(0); }); it('drops result entries with missing url (defensive)', async () => { vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce( mockResponse( { results: [{ title: 'no url', content: 'orphan' }, { url: 'https://ok/', title: 't', content: 's' }] }, { contentType: 'application/json' }, ), ); const out = await executeWebSearch({ query: 'x' }, TEST_SEARXNG); expect(out.results).toHaveLength(1); expect(out.results[0]!.url).toBe('https://ok/'); }); it('uses the injected fetcher when one is passed (v1.11.8 review)', async () => { // Direct injection vs vi.spyOn(globalThis, 'fetch'): the injected // path lets tests run without monkey-patching globals, and the // production code path defaults to global fetch when no fetcher is // supplied. Asserts the stub is the thing actually called. const globalSpy = vi.spyOn(globalThis, 'fetch'); const stub = vi.fn().mockResolvedValue( mockResponse( { results: [{ title: 'injected', url: 'https://inj/', content: 's' }] }, { contentType: 'application/json' }, ), ); const out = await executeWebSearch( { query: 'q' }, TEST_SEARXNG, stub as unknown as typeof fetch, ); expect(stub).toHaveBeenCalledOnce(); expect(globalSpy).not.toHaveBeenCalled(); expect(out.results[0]!.url).toBe('https://inj/'); }); }); // ============================================================================ // web_fetch // ============================================================================ describe('executeWebFetch — URL-guard short-circuit', () => { it('returns blocked_by_url_guard for ftp://', async () => { const result = await executeWebFetch({ url: 'ftp://example.com' }); expect('error' in result && result.error).toBe('blocked_by_url_guard'); }); it('returns blocked_by_url_guard for file:///', async () => { const result = await executeWebFetch({ url: 'file:///etc/passwd' }); expect('error' in result && result.error).toBe('blocked_by_url_guard'); }); it('returns blocked_by_url_guard for Tailscale CGNAT', async () => { const result = await executeWebFetch({ url: 'http://100.114.205.53/admin' }); expect('error' in result && result.error).toBe('blocked_by_url_guard'); }); }); describe('executeWebFetch — content-type handling', () => { it('strips HTML tags and returns plain text + title', async () => { const html = ` Hello World

Heading

Body text

`; const fakeFetch = vi.fn().mockResolvedValue( mockResponse(html, { contentType: 'text/html; charset=utf-8' }), ); const result = await executeWebFetch( { url: 'https://example.com/page' }, fakeFetch as unknown as typeof fetch, ); expect('content' in result).toBe(true); if ('content' in result) { expect(result.title).toBe('Hello World'); // Script CONTENT must not leak through — the regex stripper deletes // the whole block, not just the tags. expect(result.content).not.toContain('alert('); expect(result.content).toContain('Heading'); expect(result.content).toContain('Body text'); } }); it('returns JSON content as-is (no stripping)', async () => { const json = '{"foo": "bar"}'; const fakeFetch = vi.fn().mockResolvedValue( mockResponse(json, { contentType: 'application/json' }), ); const result = await executeWebFetch( { url: 'https://example.com/api' }, fakeFetch as unknown as typeof fetch, ); expect('content' in result && result.content).toBe(json); }); it('returns plain text as-is', async () => { const txt = 'just\nplain\ntext'; const fakeFetch = vi.fn().mockResolvedValue( mockResponse(txt, { contentType: 'text/plain' }), ); const result = await executeWebFetch( { url: 'https://example.com/file.txt' }, fakeFetch as unknown as typeof fetch, ); expect('content' in result && result.content).toBe(txt); }); it('returns unsupported_content_type for binary content', async () => { const fakeFetch = vi.fn().mockResolvedValue( mockResponse('binary garbage', { contentType: 'application/octet-stream' }), ); const result = await executeWebFetch( { url: 'https://example.com/blob' }, fakeFetch as unknown as typeof fetch, ); expect('error' in result && result.error).toBe('unsupported_content_type'); }); }); describe('executeWebFetch — size + truncation', () => { it('rejects responses whose Content-Length exceeds 5MB', async () => { const fakeFetch = vi.fn().mockResolvedValue( new Response('small body', { status: 200, headers: { 'content-type': 'text/plain', 'content-length': String(6 * 1024 * 1024), }, }), ); const result = await executeWebFetch( { url: 'https://example.com/huge' }, fakeFetch as unknown as typeof fetch, ); expect('error' in result && result.error).toBe('response_too_large'); }); it('rejects multi-byte content that exceeds 5MB in bytes but fits in chars (v1.11.8 review)', async () => { // 1.5M U+1F600 emojis: each is length 2 in UTF-16 (surrogate pair) and // 4 bytes in UTF-8. body.length = 3,000,000 chars (~2.86 MiB by // UTF-16 count) but Buffer.byteLength = 6,000,000 bytes (>5 MiB). // v1.11.10: streaming reader catches this as body_too_large (was // response_too_large in the post-consumption check). No // Content-Length header so the pre-flight pass and the streaming // path is the one that rejects. const heavy = '😀'.repeat(1_500_000); const fakeFetch = vi.fn().mockResolvedValue( new Response(heavy, { status: 200, headers: { 'content-type': 'text/plain' } }), ); const result = await executeWebFetch( { url: 'https://example.com/multibyte' }, fakeFetch as unknown as typeof fetch, ); expect('error' in result).toBe(true); if ('error' in result) { expect(result.error).toBe('body_too_large'); expect(result.reason).toMatch(/exceeded/); } }); it('truncates output to max_chars and appends a marker', async () => { const big = 'A'.repeat(50_000); const fakeFetch = vi.fn().mockResolvedValue( mockResponse(big, { contentType: 'text/plain' }), ); const result = await executeWebFetch( { url: 'https://example.com/big', max_chars: 200 }, fakeFetch as unknown as typeof fetch, ); expect('content' in result).toBe(true); if ('content' in result) { expect(result.truncated).toBe(true); expect(result.content).toContain('[truncated'); // First 200 chars + the marker line. expect(result.content.startsWith('A'.repeat(200))).toBe(true); } }); it('does NOT mark short content as truncated', async () => { const fakeFetch = vi.fn().mockResolvedValue( mockResponse('short', { contentType: 'text/plain' }), ); const result = await executeWebFetch( { url: 'https://example.com/tiny' }, fakeFetch as unknown as typeof fetch, ); expect('content' in result && result.truncated).toBe(false); }); }); // ============================================================================ // v1.11.9: manual redirect handling — re-run URL guard on each hop // ============================================================================ // Helper: build a 30x redirect Response. status 302 by default; tests // pass other codes (or omit the Location header) when they need to. function redirect(loc: string | null, status = 302): Response { const headers: Record = {}; if (loc !== null) headers['location'] = loc; return new Response('', { status, headers }); } describe('executeWebFetch — redirect handling', () => { it('blocks a redirect target that resolves to a private IP (AWS IMDS)', async () => { // Public-IP origin 302s into 169.254.169.254 (link-local). Pre-v1.11.9 // `redirect: 'follow'` would silently follow this; the new manual // loop re-runs isPublicUrl on the resolved target and blocks. const fakeFetch = vi .fn() .mockResolvedValueOnce(redirect('http://169.254.169.254/latest/meta-data/')); const result = await executeWebFetch( { url: 'https://example.com/redirect' }, fakeFetch as unknown as typeof fetch, ); expect('error' in result).toBe(true); if ('error' in result) { expect(result.error).toBe('blocked_by_url_guard'); // Reason should make it clear this was a REDIRECT hop, not the // initial URL — so logs can distinguish the two failure modes. expect(result.reason).toMatch(/redirect target/); } // Critical: the second fetch (the private target) must NOT happen. expect(fakeFetch).toHaveBeenCalledTimes(1); }); it('follows a public-to-public redirect and returns the final body', async () => { const fakeFetch = vi .fn() .mockResolvedValueOnce(redirect('https://example.org/final')) .mockResolvedValueOnce(mockResponse('ok body', { contentType: 'text/plain' })); const result = await executeWebFetch( { url: 'https://example.com/start' }, fakeFetch as unknown as typeof fetch, ); expect('content' in result).toBe(true); if ('content' in result) { expect(result.content).toBe('ok body'); // Final URL is reported back so the model knows where the body came from. expect(result.url).toBe('https://example.org/final'); } expect(fakeFetch).toHaveBeenCalledTimes(2); }); it('bails after MAX_REDIRECTS hops with a Too many redirects error', async () => { // Chain 6 redirects — one more than the loop allows. Each Location // points at a distinct public host so the URL guard stays happy and // we exercise the redirectCount > MAX_REDIRECTS branch specifically. const fakeFetch = vi .fn() .mockResolvedValueOnce(redirect('https://a.example/')) .mockResolvedValueOnce(redirect('https://b.example/')) .mockResolvedValueOnce(redirect('https://c.example/')) .mockResolvedValueOnce(redirect('https://d.example/')) .mockResolvedValueOnce(redirect('https://e.example/')) .mockResolvedValueOnce(redirect('https://f.example/')); const result = await executeWebFetch( { url: 'https://start.example/' }, fakeFetch as unknown as typeof fetch, ); expect('error' in result).toBe(true); if ('error' in result) { expect(result.error).toBe('too_many_redirects'); expect(result.reason).toMatch(/Too many redirects/); } }); it('errors when a 30x response omits the Location header', async () => { const fakeFetch = vi .fn() .mockResolvedValueOnce(redirect(null, 302)); const result = await executeWebFetch( { url: 'https://example.com/' }, fakeFetch as unknown as typeof fetch, ); expect('error' in result).toBe(true); if ('error' in result) { expect(result.error).toBe('redirect_missing_location'); expect(result.reason).toMatch(/no Location/); } }); it('resolves a relative Location against the current URL', async () => { // Server sends `Location: /foo` (relative) on a request to // https://example.com/path. RFC 9110 says resolve against the // request URL, so the next hop is https://example.com/foo. Assert // the second fetch was called with the absolute resolved URL. const fakeFetch = vi .fn() .mockResolvedValueOnce(redirect('/foo')) .mockResolvedValueOnce(mockResponse('final', { contentType: 'text/plain' })); const result = await executeWebFetch( { url: 'https://example.com/path' }, fakeFetch as unknown as typeof fetch, ); expect('content' in result && result.content).toBe('final'); expect(fakeFetch).toHaveBeenCalledTimes(2); expect(fakeFetch.mock.calls[1]![0]).toBe('https://example.com/foo'); }); }); // ============================================================================ // v1.11.10: streaming body cap — abort the response stream at MAX_BYTES // ============================================================================ // MAX_BYTES is 5 * 1024 * 1024 = 5_242_880. Repeating this here (rather // than importing) so a change to the cap surfaces as a test failure — // the limit is part of the public contract. const MAX_BYTES_TEST = 5 * 1024 * 1024; // Build a Response whose body is a real ReadableStream. Uses pull() (not // start()) so chunks are produced lazily — without backpressure, an // unbounded start() enqueues everything and calls controller.close() // before the consumer reads, which means a subsequent reader.cancel() // finds the stream already closed and the cancel callback never fires. // `cancelFlag` lets the test observe whether reader.cancel() reached the // underlying source mid-stream. function streamedResponse( chunks: Uint8Array[], init: { contentType?: string; contentLength?: number | null; cancelFlag?: { cancelled: boolean } } = {}, ): Response { let idx = 0; const stream = new ReadableStream({ pull(controller) { if (idx >= chunks.length) { controller.close(); return; } controller.enqueue(chunks[idx]!); idx += 1; }, cancel() { if (init.cancelFlag) init.cancelFlag.cancelled = true; }, }); const headers: Record = {}; if (init.contentType) headers['content-type'] = init.contentType; if (init.contentLength !== undefined && init.contentLength !== null) { headers['content-length'] = String(init.contentLength); } return new Response(stream, { status: 200, headers }); } describe('executeWebFetch — streaming body cap (v1.11.10)', () => { it('aborts the stream when a server lies about Content-Length and emits over the cap', async () => { // Honest header would have failed the pre-flight check. The lie is // the point: pre-flight passes (100 < 5MB) and the streaming reader // has to be the thing that catches the oversized body. // // Chunk count is deliberately higher than what the reader will // consume (10 × 1MB available, but the reader will cancel after ~6 // chunks land it over 5MB). That headroom keeps the stream in // 'readable' state at the moment reader.cancel() runs — otherwise // a pull-then-close race could make the source close the stream // before cancel reaches it, and the cancel() callback wouldn't fire. const oneMB = new Uint8Array(1024 * 1024).fill(65); // 'A' const tenMBInChunks = Array.from({ length: 10 }, () => oneMB); const cancelFlag = { cancelled: false }; const fakeFetch = vi.fn().mockResolvedValue( streamedResponse(tenMBInChunks, { contentType: 'text/plain', contentLength: 100, cancelFlag, }), ); const result = await executeWebFetch( { url: 'https://example.com/lying-server' }, fakeFetch as unknown as typeof fetch, ); expect('error' in result).toBe(true); if ('error' in result) { expect(result.error).toBe('body_too_large'); expect(result.reason).toMatch(/exceeded/); } // Critical: reader.cancel() actually fired so the underlying // connection / stream got released. Otherwise the abort would be // notional and the server could keep streaming. expect(cancelFlag.cancelled).toBe(true); }); it('catches an oversized stream when Content-Length is omitted entirely', async () => { // Many real servers (chunked transfer-encoding, dynamic responses) // never send Content-Length. The pre-flight check has nothing to // gate on; the streaming reader is the only line of defense. // 10 chunks vs the ~6 the reader will consume — same headroom // rationale as the lying-Content-Length test above. const oneMB = new Uint8Array(1024 * 1024).fill(66); // 'B' const tenMBInChunks = Array.from({ length: 10 }, () => oneMB); const fakeFetch = vi.fn().mockResolvedValue( streamedResponse(tenMBInChunks, { contentType: 'text/plain' }), ); const result = await executeWebFetch( { url: 'https://example.com/no-length' }, fakeFetch as unknown as typeof fetch, ); expect('error' in result && result.error).toBe('body_too_large'); }); it('passes a multi-chunk body that totals just under the cap', async () => { // Boundary case: MAX_BYTES - 1 bytes split across N chunks. The // streaming reader's `total > maxBytes` check is strict-greater so // exactly MAX_BYTES would still succeed; MAX_BYTES + 1 would fail. // - 1 leaves clear headroom without coinciding with the boundary. const targetTotal = MAX_BYTES_TEST - 1; const chunkSize = 256 * 1024; // 256 KiB chunks const chunks: Uint8Array[] = []; let remaining = targetTotal; while (remaining > 0) { const size = Math.min(chunkSize, remaining); chunks.push(new Uint8Array(size).fill(67)); // 'C' remaining -= size; } const fakeFetch = vi.fn().mockResolvedValue( streamedResponse(chunks, { contentType: 'text/plain' }), ); const result = await executeWebFetch( { url: 'https://example.com/right-at-cap' }, fakeFetch as unknown as typeof fetch, ); // The streaming reader succeeded — we got a content shape, not an // error. (Downstream truncate() will clamp the final string to // MAX_CHARS_CAP=32000 and set truncated:true; that's the existing // truncation logic and is exercised by its own test. The point of // THIS test is that readBodyCapped didn't trip on a body that // sits just under its byte limit.) expect('content' in result).toBe(true); if ('content' in result) { expect(result.content.length).toBeGreaterThan(0); // All ASCII 'C's, so the leading 200 chars before any truncation // marker should be all C — proves we read real bytes through the // streaming reader rather than getting an empty buffer. expect(result.content.slice(0, 200)).toBe('C'.repeat(200)); } }); });