591 lines
24 KiB
TypeScript
591 lines
24 KiB
TypeScript
import { afterEach, describe, expect, it, vi } from 'vitest';
|
||
import { executeWebSearch } from '../web_search.js';
|
||
import { executeWebFetch } from '../web_fetch.js';
|
||
import { isPublicUrl } from '../url_guard.js';
|
||
|
||
const TEST_SEARXNG = 'http://searxng.test:8888';
|
||
|
||
function mockResponse(
|
||
body: unknown,
|
||
init: { status?: number; contentType?: string; contentLength?: number } = {},
|
||
): Response {
|
||
const status = init.status ?? 200;
|
||
const headers: Record<string, string> = {};
|
||
if (init.contentType) headers['content-type'] = init.contentType;
|
||
if (init.contentLength !== undefined) headers['content-length'] = String(init.contentLength);
|
||
const stringBody = typeof body === 'string' ? body : JSON.stringify(body);
|
||
return new Response(stringBody, { status, headers });
|
||
}
|
||
|
||
afterEach(() => {
|
||
vi.restoreAllMocks();
|
||
});
|
||
|
||
// ============================================================================
|
||
// url_guard — SSRF protection
|
||
// ============================================================================
|
||
|
||
describe('isPublicUrl', () => {
|
||
it('blocks http://localhost', () => {
|
||
expect(isPublicUrl('http://localhost').ok).toBe(false);
|
||
});
|
||
|
||
it('blocks http://127.0.0.1:3000', () => {
|
||
const r = isPublicUrl('http://127.0.0.1:3000');
|
||
expect(r.ok).toBe(false);
|
||
expect(r.reason).toMatch(/loopback/);
|
||
});
|
||
|
||
it('blocks RFC1918 192.168.x.x', () => {
|
||
expect(isPublicUrl('http://192.168.1.1').ok).toBe(false);
|
||
});
|
||
|
||
it('blocks RFC1918 10.x.x.x', () => {
|
||
expect(isPublicUrl('http://10.0.0.5').ok).toBe(false);
|
||
});
|
||
|
||
it('blocks RFC1918 172.16-31.x.x', () => {
|
||
expect(isPublicUrl('http://172.20.0.1').ok).toBe(false);
|
||
// Boundary: 172.15 is public; 172.16 is private; 172.31 is private; 172.32 is public.
|
||
expect(isPublicUrl('http://172.15.0.1').ok).toBe(true);
|
||
expect(isPublicUrl('http://172.31.255.255').ok).toBe(false);
|
||
expect(isPublicUrl('http://172.32.0.1').ok).toBe(true);
|
||
});
|
||
|
||
it('blocks Tailscale CGNAT 100.64.0.0/10', () => {
|
||
const r = isPublicUrl('http://100.114.205.53');
|
||
expect(r.ok).toBe(false);
|
||
expect(r.reason).toMatch(/cgnat/);
|
||
});
|
||
|
||
it('allows 100.x outside CGNAT range', () => {
|
||
// 100.63 is public (one below CGNAT lower bound).
|
||
expect(isPublicUrl('http://100.63.0.1').ok).toBe(true);
|
||
// 100.128 is public (one above CGNAT upper bound).
|
||
expect(isPublicUrl('http://100.128.0.1').ok).toBe(true);
|
||
});
|
||
|
||
it('blocks ftp:// (non-http protocol)', () => {
|
||
const r = isPublicUrl('ftp://example.com');
|
||
expect(r.ok).toBe(false);
|
||
expect(r.reason).toMatch(/unsupported_protocol/);
|
||
});
|
||
|
||
it('blocks file:///etc/passwd', () => {
|
||
expect(isPublicUrl('file:///etc/passwd').ok).toBe(false);
|
||
});
|
||
|
||
it('blocks anything.local (mDNS suffix)', () => {
|
||
const r = isPublicUrl('http://anything.local');
|
||
expect(r.ok).toBe(false);
|
||
expect(r.reason).toMatch(/private_suffix/);
|
||
});
|
||
|
||
it('blocks anything.internal', () => {
|
||
expect(isPublicUrl('http://service.internal').ok).toBe(false);
|
||
});
|
||
|
||
it('blocks 169.254.x.x link-local (covers AWS/GCP IMDS)', () => {
|
||
expect(isPublicUrl('http://169.254.169.254').ok).toBe(false);
|
||
});
|
||
|
||
it('allows https://example.com', () => {
|
||
expect(isPublicUrl('https://example.com').ok).toBe(true);
|
||
});
|
||
|
||
it('rejects malformed URLs', () => {
|
||
const r = isPublicUrl('not a url');
|
||
expect(r.ok).toBe(false);
|
||
expect(r.reason).toBe('invalid_url');
|
||
});
|
||
});
|
||
|
||
// ============================================================================
|
||
// web_search
|
||
// ============================================================================
|
||
|
||
describe('executeWebSearch', () => {
|
||
it('returns top N results, mapped to {title,url,snippet}', async () => {
|
||
const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
|
||
mockResponse(
|
||
{
|
||
results: [
|
||
{ title: 'A', url: 'https://a.example/', content: 'snippet a' },
|
||
{ title: 'B', url: 'https://b.example/', content: 'snippet b' },
|
||
{ title: 'C', url: 'https://c.example/', content: 'snippet c' },
|
||
],
|
||
},
|
||
{ contentType: 'application/json' },
|
||
),
|
||
);
|
||
const out = await executeWebSearch({ query: 'foo', max_results: 2 }, TEST_SEARXNG);
|
||
expect(out.results).toHaveLength(2);
|
||
expect(out.results[0]).toEqual({ title: 'A', url: 'https://a.example/', snippet: 'snippet a' });
|
||
// URL-encodes the query and hits /search?...&format=json.
|
||
expect(fetchSpy).toHaveBeenCalledExactlyOnceWith(
|
||
`${TEST_SEARXNG}/search?q=foo&format=json`,
|
||
expect.objectContaining({ signal: expect.any(AbortSignal) }),
|
||
);
|
||
});
|
||
|
||
it('caps max_results at 10 even if a larger value is requested', async () => {
|
||
const many = Array.from({ length: 20 }, (_, i) => ({
|
||
title: `t${i}`,
|
||
url: `https://${i}.example/`,
|
||
content: `c${i}`,
|
||
}));
|
||
vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
|
||
mockResponse({ results: many }, { contentType: 'application/json' }),
|
||
);
|
||
const out = await executeWebSearch({ query: 'x', max_results: 999 }, TEST_SEARXNG);
|
||
expect(out.results).toHaveLength(10);
|
||
});
|
||
|
||
it('throws on non-200 from SearXNG (executeToolCall surfaces the error to the LLM)', async () => {
|
||
vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
|
||
new Response('boom', { status: 503 }),
|
||
);
|
||
await expect(
|
||
executeWebSearch({ query: 'x' }, TEST_SEARXNG),
|
||
).rejects.toThrow(/SearXNG returned 503/);
|
||
});
|
||
|
||
it('returns empty results cleanly when SearXNG has no matches', async () => {
|
||
vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
|
||
mockResponse({ results: [] }, { contentType: 'application/json' }),
|
||
);
|
||
const out = await executeWebSearch({ query: 'xyz' }, TEST_SEARXNG);
|
||
expect(out.results).toEqual([]);
|
||
expect(out.total).toBe(0);
|
||
});
|
||
|
||
it('drops result entries with missing url (defensive)', async () => {
|
||
vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
|
||
mockResponse(
|
||
{ results: [{ title: 'no url', content: 'orphan' }, { url: 'https://ok/', title: 't', content: 's' }] },
|
||
{ contentType: 'application/json' },
|
||
),
|
||
);
|
||
const out = await executeWebSearch({ query: 'x' }, TEST_SEARXNG);
|
||
expect(out.results).toHaveLength(1);
|
||
expect(out.results[0]!.url).toBe('https://ok/');
|
||
});
|
||
|
||
it('uses the injected fetcher when one is passed (v1.11.8 review)', async () => {
|
||
// Direct injection vs vi.spyOn(globalThis, 'fetch'): the injected
|
||
// path lets tests run without monkey-patching globals, and the
|
||
// production code path defaults to global fetch when no fetcher is
|
||
// supplied. Asserts the stub is the thing actually called.
|
||
const globalSpy = vi.spyOn(globalThis, 'fetch');
|
||
const stub = vi.fn().mockResolvedValue(
|
||
mockResponse(
|
||
{ results: [{ title: 'injected', url: 'https://inj/', content: 's' }] },
|
||
{ contentType: 'application/json' },
|
||
),
|
||
);
|
||
const out = await executeWebSearch(
|
||
{ query: 'q' },
|
||
TEST_SEARXNG,
|
||
stub as unknown as typeof fetch,
|
||
);
|
||
expect(stub).toHaveBeenCalledOnce();
|
||
expect(globalSpy).not.toHaveBeenCalled();
|
||
expect(out.results[0]!.url).toBe('https://inj/');
|
||
});
|
||
});
|
||
|
||
// ============================================================================
|
||
// web_fetch
|
||
// ============================================================================
|
||
|
||
describe('executeWebFetch — URL-guard short-circuit', () => {
|
||
it('returns blocked_by_url_guard for ftp://', async () => {
|
||
const result = await executeWebFetch({ url: 'ftp://example.com' });
|
||
expect('error' in result && result.error).toBe('blocked_by_url_guard');
|
||
});
|
||
|
||
it('returns blocked_by_url_guard for file:///', async () => {
|
||
const result = await executeWebFetch({ url: 'file:///etc/passwd' });
|
||
expect('error' in result && result.error).toBe('blocked_by_url_guard');
|
||
});
|
||
|
||
it('returns blocked_by_url_guard for Tailscale CGNAT', async () => {
|
||
const result = await executeWebFetch({ url: 'http://100.114.205.53/admin' });
|
||
expect('error' in result && result.error).toBe('blocked_by_url_guard');
|
||
});
|
||
});
|
||
|
||
describe('executeWebFetch — content-type handling', () => {
|
||
it('strips HTML tags and returns plain text + title', async () => {
|
||
const html = `<html><head><title> Hello World </title></head>
|
||
<body><script>alert('xss')</script><h1>Heading</h1><p>Body text</p></body></html>`;
|
||
const fakeFetch = vi.fn().mockResolvedValue(
|
||
mockResponse(html, { contentType: 'text/html; charset=utf-8' }),
|
||
);
|
||
const result = await executeWebFetch(
|
||
{ url: 'https://example.com/page' },
|
||
fakeFetch as unknown as typeof fetch,
|
||
);
|
||
expect('content' in result).toBe(true);
|
||
if ('content' in result) {
|
||
expect(result.title).toBe('Hello World');
|
||
// Script CONTENT must not leak through — the regex stripper deletes
|
||
// the whole <script>...</script> block, not just the tags.
|
||
expect(result.content).not.toContain('alert(');
|
||
expect(result.content).toContain('Heading');
|
||
expect(result.content).toContain('Body text');
|
||
}
|
||
});
|
||
|
||
it('returns JSON content as-is (no stripping)', async () => {
|
||
const json = '{"foo": "bar"}';
|
||
const fakeFetch = vi.fn().mockResolvedValue(
|
||
mockResponse(json, { contentType: 'application/json' }),
|
||
);
|
||
const result = await executeWebFetch(
|
||
{ url: 'https://example.com/api' },
|
||
fakeFetch as unknown as typeof fetch,
|
||
);
|
||
expect('content' in result && result.content).toBe(json);
|
||
});
|
||
|
||
it('returns plain text as-is', async () => {
|
||
const txt = 'just\nplain\ntext';
|
||
const fakeFetch = vi.fn().mockResolvedValue(
|
||
mockResponse(txt, { contentType: 'text/plain' }),
|
||
);
|
||
const result = await executeWebFetch(
|
||
{ url: 'https://example.com/file.txt' },
|
||
fakeFetch as unknown as typeof fetch,
|
||
);
|
||
expect('content' in result && result.content).toBe(txt);
|
||
});
|
||
|
||
it('returns unsupported_content_type for binary content', async () => {
|
||
const fakeFetch = vi.fn().mockResolvedValue(
|
||
mockResponse('binary garbage', { contentType: 'application/octet-stream' }),
|
||
);
|
||
const result = await executeWebFetch(
|
||
{ url: 'https://example.com/blob' },
|
||
fakeFetch as unknown as typeof fetch,
|
||
);
|
||
expect('error' in result && result.error).toBe('unsupported_content_type');
|
||
});
|
||
});
|
||
|
||
describe('executeWebFetch — size + truncation', () => {
|
||
it('rejects responses whose Content-Length exceeds 5MB', async () => {
|
||
const fakeFetch = vi.fn().mockResolvedValue(
|
||
new Response('small body', {
|
||
status: 200,
|
||
headers: {
|
||
'content-type': 'text/plain',
|
||
'content-length': String(6 * 1024 * 1024),
|
||
},
|
||
}),
|
||
);
|
||
const result = await executeWebFetch(
|
||
{ url: 'https://example.com/huge' },
|
||
fakeFetch as unknown as typeof fetch,
|
||
);
|
||
expect('error' in result && result.error).toBe('response_too_large');
|
||
});
|
||
|
||
it('rejects multi-byte content that exceeds 5MB in bytes but fits in chars (v1.11.8 review)', async () => {
|
||
// 1.5M U+1F600 emojis: each is length 2 in UTF-16 (surrogate pair) and
|
||
// 4 bytes in UTF-8. body.length = 3,000,000 chars (~2.86 MiB by
|
||
// UTF-16 count) but Buffer.byteLength = 6,000,000 bytes (>5 MiB).
|
||
// v1.11.10: streaming reader catches this as body_too_large (was
|
||
// response_too_large in the post-consumption check). No
|
||
// Content-Length header so the pre-flight pass and the streaming
|
||
// path is the one that rejects.
|
||
const heavy = '😀'.repeat(1_500_000);
|
||
const fakeFetch = vi.fn().mockResolvedValue(
|
||
new Response(heavy, { status: 200, headers: { 'content-type': 'text/plain' } }),
|
||
);
|
||
const result = await executeWebFetch(
|
||
{ url: 'https://example.com/multibyte' },
|
||
fakeFetch as unknown as typeof fetch,
|
||
);
|
||
expect('error' in result).toBe(true);
|
||
if ('error' in result) {
|
||
expect(result.error).toBe('body_too_large');
|
||
expect(result.reason).toMatch(/exceeded/);
|
||
}
|
||
});
|
||
|
||
it('truncates output to max_chars and appends a marker', async () => {
|
||
const big = 'A'.repeat(50_000);
|
||
const fakeFetch = vi.fn().mockResolvedValue(
|
||
mockResponse(big, { contentType: 'text/plain' }),
|
||
);
|
||
const result = await executeWebFetch(
|
||
{ url: 'https://example.com/big', max_chars: 200 },
|
||
fakeFetch as unknown as typeof fetch,
|
||
);
|
||
expect('content' in result).toBe(true);
|
||
if ('content' in result) {
|
||
expect(result.truncated).toBe(true);
|
||
expect(result.content).toContain('[truncated');
|
||
// First 200 chars + the marker line.
|
||
expect(result.content.startsWith('A'.repeat(200))).toBe(true);
|
||
}
|
||
});
|
||
|
||
it('does NOT mark short content as truncated', async () => {
|
||
const fakeFetch = vi.fn().mockResolvedValue(
|
||
mockResponse('short', { contentType: 'text/plain' }),
|
||
);
|
||
const result = await executeWebFetch(
|
||
{ url: 'https://example.com/tiny' },
|
||
fakeFetch as unknown as typeof fetch,
|
||
);
|
||
expect('content' in result && result.truncated).toBe(false);
|
||
});
|
||
});
|
||
|
||
// ============================================================================
|
||
// v1.11.9: manual redirect handling — re-run URL guard on each hop
|
||
// ============================================================================
|
||
|
||
// Helper: build a 30x redirect Response. status 302 by default; tests
|
||
// pass other codes (or omit the Location header) when they need to.
|
||
function redirect(loc: string | null, status = 302): Response {
|
||
const headers: Record<string, string> = {};
|
||
if (loc !== null) headers['location'] = loc;
|
||
return new Response('', { status, headers });
|
||
}
|
||
|
||
describe('executeWebFetch — redirect handling', () => {
|
||
it('blocks a redirect target that resolves to a private IP (AWS IMDS)', async () => {
|
||
// Public-IP origin 302s into 169.254.169.254 (link-local). Pre-v1.11.9
|
||
// `redirect: 'follow'` would silently follow this; the new manual
|
||
// loop re-runs isPublicUrl on the resolved target and blocks.
|
||
const fakeFetch = vi
|
||
.fn<typeof fetch>()
|
||
.mockResolvedValueOnce(redirect('http://169.254.169.254/latest/meta-data/'));
|
||
const result = await executeWebFetch(
|
||
{ url: 'https://example.com/redirect' },
|
||
fakeFetch as unknown as typeof fetch,
|
||
);
|
||
expect('error' in result).toBe(true);
|
||
if ('error' in result) {
|
||
expect(result.error).toBe('blocked_by_url_guard');
|
||
// Reason should make it clear this was a REDIRECT hop, not the
|
||
// initial URL — so logs can distinguish the two failure modes.
|
||
expect(result.reason).toMatch(/redirect target/);
|
||
}
|
||
// Critical: the second fetch (the private target) must NOT happen.
|
||
expect(fakeFetch).toHaveBeenCalledTimes(1);
|
||
});
|
||
|
||
it('follows a public-to-public redirect and returns the final body', async () => {
|
||
const fakeFetch = vi
|
||
.fn<typeof fetch>()
|
||
.mockResolvedValueOnce(redirect('https://example.org/final'))
|
||
.mockResolvedValueOnce(mockResponse('ok body', { contentType: 'text/plain' }));
|
||
const result = await executeWebFetch(
|
||
{ url: 'https://example.com/start' },
|
||
fakeFetch as unknown as typeof fetch,
|
||
);
|
||
expect('content' in result).toBe(true);
|
||
if ('content' in result) {
|
||
expect(result.content).toBe('ok body');
|
||
// Final URL is reported back so the model knows where the body came from.
|
||
expect(result.url).toBe('https://example.org/final');
|
||
}
|
||
expect(fakeFetch).toHaveBeenCalledTimes(2);
|
||
});
|
||
|
||
it('bails after MAX_REDIRECTS hops with a Too many redirects error', async () => {
|
||
// Chain 6 redirects — one more than the loop allows. Each Location
|
||
// points at a distinct public host so the URL guard stays happy and
|
||
// we exercise the redirectCount > MAX_REDIRECTS branch specifically.
|
||
const fakeFetch = vi
|
||
.fn<typeof fetch>()
|
||
.mockResolvedValueOnce(redirect('https://a.example/'))
|
||
.mockResolvedValueOnce(redirect('https://b.example/'))
|
||
.mockResolvedValueOnce(redirect('https://c.example/'))
|
||
.mockResolvedValueOnce(redirect('https://d.example/'))
|
||
.mockResolvedValueOnce(redirect('https://e.example/'))
|
||
.mockResolvedValueOnce(redirect('https://f.example/'));
|
||
const result = await executeWebFetch(
|
||
{ url: 'https://start.example/' },
|
||
fakeFetch as unknown as typeof fetch,
|
||
);
|
||
expect('error' in result).toBe(true);
|
||
if ('error' in result) {
|
||
expect(result.error).toBe('too_many_redirects');
|
||
expect(result.reason).toMatch(/Too many redirects/);
|
||
}
|
||
});
|
||
|
||
it('errors when a 30x response omits the Location header', async () => {
|
||
const fakeFetch = vi
|
||
.fn<typeof fetch>()
|
||
.mockResolvedValueOnce(redirect(null, 302));
|
||
const result = await executeWebFetch(
|
||
{ url: 'https://example.com/' },
|
||
fakeFetch as unknown as typeof fetch,
|
||
);
|
||
expect('error' in result).toBe(true);
|
||
if ('error' in result) {
|
||
expect(result.error).toBe('redirect_missing_location');
|
||
expect(result.reason).toMatch(/no Location/);
|
||
}
|
||
});
|
||
|
||
it('resolves a relative Location against the current URL', async () => {
|
||
// Server sends `Location: /foo` (relative) on a request to
|
||
// https://example.com/path. RFC 9110 says resolve against the
|
||
// request URL, so the next hop is https://example.com/foo. Assert
|
||
// the second fetch was called with the absolute resolved URL.
|
||
const fakeFetch = vi
|
||
.fn<typeof fetch>()
|
||
.mockResolvedValueOnce(redirect('/foo'))
|
||
.mockResolvedValueOnce(mockResponse('final', { contentType: 'text/plain' }));
|
||
const result = await executeWebFetch(
|
||
{ url: 'https://example.com/path' },
|
||
fakeFetch as unknown as typeof fetch,
|
||
);
|
||
expect('content' in result && result.content).toBe('final');
|
||
expect(fakeFetch).toHaveBeenCalledTimes(2);
|
||
expect(fakeFetch.mock.calls[1]![0]).toBe('https://example.com/foo');
|
||
});
|
||
});
|
||
|
||
// ============================================================================
|
||
// v1.11.10: streaming body cap — abort the response stream at MAX_BYTES
|
||
// ============================================================================
|
||
|
||
// MAX_BYTES is 5 * 1024 * 1024 = 5_242_880. Repeating this here (rather
|
||
// than importing) so a change to the cap surfaces as a test failure —
|
||
// the limit is part of the public contract.
|
||
const MAX_BYTES_TEST = 5 * 1024 * 1024;
|
||
|
||
// Build a Response whose body is a real ReadableStream. Uses pull() (not
|
||
// start()) so chunks are produced lazily — without backpressure, an
|
||
// unbounded start() enqueues everything and calls controller.close()
|
||
// before the consumer reads, which means a subsequent reader.cancel()
|
||
// finds the stream already closed and the cancel callback never fires.
|
||
// `cancelFlag` lets the test observe whether reader.cancel() reached the
|
||
// underlying source mid-stream.
|
||
function streamedResponse(
|
||
chunks: Uint8Array[],
|
||
init: { contentType?: string; contentLength?: number | null; cancelFlag?: { cancelled: boolean } } = {},
|
||
): Response {
|
||
let idx = 0;
|
||
const stream = new ReadableStream({
|
||
pull(controller) {
|
||
if (idx >= chunks.length) {
|
||
controller.close();
|
||
return;
|
||
}
|
||
controller.enqueue(chunks[idx]!);
|
||
idx += 1;
|
||
},
|
||
cancel() {
|
||
if (init.cancelFlag) init.cancelFlag.cancelled = true;
|
||
},
|
||
});
|
||
const headers: Record<string, string> = {};
|
||
if (init.contentType) headers['content-type'] = init.contentType;
|
||
if (init.contentLength !== undefined && init.contentLength !== null) {
|
||
headers['content-length'] = String(init.contentLength);
|
||
}
|
||
return new Response(stream, { status: 200, headers });
|
||
}
|
||
|
||
describe('executeWebFetch — streaming body cap (v1.11.10)', () => {
|
||
it('aborts the stream when a server lies about Content-Length and emits over the cap', async () => {
|
||
// Honest header would have failed the pre-flight check. The lie is
|
||
// the point: pre-flight passes (100 < 5MB) and the streaming reader
|
||
// has to be the thing that catches the oversized body.
|
||
//
|
||
// Chunk count is deliberately higher than what the reader will
|
||
// consume (10 × 1MB available, but the reader will cancel after ~6
|
||
// chunks land it over 5MB). That headroom keeps the stream in
|
||
// 'readable' state at the moment reader.cancel() runs — otherwise
|
||
// a pull-then-close race could make the source close the stream
|
||
// before cancel reaches it, and the cancel() callback wouldn't fire.
|
||
const oneMB = new Uint8Array(1024 * 1024).fill(65); // 'A'
|
||
const tenMBInChunks = Array.from({ length: 10 }, () => oneMB);
|
||
const cancelFlag = { cancelled: false };
|
||
const fakeFetch = vi.fn().mockResolvedValue(
|
||
streamedResponse(tenMBInChunks, {
|
||
contentType: 'text/plain',
|
||
contentLength: 100,
|
||
cancelFlag,
|
||
}),
|
||
);
|
||
const result = await executeWebFetch(
|
||
{ url: 'https://example.com/lying-server' },
|
||
fakeFetch as unknown as typeof fetch,
|
||
);
|
||
expect('error' in result).toBe(true);
|
||
if ('error' in result) {
|
||
expect(result.error).toBe('body_too_large');
|
||
expect(result.reason).toMatch(/exceeded/);
|
||
}
|
||
// Critical: reader.cancel() actually fired so the underlying
|
||
// connection / stream got released. Otherwise the abort would be
|
||
// notional and the server could keep streaming.
|
||
expect(cancelFlag.cancelled).toBe(true);
|
||
});
|
||
|
||
it('catches an oversized stream when Content-Length is omitted entirely', async () => {
|
||
// Many real servers (chunked transfer-encoding, dynamic responses)
|
||
// never send Content-Length. The pre-flight check has nothing to
|
||
// gate on; the streaming reader is the only line of defense.
|
||
// 10 chunks vs the ~6 the reader will consume — same headroom
|
||
// rationale as the lying-Content-Length test above.
|
||
const oneMB = new Uint8Array(1024 * 1024).fill(66); // 'B'
|
||
const tenMBInChunks = Array.from({ length: 10 }, () => oneMB);
|
||
const fakeFetch = vi.fn().mockResolvedValue(
|
||
streamedResponse(tenMBInChunks, { contentType: 'text/plain' }),
|
||
);
|
||
const result = await executeWebFetch(
|
||
{ url: 'https://example.com/no-length' },
|
||
fakeFetch as unknown as typeof fetch,
|
||
);
|
||
expect('error' in result && result.error).toBe('body_too_large');
|
||
});
|
||
|
||
it('passes a multi-chunk body that totals just under the cap', async () => {
|
||
// Boundary case: MAX_BYTES - 1 bytes split across N chunks. The
|
||
// streaming reader's `total > maxBytes` check is strict-greater so
|
||
// exactly MAX_BYTES would still succeed; MAX_BYTES + 1 would fail.
|
||
// - 1 leaves clear headroom without coinciding with the boundary.
|
||
const targetTotal = MAX_BYTES_TEST - 1;
|
||
const chunkSize = 256 * 1024; // 256 KiB chunks
|
||
const chunks: Uint8Array[] = [];
|
||
let remaining = targetTotal;
|
||
while (remaining > 0) {
|
||
const size = Math.min(chunkSize, remaining);
|
||
chunks.push(new Uint8Array(size).fill(67)); // 'C'
|
||
remaining -= size;
|
||
}
|
||
const fakeFetch = vi.fn().mockResolvedValue(
|
||
streamedResponse(chunks, { contentType: 'text/plain' }),
|
||
);
|
||
const result = await executeWebFetch(
|
||
{ url: 'https://example.com/right-at-cap' },
|
||
fakeFetch as unknown as typeof fetch,
|
||
);
|
||
// The streaming reader succeeded — we got a content shape, not an
|
||
// error. (Downstream truncate() will clamp the final string to
|
||
// MAX_CHARS_CAP=32000 and set truncated:true; that's the existing
|
||
// truncation logic and is exercised by its own test. The point of
|
||
// THIS test is that readBodyCapped didn't trip on a body that
|
||
// sits just under its byte limit.)
|
||
expect('content' in result).toBe(true);
|
||
if ('content' in result) {
|
||
expect(result.content.length).toBeGreaterThan(0);
|
||
// All ASCII 'C's, so the leading 200 chars before any truncation
|
||
// marker should be all C — proves we read real bytes through the
|
||
// streaming reader rather than getting an empty buffer.
|
||
expect(result.content.slice(0, 200)).toBe('C'.repeat(200));
|
||
}
|
||
});
|
||
});
|