Compare commits
1 Commits
ab01e04d77
...
v1.11.1-co
| Author | SHA1 | Date | |
|---|---|---|---|
| 3e1e17ecf6 |
@@ -295,9 +295,10 @@ describe('executeWebFetch — size + truncation', () => {
|
|||||||
// 1.5M U+1F600 emojis: each is length 2 in UTF-16 (surrogate pair) and
|
// 1.5M U+1F600 emojis: each is length 2 in UTF-16 (surrogate pair) and
|
||||||
// 4 bytes in UTF-8. body.length = 3,000,000 chars (~2.86 MiB by
|
// 4 bytes in UTF-8. body.length = 3,000,000 chars (~2.86 MiB by
|
||||||
// UTF-16 count) but Buffer.byteLength = 6,000,000 bytes (>5 MiB).
|
// UTF-16 count) but Buffer.byteLength = 6,000,000 bytes (>5 MiB).
|
||||||
// Pre-fix the char-count comparison let this through; the byte-count
|
// v1.11.10: streaming reader catches this as body_too_large (was
|
||||||
// check now rejects. No Content-Length header so the pre-flight
|
// response_too_large in the post-consumption check). No
|
||||||
// guard doesn't fire — we're testing the POST-consumption check.
|
// Content-Length header so the pre-flight pass and the streaming
|
||||||
|
// path is the one that rejects.
|
||||||
const heavy = '😀'.repeat(1_500_000);
|
const heavy = '😀'.repeat(1_500_000);
|
||||||
const fakeFetch = vi.fn().mockResolvedValue(
|
const fakeFetch = vi.fn().mockResolvedValue(
|
||||||
new Response(heavy, { status: 200, headers: { 'content-type': 'text/plain' } }),
|
new Response(heavy, { status: 200, headers: { 'content-type': 'text/plain' } }),
|
||||||
@@ -308,9 +309,8 @@ describe('executeWebFetch — size + truncation', () => {
|
|||||||
);
|
);
|
||||||
expect('error' in result).toBe(true);
|
expect('error' in result).toBe(true);
|
||||||
if ('error' in result) {
|
if ('error' in result) {
|
||||||
expect(result.error).toBe('response_too_large');
|
expect(result.error).toBe('body_too_large');
|
||||||
// Error reason should reference bytes, not character count.
|
expect(result.reason).toMatch(/exceeded/);
|
||||||
expect(result.reason).toMatch(/bytes/);
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -453,3 +453,138 @@ describe('executeWebFetch — redirect handling', () => {
|
|||||||
expect(fakeFetch.mock.calls[1]![0]).toBe('https://example.com/foo');
|
expect(fakeFetch.mock.calls[1]![0]).toBe('https://example.com/foo');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// v1.11.10: streaming body cap — abort the response stream at MAX_BYTES
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
// MAX_BYTES is 5 * 1024 * 1024 = 5_242_880. Repeating this here (rather
|
||||||
|
// than importing) so a change to the cap surfaces as a test failure —
|
||||||
|
// the limit is part of the public contract.
|
||||||
|
const MAX_BYTES_TEST = 5 * 1024 * 1024;
|
||||||
|
|
||||||
|
// Build a Response whose body is a real ReadableStream. Uses pull() (not
|
||||||
|
// start()) so chunks are produced lazily — without backpressure, an
|
||||||
|
// unbounded start() enqueues everything and calls controller.close()
|
||||||
|
// before the consumer reads, which means a subsequent reader.cancel()
|
||||||
|
// finds the stream already closed and the cancel callback never fires.
|
||||||
|
// `cancelFlag` lets the test observe whether reader.cancel() reached the
|
||||||
|
// underlying source mid-stream.
|
||||||
|
function streamedResponse(
|
||||||
|
chunks: Uint8Array[],
|
||||||
|
init: { contentType?: string; contentLength?: number | null; cancelFlag?: { cancelled: boolean } } = {},
|
||||||
|
): Response {
|
||||||
|
let idx = 0;
|
||||||
|
const stream = new ReadableStream({
|
||||||
|
pull(controller) {
|
||||||
|
if (idx >= chunks.length) {
|
||||||
|
controller.close();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
controller.enqueue(chunks[idx]!);
|
||||||
|
idx += 1;
|
||||||
|
},
|
||||||
|
cancel() {
|
||||||
|
if (init.cancelFlag) init.cancelFlag.cancelled = true;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const headers: Record<string, string> = {};
|
||||||
|
if (init.contentType) headers['content-type'] = init.contentType;
|
||||||
|
if (init.contentLength !== undefined && init.contentLength !== null) {
|
||||||
|
headers['content-length'] = String(init.contentLength);
|
||||||
|
}
|
||||||
|
return new Response(stream, { status: 200, headers });
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('executeWebFetch — streaming body cap (v1.11.10)', () => {
|
||||||
|
it('aborts the stream when a server lies about Content-Length and emits over the cap', async () => {
|
||||||
|
// Honest header would have failed the pre-flight check. The lie is
|
||||||
|
// the point: pre-flight passes (100 < 5MB) and the streaming reader
|
||||||
|
// has to be the thing that catches the oversized body.
|
||||||
|
//
|
||||||
|
// Chunk count is deliberately higher than what the reader will
|
||||||
|
// consume (10 × 1MB available, but the reader will cancel after ~6
|
||||||
|
// chunks land it over 5MB). That headroom keeps the stream in
|
||||||
|
// 'readable' state at the moment reader.cancel() runs — otherwise
|
||||||
|
// a pull-then-close race could make the source close the stream
|
||||||
|
// before cancel reaches it, and the cancel() callback wouldn't fire.
|
||||||
|
const oneMB = new Uint8Array(1024 * 1024).fill(65); // 'A'
|
||||||
|
const tenMBInChunks = Array.from({ length: 10 }, () => oneMB);
|
||||||
|
const cancelFlag = { cancelled: false };
|
||||||
|
const fakeFetch = vi.fn().mockResolvedValue(
|
||||||
|
streamedResponse(tenMBInChunks, {
|
||||||
|
contentType: 'text/plain',
|
||||||
|
contentLength: 100,
|
||||||
|
cancelFlag,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
const result = await executeWebFetch(
|
||||||
|
{ url: 'https://example.com/lying-server' },
|
||||||
|
fakeFetch as unknown as typeof fetch,
|
||||||
|
);
|
||||||
|
expect('error' in result).toBe(true);
|
||||||
|
if ('error' in result) {
|
||||||
|
expect(result.error).toBe('body_too_large');
|
||||||
|
expect(result.reason).toMatch(/exceeded/);
|
||||||
|
}
|
||||||
|
// Critical: reader.cancel() actually fired so the underlying
|
||||||
|
// connection / stream got released. Otherwise the abort would be
|
||||||
|
// notional and the server could keep streaming.
|
||||||
|
expect(cancelFlag.cancelled).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('catches an oversized stream when Content-Length is omitted entirely', async () => {
|
||||||
|
// Many real servers (chunked transfer-encoding, dynamic responses)
|
||||||
|
// never send Content-Length. The pre-flight check has nothing to
|
||||||
|
// gate on; the streaming reader is the only line of defense.
|
||||||
|
// 10 chunks vs the ~6 the reader will consume — same headroom
|
||||||
|
// rationale as the lying-Content-Length test above.
|
||||||
|
const oneMB = new Uint8Array(1024 * 1024).fill(66); // 'B'
|
||||||
|
const tenMBInChunks = Array.from({ length: 10 }, () => oneMB);
|
||||||
|
const fakeFetch = vi.fn().mockResolvedValue(
|
||||||
|
streamedResponse(tenMBInChunks, { contentType: 'text/plain' }),
|
||||||
|
);
|
||||||
|
const result = await executeWebFetch(
|
||||||
|
{ url: 'https://example.com/no-length' },
|
||||||
|
fakeFetch as unknown as typeof fetch,
|
||||||
|
);
|
||||||
|
expect('error' in result && result.error).toBe('body_too_large');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('passes a multi-chunk body that totals just under the cap', async () => {
|
||||||
|
// Boundary case: MAX_BYTES - 1 bytes split across N chunks. The
|
||||||
|
// streaming reader's `total > maxBytes` check is strict-greater so
|
||||||
|
// exactly MAX_BYTES would still succeed; MAX_BYTES + 1 would fail.
|
||||||
|
// - 1 leaves clear headroom without coinciding with the boundary.
|
||||||
|
const targetTotal = MAX_BYTES_TEST - 1;
|
||||||
|
const chunkSize = 256 * 1024; // 256 KiB chunks
|
||||||
|
const chunks: Uint8Array[] = [];
|
||||||
|
let remaining = targetTotal;
|
||||||
|
while (remaining > 0) {
|
||||||
|
const size = Math.min(chunkSize, remaining);
|
||||||
|
chunks.push(new Uint8Array(size).fill(67)); // 'C'
|
||||||
|
remaining -= size;
|
||||||
|
}
|
||||||
|
const fakeFetch = vi.fn().mockResolvedValue(
|
||||||
|
streamedResponse(chunks, { contentType: 'text/plain' }),
|
||||||
|
);
|
||||||
|
const result = await executeWebFetch(
|
||||||
|
{ url: 'https://example.com/right-at-cap' },
|
||||||
|
fakeFetch as unknown as typeof fetch,
|
||||||
|
);
|
||||||
|
// The streaming reader succeeded — we got a content shape, not an
|
||||||
|
// error. (Downstream truncate() will clamp the final string to
|
||||||
|
// MAX_CHARS_CAP=32000 and set truncated:true; that's the existing
|
||||||
|
// truncation logic and is exercised by its own test. The point of
|
||||||
|
// THIS test is that readBodyCapped didn't trip on a body that
|
||||||
|
// sits just under its byte limit.)
|
||||||
|
expect('content' in result).toBe(true);
|
||||||
|
if ('content' in result) {
|
||||||
|
expect(result.content.length).toBeGreaterThan(0);
|
||||||
|
// All ASCII 'C's, so the leading 200 chars before any truncation
|
||||||
|
// marker should be all C — proves we read real bytes through the
|
||||||
|
// streaming reader rather than getting an empty buffer.
|
||||||
|
expect(result.content.slice(0, 200)).toBe('C'.repeat(200));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -62,6 +62,39 @@ function stripHtml(html: string): { text: string; title: string | undefined } {
|
|||||||
return { text, title };
|
return { text, title };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// v1.11.10: streaming body reader. Aborts the response stream the instant
|
||||||
|
// cumulative bytes cross maxBytes, so a server that lies about
|
||||||
|
// Content-Length (or omits it entirely) can't make us buffer gigabytes
|
||||||
|
// before the post-read check fires. reader.cancel() releases the
|
||||||
|
// underlying connection on the spot.
|
||||||
|
async function readBodyCapped(
|
||||||
|
res: Response,
|
||||||
|
maxBytes: number,
|
||||||
|
): Promise<{ ok: true; body: string } | { ok: false; bytesRead: number }> {
|
||||||
|
if (!res.body) return { ok: true, body: '' };
|
||||||
|
const reader = res.body.getReader();
|
||||||
|
const chunks: Uint8Array[] = [];
|
||||||
|
let total = 0;
|
||||||
|
try {
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read();
|
||||||
|
if (done) break;
|
||||||
|
total += value.byteLength;
|
||||||
|
if (total > maxBytes) {
|
||||||
|
// Best-effort cancel — surfaces on the server side as a closed
|
||||||
|
// connection and (in our tests) fires the ReadableStream's
|
||||||
|
// cancel() callback so we can assert the abort happened.
|
||||||
|
await reader.cancel();
|
||||||
|
return { ok: false, bytesRead: total };
|
||||||
|
}
|
||||||
|
chunks.push(value);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
try { reader.releaseLock(); } catch { /* already released by cancel() */ }
|
||||||
|
}
|
||||||
|
return { ok: true, body: Buffer.concat(chunks).toString('utf8') };
|
||||||
|
}
|
||||||
|
|
||||||
function truncate(text: string, max: number): { content: string; truncated: boolean } {
|
function truncate(text: string, max: number): { content: string; truncated: boolean } {
|
||||||
if (text.length <= max) return { content: text, truncated: false };
|
if (text.length <= max) return { content: text, truncated: false };
|
||||||
const omitted = text.length - max;
|
const omitted = text.length - max;
|
||||||
@@ -159,19 +192,20 @@ export async function executeWebFetch(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
const contentType = (res.headers.get('content-type') ?? '').toLowerCase();
|
const contentType = (res.headers.get('content-type') ?? '').toLowerCase();
|
||||||
// Read body. We rely on the 5MB cap by checking length after consumption
|
// v1.11.10: stream the body with a hard byte cap. Previously we read
|
||||||
// — most malicious or accidental large responses also exceed it via the
|
// res.text() in one shot and then byte-length-checked — a server that
|
||||||
// Content-Length pre-flight above. A truly hostile server that lies
|
// lies about Content-Length (or omits it) could make us buffer
|
||||||
// about length AND streams gigabytes would defeat that; the per-hop
|
// gigabytes before the post-check fired. readBodyCapped aborts the
|
||||||
// 15s timeout is the secondary fence.
|
// stream the instant total bytes cross MAX_BYTES. The Content-Length
|
||||||
const body = await res.text();
|
// pre-flight above stays as a cheap early reject for honest servers.
|
||||||
// v1.11.8 review: byte-count, not char-count. A 5MB cap on body.length
|
const read = await readBodyCapped(res, MAX_BYTES);
|
||||||
// (UTF-16 code units) lets a multi-byte payload (emoji, CJK) pass when
|
if (!read.ok) {
|
||||||
// its wire size already exceeded MAX_BYTES.
|
return {
|
||||||
const bodyBytes = Buffer.byteLength(body, 'utf8');
|
error: 'body_too_large',
|
||||||
if (bodyBytes > MAX_BYTES) {
|
reason: `Response body exceeded ${MAX_BYTES} bytes (read ${read.bytesRead} before abort)`,
|
||||||
return { error: 'response_too_large', reason: `body ${bodyBytes} bytes > ${MAX_BYTES}` };
|
};
|
||||||
}
|
}
|
||||||
|
const body = read.body;
|
||||||
|
|
||||||
let textRaw: string;
|
let textRaw: string;
|
||||||
let title: string | undefined;
|
let title: string | undefined;
|
||||||
|
|||||||
Reference in New Issue
Block a user