v1.11.10: stream-cap response body at 5MB, abort on overflow

This commit is contained in:
2026-05-21 02:27:31 +00:00
parent ab01e04d77
commit 3e1e17ecf6
2 changed files with 187 additions and 18 deletions

View File

@@ -62,6 +62,39 @@ function stripHtml(html: string): { text: string; title: string | undefined } {
return { text, title };
}
// v1.11.10: streaming body reader. Aborts the response stream the instant
// cumulative bytes cross maxBytes, so a server that lies about
// Content-Length (or omits it entirely) can't make us buffer gigabytes
// before the post-read check fires. reader.cancel() releases the
// underlying connection on the spot.
async function readBodyCapped(
res: Response,
maxBytes: number,
): Promise<{ ok: true; body: string } | { ok: false; bytesRead: number }> {
if (!res.body) return { ok: true, body: '' };
const reader = res.body.getReader();
const chunks: Uint8Array[] = [];
let total = 0;
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
total += value.byteLength;
if (total > maxBytes) {
// Best-effort cancel — surfaces on the server side as a closed
// connection and (in our tests) fires the ReadableStream's
// cancel() callback so we can assert the abort happened.
await reader.cancel();
return { ok: false, bytesRead: total };
}
chunks.push(value);
}
} finally {
try { reader.releaseLock(); } catch { /* already released by cancel() */ }
}
return { ok: true, body: Buffer.concat(chunks).toString('utf8') };
}
function truncate(text: string, max: number): { content: string; truncated: boolean } {
if (text.length <= max) return { content: text, truncated: false };
const omitted = text.length - max;
@@ -159,19 +192,20 @@ export async function executeWebFetch(
}
}
const contentType = (res.headers.get('content-type') ?? '').toLowerCase();
// Read body. We rely on the 5MB cap by checking length after consumption
// — most malicious or accidental large responses also exceed it via the
// Content-Length pre-flight above. A truly hostile server that lies
// about length AND streams gigabytes would defeat that; the per-hop
// 15s timeout is the secondary fence.
const body = await res.text();
// v1.11.8 review: byte-count, not char-count. A 5MB cap on body.length
// (UTF-16 code units) lets a multi-byte payload (emoji, CJK) pass when
// its wire size already exceeded MAX_BYTES.
const bodyBytes = Buffer.byteLength(body, 'utf8');
if (bodyBytes > MAX_BYTES) {
return { error: 'response_too_large', reason: `body ${bodyBytes} bytes > ${MAX_BYTES}` };
// v1.11.10: stream the body with a hard byte cap. Previously we read
// res.text() in one shot and then byte-length-checked — a server that
// lies about Content-Length (or omits it) could make us buffer
// gigabytes before the post-check fired. readBodyCapped aborts the
// stream the instant total bytes cross MAX_BYTES. The Content-Length
// pre-flight above stays as a cheap early reject for honest servers.
const read = await readBodyCapped(res, MAX_BYTES);
if (!read.ok) {
return {
error: 'body_too_large',
reason: `Response body exceeded ${MAX_BYTES} bytes (read ${read.bytesRead} before abort)`,
};
}
const body = read.body;
let textRaw: string;
let title: string | undefined;