v1.11.9: manual redirect handling — re-run URL guard on each hop
This commit is contained in:
@@ -343,3 +343,113 @@ describe('executeWebFetch — size + truncation', () => {
|
|||||||
expect('content' in result && result.truncated).toBe(false);
|
expect('content' in result && result.truncated).toBe(false);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// v1.11.9: manual redirect handling — re-run URL guard on each hop
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
// Helper: build a 30x redirect Response. status 302 by default; tests
|
||||||
|
// pass other codes (or omit the Location header) when they need to.
|
||||||
|
function redirect(loc: string | null, status = 302): Response {
|
||||||
|
const headers: Record<string, string> = {};
|
||||||
|
if (loc !== null) headers['location'] = loc;
|
||||||
|
return new Response('', { status, headers });
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('executeWebFetch — redirect handling', () => {
|
||||||
|
it('blocks a redirect target that resolves to a private IP (AWS IMDS)', async () => {
|
||||||
|
// Public-IP origin 302s into 169.254.169.254 (link-local). Pre-v1.11.9
|
||||||
|
// `redirect: 'follow'` would silently follow this; the new manual
|
||||||
|
// loop re-runs isPublicUrl on the resolved target and blocks.
|
||||||
|
const fakeFetch = vi
|
||||||
|
.fn<typeof fetch>()
|
||||||
|
.mockResolvedValueOnce(redirect('http://169.254.169.254/latest/meta-data/'));
|
||||||
|
const result = await executeWebFetch(
|
||||||
|
{ url: 'https://example.com/redirect' },
|
||||||
|
fakeFetch as unknown as typeof fetch,
|
||||||
|
);
|
||||||
|
expect('error' in result).toBe(true);
|
||||||
|
if ('error' in result) {
|
||||||
|
expect(result.error).toBe('blocked_by_url_guard');
|
||||||
|
// Reason should make it clear this was a REDIRECT hop, not the
|
||||||
|
// initial URL — so logs can distinguish the two failure modes.
|
||||||
|
expect(result.reason).toMatch(/redirect target/);
|
||||||
|
}
|
||||||
|
// Critical: the second fetch (the private target) must NOT happen.
|
||||||
|
expect(fakeFetch).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('follows a public-to-public redirect and returns the final body', async () => {
|
||||||
|
const fakeFetch = vi
|
||||||
|
.fn<typeof fetch>()
|
||||||
|
.mockResolvedValueOnce(redirect('https://example.org/final'))
|
||||||
|
.mockResolvedValueOnce(mockResponse('ok body', { contentType: 'text/plain' }));
|
||||||
|
const result = await executeWebFetch(
|
||||||
|
{ url: 'https://example.com/start' },
|
||||||
|
fakeFetch as unknown as typeof fetch,
|
||||||
|
);
|
||||||
|
expect('content' in result).toBe(true);
|
||||||
|
if ('content' in result) {
|
||||||
|
expect(result.content).toBe('ok body');
|
||||||
|
// Final URL is reported back so the model knows where the body came from.
|
||||||
|
expect(result.url).toBe('https://example.org/final');
|
||||||
|
}
|
||||||
|
expect(fakeFetch).toHaveBeenCalledTimes(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('bails after MAX_REDIRECTS hops with a Too many redirects error', async () => {
|
||||||
|
// Chain 6 redirects — one more than the loop allows. Each Location
|
||||||
|
// points at a distinct public host so the URL guard stays happy and
|
||||||
|
// we exercise the redirectCount > MAX_REDIRECTS branch specifically.
|
||||||
|
const fakeFetch = vi
|
||||||
|
.fn<typeof fetch>()
|
||||||
|
.mockResolvedValueOnce(redirect('https://a.example/'))
|
||||||
|
.mockResolvedValueOnce(redirect('https://b.example/'))
|
||||||
|
.mockResolvedValueOnce(redirect('https://c.example/'))
|
||||||
|
.mockResolvedValueOnce(redirect('https://d.example/'))
|
||||||
|
.mockResolvedValueOnce(redirect('https://e.example/'))
|
||||||
|
.mockResolvedValueOnce(redirect('https://f.example/'));
|
||||||
|
const result = await executeWebFetch(
|
||||||
|
{ url: 'https://start.example/' },
|
||||||
|
fakeFetch as unknown as typeof fetch,
|
||||||
|
);
|
||||||
|
expect('error' in result).toBe(true);
|
||||||
|
if ('error' in result) {
|
||||||
|
expect(result.error).toBe('too_many_redirects');
|
||||||
|
expect(result.reason).toMatch(/Too many redirects/);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('errors when a 30x response omits the Location header', async () => {
|
||||||
|
const fakeFetch = vi
|
||||||
|
.fn<typeof fetch>()
|
||||||
|
.mockResolvedValueOnce(redirect(null, 302));
|
||||||
|
const result = await executeWebFetch(
|
||||||
|
{ url: 'https://example.com/' },
|
||||||
|
fakeFetch as unknown as typeof fetch,
|
||||||
|
);
|
||||||
|
expect('error' in result).toBe(true);
|
||||||
|
if ('error' in result) {
|
||||||
|
expect(result.error).toBe('redirect_missing_location');
|
||||||
|
expect(result.reason).toMatch(/no Location/);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('resolves a relative Location against the current URL', async () => {
|
||||||
|
// Server sends `Location: /foo` (relative) on a request to
|
||||||
|
// https://example.com/path. RFC 9110 says resolve against the
|
||||||
|
// request URL, so the next hop is https://example.com/foo. Assert
|
||||||
|
// the second fetch was called with the absolute resolved URL.
|
||||||
|
const fakeFetch = vi
|
||||||
|
.fn<typeof fetch>()
|
||||||
|
.mockResolvedValueOnce(redirect('/foo'))
|
||||||
|
.mockResolvedValueOnce(mockResponse('final', { contentType: 'text/plain' }));
|
||||||
|
const result = await executeWebFetch(
|
||||||
|
{ url: 'https://example.com/path' },
|
||||||
|
fakeFetch as unknown as typeof fetch,
|
||||||
|
);
|
||||||
|
expect('content' in result && result.content).toBe('final');
|
||||||
|
expect(fakeFetch).toHaveBeenCalledTimes(2);
|
||||||
|
expect(fakeFetch.mock.calls[1]![0]).toBe('https://example.com/foo');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -22,6 +22,9 @@ const DEFAULT_MAX_CHARS = 8_000;
|
|||||||
const MAX_CHARS_CAP = 32_000;
|
const MAX_CHARS_CAP = 32_000;
|
||||||
const FETCH_TIMEOUT_MS = 15_000;
|
const FETCH_TIMEOUT_MS = 15_000;
|
||||||
const MAX_BYTES = 5 * 1024 * 1024;
|
const MAX_BYTES = 5 * 1024 * 1024;
|
||||||
|
// v1.11.9: cap redirect chains. Each hop re-runs isPublicUrl on the
|
||||||
|
// resolved target so a public-IP origin can't 302 us into a private IP.
|
||||||
|
const MAX_REDIRECTS = 5;
|
||||||
|
|
||||||
// Output shape. Each variant uses a discriminator the LLM can branch on.
|
// Output shape. Each variant uses a discriminator the LLM can branch on.
|
||||||
export type WebFetchOutput =
|
export type WebFetchOutput =
|
||||||
@@ -74,23 +77,76 @@ export async function executeWebFetch(
|
|||||||
input: WebFetchInputT,
|
input: WebFetchInputT,
|
||||||
fetcher: typeof fetch = fetch,
|
fetcher: typeof fetch = fetch,
|
||||||
): Promise<WebFetchOutput> {
|
): Promise<WebFetchOutput> {
|
||||||
const guard = isPublicUrl(input.url);
|
|
||||||
if (!guard.ok) {
|
|
||||||
return { error: 'blocked_by_url_guard', reason: guard.reason ?? 'unknown' };
|
|
||||||
}
|
|
||||||
|
|
||||||
const maxChars = Math.min(input.max_chars ?? DEFAULT_MAX_CHARS, MAX_CHARS_CAP);
|
const maxChars = Math.min(input.max_chars ?? DEFAULT_MAX_CHARS, MAX_CHARS_CAP);
|
||||||
|
|
||||||
const controller = new AbortController();
|
// v1.11.9: manual redirect handling. `redirect: 'follow'` in fetch
|
||||||
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
|
// doesn't expose intermediate hops — a public-IP origin that 302s us
|
||||||
|
// to 169.254.169.254 would silently bypass isPublicUrl. We follow each
|
||||||
|
// hop ourselves, re-running the URL guard on the resolved target so a
|
||||||
|
// mid-chain hostile redirect gets blocked.
|
||||||
|
//
|
||||||
|
// Timeout semantics changed from v1.11.8: AbortSignal.timeout fires
|
||||||
|
// per fetch hop (vs. one 15s budget shared across the whole call). In
|
||||||
|
// the worst case a 5-hop chain can take ~5×15s before erroring — still
|
||||||
|
// bounded; trades a longer cap for simpler code.
|
||||||
|
let currentUrl = input.url;
|
||||||
|
let res: Response | undefined;
|
||||||
|
let redirectCount = 0;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const guard = isPublicUrl(currentUrl);
|
||||||
|
if (!guard.ok) {
|
||||||
|
return {
|
||||||
|
error: 'blocked_by_url_guard',
|
||||||
|
reason: redirectCount === 0
|
||||||
|
? (guard.reason ?? 'unknown')
|
||||||
|
: `redirect target ${currentUrl} blocked: ${guard.reason ?? 'unknown'}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await fetcher(input.url, {
|
res = await fetcher(currentUrl, {
|
||||||
signal: controller.signal,
|
method: 'GET',
|
||||||
// TODO(v1.11.9): redirect: 'manual' + re-run isPublicUrl on Location header.
|
redirect: 'manual',
|
||||||
// Current 'follow' allows redirect-to-private-IP bypass of URL guard.
|
signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
|
||||||
redirect: 'follow',
|
headers: {
|
||||||
headers: { 'User-Agent': 'BooCode/1.11.8', Accept: 'text/html,text/plain,application/json,*/*' },
|
'User-Agent': 'BooCode/1.11.9',
|
||||||
|
Accept: 'text/html,text/plain,application/json,*/*',
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
} catch (err) {
|
||||||
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
|
// AbortSignal.timeout fires a DOMException with name 'TimeoutError';
|
||||||
|
// older runtimes / polyfills may surface 'AbortError'. Treat both.
|
||||||
|
if (err instanceof Error && (err.name === 'TimeoutError' || err.name === 'AbortError')) {
|
||||||
|
return { error: 'timeout', reason: `aborted after ${FETCH_TIMEOUT_MS}ms` };
|
||||||
|
}
|
||||||
|
return { error: 'fetch_failed', reason: msg };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (res.status >= 300 && res.status < 400) {
|
||||||
|
const loc = res.headers.get('location');
|
||||||
|
if (!loc) {
|
||||||
|
return {
|
||||||
|
error: 'redirect_missing_location',
|
||||||
|
reason: `${res.status} redirect with no Location header`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
redirectCount += 1;
|
||||||
|
if (redirectCount > MAX_REDIRECTS) {
|
||||||
|
return {
|
||||||
|
error: 'too_many_redirects',
|
||||||
|
reason: `Too many redirects (exceeded ${MAX_REDIRECTS} hops)`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// Resolve relative Location against the URL we just hit (RFC 9110).
|
||||||
|
// The next loop iteration re-runs isPublicUrl on the new currentUrl.
|
||||||
|
currentUrl = new URL(loc, currentUrl).toString();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
return { error: 'upstream_status', reason: `HTTP ${res.status}` };
|
return { error: 'upstream_status', reason: `HTTP ${res.status}` };
|
||||||
}
|
}
|
||||||
@@ -106,13 +162,12 @@ export async function executeWebFetch(
|
|||||||
// Read body. We rely on the 5MB cap by checking length after consumption
|
// Read body. We rely on the 5MB cap by checking length after consumption
|
||||||
// — most malicious or accidental large responses also exceed it via the
|
// — most malicious or accidental large responses also exceed it via the
|
||||||
// Content-Length pre-flight above. A truly hostile server that lies
|
// Content-Length pre-flight above. A truly hostile server that lies
|
||||||
// about length AND streams gigabytes would defeat that; for v1.11.8
|
// about length AND streams gigabytes would defeat that; the per-hop
|
||||||
// the 15s timeout is the secondary fence.
|
// 15s timeout is the secondary fence.
|
||||||
const body = await res.text();
|
const body = await res.text();
|
||||||
// v1.11.8 review: byte-count, not char-count. A 5MB cap on
|
// v1.11.8 review: byte-count, not char-count. A 5MB cap on body.length
|
||||||
// body.length (UTF-16 code units) lets a multi-byte payload (emoji,
|
// (UTF-16 code units) lets a multi-byte payload (emoji, CJK) pass when
|
||||||
// CJK) pass when its wire size already exceeded MAX_BYTES. Compute
|
// its wire size already exceeded MAX_BYTES.
|
||||||
// once and reuse for the error message.
|
|
||||||
const bodyBytes = Buffer.byteLength(body, 'utf8');
|
const bodyBytes = Buffer.byteLength(body, 'utf8');
|
||||||
if (bodyBytes > MAX_BYTES) {
|
if (bodyBytes > MAX_BYTES) {
|
||||||
return { error: 'response_too_large', reason: `body ${bodyBytes} bytes > ${MAX_BYTES}` };
|
return { error: 'response_too_large', reason: `body ${bodyBytes} bytes > ${MAX_BYTES}` };
|
||||||
@@ -141,22 +196,16 @@ export async function executeWebFetch(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const truncated = truncate(textRaw, maxChars);
|
const truncated = truncate(textRaw, maxChars);
|
||||||
|
// Report the FINAL URL (post-redirects) so the LLM knows where the body
|
||||||
|
// came from — useful for citations and for the model to reason about
|
||||||
|
// domain trust.
|
||||||
return {
|
return {
|
||||||
url: input.url,
|
url: currentUrl,
|
||||||
title,
|
title,
|
||||||
content: truncated.content,
|
content: truncated.content,
|
||||||
content_type: contentType,
|
content_type: contentType,
|
||||||
truncated: truncated.truncated,
|
truncated: truncated.truncated,
|
||||||
};
|
};
|
||||||
} catch (err) {
|
|
||||||
const msg = err instanceof Error ? err.message : String(err);
|
|
||||||
if (err instanceof Error && err.name === 'AbortError') {
|
|
||||||
return { error: 'timeout', reason: `aborted after ${FETCH_TIMEOUT_MS}ms` };
|
|
||||||
}
|
|
||||||
return { error: 'fetch_failed', reason: msg };
|
|
||||||
} finally {
|
|
||||||
clearTimeout(timer);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export const webFetch: ToolDef<WebFetchInputT> = {
|
export const webFetch: ToolDef<WebFetchInputT> = {
|
||||||
|
|||||||
Reference in New Issue
Block a user