v1.11.8: address review — inject fetcher, byte-count limit, redirect TODO

This commit is contained in:
2026-05-20 21:40:11 +00:00
parent 2fdbb05477
commit 4e67a265ac
3 changed files with 58 additions and 3 deletions

View File

@@ -170,6 +170,28 @@ describe('executeWebSearch', () => {
expect(out.results).toHaveLength(1);
expect(out.results[0]!.url).toBe('https://ok/');
});
it('uses the injected fetcher when one is passed (v1.11.8 review)', async () => {
// Direct injection vs vi.spyOn(globalThis, 'fetch'): the injected
// path lets tests run without monkey-patching globals, and the
// production code path defaults to global fetch when no fetcher is
// supplied. Asserts the stub is the thing actually called.
const globalSpy = vi.spyOn(globalThis, 'fetch');
const stub = vi.fn().mockResolvedValue(
mockResponse(
{ results: [{ title: 'injected', url: 'https://inj/', content: 's' }] },
{ contentType: 'application/json' },
),
);
const out = await executeWebSearch(
{ query: 'q' },
TEST_SEARXNG,
stub as unknown as typeof fetch,
);
expect(stub).toHaveBeenCalledOnce();
expect(globalSpy).not.toHaveBeenCalled();
expect(out.results[0]!.url).toBe('https://inj/');
});
});
// ============================================================================
@@ -269,6 +291,29 @@ describe('executeWebFetch — size + truncation', () => {
expect('error' in result && result.error).toBe('response_too_large');
});
it('rejects multi-byte content that exceeds 5MB in bytes but fits in chars (v1.11.8 review)', async () => {
// 1.5M U+1F600 emojis: each is length 2 in UTF-16 (surrogate pair) and
// 4 bytes in UTF-8. body.length = 3,000,000 chars (~2.86 MiB by
// UTF-16 count) but Buffer.byteLength = 6,000,000 bytes (>5 MiB).
// Pre-fix the char-count comparison let this through; the byte-count
// check now rejects. No Content-Length header so the pre-flight
// guard doesn't fire — we're testing the POST-consumption check.
const heavy = '😀'.repeat(1_500_000);
const fakeFetch = vi.fn().mockResolvedValue(
new Response(heavy, { status: 200, headers: { 'content-type': 'text/plain' } }),
);
const result = await executeWebFetch(
{ url: 'https://example.com/multibyte' },
fakeFetch as unknown as typeof fetch,
);
expect('error' in result).toBe(true);
if ('error' in result) {
expect(result.error).toBe('response_too_large');
// Error reason should reference bytes, not character count.
expect(result.reason).toMatch(/bytes/);
}
});
it('truncates output to max_chars and appends a marker', async () => {
const big = 'A'.repeat(50_000);
const fakeFetch = vi.fn().mockResolvedValue(

View File

@@ -86,6 +86,8 @@ export async function executeWebFetch(
try {
const res = await fetcher(input.url, {
signal: controller.signal,
// TODO(v1.11.9): redirect: 'manual' + re-run isPublicUrl on Location header.
// Current 'follow' allows redirect-to-private-IP bypass of URL guard.
redirect: 'follow',
headers: { 'User-Agent': 'BooCode/1.11.8', Accept: 'text/html,text/plain,application/json,*/*' },
});
@@ -107,8 +109,13 @@ export async function executeWebFetch(
// about length AND streams gigabytes would defeat that; for v1.11.8
// the 15s timeout is the secondary fence.
const body = await res.text();
if (body.length > MAX_BYTES) {
return { error: 'response_too_large', reason: `body ${body.length} > ${MAX_BYTES}` };
// v1.11.8 review: byte-count, not char-count. A 5MB cap on
// body.length (UTF-16 code units) lets a multi-byte payload (emoji,
// CJK) pass when its wire size already exceeded MAX_BYTES. Compute
// once and reuse for the error message.
const bodyBytes = Buffer.byteLength(body, 'utf8');
if (bodyBytes > MAX_BYTES) {
return { error: 'response_too_large', reason: `body ${bodyBytes} bytes > ${MAX_BYTES}` };
}
let textRaw: string;

View File

@@ -35,16 +35,19 @@ export interface WebSearchOutput {
// with a mocked fetch. Throws on network / non-200 — the executeToolCall
// wrapper in inference.ts turns the thrown message into the LLM-visible
// error string.
// v1.11.8 review: fetcher injection. Mirrors executeWebFetch's signature
// so tests can pass a vi.fn() stub without monkey-patching globalThis.
export async function executeWebSearch(
input: WebSearchInputT,
searxngUrl: string,
fetcher: typeof fetch = fetch,
): Promise<WebSearchOutput> {
const cap = Math.min(Math.max(1, input.max_results ?? DEFAULT_RESULTS), MAX_RESULTS_CAP);
const url = `${searxngUrl}/search?q=${encodeURIComponent(input.query)}&format=json`;
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
try {
const res = await fetch(url, {
const res = await fetcher(url, {
signal: controller.signal,
headers: { 'User-Agent': 'BooCode/1.11.8' },
});