diff --git a/apps/server/src/services/__tests__/web_tools.test.ts b/apps/server/src/services/__tests__/web_tools.test.ts index 2b00ae1..1b0583f 100644 --- a/apps/server/src/services/__tests__/web_tools.test.ts +++ b/apps/server/src/services/__tests__/web_tools.test.ts @@ -170,6 +170,28 @@ describe('executeWebSearch', () => { expect(out.results).toHaveLength(1); expect(out.results[0]!.url).toBe('https://ok/'); }); + + it('uses the injected fetcher when one is passed (v1.11.8 review)', async () => { + // Direct injection vs vi.spyOn(globalThis, 'fetch'): the injected + // path lets tests run without monkey-patching globals, and the + // production code path defaults to global fetch when no fetcher is + // supplied. Asserts the stub is the thing actually called. + const globalSpy = vi.spyOn(globalThis, 'fetch'); + const stub = vi.fn().mockResolvedValue( + mockResponse( + { results: [{ title: 'injected', url: 'https://inj/', content: 's' }] }, + { contentType: 'application/json' }, + ), + ); + const out = await executeWebSearch( + { query: 'q' }, + TEST_SEARXNG, + stub as unknown as typeof fetch, + ); + expect(stub).toHaveBeenCalledOnce(); + expect(globalSpy).not.toHaveBeenCalled(); + expect(out.results[0]!.url).toBe('https://inj/'); + }); }); // ============================================================================ @@ -269,6 +291,29 @@ describe('executeWebFetch — size + truncation', () => { expect('error' in result && result.error).toBe('response_too_large'); }); + it('rejects multi-byte content that exceeds 5MB in bytes but fits in chars (v1.11.8 review)', async () => { + // 1.5M U+1F600 emojis: each is length 2 in UTF-16 (surrogate pair) and + // 4 bytes in UTF-8. body.length = 3,000,000 chars (~2.86 MiB by + // UTF-16 count) but Buffer.byteLength = 6,000,000 bytes (>5 MiB). + // Pre-fix the char-count comparison let this through; the byte-count + // check now rejects. No Content-Length header so the pre-flight + // guard doesn't fire — we're testing the POST-consumption check. + const heavy = '😀'.repeat(1_500_000); + const fakeFetch = vi.fn().mockResolvedValue( + new Response(heavy, { status: 200, headers: { 'content-type': 'text/plain' } }), + ); + const result = await executeWebFetch( + { url: 'https://example.com/multibyte' }, + fakeFetch as unknown as typeof fetch, + ); + expect('error' in result).toBe(true); + if ('error' in result) { + expect(result.error).toBe('response_too_large'); + // Error reason should reference bytes, not character count. + expect(result.reason).toMatch(/bytes/); + } + }); + it('truncates output to max_chars and appends a marker', async () => { const big = 'A'.repeat(50_000); const fakeFetch = vi.fn().mockResolvedValue( diff --git a/apps/server/src/services/web_fetch.ts b/apps/server/src/services/web_fetch.ts index 9dac95e..09f8bc8 100644 --- a/apps/server/src/services/web_fetch.ts +++ b/apps/server/src/services/web_fetch.ts @@ -86,6 +86,8 @@ export async function executeWebFetch( try { const res = await fetcher(input.url, { signal: controller.signal, + // TODO(v1.11.9): redirect: 'manual' + re-run isPublicUrl on Location header. + // Current 'follow' allows redirect-to-private-IP bypass of URL guard. redirect: 'follow', headers: { 'User-Agent': 'BooCode/1.11.8', Accept: 'text/html,text/plain,application/json,*/*' }, }); @@ -107,8 +109,13 @@ export async function executeWebFetch( // about length AND streams gigabytes would defeat that; for v1.11.8 // the 15s timeout is the secondary fence. const body = await res.text(); - if (body.length > MAX_BYTES) { - return { error: 'response_too_large', reason: `body ${body.length} > ${MAX_BYTES}` }; + // v1.11.8 review: byte-count, not char-count. A 5MB cap on + // body.length (UTF-16 code units) lets a multi-byte payload (emoji, + // CJK) pass when its wire size already exceeded MAX_BYTES. Compute + // once and reuse for the error message. + const bodyBytes = Buffer.byteLength(body, 'utf8'); + if (bodyBytes > MAX_BYTES) { + return { error: 'response_too_large', reason: `body ${bodyBytes} bytes > ${MAX_BYTES}` }; } let textRaw: string; diff --git a/apps/server/src/services/web_search.ts b/apps/server/src/services/web_search.ts index 4660d65..7ce25fc 100644 --- a/apps/server/src/services/web_search.ts +++ b/apps/server/src/services/web_search.ts @@ -35,16 +35,19 @@ export interface WebSearchOutput { // with a mocked fetch. Throws on network / non-200 — the executeToolCall // wrapper in inference.ts turns the thrown message into the LLM-visible // error string. +// v1.11.8 review: fetcher injection. Mirrors executeWebFetch's signature +// so tests can pass a vi.fn() stub without monkey-patching globalThis. export async function executeWebSearch( input: WebSearchInputT, searxngUrl: string, + fetcher: typeof fetch = fetch, ): Promise { const cap = Math.min(Math.max(1, input.max_results ?? DEFAULT_RESULTS), MAX_RESULTS_CAP); const url = `${searxngUrl}/search?q=${encodeURIComponent(input.query)}&format=json`; const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS); try { - const res = await fetch(url, { + const res = await fetcher(url, { signal: controller.signal, headers: { 'User-Agent': 'BooCode/1.11.8' }, });