Adds two new tools registered through the existing ALL_TOOLS registry:
- web_search hits SearXNG's JSON API (Fathom, internal Tailscale URL,
no auth) and returns top results
- web_fetch retrieves a URL's text content, gated by isPublicUrl
(url_guard.ts) which blocks loopback / RFC1918 / Tailscale CGNAT /
link-local / .local / .internal / non-http schemes
Both tools are opt-in via the existing session.web_search_enabled flag
(plumbed in v1.9, activated here). Default off. UI labels updated to
"Enable web search and fetch" / "Web search and fetch" since fetch joins
the same store. Counts against the v1.8.2 per-turn budget; covered by
the v1.11.6 doom-loop guard.
Native Node 20 fetch — no new prod dep. HTML stripping via regex (script
and style content elided wholesale). 5MB body cap, 15s fetch timeout,
8000-char default output, 32000-char cap.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
79 lines
3.1 KiB
TypeScript
79 lines
3.1 KiB
TypeScript
// v1.11.8: SSRF guard for web_fetch (and any other tool that follows a
|
|
// model-supplied URL). Sibling of path_guard.ts (workspace scope) and
|
|
// secret_guard.ts (filename deny) — same _guard.ts naming pattern. The
|
|
// spec suggested apps/server/src/services/safety/urlGuard.ts but BooCode
|
|
// has no `safety/` subdirectory and the existing guards live one level up.
|
|
//
|
|
// Block list, in order of evaluation:
|
|
// - protocol other than http: / https:
|
|
// - hostname is a known private name (localhost, 0.0.0.0, ::1)
|
|
// - hostname ends with .local or .internal (mDNS / private TLD)
|
|
// - IPv4 in any RFC1918 / loopback / CGNAT / link-local range
|
|
//
|
|
// IPv6 numeric literals aren't enumerated here. Most public hostnames
|
|
// resolve to IPv4 via DNS; an IPv6-only attack surface against a
|
|
// chat-app deployment is exotic enough to defer until a real abuse case
|
|
// motivates a comprehensive check. The protocol + name-suffix checks
|
|
// already cover the common LAN-targeting cases.
|
|
|
|
export interface UrlGuardResult {
|
|
ok: boolean;
|
|
reason?: string;
|
|
}
|
|
|
|
export function isPublicUrl(input: string): UrlGuardResult {
|
|
let u: URL;
|
|
try {
|
|
u = new URL(input);
|
|
} catch {
|
|
return { ok: false, reason: 'invalid_url' };
|
|
}
|
|
|
|
if (u.protocol !== 'http:' && u.protocol !== 'https:') {
|
|
return { ok: false, reason: `unsupported_protocol: ${u.protocol}` };
|
|
}
|
|
|
|
const host = u.hostname.toLowerCase();
|
|
if (host.length === 0) {
|
|
return { ok: false, reason: 'empty_host' };
|
|
}
|
|
|
|
// Bare-name targets
|
|
if (host === 'localhost' || host === '0.0.0.0') {
|
|
return { ok: false, reason: `private_host: ${host}` };
|
|
}
|
|
// node's URL strips the [] from a literal IPv6 host. Both forms checked.
|
|
if (host === '::1' || host === '[::1]') {
|
|
return { ok: false, reason: `loopback_v6: ${host}` };
|
|
}
|
|
|
|
// mDNS / private TLDs
|
|
if (host.endsWith('.local') || host.endsWith('.internal')) {
|
|
return { ok: false, reason: `private_suffix: ${host}` };
|
|
}
|
|
|
|
// IPv4 numeric ranges. Matches host that's all-numeric octets only — DNS
|
|
// names that happen to start with digits (e.g. 1password.com) won't match.
|
|
const ipv4 = host.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/);
|
|
if (ipv4) {
|
|
const o1 = Number(ipv4[1]);
|
|
const o2 = Number(ipv4[2]);
|
|
// Loopback 127.0.0.0/8
|
|
if (o1 === 127) return { ok: false, reason: `loopback: ${host}` };
|
|
// RFC1918 10.0.0.0/8
|
|
if (o1 === 10) return { ok: false, reason: `rfc1918: ${host}` };
|
|
// RFC1918 172.16.0.0/12
|
|
if (o1 === 172 && o2 >= 16 && o2 <= 31) return { ok: false, reason: `rfc1918: ${host}` };
|
|
// RFC1918 192.168.0.0/16
|
|
if (o1 === 192 && o2 === 168) return { ok: false, reason: `rfc1918: ${host}` };
|
|
// CGNAT / Tailscale 100.64.0.0/10
|
|
if (o1 === 100 && o2 >= 64 && o2 <= 127) return { ok: false, reason: `cgnat: ${host}` };
|
|
// Link-local 169.254.0.0/16 (covers AWS/GCP metadata IMDS)
|
|
if (o1 === 169 && o2 === 254) return { ok: false, reason: `link_local: ${host}` };
|
|
// Source net 0.0.0.0/8 (rare but possible)
|
|
if (o1 === 0) return { ok: false, reason: `zero_net: ${host}` };
|
|
}
|
|
|
|
return { ok: true };
|
|
}
|