// v1.11.8: SSRF guard for web_fetch (and any other tool that follows a // model-supplied URL). Sibling of path_guard.ts (workspace scope) and // secret_guard.ts (filename deny) — same _guard.ts naming pattern. The // spec suggested apps/server/src/services/safety/urlGuard.ts but BooCode // has no `safety/` subdirectory and the existing guards live one level up. // // Block list, in order of evaluation: // - protocol other than http: / https: // - hostname is a known private name (localhost, 0.0.0.0, ::1) // - hostname ends with .local or .internal (mDNS / private TLD) // - IPv4 in any RFC1918 / loopback / CGNAT / link-local range // // IPv6 numeric literals aren't enumerated here. Most public hostnames // resolve to IPv4 via DNS; an IPv6-only attack surface against a // chat-app deployment is exotic enough to defer until a real abuse case // motivates a comprehensive check. The protocol + name-suffix checks // already cover the common LAN-targeting cases. export interface UrlGuardResult { ok: boolean; reason?: string; } export function isPublicUrl(input: string): UrlGuardResult { let u: URL; try { u = new URL(input); } catch { return { ok: false, reason: 'invalid_url' }; } if (u.protocol !== 'http:' && u.protocol !== 'https:') { return { ok: false, reason: `unsupported_protocol: ${u.protocol}` }; } const host = u.hostname.toLowerCase(); if (host.length === 0) { return { ok: false, reason: 'empty_host' }; } // Bare-name targets if (host === 'localhost' || host === '0.0.0.0') { return { ok: false, reason: `private_host: ${host}` }; } // node's URL strips the [] from a literal IPv6 host. Both forms checked. if (host === '::1' || host === '[::1]') { return { ok: false, reason: `loopback_v6: ${host}` }; } // mDNS / private TLDs if (host.endsWith('.local') || host.endsWith('.internal')) { return { ok: false, reason: `private_suffix: ${host}` }; } // IPv4 numeric ranges. Matches host that's all-numeric octets only — DNS // names that happen to start with digits (e.g. 1password.com) won't match. const ipv4 = host.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/); if (ipv4) { const o1 = Number(ipv4[1]); const o2 = Number(ipv4[2]); // Loopback 127.0.0.0/8 if (o1 === 127) return { ok: false, reason: `loopback: ${host}` }; // RFC1918 10.0.0.0/8 if (o1 === 10) return { ok: false, reason: `rfc1918: ${host}` }; // RFC1918 172.16.0.0/12 if (o1 === 172 && o2 >= 16 && o2 <= 31) return { ok: false, reason: `rfc1918: ${host}` }; // RFC1918 192.168.0.0/16 if (o1 === 192 && o2 === 168) return { ok: false, reason: `rfc1918: ${host}` }; // CGNAT / Tailscale 100.64.0.0/10 if (o1 === 100 && o2 >= 64 && o2 <= 127) return { ok: false, reason: `cgnat: ${host}` }; // Link-local 169.254.0.0/16 (covers AWS/GCP metadata IMDS) if (o1 === 169 && o2 === 254) return { ok: false, reason: `link_local: ${host}` }; // Source net 0.0.0.0/8 (rare but possible) if (o1 === 0) return { ok: false, reason: `zero_net: ${host}` }; } return { ok: true }; }