// v1.11.7: secret-file guard. Filters paths that commonly contain secrets // (env files, key/cert files, credential stores) out of tool results, and // hard-refuses single-path reads of the same. Composes with path_guard.ts: // pathGuard() proves the path is inside the project root; isSecretPath() // then proves it's not a known-sensitive filename. Patterns ported from // continuedev/continue/core/indexing/ignore.ts plus a small BooCode // additions block (see below). // Verbatim from continuedev/continue/core/indexing/ignore.ts // DEFAULT_SECURITY_IGNORE_FILETYPES export. 40 patterns. const CONTINUE_FILETYPES: ReadonlyArray = [ // Environment and configuration files with secrets '*.env', '*.env.*', '.env*', 'config.json', 'config.yaml', 'config.yml', 'settings.json', 'appsettings.json', 'appsettings.*.json', // Certificate and key files '*.key', '*.pem', '*.p12', '*.pfx', '*.crt', '*.cer', '*.jks', '*.keystore', '*.truststore', // Database files that may contain sensitive data '*.db', '*.sqlite', '*.sqlite3', '*.mdb', '*.accdb', // Credential and secret files '*.secret', '*.secrets', 'auth.json', '*.token', // Backup files that might contain sensitive data '*.bak', '*.backup', '*.old', '*.orig', // Docker secrets 'docker-compose.override.yml', 'docker-compose.override.yaml', // SSH and GPG 'id_rsa', 'id_dsa', 'id_ecdsa', 'id_ed25519', '*.ppk', '*.gpg', ]; // Verbatim from continuedev/continue/core/indexing/ignore.ts // DEFAULT_SECURITY_IGNORE_DIRS export. Trailing "/" semantics: match // against any path segment that equals the dir name (so files INSIDE the // dir get blocked even if their leaf name is innocuous, e.g. // `home/user/.aws/credentials` blocks via the `.aws` segment). const CONTINUE_DIRS: ReadonlyArray = [ // Environment and configuration directories '.env/', 'env/', // Cloud provider credential directories '.aws/', '.gcp/', '.azure/', '.kube/', '.docker/', // Secret directories 'secrets/', '.secrets/', 'private/', '.private/', 'certs/', 'certificates/', 'keys/', '.ssh/', '.gnupg/', '.gpg/', // Temporary directories that might contain sensitive data 'tmp/secrets/', 'temp/secrets/', '.tmp/', ]; // BooCode additions. continue.dev's list omits some classics — closing the // gaps below. Each entry has a one-line justification so future audits know // why it's here and not in the upstream port. const BOOCODE_ADDITIONS: ReadonlyArray = [ // SSH public keys leak hostnames + usernames. continue.dev's `id_rsa` // is a literal that doesn't match `id_rsa.pub`; broadening to a glob. 'id_rsa*', 'id_dsa*', 'id_ecdsa*', 'id_ed25519*', // Wide-net credential pattern. `*credentials*` (not `credentials*`) // because the leak shape varies: credentials.json, aws_credentials, // gcp-credentials.yml, etc. Trade-off: also catches files named // "Credentials.tsx" → those go through view_file's hard-refuse path, // which is the right outcome (the LLM gets a clear "blocked" signal // and can ask the user to whitelist if it was a false-positive). '*credentials*', // .netrc holds plaintext FTP/HTTP credentials. Standard tooling target. '.netrc', // KeePass database. Encrypted at rest but contents are 1:1 secret // material; never want to feed even ciphertext to a model. '*.kdbx', ]; export const DEFAULT_SECURITY_IGNORE_FILETYPES: ReadonlyArray = [ ...CONTINUE_FILETYPES, ...CONTINUE_DIRS, ...BOOCODE_ADDITIONS, ]; // === glob compilation ====================================================== // Tiny glob-to-regex. No new prod dep — the patterns we ship are simple // (literal | name* | *.ext | dir/). Covers ~95% of glob spec, which is // 100% of what this list uses. If patterns ever grow to need `**`, `[]`, // `{a,b}`, or negation, swap in picomatch. interface CompiledPattern { regex: RegExp; // 'basename' = test against the trailing path component only. // 'segment' = test against ANY path component (used for `dir/` patterns // so `home/user/.aws/credentials` blocks via the `.aws` seg). mode: 'basename' | 'segment'; } function compile(pattern: string): CompiledPattern { const isDir = pattern.endsWith('/'); const body = isDir ? pattern.slice(0, -1) : pattern; // Escape regex specials except * and ?. Don't escape `/` — the patterns // we accept don't contain it, but if a future pattern does, splitting on // `/` in the matcher already handles it. const escaped = body.replace(/[.+^${}()|[\]\\]/g, '\\$&'); const regexBody = escaped.replace(/\*/g, '.*').replace(/\?/g, '.'); return { regex: new RegExp(`^${regexBody}$`, 'i'), mode: isDir ? 'segment' : 'basename', }; } const COMPILED: ReadonlyArray = DEFAULT_SECURITY_IGNORE_FILETYPES.map(compile); // === public API ============================================================ // Returns true when `relPath` matches a known-secret pattern. Case-insensitive // (regex 'i' flag). Always normalize path separators to `/` so Windows-origin // paths match the same patterns. Empty or root-only paths return false. export function isSecretPath(relPath: string): boolean { if (!relPath) return false; const normalized = relPath.replace(/\\/g, '/'); const segments = normalized.split('/').filter((s) => s.length > 0); if (segments.length === 0) return false; const base = segments[segments.length - 1]!; for (const compiled of COMPILED) { if (compiled.mode === 'basename') { if (compiled.regex.test(base)) return true; } else { for (const seg of segments) { if (compiled.regex.test(seg)) return true; } } } return false; } // Error thrown by view_file (or any single-path read) when the resolved // path matches a secret pattern. Caught by inference.ts executeToolCall // alongside PathScopeError; the message reaches the LLM verbatim so it // knows the file was deliberately blocked rather than missing/broken. export class SecretBlockedError extends Error { readonly path: string; constructor(relPath: string) { super( `Refused: ${relPath} matches a secret-file pattern and was blocked by pathGuard.`, ); this.name = 'SecretBlockedError'; this.path = relPath; } } // Helper for listing tools (list_dir / grep / find_files). Filters entries // by their `.path` (or computed path), returns the filtered list plus a // note string when anything was hidden. Callers attach the note to a // `pathguard_note` field on their output shape so the LLM sees it. // // Generic over the entry type so each tool can pass its own row shape and // a `pathOf` extractor. The caller-supplied path is what gets tested — // usually the project-relative path the tool already computes for output. export function filterSecretEntries( entries: ReadonlyArray, pathOf: (entry: T) => string, ): { kept: T[]; hidden: number; note: string | undefined } { const kept: T[] = []; let hidden = 0; for (const e of entries) { if (isSecretPath(pathOf(e))) { hidden += 1; continue; } kept.push(e); } const note = hidden > 0 ? `[pathGuard: ${hidden} ${hidden === 1 ? 'entry' : 'entries'} hidden by secret-file filter]` : undefined; return { kept, hidden, note }; }