v1.11.7: secret-file deny list for codebase tools
Ports continue.dev's DEFAULT_SECURITY_IGNORE_FILETYPES + ignored-dir lists into apps/server/src/services/secret_guard.ts plus a small BooCode additions block (id_rsa*, *credentials*, .netrc, *.kdbx). Tiny glob-to- regex matcher; no new prod dep. view_file hard-refuses via SecretBlockedError. list_dir / grep / find_files filter their results and surface a pathguard_note string field with the hidden count — never list the offending paths back. Named secret_guard.ts (not safety/pathGuard.ts) to avoid collision with the existing path_guard.ts which already exports a pathGuard() function. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
226
apps/server/src/services/secret_guard.ts
Normal file
226
apps/server/src/services/secret_guard.ts
Normal file
@@ -0,0 +1,226 @@
|
||||
// v1.11.7: secret-file guard. Filters paths that commonly contain secrets
|
||||
// (env files, key/cert files, credential stores) out of tool results, and
|
||||
// hard-refuses single-path reads of the same. Composes with path_guard.ts:
|
||||
// pathGuard() proves the path is inside the project root; isSecretPath()
|
||||
// then proves it's not a known-sensitive filename. Patterns ported from
|
||||
// continuedev/continue/core/indexing/ignore.ts plus a small BooCode
|
||||
// additions block (see below).
|
||||
|
||||
// Verbatim from continuedev/continue/core/indexing/ignore.ts
|
||||
// DEFAULT_SECURITY_IGNORE_FILETYPES export. 40 patterns.
|
||||
const CONTINUE_FILETYPES: ReadonlyArray<string> = [
|
||||
// Environment and configuration files with secrets
|
||||
'*.env',
|
||||
'*.env.*',
|
||||
'.env*',
|
||||
'config.json',
|
||||
'config.yaml',
|
||||
'config.yml',
|
||||
'settings.json',
|
||||
'appsettings.json',
|
||||
'appsettings.*.json',
|
||||
|
||||
// Certificate and key files
|
||||
'*.key',
|
||||
'*.pem',
|
||||
'*.p12',
|
||||
'*.pfx',
|
||||
'*.crt',
|
||||
'*.cer',
|
||||
'*.jks',
|
||||
'*.keystore',
|
||||
'*.truststore',
|
||||
|
||||
// Database files that may contain sensitive data
|
||||
'*.db',
|
||||
'*.sqlite',
|
||||
'*.sqlite3',
|
||||
'*.mdb',
|
||||
'*.accdb',
|
||||
|
||||
// Credential and secret files
|
||||
'*.secret',
|
||||
'*.secrets',
|
||||
'auth.json',
|
||||
'*.token',
|
||||
|
||||
// Backup files that might contain sensitive data
|
||||
'*.bak',
|
||||
'*.backup',
|
||||
'*.old',
|
||||
'*.orig',
|
||||
|
||||
// Docker secrets
|
||||
'docker-compose.override.yml',
|
||||
'docker-compose.override.yaml',
|
||||
|
||||
// SSH and GPG
|
||||
'id_rsa',
|
||||
'id_dsa',
|
||||
'id_ecdsa',
|
||||
'id_ed25519',
|
||||
'*.ppk',
|
||||
'*.gpg',
|
||||
];
|
||||
|
||||
// Verbatim from continuedev/continue/core/indexing/ignore.ts
|
||||
// DEFAULT_SECURITY_IGNORE_DIRS export. Trailing "/" semantics: match
|
||||
// against any path segment that equals the dir name (so files INSIDE the
|
||||
// dir get blocked even if their leaf name is innocuous, e.g.
|
||||
// `home/user/.aws/credentials` blocks via the `.aws` segment).
|
||||
const CONTINUE_DIRS: ReadonlyArray<string> = [
|
||||
// Environment and configuration directories
|
||||
'.env/',
|
||||
'env/',
|
||||
|
||||
// Cloud provider credential directories
|
||||
'.aws/',
|
||||
'.gcp/',
|
||||
'.azure/',
|
||||
'.kube/',
|
||||
'.docker/',
|
||||
|
||||
// Secret directories
|
||||
'secrets/',
|
||||
'.secrets/',
|
||||
'private/',
|
||||
'.private/',
|
||||
'certs/',
|
||||
'certificates/',
|
||||
'keys/',
|
||||
'.ssh/',
|
||||
'.gnupg/',
|
||||
'.gpg/',
|
||||
|
||||
// Temporary directories that might contain sensitive data
|
||||
'tmp/secrets/',
|
||||
'temp/secrets/',
|
||||
'.tmp/',
|
||||
];
|
||||
|
||||
// BooCode additions. continue.dev's list omits some classics — closing the
|
||||
// gaps below. Each entry has a one-line justification so future audits know
|
||||
// why it's here and not in the upstream port.
|
||||
const BOOCODE_ADDITIONS: ReadonlyArray<string> = [
|
||||
// SSH public keys leak hostnames + usernames. continue.dev's `id_rsa`
|
||||
// is a literal that doesn't match `id_rsa.pub`; broadening to a glob.
|
||||
'id_rsa*',
|
||||
'id_dsa*',
|
||||
'id_ecdsa*',
|
||||
'id_ed25519*',
|
||||
// Wide-net credential pattern. `*credentials*` (not `credentials*`)
|
||||
// because the leak shape varies: credentials.json, aws_credentials,
|
||||
// gcp-credentials.yml, etc. Trade-off: also catches files named
|
||||
// "Credentials.tsx" → those go through view_file's hard-refuse path,
|
||||
// which is the right outcome (the LLM gets a clear "blocked" signal
|
||||
// and can ask the user to whitelist if it was a false-positive).
|
||||
'*credentials*',
|
||||
// .netrc holds plaintext FTP/HTTP credentials. Standard tooling target.
|
||||
'.netrc',
|
||||
// KeePass database. Encrypted at rest but contents are 1:1 secret
|
||||
// material; never want to feed even ciphertext to a model.
|
||||
'*.kdbx',
|
||||
];
|
||||
|
||||
export const DEFAULT_SECURITY_IGNORE_FILETYPES: ReadonlyArray<string> = [
|
||||
...CONTINUE_FILETYPES,
|
||||
...CONTINUE_DIRS,
|
||||
...BOOCODE_ADDITIONS,
|
||||
];
|
||||
|
||||
// === glob compilation ======================================================
|
||||
// Tiny glob-to-regex. No new prod dep — the patterns we ship are simple
|
||||
// (literal | name* | *.ext | dir/). Covers ~95% of glob spec, which is
|
||||
// 100% of what this list uses. If patterns ever grow to need `**`, `[]`,
|
||||
// `{a,b}`, or negation, swap in picomatch.
|
||||
|
||||
interface CompiledPattern {
|
||||
regex: RegExp;
|
||||
// 'basename' = test against the trailing path component only.
|
||||
// 'segment' = test against ANY path component (used for `dir/` patterns
|
||||
// so `home/user/.aws/credentials` blocks via the `.aws` seg).
|
||||
mode: 'basename' | 'segment';
|
||||
}
|
||||
|
||||
function compile(pattern: string): CompiledPattern {
|
||||
const isDir = pattern.endsWith('/');
|
||||
const body = isDir ? pattern.slice(0, -1) : pattern;
|
||||
// Escape regex specials except * and ?. Don't escape `/` — the patterns
|
||||
// we accept don't contain it, but if a future pattern does, splitting on
|
||||
// `/` in the matcher already handles it.
|
||||
const escaped = body.replace(/[.+^${}()|[\]\\]/g, '\\$&');
|
||||
const regexBody = escaped.replace(/\*/g, '.*').replace(/\?/g, '.');
|
||||
return {
|
||||
regex: new RegExp(`^${regexBody}$`, 'i'),
|
||||
mode: isDir ? 'segment' : 'basename',
|
||||
};
|
||||
}
|
||||
|
||||
const COMPILED: ReadonlyArray<CompiledPattern> = DEFAULT_SECURITY_IGNORE_FILETYPES.map(compile);
|
||||
|
||||
// === public API ============================================================
|
||||
|
||||
// Returns true when `relPath` matches a known-secret pattern. Case-insensitive
|
||||
// (regex 'i' flag). Always normalize path separators to `/` so Windows-origin
|
||||
// paths match the same patterns. Empty or root-only paths return false.
|
||||
export function isSecretPath(relPath: string): boolean {
|
||||
if (!relPath) return false;
|
||||
const normalized = relPath.replace(/\\/g, '/');
|
||||
const segments = normalized.split('/').filter((s) => s.length > 0);
|
||||
if (segments.length === 0) return false;
|
||||
const base = segments[segments.length - 1]!;
|
||||
|
||||
for (const compiled of COMPILED) {
|
||||
if (compiled.mode === 'basename') {
|
||||
if (compiled.regex.test(base)) return true;
|
||||
} else {
|
||||
for (const seg of segments) {
|
||||
if (compiled.regex.test(seg)) return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Error thrown by view_file (or any single-path read) when the resolved
|
||||
// path matches a secret pattern. Caught by inference.ts executeToolCall
|
||||
// alongside PathScopeError; the message reaches the LLM verbatim so it
|
||||
// knows the file was deliberately blocked rather than missing/broken.
|
||||
export class SecretBlockedError extends Error {
|
||||
readonly path: string;
|
||||
constructor(relPath: string) {
|
||||
super(
|
||||
`Refused: ${relPath} matches a secret-file pattern and was blocked by pathGuard.`,
|
||||
);
|
||||
this.name = 'SecretBlockedError';
|
||||
this.path = relPath;
|
||||
}
|
||||
}
|
||||
|
||||
// Helper for listing tools (list_dir / grep / find_files). Filters entries
|
||||
// by their `.path` (or computed path), returns the filtered list plus a
|
||||
// note string when anything was hidden. Callers attach the note to a
|
||||
// `pathguard_note` field on their output shape so the LLM sees it.
|
||||
//
|
||||
// Generic over the entry type so each tool can pass its own row shape and
|
||||
// a `pathOf` extractor. The caller-supplied path is what gets tested —
|
||||
// usually the project-relative path the tool already computes for output.
|
||||
export function filterSecretEntries<T>(
|
||||
entries: ReadonlyArray<T>,
|
||||
pathOf: (entry: T) => string,
|
||||
): { kept: T[]; hidden: number; note: string | undefined } {
|
||||
const kept: T[] = [];
|
||||
let hidden = 0;
|
||||
for (const e of entries) {
|
||||
if (isSecretPath(pathOf(e))) {
|
||||
hidden += 1;
|
||||
continue;
|
||||
}
|
||||
kept.push(e);
|
||||
}
|
||||
const note =
|
||||
hidden > 0
|
||||
? `[pathGuard: ${hidden} ${hidden === 1 ? 'entry' : 'entries'} hidden by secret-file filter]`
|
||||
: undefined;
|
||||
return { kept, hidden, note };
|
||||
}
|
||||
Reference in New Issue
Block a user