From 863452ae0729a0f67a0afd3cd36cff9a7a129e78 Mon Sep 17 00:00:00 2001 From: indifferentketchup Date: Wed, 20 May 2026 20:55:50 +0000 Subject: [PATCH] v1.11.7: secret-file deny list for codebase tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ports continue.dev's DEFAULT_SECURITY_IGNORE_FILETYPES + ignored-dir lists into apps/server/src/services/secret_guard.ts plus a small BooCode additions block (id_rsa*, *credentials*, .netrc, *.kdbx). Tiny glob-to- regex matcher; no new prod dep. view_file hard-refuses via SecretBlockedError. list_dir / grep / find_files filter their results and surface a pathguard_note string field with the hidden count — never list the offending paths back. Named secret_guard.ts (not safety/pathGuard.ts) to avoid collision with the existing path_guard.ts which already exports a pathGuard() function. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../services/__tests__/secret_guard.test.ts | 198 +++++++++++++++ apps/server/src/services/secret_guard.ts | 226 ++++++++++++++++++ apps/server/src/services/tools.ts | 54 ++++- 3 files changed, 467 insertions(+), 11 deletions(-) create mode 100644 apps/server/src/services/__tests__/secret_guard.test.ts create mode 100644 apps/server/src/services/secret_guard.ts diff --git a/apps/server/src/services/__tests__/secret_guard.test.ts b/apps/server/src/services/__tests__/secret_guard.test.ts new file mode 100644 index 0000000..a2e446c --- /dev/null +++ b/apps/server/src/services/__tests__/secret_guard.test.ts @@ -0,0 +1,198 @@ +import { describe, it, expect } from 'vitest'; +import { + isSecretPath, + filterSecretEntries, + SecretBlockedError, + DEFAULT_SECURITY_IGNORE_FILETYPES, +} from '../secret_guard.js'; + +// ---- env / config patterns ------------------------------------------------- + +describe('isSecretPath — env / config files', () => { + it('matches .env (literal via .env*)', () => { + expect(isSecretPath('.env')).toBe(true); + }); + + it('matches .env.local (via .env*)', () => { + expect(isSecretPath('.env.local')).toBe(true); + }); + + it('matches .env.production.local (via .env*)', () => { + expect(isSecretPath('.env.production.local')).toBe(true); + }); + + it('matches .envrc (via .env*, common direnv config holding secrets)', () => { + expect(isSecretPath('.envrc')).toBe(true); + }); + + it('matches nested .env (apps/server/.env via basename test)', () => { + expect(isSecretPath('apps/server/.env')).toBe(true); + }); + + it('case-insensitive: .ENV matches .env*', () => { + expect(isSecretPath('.ENV')).toBe(true); + }); +}); + +// ---- SSH / cert / key patterns -------------------------------------------- + +describe('isSecretPath — SSH / certs / keys', () => { + it('matches id_rsa (continue.dev literal)', () => { + expect(isSecretPath('id_rsa')).toBe(true); + }); + + it('matches id_rsa.pub (BooCode addition id_rsa*)', () => { + // continue.dev's literal id_rsa wouldn't match this; BooCode broadens + // because .pub files leak hostnames/usernames and authorized_keys hints. + expect(isSecretPath('id_rsa.pub')).toBe(true); + }); + + it('matches cert.pem (*.pem)', () => { + expect(isSecretPath('cert.pem')).toBe(true); + }); + + it('matches private.key (*.key)', () => { + expect(isSecretPath('private.key')).toBe(true); + }); +}); + +// ---- credential patterns --------------------------------------------------- + +describe('isSecretPath — credential files (BooCode additions)', () => { + it('matches credentials.json (BooCode *credentials*)', () => { + expect(isSecretPath('credentials.json')).toBe(true); + }); + + it('matches aws_credentials (BooCode *credentials* — substring match)', () => { + // continue.dev has no `credentials*` pattern. BooCode adds `*credentials*` + // to catch the common `aws_credentials`, `gcp-credentials.yml`, etc. + expect(isSecretPath('aws_credentials')).toBe(true); + }); + + it('matches .netrc (BooCode addition)', () => { + expect(isSecretPath('.netrc')).toBe(true); + }); + + it('matches keystore.kdbx (BooCode addition *.kdbx)', () => { + expect(isSecretPath('keystore.kdbx')).toBe(true); + }); +}); + +// ---- directory patterns ---------------------------------------------------- + +describe('isSecretPath — directory segments (trailing-slash patterns)', () => { + it('matches files under .aws/ via segment test', () => { + expect(isSecretPath('home/user/.aws/credentials')).toBe(true); + }); + + it('matches files under .ssh/', () => { + expect(isSecretPath('home/user/.ssh/known_hosts')).toBe(true); + }); + + it('matches files inside any path segment named secrets/', () => { + expect(isSecretPath('apps/server/secrets/api.key')).toBe(true); + }); +}); + +// ---- negatives ------------------------------------------------------------- + +describe('isSecretPath — negatives', () => { + it('package.json is allowed', () => { + expect(isSecretPath('package.json')).toBe(false); + }); + + it('README.md is allowed', () => { + expect(isSecretPath('README.md')).toBe(false); + }); + + it('Login.tsx is allowed (substring "login" doesn\'t trigger anything)', () => { + expect(isSecretPath('src/components/Login.tsx')).toBe(false); + }); + + it('empty string returns false (defensive)', () => { + expect(isSecretPath('')).toBe(false); + }); + + it('a directory NAMED "credentials" alone does NOT trigger — only file basenames do', () => { + // Worth pinning: BooCode's `*credentials*` is a basename pattern (no + // trailing `/`), so it tests the leaf filename only. A directory + // literally called "credentials" containing innocuous files (e.g. + // Login.tsx) is fine. This is a deliberate trade-off vs. continue.dev's + // dir-pattern approach — adding `credentials/` as a dir pattern would + // block legitimate code like `src/auth/credentials/Login.tsx`. + expect(isSecretPath('src/auth/credentials/Login.tsx')).toBe(false); + // ...but a file INSIDE that dir whose name includes "credentials" still + // blocks via the basename match: + expect(isSecretPath('src/auth/credentials/credentials.ts')).toBe(true); + }); +}); + +// ---- filterSecretEntries (listing-tools helper) ---------------------------- + +describe('filterSecretEntries', () => { + it('removes secret entries and reports the count via note string', () => { + const entries = [ + { path: 'src/index.ts' }, + { path: '.env' }, + { path: 'README.md' }, + { path: 'id_rsa' }, + { path: 'apps/server/package.json' }, + ]; + const result = filterSecretEntries(entries, (e) => e.path); + expect(result.kept.map((e) => e.path)).toEqual([ + 'src/index.ts', + 'README.md', + 'apps/server/package.json', + ]); + expect(result.hidden).toBe(2); + expect(result.note).toBe('[pathGuard: 2 entries hidden by secret-file filter]'); + }); + + it('returns undefined note when nothing was filtered', () => { + const result = filterSecretEntries( + [{ path: 'a.ts' }, { path: 'b.ts' }], + (e) => e.path, + ); + expect(result.kept).toHaveLength(2); + expect(result.hidden).toBe(0); + expect(result.note).toBeUndefined(); + }); + + it('uses singular "entry" for a 1-hit filter (cosmetic but worth pinning)', () => { + const result = filterSecretEntries( + [{ path: 'index.ts' }, { path: '.env' }], + (e) => e.path, + ); + expect(result.note).toBe('[pathGuard: 1 entry hidden by secret-file filter]'); + }); +}); + +// ---- SecretBlockedError ---------------------------------------------------- + +describe('SecretBlockedError', () => { + it('carries the offending path on .path and in the message', () => { + const err = new SecretBlockedError('apps/server/.env'); + expect(err.name).toBe('SecretBlockedError'); + expect(err.path).toBe('apps/server/.env'); + expect(err.message).toContain('apps/server/.env'); + expect(err.message).toContain('pathGuard'); + }); +}); + +// ---- contract sanity check ------------------------------------------------- + +describe('DEFAULT_SECURITY_IGNORE_FILETYPES', () => { + it('exports at least 40 patterns (continue.dev base) and is non-empty', () => { + expect(DEFAULT_SECURITY_IGNORE_FILETYPES.length).toBeGreaterThanOrEqual(40); + }); + + it('includes all the headline continue.dev entries we tested above', () => { + // Spot-check that the list still carries the patterns whose behavior + // the tests depend on. Catches an accidental list edit that would + // silently degrade coverage. + const set = new Set(DEFAULT_SECURITY_IGNORE_FILETYPES); + for (const pat of ['*.env', '.env*', '*.pem', '*.key', 'id_rsa', '.aws/', '.ssh/']) { + expect(set.has(pat), `missing pattern: ${pat}`).toBe(true); + } + }); +}); diff --git a/apps/server/src/services/secret_guard.ts b/apps/server/src/services/secret_guard.ts new file mode 100644 index 0000000..7176eae --- /dev/null +++ b/apps/server/src/services/secret_guard.ts @@ -0,0 +1,226 @@ +// v1.11.7: secret-file guard. Filters paths that commonly contain secrets +// (env files, key/cert files, credential stores) out of tool results, and +// hard-refuses single-path reads of the same. Composes with path_guard.ts: +// pathGuard() proves the path is inside the project root; isSecretPath() +// then proves it's not a known-sensitive filename. Patterns ported from +// continuedev/continue/core/indexing/ignore.ts plus a small BooCode +// additions block (see below). + +// Verbatim from continuedev/continue/core/indexing/ignore.ts +// DEFAULT_SECURITY_IGNORE_FILETYPES export. 40 patterns. +const CONTINUE_FILETYPES: ReadonlyArray = [ + // Environment and configuration files with secrets + '*.env', + '*.env.*', + '.env*', + 'config.json', + 'config.yaml', + 'config.yml', + 'settings.json', + 'appsettings.json', + 'appsettings.*.json', + + // Certificate and key files + '*.key', + '*.pem', + '*.p12', + '*.pfx', + '*.crt', + '*.cer', + '*.jks', + '*.keystore', + '*.truststore', + + // Database files that may contain sensitive data + '*.db', + '*.sqlite', + '*.sqlite3', + '*.mdb', + '*.accdb', + + // Credential and secret files + '*.secret', + '*.secrets', + 'auth.json', + '*.token', + + // Backup files that might contain sensitive data + '*.bak', + '*.backup', + '*.old', + '*.orig', + + // Docker secrets + 'docker-compose.override.yml', + 'docker-compose.override.yaml', + + // SSH and GPG + 'id_rsa', + 'id_dsa', + 'id_ecdsa', + 'id_ed25519', + '*.ppk', + '*.gpg', +]; + +// Verbatim from continuedev/continue/core/indexing/ignore.ts +// DEFAULT_SECURITY_IGNORE_DIRS export. Trailing "/" semantics: match +// against any path segment that equals the dir name (so files INSIDE the +// dir get blocked even if their leaf name is innocuous, e.g. +// `home/user/.aws/credentials` blocks via the `.aws` segment). +const CONTINUE_DIRS: ReadonlyArray = [ + // Environment and configuration directories + '.env/', + 'env/', + + // Cloud provider credential directories + '.aws/', + '.gcp/', + '.azure/', + '.kube/', + '.docker/', + + // Secret directories + 'secrets/', + '.secrets/', + 'private/', + '.private/', + 'certs/', + 'certificates/', + 'keys/', + '.ssh/', + '.gnupg/', + '.gpg/', + + // Temporary directories that might contain sensitive data + 'tmp/secrets/', + 'temp/secrets/', + '.tmp/', +]; + +// BooCode additions. continue.dev's list omits some classics — closing the +// gaps below. Each entry has a one-line justification so future audits know +// why it's here and not in the upstream port. +const BOOCODE_ADDITIONS: ReadonlyArray = [ + // SSH public keys leak hostnames + usernames. continue.dev's `id_rsa` + // is a literal that doesn't match `id_rsa.pub`; broadening to a glob. + 'id_rsa*', + 'id_dsa*', + 'id_ecdsa*', + 'id_ed25519*', + // Wide-net credential pattern. `*credentials*` (not `credentials*`) + // because the leak shape varies: credentials.json, aws_credentials, + // gcp-credentials.yml, etc. Trade-off: also catches files named + // "Credentials.tsx" → those go through view_file's hard-refuse path, + // which is the right outcome (the LLM gets a clear "blocked" signal + // and can ask the user to whitelist if it was a false-positive). + '*credentials*', + // .netrc holds plaintext FTP/HTTP credentials. Standard tooling target. + '.netrc', + // KeePass database. Encrypted at rest but contents are 1:1 secret + // material; never want to feed even ciphertext to a model. + '*.kdbx', +]; + +export const DEFAULT_SECURITY_IGNORE_FILETYPES: ReadonlyArray = [ + ...CONTINUE_FILETYPES, + ...CONTINUE_DIRS, + ...BOOCODE_ADDITIONS, +]; + +// === glob compilation ====================================================== +// Tiny glob-to-regex. No new prod dep — the patterns we ship are simple +// (literal | name* | *.ext | dir/). Covers ~95% of glob spec, which is +// 100% of what this list uses. If patterns ever grow to need `**`, `[]`, +// `{a,b}`, or negation, swap in picomatch. + +interface CompiledPattern { + regex: RegExp; + // 'basename' = test against the trailing path component only. + // 'segment' = test against ANY path component (used for `dir/` patterns + // so `home/user/.aws/credentials` blocks via the `.aws` seg). + mode: 'basename' | 'segment'; +} + +function compile(pattern: string): CompiledPattern { + const isDir = pattern.endsWith('/'); + const body = isDir ? pattern.slice(0, -1) : pattern; + // Escape regex specials except * and ?. Don't escape `/` — the patterns + // we accept don't contain it, but if a future pattern does, splitting on + // `/` in the matcher already handles it. + const escaped = body.replace(/[.+^${}()|[\]\\]/g, '\\$&'); + const regexBody = escaped.replace(/\*/g, '.*').replace(/\?/g, '.'); + return { + regex: new RegExp(`^${regexBody}$`, 'i'), + mode: isDir ? 'segment' : 'basename', + }; +} + +const COMPILED: ReadonlyArray = DEFAULT_SECURITY_IGNORE_FILETYPES.map(compile); + +// === public API ============================================================ + +// Returns true when `relPath` matches a known-secret pattern. Case-insensitive +// (regex 'i' flag). Always normalize path separators to `/` so Windows-origin +// paths match the same patterns. Empty or root-only paths return false. +export function isSecretPath(relPath: string): boolean { + if (!relPath) return false; + const normalized = relPath.replace(/\\/g, '/'); + const segments = normalized.split('/').filter((s) => s.length > 0); + if (segments.length === 0) return false; + const base = segments[segments.length - 1]!; + + for (const compiled of COMPILED) { + if (compiled.mode === 'basename') { + if (compiled.regex.test(base)) return true; + } else { + for (const seg of segments) { + if (compiled.regex.test(seg)) return true; + } + } + } + return false; +} + +// Error thrown by view_file (or any single-path read) when the resolved +// path matches a secret pattern. Caught by inference.ts executeToolCall +// alongside PathScopeError; the message reaches the LLM verbatim so it +// knows the file was deliberately blocked rather than missing/broken. +export class SecretBlockedError extends Error { + readonly path: string; + constructor(relPath: string) { + super( + `Refused: ${relPath} matches a secret-file pattern and was blocked by pathGuard.`, + ); + this.name = 'SecretBlockedError'; + this.path = relPath; + } +} + +// Helper for listing tools (list_dir / grep / find_files). Filters entries +// by their `.path` (or computed path), returns the filtered list plus a +// note string when anything was hidden. Callers attach the note to a +// `pathguard_note` field on their output shape so the LLM sees it. +// +// Generic over the entry type so each tool can pass its own row shape and +// a `pathOf` extractor. The caller-supplied path is what gets tested — +// usually the project-relative path the tool already computes for output. +export function filterSecretEntries( + entries: ReadonlyArray, + pathOf: (entry: T) => string, +): { kept: T[]; hidden: number; note: string | undefined } { + const kept: T[] = []; + let hidden = 0; + for (const e of entries) { + if (isSecretPath(pathOf(e))) { + hidden += 1; + continue; + } + kept.push(e); + } + const note = + hidden > 0 + ? `[pathGuard: ${hidden} ${hidden === 1 ? 'entry' : 'entries'} hidden by secret-file filter]` + : undefined; + return { kept, hidden, note }; +} diff --git a/apps/server/src/services/tools.ts b/apps/server/src/services/tools.ts index c01b619..7f8bc4d 100644 --- a/apps/server/src/services/tools.ts +++ b/apps/server/src/services/tools.ts @@ -2,6 +2,7 @@ import { readFile, readdir, stat } from 'node:fs/promises'; import { resolve, basename, relative } from 'node:path'; import { z } from 'zod'; import { pathGuard, PathScopeError } from './path_guard.js'; +import { isSecretPath, SecretBlockedError, filterSecretEntries } from './secret_guard.js'; import { grep as fileOpsGrep, findFiles as fileOpsFindFiles } from './file_ops.js'; import { getGitMeta } from './git_meta.js'; import { findSkills, getSkillBody, getSkillResource } from './skills.js'; @@ -63,6 +64,15 @@ export const viewFile: ToolDef = { }, async execute(input, projectRoot) { const real = await pathGuard(projectRoot, input.path); + // v1.11.7: secret-file deny check. Test the project-relative path + // (matches the form continue.dev's patterns expect: basenames + dir + // segments). Throw a typed error so executeToolCall in inference.ts + // surfaces a clear "blocked" message to the LLM instead of silently + // returning content the user wanted hidden. + const relPath = relative(projectRoot, real) || basename(real); + if (isSecretPath(relPath)) { + throw new SecretBlockedError(relPath); + } const s = await stat(real); if (!s.isFile()) { throw new PathScopeError(`not a file: ${input.path}`); @@ -152,11 +162,21 @@ export const listDir: ToolDef = { }; }) ); + // v1.11.7: filter entries whose project-relative path matches a secret + // pattern. Each entry is tested using the project-rel dir + its name + // so the pattern's path/segment semantics work for nested dirs like + // `.aws/`. The count is surfaced via `pathguard_note` — we never list + // the hidden paths (defeats the purpose). + const relDir = relative(projectRoot, real) || '.'; + const secretFilter = filterSecretEntries(out, (e) => + relDir === '.' ? e.name : `${relDir}/${e.name}`, + ); return { - path: relative(projectRoot, real) || '.', - entries: out, - total, + path: relDir, + entries: secretFilter.kept, + total: secretFilter.kept.length, truncated: total > MAX_DIR_ENTRIES, + ...(secretFilter.note ? { pathguard_note: secretFilter.note } : {}), }; }, }; @@ -208,14 +228,21 @@ export const grep: ToolDef = { case_sensitive: input.case_sensitive, hidden: input.hidden, }); + const reshaped = result.matches.map((m) => ({ + path: m.path, + line: m.line, + content: m.text, + })); + // v1.11.7: drop matches whose source file is a known-secret pattern. + // file_ops.grep returns project-relative paths, so we feed them straight + // into isSecretPath. Multiple matches in the same secret file each get + // dropped individually — they all count in the hidden tally. + const secretFilter = filterSecretEntries(reshaped, (m) => m.path); return { - matches: result.matches.map((m) => ({ - path: m.path, - line: m.line, - content: m.text, - })), - total: result.matches.length, + matches: secretFilter.kept, + total: secretFilter.kept.length, truncated: result.truncated, + ...(secretFilter.note ? { pathguard_note: secretFilter.note } : {}), }; }, }; @@ -260,10 +287,15 @@ export const findFiles: ToolDef = { path: input.path, max_results: limit, }); + // v1.11.7: drop paths matching secret patterns. The original `total` + // from file_ops includes pre-truncation count; we report the visible + // count post-filter so the LLM can't infer hidden-count by subtraction. + const secretFilter = filterSecretEntries(result.files, (p) => p); return { - paths: result.files, - total: result.total, + paths: secretFilter.kept, + total: secretFilter.kept.length, truncated: result.truncated, + ...(secretFilter.note ? { pathguard_note: secretFilter.note } : {}), }; }, };