Files
boocode/apps/server/src/services/tools.ts
indifferentketchup 863452ae07 v1.11.7: secret-file deny list for codebase tools
Ports continue.dev's DEFAULT_SECURITY_IGNORE_FILETYPES + ignored-dir lists
into apps/server/src/services/secret_guard.ts plus a small BooCode
additions block (id_rsa*, *credentials*, .netrc, *.kdbx). Tiny glob-to-
regex matcher; no new prod dep.

view_file hard-refuses via SecretBlockedError. list_dir / grep /
find_files filter their results and surface a pathguard_note string
field with the hidden count — never list the offending paths back.

Named secret_guard.ts (not safety/pathGuard.ts) to avoid collision with
the existing path_guard.ts which already exports a pathGuard() function.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 20:55:50 +00:00

554 lines
20 KiB
TypeScript

import { readFile, readdir, stat } from 'node:fs/promises';
import { resolve, basename, relative } from 'node:path';
import { z } from 'zod';
import { pathGuard, PathScopeError } from './path_guard.js';
import { isSecretPath, SecretBlockedError, filterSecretEntries } from './secret_guard.js';
import { grep as fileOpsGrep, findFiles as fileOpsFindFiles } from './file_ops.js';
import { getGitMeta } from './git_meta.js';
import { findSkills, getSkillBody, getSkillResource } from './skills.js';
const MAX_FILE_BYTES = 5 * 1024 * 1024;
const DEFAULT_VIEW_LINES = 200;
const MAX_GREP_RESULTS = 200;
const DEFAULT_GREP_RESULTS = 100;
const MAX_FIND_RESULTS = 200;
const DEFAULT_FIND_RESULTS = 100;
const MAX_DIR_ENTRIES = 500;
export interface ToolJsonSchema {
type: 'function';
function: {
name: string;
description: string;
parameters: Record<string, unknown>;
};
}
export interface ToolDef<TInput> {
name: string;
description: string;
inputSchema: z.ZodType<TInput>;
jsonSchema: ToolJsonSchema;
execute(input: TInput, projectRoot: string): Promise<unknown>;
}
const ViewFileInput = z.object({
path: z.string().min(1),
start_line: z.number().int().positive().optional(),
end_line: z.number().int().positive().optional(),
});
type ViewFileInputT = z.infer<typeof ViewFileInput>;
export const viewFile: ToolDef<ViewFileInputT> = {
name: 'view_file',
description:
"Read a file under the project. Returns first 200 lines by default, or a slice via start_line/end_line (1-indexed, inclusive). Files larger than 5MB are refused. Output is truncated if longer than the slice; the response indicates truncation.",
inputSchema: ViewFileInput,
jsonSchema: {
type: 'function',
function: {
name: 'view_file',
description:
"Read a file under the project. Returns first 200 lines by default, or a slice via start_line/end_line (1-indexed, inclusive). Files larger than 5MB are refused.",
parameters: {
type: 'object',
properties: {
path: { type: 'string', description: 'absolute or project-relative path' },
start_line: { type: 'integer', description: 'first line (1-indexed)' },
end_line: { type: 'integer', description: 'last line (1-indexed, inclusive)' },
},
required: ['path'],
additionalProperties: false,
},
},
},
async execute(input, projectRoot) {
const real = await pathGuard(projectRoot, input.path);
// v1.11.7: secret-file deny check. Test the project-relative path
// (matches the form continue.dev's patterns expect: basenames + dir
// segments). Throw a typed error so executeToolCall in inference.ts
// surfaces a clear "blocked" message to the LLM instead of silently
// returning content the user wanted hidden.
const relPath = relative(projectRoot, real) || basename(real);
if (isSecretPath(relPath)) {
throw new SecretBlockedError(relPath);
}
const s = await stat(real);
if (!s.isFile()) {
throw new PathScopeError(`not a file: ${input.path}`);
}
if (s.size > MAX_FILE_BYTES) {
throw new Error(`file too large (${s.size} bytes, max ${MAX_FILE_BYTES})`);
}
const raw = await readFile(real, 'utf8');
const lines = raw.split('\n');
const total = lines.length;
let start = input.start_line ?? 1;
let end = input.end_line ?? Math.min(total, start + DEFAULT_VIEW_LINES - 1);
if (input.start_line == null && input.end_line == null) {
end = Math.min(total, DEFAULT_VIEW_LINES);
}
if (start < 1) start = 1;
if (end > total) end = total;
if (end < start) end = start;
const slice = lines.slice(start - 1, end);
const content = slice.join('\n');
const truncated = total > end || start > 1;
return {
path: relative(projectRoot, real) || basename(real),
content,
total_lines: total,
returned_lines: [start, end],
truncated,
};
},
};
const ListDirInput = z.object({
path: z.string().min(1),
show_hidden: z.boolean().optional(),
});
type ListDirInputT = z.infer<typeof ListDirInput>;
export const listDir: ToolDef<ListDirInputT> = {
name: 'list_dir',
description: 'List entries in a directory (up to 500). Hidden files excluded unless show_hidden=true.',
inputSchema: ListDirInput,
jsonSchema: {
type: 'function',
function: {
name: 'list_dir',
description:
'List entries in a directory (up to 500). Hidden files (dot-prefixed) excluded unless show_hidden=true.',
parameters: {
type: 'object',
properties: {
path: { type: 'string' },
show_hidden: { type: 'boolean' },
},
required: ['path'],
additionalProperties: false,
},
},
},
async execute(input, projectRoot) {
const real = await pathGuard(projectRoot, input.path);
const s = await stat(real);
if (!s.isDirectory()) {
throw new PathScopeError(`not a directory: ${input.path}`);
}
const entries = await readdir(real, { withFileTypes: true });
const filtered = input.show_hidden
? entries
: entries.filter((e) => !e.name.startsWith('.'));
const total = filtered.length;
const slice = filtered.slice(0, MAX_DIR_ENTRIES);
const out = await Promise.all(
slice.map(async (e) => {
const child = resolve(real, e.name);
let size: number | undefined;
if (e.isFile()) {
try {
const cs = await stat(child);
size = cs.size;
} catch {
/* ignore */
}
}
return {
name: e.name,
type: e.isDirectory() ? ('dir' as const) : ('file' as const),
...(size != null ? { size } : {}),
};
})
);
// v1.11.7: filter entries whose project-relative path matches a secret
// pattern. Each entry is tested using the project-rel dir + its name
// so the pattern's path/segment semantics work for nested dirs like
// `.aws/`. The count is surfaced via `pathguard_note` — we never list
// the hidden paths (defeats the purpose).
const relDir = relative(projectRoot, real) || '.';
const secretFilter = filterSecretEntries(out, (e) =>
relDir === '.' ? e.name : `${relDir}/${e.name}`,
);
return {
path: relDir,
entries: secretFilter.kept,
total: secretFilter.kept.length,
truncated: total > MAX_DIR_ENTRIES,
...(secretFilter.note ? { pathguard_note: secretFilter.note } : {}),
};
},
};
const GrepInput = z.object({
pattern: z.string().min(1),
path: z.string().optional(),
case_sensitive: z.boolean().optional(),
max_results: z.number().int().positive().optional(),
hidden: z.boolean().optional(),
});
type GrepInputT = z.infer<typeof GrepInput>;
export const grep: ToolDef<GrepInputT> = {
name: 'grep',
description:
'Search file contents with ripgrep. Default path is project root. Max 100 results (200 cap).',
inputSchema: GrepInput,
jsonSchema: {
type: 'function',
function: {
name: 'grep',
description:
'Search file contents with ripgrep. Returns up to 100 matches (cap 200). Set hidden=true to include dot-prefixed files.',
parameters: {
type: 'object',
properties: {
pattern: { type: 'string' },
path: { type: 'string' },
case_sensitive: { type: 'boolean' },
max_results: { type: 'integer' },
hidden: { type: 'boolean' },
},
required: ['pattern'],
additionalProperties: false,
},
},
},
async execute(input, projectRoot) {
const limit = Math.min(
Math.max(input.max_results ?? DEFAULT_GREP_RESULTS, 1),
MAX_GREP_RESULTS
);
// Delegate to file_ops.grep; reshape match objects to preserve LLM output format
// (file_ops uses {path, line, text}; tool output uses {path, line, content})
const result = await fileOpsGrep(projectRoot, input.pattern, {
path: input.path,
max_matches: limit,
case_sensitive: input.case_sensitive,
hidden: input.hidden,
});
const reshaped = result.matches.map((m) => ({
path: m.path,
line: m.line,
content: m.text,
}));
// v1.11.7: drop matches whose source file is a known-secret pattern.
// file_ops.grep returns project-relative paths, so we feed them straight
// into isSecretPath. Multiple matches in the same secret file each get
// dropped individually — they all count in the hidden tally.
const secretFilter = filterSecretEntries(reshaped, (m) => m.path);
return {
matches: secretFilter.kept,
total: secretFilter.kept.length,
truncated: result.truncated,
...(secretFilter.note ? { pathguard_note: secretFilter.note } : {}),
};
},
};
const FindFilesInput = z.object({
pattern: z.string().min(1),
path: z.string().optional(),
max_results: z.number().int().positive().optional(),
});
type FindFilesInputT = z.infer<typeof FindFilesInput>;
export const findFiles: ToolDef<FindFilesInputT> = {
name: 'find_files',
description: 'Glob for filenames. Default path is project root. Max 100 results (200 cap).',
inputSchema: FindFilesInput,
jsonSchema: {
type: 'function',
function: {
name: 'find_files',
description:
'Glob for filenames under a directory. Default path is project root. Max 100 results (cap 200). Pattern uses standard glob (e.g. "**/*.ts").',
parameters: {
type: 'object',
properties: {
pattern: { type: 'string' },
path: { type: 'string' },
max_results: { type: 'integer' },
},
required: ['pattern'],
additionalProperties: false,
},
},
},
async execute(input, projectRoot) {
const limit = Math.min(
Math.max(input.max_results ?? DEFAULT_FIND_RESULTS, 1),
MAX_FIND_RESULTS
);
// Delegate to file_ops.findFiles; reshape { files, total, truncated } to
// preserve the LLM-visible output format { paths, total, truncated }
const result = await fileOpsFindFiles(projectRoot, input.pattern, {
path: input.path,
max_results: limit,
});
// v1.11.7: drop paths matching secret patterns. The original `total`
// from file_ops includes pre-truncation count; we report the visible
// count post-filter so the LLM can't infer hidden-count by subtraction.
const secretFilter = filterSecretEntries(result.files, (p) => p);
return {
paths: secretFilter.kept,
total: secretFilter.kept.length,
truncated: result.truncated,
...(secretFilter.note ? { pathguard_note: secretFilter.note } : {}),
};
},
};
// v1.8 Level 1 branch awareness: gives the model a read-only view of the
// project's git state. No path input — operates on the inference-resolved
// project root via getGitMeta. Subprocess runs with a 2s timeout (see git_meta).
const GitStatusInput = z.object({}).strict();
type GitStatusInputT = z.infer<typeof GitStatusInput>;
export const gitStatus: ToolDef<GitStatusInputT> = {
name: 'git_status',
description:
"Returns the current git branch, whether the working tree is dirty, and ahead/behind counts vs upstream. Read-only. Use when you need to know which branch the user is currently working on.",
inputSchema: GitStatusInput,
jsonSchema: {
type: 'function',
function: {
name: 'git_status',
description:
'Returns the current git branch, dirty flag, and ahead/behind counts vs upstream. Read-only.',
parameters: {
type: 'object',
properties: {},
additionalProperties: false,
},
},
},
async execute(_input, projectRoot) {
const meta = await getGitMeta(projectRoot);
if (meta === null) {
return { repo: false, branch: null, is_dirty: false, ahead: 0, behind: 0 };
}
return { repo: true, ...meta };
},
};
// Batch 9.6: skill_find, skill_use, skill_resource. Lazy-loaded markdown
// playbooks at /data/skills/. Three tools rather than one to keep each call
// cheap — the model lists, then loads, then optionally pulls support files.
const SkillFindInput = z.object({
query: z.string().optional(),
});
type SkillFindInputT = z.infer<typeof SkillFindInput>;
export const skillFind: ToolDef<SkillFindInputT> = {
name: 'skill_find',
description:
'Find skills (markdown playbooks under /data/skills) by name or description. Returns up to 5 matches. Empty query or "*" returns all available skills. Call this first to discover what skills are available.',
inputSchema: SkillFindInput,
jsonSchema: {
type: 'function',
function: {
name: 'skill_find',
description:
'Find skills by name or description. Returns up to 5 matches. Empty or "*" returns all.',
parameters: {
type: 'object',
properties: {
query: { type: 'string', description: 'substring matched against skill name and description' },
},
additionalProperties: false,
},
},
},
async execute(input) {
return await findSkills(input.query ?? '');
},
};
const SkillUseInput = z.object({
name: z.string().min(1),
});
type SkillUseInputT = z.infer<typeof SkillUseInput>;
export const skillUse: ToolDef<SkillUseInputT> = {
name: 'skill_use',
description:
"Load the full body of a skill's SKILL.md by name. Returns the markdown playbook to follow. Discover names via skill_find. Errors: unknown_skill.",
inputSchema: SkillUseInput,
jsonSchema: {
type: 'function',
function: {
name: 'skill_use',
description: "Load the full body of a skill's SKILL.md by name.",
parameters: {
type: 'object',
properties: {
name: { type: 'string', description: 'skill name from skill_find' },
},
required: ['name'],
additionalProperties: false,
},
},
},
async execute(input) {
const body = await getSkillBody(input.name);
if (body === null) {
return { error: 'unknown_skill', message: `unknown skill: ${input.name}` };
}
return { body };
},
};
const SkillResourceInput = z.object({
name: z.string().min(1),
path: z.string().min(1),
});
type SkillResourceInputT = z.infer<typeof SkillResourceInput>;
export const skillResource: ToolDef<SkillResourceInputT> = {
name: 'skill_resource',
description:
"Read a support file inside a skill's folder (e.g. references/root-cause-tracing.md). Path is relative to the skill folder. Use skill_use to read SKILL.md itself. Errors: unknown_skill, unknown_resource, path_escape.",
inputSchema: SkillResourceInput,
jsonSchema: {
type: 'function',
function: {
name: 'skill_resource',
description: "Read a support file inside a skill's folder. Path is relative to the skill folder.",
parameters: {
type: 'object',
properties: {
name: { type: 'string', description: 'skill name' },
path: { type: 'string', description: 'relative path under the skill folder' },
},
required: ['name', 'path'],
additionalProperties: false,
},
},
},
async execute(input) {
const result = await getSkillResource(input.name, input.path);
if (!result.ok) {
return { error: result.code, message: result.message };
}
return { content: result.content };
},
};
// Batch 9.7: ask_user_input. Interactive elicitation. The model emits a tool
// call with 1-3 structured questions; the inference loop PAUSES (does not
// execute the tool server-side, does not recurse) and waits for the frontend
// to POST /api/chats/:id/answer_user_input with the user's selections. See
// routes/messages.ts for the resume path and services/inference.ts for the
// pause branch in executeToolPhase.
const AskUserInputInput = z.object({
questions: z
.array(
z.object({
question: z.string().min(1).max(200),
type: z.enum(['single_select', 'multi_select']),
options: z.array(z.string().min(1).max(80)).min(2).max(6),
}),
)
.min(1)
.max(3),
});
type AskUserInputInputT = z.infer<typeof AskUserInputInput>;
export const askUserInput: ToolDef<AskUserInputInputT> = {
name: 'ask_user_input',
description:
"Ask the user 1-3 structured questions through an inline picker UI. Use when you genuinely need a choice the user must make (e.g. scope, options, preferences) before continuing. Each question has 2-6 options and accepts free-text answers in addition. The tool call pauses the conversation until the user submits — the next assistant turn sees their answers as the tool result. Do not use for trivial yes/no clarifications you could infer; prefer it over multi-paragraph speculation about what the user might want.",
inputSchema: AskUserInputInput,
jsonSchema: {
type: 'function',
function: {
name: 'ask_user_input',
description:
'Ask the user 1-3 structured questions through an inline picker. Pauses the conversation until the user answers; the next turn sees their selections.',
parameters: {
type: 'object',
properties: {
questions: {
type: 'array',
minItems: 1,
maxItems: 3,
items: {
type: 'object',
properties: {
question: { type: 'string', description: '<=200 chars, shown to the user' },
type: {
type: 'string',
enum: ['single_select', 'multi_select'],
description: 'single_select = at most one option; multi_select = any subset',
},
options: {
type: 'array',
minItems: 2,
maxItems: 6,
items: { type: 'string' },
description: '2-6 strings, each <=80 chars; free-text input is always available alongside',
},
},
required: ['question', 'type', 'options'],
additionalProperties: false,
},
},
},
required: ['questions'],
additionalProperties: false,
},
},
},
// Server-side no-op. The "execution" of ask_user_input is the user's
// response, captured client-side and posted to /api/chats/:id/answer_user_input.
// The inference loop detects this tool by name and pauses before reaching
// executeToolCall — this fallback only runs if something bypasses that
// branch, in which case the pending sentinel matches the pause-path shape.
async execute(input) {
return { _pending: true, questions: input.questions };
},
};
export const ALL_TOOLS: ReadonlyArray<ToolDef<unknown>> = [
viewFile as ToolDef<unknown>,
listDir as ToolDef<unknown>,
grep as ToolDef<unknown>,
findFiles as ToolDef<unknown>,
gitStatus as ToolDef<unknown>,
skillFind as ToolDef<unknown>,
skillUse as ToolDef<unknown>,
skillResource as ToolDef<unknown>,
askUserInput as ToolDef<unknown>,
];
// v1.8.2: forward-compatible read-only whitelist. An agent whose `tools` is
// fully contained in this set gets a generous default tool budget (30);
// anything outside means the agent can mutate state and gets a tighter
// default (10). Every tool in v1.8.2 happens to be read-only, so the
// non-RO branch only takes effect once BooCoder lands write tools.
// Batch 9.6: skill_* added; all still read-only.
// Batch 9.7: ask_user_input added — it pauses execution but doesn't mutate
// project state, so it belongs in the read-only set for budget purposes.
export const READ_ONLY_TOOL_NAMES = [
'view_file',
'list_dir',
'grep',
'find_files',
'git_status',
'skill_find',
'skill_use',
'skill_resource',
'ask_user_input',
] as const;
export const TOOLS_BY_NAME: Record<string, ToolDef<unknown>> = Object.fromEntries(
ALL_TOOLS.map((t) => [t.name, t])
);
export function toolJsonSchemas(): ToolJsonSchema[] {
return ALL_TOOLS.map((t) => t.jsonSchema);
}