diff --git a/AGENTS.md b/AGENTS.md deleted file mode 100644 index 3521364..0000000 --- a/AGENTS.md +++ /dev/null @@ -1,191 +0,0 @@ -# Agents - -## Code Reviewer ---- -temperature: 0.3 -description: Reviews code for bugs, security issues, and maintainability. Read-only. ---- -You review code. Find real problems, not style nits. - -Process: -1. Read the file(s) in question with view_file. If a diff is provided, read surrounding context too. -2. Use grep/find_files to check how changed symbols are used elsewhere. -3. Cite every finding as file:line. - -Prioritize in order: -1. Bugs and logic errors -2. Security issues (injection, auth bypass, secret leakage, unsafe deserialization, SSRF, path traversal) -3. Race conditions, error handling, resource leaks -4. Performance issues with measurable impact -5. Maintainability (only if it blocks future work) - -Skip: formatting, naming preferences, "consider extracting", "add a comment here". The user has a linter. - -Output format: -- Critical: -- Major: -- Minor: - -If nothing critical or major, say so in one line. Do not pad. - - -## Debugger ---- -temperature: 0.2 -description: Diagnoses bugs from error messages, logs, or described symptoms. ---- -You diagnose bugs. Form a hypothesis, prove it with evidence from the code. - -Process: -1. Restate the symptom in one line. Confirm you understand it. -2. Read the error/stacktrace. Identify the exact frame where things go wrong. -3. view_file on that frame. Read 50 lines around it. -4. grep for callers, related state, recent changes that could explain it. -5. State the root cause with file:line evidence. -6. Propose the minimal fix. Note any side effects. - -Rules: -- Never guess. If evidence is missing, say what you need (specific log line, specific file, specific repro step). -- Distinguish symptom from cause. A null check fixes the symptom; missing init causes it. -- Off-by-one, race conditions, and silent except blocks are common — check for them. -- If two plausible causes exist, name both and say what would discriminate. - -Output: -- Symptom: -- Root cause: -- Fix: -- Risk: - - -## Refactorer ---- -temperature: 0.3 -description: Proposes refactors for clarity, deduplication, or decoupling. Read-only — outputs plans, not edits. ---- -You propose refactors. You do not apply them. The user applies via OpenCode or Claude Code. - -Process: -1. Read the target file(s). -2. grep for callers, duplicates, and similar patterns elsewhere in the repo. -3. Identify the smallest refactor that delivers the goal. - -Prioritize: -1. Deduplication where 3+ sites have near-identical logic -2. Extracting a function/module when one is doing two unrelated jobs -3. Decoupling when a change in A forces a change in B unnecessarily -4. Renaming when a name actively misleads - -Reject: -- Refactors that touch 10+ files for marginal gain -- "Modernization" with no concrete benefit -- Abstraction for future flexibility that may never come -- Style-only changes - -Output: -- Goal: -- Scope: -- Plan: numbered steps, each one self-contained -- Risk: -- Skip if: - - -## Architect ---- -temperature: 0.5 -description: Designs new features, modules, or architectural changes. Outputs a build plan. ---- -You design. You produce build plans, not code. - -Process: -1. Restate the goal in your own words. Confirm constraints (perf, deploy, deps). -2. list_dir the relevant areas. Read existing patterns — match them unless there's a reason not to. -3. Decide: extend existing code or add new module. Justify. -4. Sketch the data flow: inputs → transforms → outputs → side effects. -5. Identify integration points: DB schema, API surface, env vars, container boundaries. -6. List failure modes and how the design handles them. - -Rules: -- Reuse before inventing. If a service/lib in the repo already does this, say so. -- Prefer boring tech. New deps require justification. -- Tailscale IPs for internal routing. No 0.0.0.0 binds. -- Least privilege: separate read/write paths, explicit auth gates. -- State assumptions inline. Do not ask clarifying questions mid-design unless blocked. - -Output: -- Goal -- Existing code to reuse: -- New code: -- Data model changes: -- API surface: -- Failure modes: -- Build order: numbered, each step 30-90 min - - -## Security Auditor ---- -temperature: 0.2 -description: Audits code for security vulnerabilities. Read-only. ---- -You audit for security issues. Concrete findings only, no generic warnings. - -Process: -1. Identify the trust boundary: where does untrusted input enter? Where does it leave? -2. Trace input flow with grep. Mark every transformation. -3. Check each finding against a real attack scenario. - -Look for: -- Injection: SQL (raw queries, string concat into queries), command (subprocess with shell=True, unescaped args), XSS (unescaped output in HTML/JSX), template injection, NoSQL injection -- AuthN/AuthZ: missing checks on routes, IDOR (user-supplied IDs without ownership check), JWT misuse (alg=none, weak secret, no expiry), session fixation -- Secrets: hardcoded keys/passwords, .env in repo, secrets in logs, secrets in error messages -- Crypto: weak hashes (MD5, SHA1 for passwords), missing salt, predictable randomness (Math.random for tokens), ECB mode, custom crypto -- Network: SSRF (user URL → server fetch), open CORS, missing CSRF on state-changing requests, plaintext over public network -- File: path traversal, unrestricted upload type/size, zip slip -- Deserialization: pickle, yaml.load, eval, exec on user input -- Resource: missing rate limits on auth/expensive endpoints, unbounded query results - -For each finding: -- Severity: Critical / High / Medium / Low -- Location: file:line -- Attack scenario: one sentence describing how an attacker exploits this -- Fix: minimal change - -Skip: -- Generic "use HTTPS" advice -- "Consider adding rate limiting" without a specific endpoint -- CVE-of-the-week scares without proof the code is affected - -If the code is clean, say so. Do not invent findings. - - -## Prompt Builder ---- -temperature: 0.4 -description: Builds prompts for OpenCode, Claude Code, or BooCode dispatch. ---- -You write prompts that another coding agent will execute. Your output is the prompt, not the work. - -Process: -1. Ask the user (or read context) for: goal, target repo, target files if known, constraints. -2. list_dir and view_file the target area. Confirm files exist and are roughly the shape you think. -3. Identify imports, exports, and conventions in the repo (component layout, error handling style, test framework). -4. Write the prompt. - -Prompt structure: -- One-line goal at the top -- Constraints block: don't commit, don't push, don't pull. Use `#careful` and `#nofluff` style hashtags if the target agent honors them -- Pre-flight: list_dir or grep commands the agent must run before writing (e.g. "run: ls frontend/src/components/ui/ and only import primitives that exist") -- Files to modify: explicit paths -- Files to create: explicit paths with one-line purpose -- Behavior spec: numbered, testable -- Backup rule: `cp file file.bak-$(date +%Y%m%d)` before any destructive edit -- Verification: `py_compile`, `tsc --noEmit`, `docker compose up --build -d` — whichever applies -- Stop conditions: when to halt and report instead of pressing on - -Rules: -- Tailored to the target agent: OpenCode honors hashtag snippets and skills; Claude Code honors CLAUDE.md and slash commands; BooCode batches are written as user-facing markdown -- Never include credentials or secrets -- Never instruct the agent to commit or push -- Include the exact model the user wants if dispatch is via Paseo or BooCode batch -- For BooLab frontend prompts, always include the "verify shadcn primitives exist" preflight - -Output: the prompt, ready to paste. Nothing else. diff --git a/CLAUDE.md b/CLAUDE.md index d35f05d..1b8bd3f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -114,6 +114,8 @@ Required: `DATABASE_URL`, `LLAMA_SWAP_URL`. Optional: `PORT` (3000), `HOST` (0.0 - A local PreToolUse hook (`security_reminder_hook.py`) regex-flags Node's older `child_process` spawn helpers as unsafe (false positive even on the File-suffixed variant). Use `spawn` — it's accepted. - `/opt/boolab` hosts a working sibling BooCode terminal at `boocode.indifferentketchup.com`. Useful for visual side-by-side comparison on the same iPhone when debugging booterm rendering. Boolab uses Tailwind v3 (`@tailwind base`); boocode uses v4 — many subtle build differences. Don't assume parity. - booterm SSHs to the host as `samkintop@100.114.205.53` (the Tailscale IP). The hostname `ubuntu-homelab` (shown in the bash prompt after login) does NOT resolve from inside the container — only the host's `/etc/hosts` knows it. Override via `BOOTERM_SSH_HOST` / `BOOTERM_SSH_USER` env vars in docker-compose if you ever move the shell to a different machine. +- codecontext sidecar lives at `/opt/boocode/codecontext/`. Sidecar HTTP API at `http://codecontext:8080/v1/` over the `boocode_net` bridge (no host port). BooCode wrappers in `apps/server/src/services/tools/codecontext/`. The `.codecontextignore.template` documents recommended ignore patterns; users copy and adapt to project root manually. +- `os/exec` child supervisors must explicitly call `child.Wait()` in a goroutine and `os.Exit` on child death. `Signal(0)` returns nil on zombies and is NOT a liveness check. Without `Wait()`, docker's `restart: unless-stopped` policy never fires because the parent stays alive. The `codecontext/shim.go` implementation is the reference pattern. ## Conventions diff --git a/apps/server/src/services/__tests__/codecontext_client.test.ts b/apps/server/src/services/__tests__/codecontext_client.test.ts new file mode 100644 index 0000000..e0b26e5 --- /dev/null +++ b/apps/server/src/services/__tests__/codecontext_client.test.ts @@ -0,0 +1,205 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { mkdir, mkdtemp, rm } from 'node:fs/promises'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { callCodecontext } from '../codecontext_client.js'; + +// ---- fixtures --------------------------------------------------------------- + +let workDir: string; +let projectDir: string; +let outsideDir: string; + +beforeEach(async () => { + // Shared workspace so projectDir and outsideDir are siblings but the + // realpath escape check still treats outsideDir as outside the project. + workDir = await mkdtemp(join(tmpdir(), 'codecontext-test-')); + projectDir = join(workDir, 'project'); + outsideDir = join(workDir, 'outside'); + await mkdir(projectDir); + await mkdir(outsideDir); +}); + +afterEach(async () => { + await rm(workDir, { recursive: true, force: true }); + vi.restoreAllMocks(); +}); + +function mockJSONResponse(body: unknown, status = 200): Response { + return new Response(JSON.stringify(body), { + status, + headers: { 'content-type': 'application/json' }, + }); +} + +// ---- tests ------------------------------------------------------------------ + +describe('callCodecontext — target_dir validation', () => { + it('rejects when target_dir does not exist', async () => { + const fetcher = vi.fn(); + await expect( + callCodecontext( + { + toolName: 'get_codebase_overview', + args: { target_dir: '/nonexistent/path/deliberately/missing' }, + projectPath: projectDir, + }, + fetcher as unknown as typeof fetch, + ), + ).rejects.toThrow(/target_dir does not exist/); + expect(fetcher).not.toHaveBeenCalled(); + }); + + it('rejects when target_dir is outside the project root', async () => { + const fetcher = vi.fn(); + await expect( + callCodecontext( + { + toolName: 'get_codebase_overview', + args: { target_dir: outsideDir }, + projectPath: projectDir, + }, + fetcher as unknown as typeof fetch, + ), + ).rejects.toThrow(/escapes project root/); + expect(fetcher).not.toHaveBeenCalled(); + }); + + it('injects projectPath as target_dir when args.target_dir is undefined', async () => { + const fetcher = vi.fn().mockResolvedValue( + mockJSONResponse({ result: 'overview text', error: null }), + ); + await callCodecontext( + { + toolName: 'get_codebase_overview', + args: { include_stats: true }, + projectPath: projectDir, + }, + fetcher as unknown as typeof fetch, + ); + expect(fetcher).toHaveBeenCalledTimes(1); + const body = JSON.parse(fetcher.mock.calls[0]![1]!.body as string); + expect(body.target_dir).toBe(projectDir); + expect(body.include_stats).toBe(true); + }); +}); + +describe('callCodecontext — HTTP request shape', () => { + it('POSTs to /v1/ with JSON content-type', async () => { + const fetcher = vi.fn().mockResolvedValue( + mockJSONResponse({ result: 'ok', error: null }), + ); + await callCodecontext( + { + toolName: 'search_symbols', + args: { query: 'User', limit: 5 }, + projectPath: projectDir, + }, + fetcher as unknown as typeof fetch, + ); + expect(fetcher).toHaveBeenCalledTimes(1); + const [url, init] = fetcher.mock.calls[0]!; + expect(url).toMatch(/\/v1\/search_symbols$/); + expect(init.method).toBe('POST'); + expect(init.headers['Content-Type']).toBe('application/json'); + const body = JSON.parse(init.body); + expect(body).toMatchObject({ query: 'User', limit: 5, target_dir: projectDir }); + }); +}); + +describe('callCodecontext — result handling', () => { + it('returns { result, truncated: false } when codecontext result is under the 32 kB limit', async () => { + const fetcher = vi.fn().mockResolvedValue( + mockJSONResponse({ result: 'a short markdown report', error: null }), + ); + const out = await callCodecontext( + { + toolName: 'get_codebase_overview', + args: {}, + projectPath: projectDir, + }, + fetcher as unknown as typeof fetch, + ); + expect(out.truncated).toBe(false); + expect(out.result).toBe('a short markdown report'); + }); + + it('truncates and marks truncated: true when result exceeds 32 kB', async () => { + const bigResult = 'x'.repeat(40_000); + const fetcher = vi.fn().mockResolvedValue( + mockJSONResponse({ result: bigResult, error: null }), + ); + const out = await callCodecontext( + { + toolName: 'get_codebase_overview', + args: {}, + projectPath: projectDir, + }, + fetcher as unknown as typeof fetch, + ); + expect(out.truncated).toBe(true); + expect(out.result).toMatch(/\[truncated, 8000 chars omitted; narrow with file_path/); + expect(out.result.length).toBeLessThan(bigResult.length); + }); +}); + +describe('callCodecontext — error paths', () => { + it('throws an actionable error when codecontext reports an empty-file parser failure', async () => { + const fetcher = vi.fn().mockResolvedValue( + mockJSONResponse({ + result: null, + error: + 'failed to refresh analysis: failed to analyze directory: ' + + 'failed to parse file /opt/boolab/.opencode/node_modules/foo/index.js: content is empty', + }), + ); + await expect( + callCodecontext( + { toolName: 'get_codebase_overview', args: {}, projectPath: projectDir }, + fetcher as unknown as typeof fetch, + ), + ).rejects.toThrow(/codecontext parse failure.*\.codecontextignore/); + }); + + it('throws a generic error when codecontext reports other errors', async () => { + const fetcher = vi.fn().mockResolvedValue( + mockJSONResponse({ result: null, error: 'symbol_name is required' }), + ); + await expect( + callCodecontext( + { toolName: 'get_symbol_info', args: {}, projectPath: projectDir }, + fetcher as unknown as typeof fetch, + ), + ).rejects.toThrow(/codecontext error: symbol_name is required/); + }); + + it('throws on HTTP non-2xx response', async () => { + const fetcher = vi.fn().mockResolvedValue( + new Response('upstream gateway boom', { status: 502 }), + ); + await expect( + callCodecontext( + { toolName: 'get_codebase_overview', args: {}, projectPath: projectDir }, + fetcher as unknown as typeof fetch, + ), + ).rejects.toThrow(/codecontext HTTP 502/); + }); + + it('translates a fetcher AbortError to a "timed out" error', async () => { + // The catch branch in callCodecontext maps any AbortError (whether it + // came from our internal 30s setTimeout or from the fetcher itself) to a + // "timed out" message. Exercising the catch directly is cleaner than + // wrangling vi.useFakeTimers with realpath's microtask scheduling. + const abortingFetcher = vi.fn().mockImplementation(() => { + const err = new Error('The user aborted a request.'); + err.name = 'AbortError'; + return Promise.reject(err); + }); + await expect( + callCodecontext( + { toolName: 'get_codebase_overview', args: {}, projectPath: projectDir }, + abortingFetcher as unknown as typeof fetch, + ), + ).rejects.toThrow(/timed out after 30000ms/); + }); +}); diff --git a/apps/server/src/services/__tests__/codecontext_tools.test.ts b/apps/server/src/services/__tests__/codecontext_tools.test.ts new file mode 100644 index 0000000..d0f70b6 --- /dev/null +++ b/apps/server/src/services/__tests__/codecontext_tools.test.ts @@ -0,0 +1,155 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { mkdtemp, rm } from 'node:fs/promises'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; + +import { executeGetCodebaseOverview } from '../tools/codecontext/get_codebase_overview.js'; +import { executeGetFileAnalysis } from '../tools/codecontext/get_file_analysis.js'; +import { executeGetSymbolInfo } from '../tools/codecontext/get_symbol_info.js'; +import { executeSearchSymbols } from '../tools/codecontext/search_symbols.js'; +import { executeGetDependencies } from '../tools/codecontext/get_dependencies.js'; +import { executeWatchChanges } from '../tools/codecontext/watch_changes.js'; +import { executeGetSemanticNeighborhoods } from '../tools/codecontext/get_semantic_neighborhoods.js'; +import { executeGetFrameworkAnalysis } from '../tools/codecontext/get_framework_analysis.js'; + +// ---- fixtures --------------------------------------------------------------- + +let projectDir: string; + +beforeEach(async () => { + projectDir = await mkdtemp(join(tmpdir(), 'codecontext-tools-test-')); +}); + +afterEach(async () => { + await rm(projectDir, { recursive: true, force: true }); + vi.restoreAllMocks(); +}); + +function mockJSONResponse(body: unknown, status = 200): Response { + return new Response(JSON.stringify(body), { + status, + headers: { 'content-type': 'application/json' }, + }); +} + +// Stub fetcher that records every call and returns a canned successful body. +// Each test inspects fetcher.mock.calls[0] to assert URL + body shape. +function makeStub() { + return vi.fn().mockResolvedValue( + mockJSONResponse({ result: 'wrapped ok', error: null }), + ); +} + +function parsePOST(fetcher: ReturnType): { + url: string; + body: Record; +} { + expect(fetcher).toHaveBeenCalledTimes(1); + const [url, init] = fetcher.mock.calls[0]! as [string, { body: string }]; + return { url, body: JSON.parse(init.body) }; +} + +// ---- per-wrapper smoke tests ----------------------------------------------- + +describe('codecontext wrappers — toolName + args forwarding', () => { + it('get_codebase_overview posts to /v1/get_codebase_overview with include_stats default true', async () => { + const fetcher = makeStub(); + await executeGetCodebaseOverview({}, projectDir, fetcher as unknown as typeof fetch); + const { url, body } = parsePOST(fetcher); + expect(url).toMatch(/\/v1\/get_codebase_overview$/); + expect(body).toMatchObject({ include_stats: true, target_dir: projectDir }); + }); + + it('get_file_analysis forwards file_path', async () => { + const fetcher = makeStub(); + await executeGetFileAnalysis( + { file_path: 'apps/server/src/index.ts' }, + projectDir, + fetcher as unknown as typeof fetch, + ); + const { url, body } = parsePOST(fetcher); + expect(url).toMatch(/\/v1\/get_file_analysis$/); + expect(body).toMatchObject({ + file_path: 'apps/server/src/index.ts', + target_dir: projectDir, + }); + }); + + it('get_symbol_info forwards symbol_name and omits optional fields when unset', async () => { + const fetcher = makeStub(); + await executeGetSymbolInfo( + { symbol_name: 'buildSystemPrompt' }, + projectDir, + fetcher as unknown as typeof fetch, + ); + const { url, body } = parsePOST(fetcher); + expect(url).toMatch(/\/v1\/get_symbol_info$/); + expect(body).toMatchObject({ symbol_name: 'buildSystemPrompt', target_dir: projectDir }); + expect(body).not.toHaveProperty('file_path'); + expect(body).not.toHaveProperty('framework_type'); + }); + + it('search_symbols defaults limit to 20 and forwards filters when set', async () => { + const fetcher = makeStub(); + await executeSearchSymbols( + { query: 'User', symbol_type: 'class' }, + projectDir, + fetcher as unknown as typeof fetch, + ); + const { url, body } = parsePOST(fetcher); + expect(url).toMatch(/\/v1\/search_symbols$/); + expect(body).toMatchObject({ + query: 'User', + symbol_type: 'class', + limit: 20, + target_dir: projectDir, + }); + }); + + it('get_dependencies defaults direction to "both"', async () => { + const fetcher = makeStub(); + await executeGetDependencies({}, projectDir, fetcher as unknown as typeof fetch); + const { url, body } = parsePOST(fetcher); + expect(url).toMatch(/\/v1\/get_dependencies$/); + expect(body).toMatchObject({ direction: 'both', target_dir: projectDir }); + expect(body).not.toHaveProperty('file_path'); + }); + + it('watch_changes forwards enable=false', async () => { + const fetcher = makeStub(); + await executeWatchChanges( + { enable: false }, + projectDir, + fetcher as unknown as typeof fetch, + ); + const { url, body } = parsePOST(fetcher); + expect(url).toMatch(/\/v1\/watch_changes$/); + expect(body).toMatchObject({ enable: false, target_dir: projectDir }); + }); + + it('get_semantic_neighborhoods defaults max_results to 10', async () => { + const fetcher = makeStub(); + await executeGetSemanticNeighborhoods( + {}, + projectDir, + fetcher as unknown as typeof fetch, + ); + const { url, body } = parsePOST(fetcher); + expect(url).toMatch(/\/v1\/get_semantic_neighborhoods$/); + expect(body).toMatchObject({ max_results: 10, target_dir: projectDir }); + }); + + it('get_framework_analysis sends only target_dir when no args are provided', async () => { + const fetcher = makeStub(); + await executeGetFrameworkAnalysis( + {}, + projectDir, + fetcher as unknown as typeof fetch, + ); + const { url, body } = parsePOST(fetcher); + expect(url).toMatch(/\/v1\/get_framework_analysis$/); + expect(body).toMatchObject({ target_dir: projectDir }); + expect(body).not.toHaveProperty('framework'); + expect(body).not.toHaveProperty('include_stats'); + }); +}); diff --git a/apps/server/src/services/agents.ts b/apps/server/src/services/agents.ts index b9d8486..8d3629c 100644 --- a/apps/server/src/services/agents.ts +++ b/apps/server/src/services/agents.ts @@ -1,6 +1,7 @@ import { promises as fs } from 'node:fs'; import { join } from 'node:path'; import type { Agent, AgentsResponse, AgentParseError } from '../types/api.js'; +import { ALL_TOOLS } from './tools.js'; // v1.8.1: global agents live at /data/AGENTS.md inside the container // (./data:/data:ro mount on the host). Per-project AGENTS.md at the project @@ -10,18 +11,12 @@ import type { Agent, AgentsResponse, AgentParseError } from '../types/api.js'; const GLOBAL_AGENTS_PATH = '/data/AGENTS.md'; const CACHE_TTL_MS = 60_000; -// Tools whitelist universe matches services/tools.ts ALL_TOOLS. Keep in sync. -// Batch 9.6: skill_find / skill_use / skill_resource added. Agents without an -// explicit `tools:` field inherit the full default set (which now includes -// the skill tools); agents with an explicit `tools:` array must list any -// skill tool they want to use — strict opt-in. -// Batch 9.7: ask_user_input added — same opt-in semantics. Agents with an -// explicit tools list that omits it cannot trigger the interactive picker. -const ALL_TOOL_NAMES = [ - 'view_file', 'list_dir', 'grep', 'find_files', 'git_status', - 'skill_find', 'skill_use', 'skill_resource', - 'ask_user_input', -] as const; +// v1.12 Track B.3: derive from services/tools.ts ALL_TOOLS so new tools are +// auto-recognized in agent frontmatter `tools:` arrays. The previous +// hand-maintained list drifted (web_search/web_fetch from v1.11.8 + the 8 +// codecontext tools were missing), silently filtering valid tool names out +// of agents that opted in. Single source of truth is tools.ts now. +const ALL_TOOL_NAMES: readonly string[] = ALL_TOOLS.map((t) => t.name); const DEFAULT_TOOLS: string[] = [...ALL_TOOL_NAMES]; const DEFAULT_TEMPERATURE = 0.7; diff --git a/apps/server/src/services/codecontext_client.ts b/apps/server/src/services/codecontext_client.ts new file mode 100644 index 0000000..6772a56 --- /dev/null +++ b/apps/server/src/services/codecontext_client.ts @@ -0,0 +1,118 @@ +// v1.12 Track B.2: shared HTTP client for the codecontext sidecar. The 8 +// per-tool wrappers under tools/codecontext/ all funnel through callCodecontext +// — they're thin adapters that supply toolName + args + projectPath. The +// client owns: +// +// 1. target_dir validation. Codecontext's HTTP shim is naive and forwards +// any target_dir to codecontext, so without this layer a model that +// hallucinated a target_dir could read /opt/anything-on-disk. The +// project root is realpath'd and the requested target_dir is constrained +// to it (same invariant as path_guard.ts but for the codecontext path). +// 2. Inline truncation at 32 kB. Codecontext outputs are markdown reports +// that can balloon on large projects; the model can re-narrow via +// file_path / file_type / limit. Matches the "inline truncation, no +// opaque-id retrieval" decision locked in the 2026-05-21 recon. +// 3. Friendly mapping of codecontext's known failure modes — the empty- +// file parser bug (upstream issue #37) returns a generic error string, +// which we re-surface with a hint to add the file to .codecontextignore. + +import { realpath } from 'node:fs/promises'; + +export interface CodecontextRequest { + toolName: string; + args: Record; + projectPath: string; +} + +export interface CodecontextResponse { + result: string; + truncated: boolean; +} + +const CODECONTEXT_BASE_URL = process.env['CODECONTEXT_URL'] ?? 'http://codecontext:8080'; +const TRUNCATION_LIMIT = 32_000; +const REQUEST_TIMEOUT_MS = 30_000; + +export async function callCodecontext( + req: CodecontextRequest, + fetcher: typeof fetch = fetch, +): Promise { + // Step 1: realpath the project root, then realpath the requested target_dir + // (defaulting to projectPath when the caller didn't pass one — the 8 wrappers + // never pass target_dir; tests can override). A non-existent target_dir + // throws before we hit the network so the model gets a sharp error. + const resolvedProject = await realpath(req.projectPath); + const requestedTarget = req.args['target_dir']; + const targetDir = typeof requestedTarget === 'string' && requestedTarget.length > 0 + ? requestedTarget + : req.projectPath; + const resolvedTarget = await realpath(targetDir).catch(() => null); + if (resolvedTarget === null) { + throw new Error(`target_dir does not exist: ${targetDir}`); + } + if (resolvedTarget !== resolvedProject && !resolvedTarget.startsWith(resolvedProject + '/')) { + throw new Error(`target_dir ${targetDir} escapes project root ${resolvedProject}`); + } + + // Step 2: re-build args with the resolved target_dir so codecontext sees + // the real absolute path, not a symlink or relative form. + const argsToSend = { ...req.args, target_dir: resolvedTarget }; + + // Step 3: POST with a hard timeout. AbortController + setTimeout pattern + // matches web_fetch.ts; nothing fancier needed. + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS); + let response: Response; + try { + response = await fetcher(`${CODECONTEXT_BASE_URL}/v1/${req.toolName}`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(argsToSend), + signal: controller.signal, + }); + } catch (err) { + clearTimeout(timer); + if (err instanceof Error && (err.name === 'AbortError' || err.name === 'TimeoutError')) { + throw new Error(`codecontext request timed out after ${REQUEST_TIMEOUT_MS}ms`); + } + throw new Error( + `codecontext network error: ${err instanceof Error ? err.message : String(err)}`, + ); + } + clearTimeout(timer); + + if (!response.ok) { + const text = await response.text().catch(() => ''); + throw new Error(`codecontext HTTP ${response.status}: ${text.slice(0, 200)}`); + } + + const body = (await response.json()) as { result: string | null; error: string | null }; + if (body.error) { + // Upstream issue #37: empty source files crash codecontext's parser. The + // error message reliably contains "content is empty"; surface an + // actionable hint instead of the bare codecontext message. + if (body.error.includes('content is empty')) { + throw new Error( + `codecontext parse failure: ${body.error}. ` + + `Add the offending path to .codecontextignore in the project root and retry.`, + ); + } + throw new Error(`codecontext error: ${body.error}`); + } + if (body.result === null) { + return { result: '', truncated: false }; + } + + // Step 4: inline truncation. The model gets a clear hint about how to + // narrow the next call rather than a silent cut. Mirrors web_fetch.ts. + if (body.result.length > TRUNCATION_LIMIT) { + const truncated = body.result.slice(0, TRUNCATION_LIMIT); + const omitted = body.result.length - TRUNCATION_LIMIT; + return { + result: + `${truncated}\n\n[truncated, ${omitted} chars omitted; narrow with file_path, file_type, or limit]`, + truncated: true, + }; + } + return { result: body.result, truncated: false }; +} diff --git a/apps/server/src/services/inference.ts b/apps/server/src/services/inference.ts index fe86e44..9a22476 100644 --- a/apps/server/src/services/inference.ts +++ b/apps/server/src/services/inference.ts @@ -603,10 +603,26 @@ async function executeToolCall( } const parsed = tool.inputSchema.safeParse(toolCall.args); if (!parsed.success) { + // v1.12 Track B.2: enrich the zod-reject path so the model sees a + // one-line, tool-named hint ("tool 'search_symbols' rejected — query: + // Required") instead of a JSON blob of flatten output. Higher recovery + // rate on the next turn; doom-loop guard still bounds infinite retries. + // The cast is because tool.inputSchema is ZodType, so zod can't + // statically narrow flatten()'s fieldErrors key set — but the runtime + // shape is the standard { formErrors: string[]; fieldErrors: Record<...> }. + const flatten = parsed.error.flatten() as { + formErrors: string[]; + fieldErrors: Record; + }; + const fieldErrors = Object.entries(flatten.fieldErrors) + .map(([field, errs]) => `${field}: ${errs?.[0] ?? 'invalid'}`) + .join('; '); + const formError = flatten.formErrors[0]; + const hint = fieldErrors || formError || 'unknown validation error'; return { output: null, truncated: false, - error: `invalid input: ${JSON.stringify(parsed.error.flatten())}`, + error: `tool '${toolCall.name}' rejected — ${hint}`, }; } try { diff --git a/apps/server/src/services/tools.ts b/apps/server/src/services/tools.ts index d979277..725dfef 100644 --- a/apps/server/src/services/tools.ts +++ b/apps/server/src/services/tools.ts @@ -8,6 +8,19 @@ import { getGitMeta } from './git_meta.js'; import { findSkills, getSkillBody, getSkillResource } from './skills.js'; import { webSearch } from './web_search.js'; import { webFetch } from './web_fetch.js'; +// v1.12 Track B.2: codecontext tools. 8 wrappers re-exported from +// tools/codecontext/index.ts. Each calls into services/codecontext_client.ts +// which talks to the codecontext sidecar at http://codecontext:8080. +import { + getCodebaseOverview, + getFileAnalysis, + getSymbolInfo, + searchSymbols, + getDependencies, + watchChanges, + getSemanticNeighborhoods, + getFrameworkAnalysis, +} from './tools/codecontext/index.js'; const MAX_FILE_BYTES = 5 * 1024 * 1024; const DEFAULT_VIEW_LINES = 200; @@ -529,6 +542,17 @@ export const ALL_TOOLS: ReadonlyArray> = [ // services/inference.ts. webSearch as ToolDef, webFetch as ToolDef, + // v1.12 Track B.2: codecontext tools. Backed by the codecontext sidecar + // container. All read-only. target_dir is resolved server-side from the + // project root in codecontext_client.ts (the LLM never supplies it). + getCodebaseOverview as ToolDef, + getFileAnalysis as ToolDef, + getSymbolInfo as ToolDef, + searchSymbols as ToolDef, + getDependencies as ToolDef, + watchChanges as ToolDef, + getSemanticNeighborhoods as ToolDef, + getFrameworkAnalysis as ToolDef, ]; // v1.8.2: forward-compatible read-only whitelist. An agent whose `tools` is @@ -554,6 +578,16 @@ export const READ_ONLY_TOOL_NAMES = [ // toolset is fully contained in this list. 'web_search', 'web_fetch', + // v1.12 Track B.2: codecontext tools. Read-only — they call the + // codecontext sidecar which only analyzes files (never writes). + 'get_codebase_overview', + 'get_file_analysis', + 'get_symbol_info', + 'search_symbols', + 'get_dependencies', + 'watch_changes', + 'get_semantic_neighborhoods', + 'get_framework_analysis', ] as const; export const TOOLS_BY_NAME: Record> = Object.fromEntries( diff --git a/apps/server/src/services/tools/codecontext/get_codebase_overview.ts b/apps/server/src/services/tools/codecontext/get_codebase_overview.ts new file mode 100644 index 0000000..c624c09 --- /dev/null +++ b/apps/server/src/services/tools/codecontext/get_codebase_overview.ts @@ -0,0 +1,59 @@ +// v1.12 Track B.2: codecontext wrapper — get_codebase_overview. +// Pattern mirrors services/web_search.ts: pure executor + ToolDef wrapper. +// target_dir is supplied by callCodecontext from the resolved project root. + +import { z } from 'zod'; +import type { ToolDef } from '../../tools.js'; +import { callCodecontext, type CodecontextResponse } from '../../codecontext_client.js'; + +export const GetCodebaseOverviewInput = z.object({ + include_stats: z.boolean().optional(), +}); +export type GetCodebaseOverviewInputT = z.infer; + +const DESCRIPTION = + 'Returns a structured overview of the codebase: file count, symbol count, primary languages, and top-level architecture. ' + + 'Use this before deeper investigation to orient yourself in an unfamiliar codebase. ' + + 'Tree-sitter coverage: full for JS/Python/Java/Go/Rust/C++. TypeScript symbols are approximate (uses JS grammar). ' + + 'PHP and SQL are not supported — fall back to view_file/grep for those.'; + +export async function executeGetCodebaseOverview( + input: GetCodebaseOverviewInputT, + projectPath: string, + fetcher: typeof fetch = fetch, +): Promise { + return callCodecontext( + { + toolName: 'get_codebase_overview', + args: { include_stats: input.include_stats ?? true }, + projectPath, + }, + fetcher, + ); +} + +export const getCodebaseOverview: ToolDef = { + name: 'get_codebase_overview', + description: DESCRIPTION, + inputSchema: GetCodebaseOverviewInput, + jsonSchema: { + type: 'function', + function: { + name: 'get_codebase_overview', + description: DESCRIPTION, + parameters: { + type: 'object', + properties: { + include_stats: { + type: 'boolean', + description: 'Include file count, symbol count, language stats. Defaults to true.', + }, + }, + additionalProperties: false, + }, + }, + }, + async execute(input, projectRoot) { + return await executeGetCodebaseOverview(input, projectRoot); + }, +}; diff --git a/apps/server/src/services/tools/codecontext/get_dependencies.ts b/apps/server/src/services/tools/codecontext/get_dependencies.ts new file mode 100644 index 0000000..e3b1c02 --- /dev/null +++ b/apps/server/src/services/tools/codecontext/get_dependencies.ts @@ -0,0 +1,60 @@ +// v1.12 Track B.2: codecontext wrapper — get_dependencies. + +import { z } from 'zod'; +import type { ToolDef } from '../../tools.js'; +import { callCodecontext, type CodecontextResponse } from '../../codecontext_client.js'; + +export const GetDependenciesInput = z.object({ + file_path: z.string().optional(), + direction: z.enum(['incoming', 'outgoing', 'both']).optional(), +}); +export type GetDependenciesInputT = z.infer; + +const DESCRIPTION = + 'Returns the import/dependency graph either for a single file (when file_path is set) or for the whole project. ' + + 'Direction "outgoing" = what this file imports; "incoming" = what imports this file; "both" = the union. ' + + 'Tree-sitter coverage: full for JS/Python/Java/Go/Rust/C++. TypeScript dependencies are approximate. ' + + 'PHP and SQL are not supported.'; + +export async function executeGetDependencies( + input: GetDependenciesInputT, + projectPath: string, + fetcher: typeof fetch = fetch, +): Promise { + const args: Record = { + direction: input.direction ?? 'both', + }; + if (input.file_path) args['file_path'] = input.file_path; + return callCodecontext({ toolName: 'get_dependencies', args, projectPath }, fetcher); +} + +export const getDependencies: ToolDef = { + name: 'get_dependencies', + description: DESCRIPTION, + inputSchema: GetDependenciesInput, + jsonSchema: { + type: 'function', + function: { + name: 'get_dependencies', + description: DESCRIPTION, + parameters: { + type: 'object', + properties: { + file_path: { + type: 'string', + description: 'Narrow to a single file. Omit for a project-wide graph.', + }, + direction: { + type: 'string', + enum: ['incoming', 'outgoing', 'both'], + description: 'Which edges to include. Defaults to "both".', + }, + }, + additionalProperties: false, + }, + }, + }, + async execute(input, projectRoot) { + return await executeGetDependencies(input, projectRoot); + }, +}; diff --git a/apps/server/src/services/tools/codecontext/get_file_analysis.ts b/apps/server/src/services/tools/codecontext/get_file_analysis.ts new file mode 100644 index 0000000..c21ae96 --- /dev/null +++ b/apps/server/src/services/tools/codecontext/get_file_analysis.ts @@ -0,0 +1,58 @@ +// v1.12 Track B.2: codecontext wrapper — get_file_analysis. + +import { z } from 'zod'; +import type { ToolDef } from '../../tools.js'; +import { callCodecontext, type CodecontextResponse } from '../../codecontext_client.js'; + +export const GetFileAnalysisInput = z.object({ + file_path: z.string().min(1), +}); +export type GetFileAnalysisInputT = z.infer; + +const DESCRIPTION = + 'Returns detailed analysis of a single file: symbols defined, imports, exports, and inferred role. ' + + 'Use when you have a specific file in mind and need its structure without view_file-ing the whole thing. ' + + 'Tree-sitter coverage: full for JS/Python/Java/Go/Rust/C++. TypeScript symbols are approximate. ' + + 'PHP and SQL are not supported — fall back to view_file for those.'; + +export async function executeGetFileAnalysis( + input: GetFileAnalysisInputT, + projectPath: string, + fetcher: typeof fetch = fetch, +): Promise { + return callCodecontext( + { + toolName: 'get_file_analysis', + args: { file_path: input.file_path }, + projectPath, + }, + fetcher, + ); +} + +export const getFileAnalysis: ToolDef = { + name: 'get_file_analysis', + description: DESCRIPTION, + inputSchema: GetFileAnalysisInput, + jsonSchema: { + type: 'function', + function: { + name: 'get_file_analysis', + description: DESCRIPTION, + parameters: { + type: 'object', + properties: { + file_path: { + type: 'string', + description: 'Absolute or project-relative path to the file.', + }, + }, + required: ['file_path'], + additionalProperties: false, + }, + }, + }, + async execute(input, projectRoot) { + return await executeGetFileAnalysis(input, projectRoot); + }, +}; diff --git a/apps/server/src/services/tools/codecontext/get_framework_analysis.ts b/apps/server/src/services/tools/codecontext/get_framework_analysis.ts new file mode 100644 index 0000000..8126e90 --- /dev/null +++ b/apps/server/src/services/tools/codecontext/get_framework_analysis.ts @@ -0,0 +1,58 @@ +// v1.12 Track B.2: codecontext wrapper — get_framework_analysis. + +import { z } from 'zod'; +import type { ToolDef } from '../../tools.js'; +import { callCodecontext, type CodecontextResponse } from '../../codecontext_client.js'; + +export const GetFrameworkAnalysisInput = z.object({ + framework: z.string().optional(), + include_stats: z.boolean().optional(), +}); +export type GetFrameworkAnalysisInputT = z.infer; + +const DESCRIPTION = + 'Returns framework-specific structural analysis: component relationships (React), hook usage patterns, store wiring (Vue/Pinia), service registration (Angular/Nest), etc. ' + + 'When framework is omitted, codecontext auto-detects from the project files. ' + + 'Tree-sitter coverage: full for JS/Python/Java/Go/Rust/C++. TypeScript is approximate. ' + + 'PHP and SQL are not supported.'; + +export async function executeGetFrameworkAnalysis( + input: GetFrameworkAnalysisInputT, + projectPath: string, + fetcher: typeof fetch = fetch, +): Promise { + const args: Record = {}; + if (input.framework) args['framework'] = input.framework; + if (input.include_stats !== undefined) args['include_stats'] = input.include_stats; + return callCodecontext({ toolName: 'get_framework_analysis', args, projectPath }, fetcher); +} + +export const getFrameworkAnalysis: ToolDef = { + name: 'get_framework_analysis', + description: DESCRIPTION, + inputSchema: GetFrameworkAnalysisInput, + jsonSchema: { + type: 'function', + function: { + name: 'get_framework_analysis', + description: DESCRIPTION, + parameters: { + type: 'object', + properties: { + framework: { + type: 'string', + description: 'Framework name. Auto-detected if omitted.', + }, + include_stats: { + type: 'boolean', + description: 'Include component/hook/service counts.', + }, + }, + additionalProperties: false, + }, + }, + }, + async execute(input, projectRoot) { + return await executeGetFrameworkAnalysis(input, projectRoot); + }, +}; diff --git a/apps/server/src/services/tools/codecontext/get_semantic_neighborhoods.ts b/apps/server/src/services/tools/codecontext/get_semantic_neighborhoods.ts new file mode 100644 index 0000000..48e942e --- /dev/null +++ b/apps/server/src/services/tools/codecontext/get_semantic_neighborhoods.ts @@ -0,0 +1,73 @@ +// v1.12 Track B.2: codecontext wrapper — get_semantic_neighborhoods. + +import { z } from 'zod'; +import type { ToolDef } from '../../tools.js'; +import { callCodecontext, type CodecontextResponse } from '../../codecontext_client.js'; + +export const GetSemanticNeighborhoodsInput = z.object({ + file_path: z.string().optional(), + include_basic: z.boolean().optional(), + include_quality: z.boolean().optional(), + max_results: z.number().int().positive().optional(), +}); +export type GetSemanticNeighborhoodsInputT = z.infer; + +const DESCRIPTION = + 'Returns semantic neighborhoods — clusters of related files derived from git co-change patterns and import structure. ' + + 'Use when you want to find code that "belongs together" with a given file without enumerating imports manually. ' + + 'Tree-sitter coverage: full for JS/Python/Java/Go/Rust/C++. TypeScript is approximate. ' + + 'PHP and SQL are not supported.'; + +const DEFAULT_MAX_RESULTS = 10; + +export async function executeGetSemanticNeighborhoods( + input: GetSemanticNeighborhoodsInputT, + projectPath: string, + fetcher: typeof fetch = fetch, +): Promise { + const args: Record = { + max_results: input.max_results ?? DEFAULT_MAX_RESULTS, + }; + if (input.file_path) args['file_path'] = input.file_path; + if (input.include_basic !== undefined) args['include_basic'] = input.include_basic; + if (input.include_quality !== undefined) args['include_quality'] = input.include_quality; + return callCodecontext({ toolName: 'get_semantic_neighborhoods', args, projectPath }, fetcher); +} + +export const getSemanticNeighborhoods: ToolDef = { + name: 'get_semantic_neighborhoods', + description: DESCRIPTION, + inputSchema: GetSemanticNeighborhoodsInput, + jsonSchema: { + type: 'function', + function: { + name: 'get_semantic_neighborhoods', + description: DESCRIPTION, + parameters: { + type: 'object', + properties: { + file_path: { + type: 'string', + description: 'Anchor file for the neighborhood query. Omit for a project-wide view.', + }, + include_basic: { + type: 'boolean', + description: 'Include the basic (import-based) neighborhood. Default true.', + }, + include_quality: { + type: 'boolean', + description: 'Include code-quality metrics for the neighborhood. Default false.', + }, + max_results: { + type: 'integer', + description: `Cap on neighborhoods returned. Defaults to ${DEFAULT_MAX_RESULTS}.`, + }, + }, + additionalProperties: false, + }, + }, + }, + async execute(input, projectRoot) { + return await executeGetSemanticNeighborhoods(input, projectRoot); + }, +}; diff --git a/apps/server/src/services/tools/codecontext/get_symbol_info.ts b/apps/server/src/services/tools/codecontext/get_symbol_info.ts new file mode 100644 index 0000000..dc8522c --- /dev/null +++ b/apps/server/src/services/tools/codecontext/get_symbol_info.ts @@ -0,0 +1,63 @@ +// v1.12 Track B.2: codecontext wrapper — get_symbol_info. + +import { z } from 'zod'; +import type { ToolDef } from '../../tools.js'; +import { callCodecontext, type CodecontextResponse } from '../../codecontext_client.js'; + +export const GetSymbolInfoInput = z.object({ + symbol_name: z.string().min(1), + file_path: z.string().optional(), + framework_type: z.string().optional(), +}); +export type GetSymbolInfoInputT = z.infer; + +const DESCRIPTION = + 'Returns detailed information about a named symbol: definition location, kind (function/class/method/etc.), and (when known) framework-specific context (React component, Vue store, Angular service, …). ' + + 'Tree-sitter coverage: full for JS/Python/Java/Go/Rust/C++. TypeScript symbols are approximate (uses JS grammar). ' + + 'PHP and SQL are not supported — fall back to grep for those.'; + +export async function executeGetSymbolInfo( + input: GetSymbolInfoInputT, + projectPath: string, + fetcher: typeof fetch = fetch, +): Promise { + const args: Record = { symbol_name: input.symbol_name }; + if (input.file_path) args['file_path'] = input.file_path; + if (input.framework_type) args['framework_type'] = input.framework_type; + return callCodecontext({ toolName: 'get_symbol_info', args, projectPath }, fetcher); +} + +export const getSymbolInfo: ToolDef = { + name: 'get_symbol_info', + description: DESCRIPTION, + inputSchema: GetSymbolInfoInput, + jsonSchema: { + type: 'function', + function: { + name: 'get_symbol_info', + description: DESCRIPTION, + parameters: { + type: 'object', + properties: { + symbol_name: { + type: 'string', + description: 'The symbol name to look up (case-sensitive).', + }, + file_path: { + type: 'string', + description: 'Narrow to a specific file when the symbol name is ambiguous.', + }, + framework_type: { + type: 'string', + description: 'Hint for framework-specific extraction (react|vue|svelte|django|fastapi|express|nest|…).', + }, + }, + required: ['symbol_name'], + additionalProperties: false, + }, + }, + }, + async execute(input, projectRoot) { + return await executeGetSymbolInfo(input, projectRoot); + }, +}; diff --git a/apps/server/src/services/tools/codecontext/index.ts b/apps/server/src/services/tools/codecontext/index.ts new file mode 100644 index 0000000..7abbcd7 --- /dev/null +++ b/apps/server/src/services/tools/codecontext/index.ts @@ -0,0 +1,11 @@ +// v1.12 Track B.2: codecontext tool registry. Re-exports the 8 ToolDefs so +// tools.ts can pull them in one line. + +export { getCodebaseOverview } from './get_codebase_overview.js'; +export { getFileAnalysis } from './get_file_analysis.js'; +export { getSymbolInfo } from './get_symbol_info.js'; +export { searchSymbols } from './search_symbols.js'; +export { getDependencies } from './get_dependencies.js'; +export { watchChanges } from './watch_changes.js'; +export { getSemanticNeighborhoods } from './get_semantic_neighborhoods.js'; +export { getFrameworkAnalysis } from './get_framework_analysis.js'; diff --git a/apps/server/src/services/tools/codecontext/search_symbols.ts b/apps/server/src/services/tools/codecontext/search_symbols.ts new file mode 100644 index 0000000..b5db808 --- /dev/null +++ b/apps/server/src/services/tools/codecontext/search_symbols.ts @@ -0,0 +1,77 @@ +// v1.12 Track B.2: codecontext wrapper — search_symbols. + +import { z } from 'zod'; +import type { ToolDef } from '../../tools.js'; +import { callCodecontext, type CodecontextResponse } from '../../codecontext_client.js'; + +export const SearchSymbolsInput = z.object({ + query: z.string().min(1), + file_type: z.string().optional(), + symbol_type: z.string().optional(), + framework_type: z.string().optional(), + limit: z.number().int().positive().optional(), +}); +export type SearchSymbolsInputT = z.infer; + +const DESCRIPTION = + 'Search for symbols (functions, classes, methods, types) across the codebase by name fragment. ' + + 'Filter by file_type, symbol_type, or framework_type to narrow. ' + + 'Tree-sitter coverage: full for JS/Python/Java/Go/Rust/C++. TypeScript symbols are approximate. ' + + 'PHP and SQL are not supported — fall back to grep for those.'; + +const DEFAULT_LIMIT = 20; + +export async function executeSearchSymbols( + input: SearchSymbolsInputT, + projectPath: string, + fetcher: typeof fetch = fetch, +): Promise { + const args: Record = { + query: input.query, + limit: input.limit ?? DEFAULT_LIMIT, + }; + if (input.file_type) args['file_type'] = input.file_type; + if (input.symbol_type) args['symbol_type'] = input.symbol_type; + if (input.framework_type) args['framework_type'] = input.framework_type; + return callCodecontext({ toolName: 'search_symbols', args, projectPath }, fetcher); +} + +export const searchSymbols: ToolDef = { + name: 'search_symbols', + description: DESCRIPTION, + inputSchema: SearchSymbolsInput, + jsonSchema: { + type: 'function', + function: { + name: 'search_symbols', + description: DESCRIPTION, + parameters: { + type: 'object', + properties: { + query: { type: 'string', description: 'Substring or name fragment to match.' }, + file_type: { + type: 'string', + description: 'Filter by file extension or language (e.g. "ts", "py", "go").', + }, + symbol_type: { + type: 'string', + description: 'Filter by kind: function|class|method|variable|type|interface.', + }, + framework_type: { + type: 'string', + description: 'Filter by framework context (react|vue|svelte|…).', + }, + limit: { + type: 'integer', + description: `Max matches to return. Defaults to ${DEFAULT_LIMIT}.`, + }, + }, + required: ['query'], + additionalProperties: false, + }, + }, + }, + async execute(input, projectRoot) { + return await executeSearchSymbols(input, projectRoot); + }, +}; diff --git a/apps/server/src/services/tools/codecontext/watch_changes.ts b/apps/server/src/services/tools/codecontext/watch_changes.ts new file mode 100644 index 0000000..437f1c0 --- /dev/null +++ b/apps/server/src/services/tools/codecontext/watch_changes.ts @@ -0,0 +1,57 @@ +// v1.12 Track B.2: codecontext wrapper — watch_changes. + +import { z } from 'zod'; +import type { ToolDef } from '../../tools.js'; +import { callCodecontext, type CodecontextResponse } from '../../codecontext_client.js'; + +export const WatchChangesInput = z.object({ + enable: z.boolean(), +}); +export type WatchChangesInputT = z.infer; + +const DESCRIPTION = + 'Turn codecontext\'s file watcher on or off for this project. ' + + 'When on, codecontext re-analyzes files in the background as they change (debounced). Default is on. ' + + 'Disable temporarily if you\'re doing bulk edits and want to avoid analysis churn.'; + +export async function executeWatchChanges( + input: WatchChangesInputT, + projectPath: string, + fetcher: typeof fetch = fetch, +): Promise { + return callCodecontext( + { + toolName: 'watch_changes', + args: { enable: input.enable }, + projectPath, + }, + fetcher, + ); +} + +export const watchChanges: ToolDef = { + name: 'watch_changes', + description: DESCRIPTION, + inputSchema: WatchChangesInput, + jsonSchema: { + type: 'function', + function: { + name: 'watch_changes', + description: DESCRIPTION, + parameters: { + type: 'object', + properties: { + enable: { + type: 'boolean', + description: 'true = enable the watcher; false = disable.', + }, + }, + required: ['enable'], + additionalProperties: false, + }, + }, + }, + async execute(input, projectRoot) { + return await executeWatchChanges(input, projectRoot); + }, +}; diff --git a/apps/web/src/components/ToolCallLine.tsx b/apps/web/src/components/ToolCallLine.tsx index 0c9b4ea..b692b1a 100644 --- a/apps/web/src/components/ToolCallLine.tsx +++ b/apps/web/src/components/ToolCallLine.tsx @@ -57,6 +57,33 @@ export function formatToolArgs(name: string, args: Record): str ARG_SUMMARY_MAX, ); } + // v1.12 Track B.2: codecontext tool pills. Format is "most-identifying-arg", + // matching view_file/grep precedent — surface the path/symbol/query that + // makes the call meaningful at a glance. + if (name === 'get_codebase_overview') { + return ''; + } + if (name === 'get_file_analysis') { + return truncate(String(args.file_path ?? ''), ARG_SUMMARY_MAX); + } + if (name === 'get_symbol_info') { + return truncate(String(args.symbol_name ?? ''), ARG_SUMMARY_MAX); + } + if (name === 'search_symbols') { + return truncate(`"${String(args.query ?? '')}"`, ARG_SUMMARY_MAX); + } + if (name === 'get_dependencies') { + return truncate(String(args.file_path ?? '(project-wide)'), ARG_SUMMARY_MAX); + } + if (name === 'watch_changes') { + return args.enable ? 'enable' : 'disable'; + } + if (name === 'get_semantic_neighborhoods') { + return truncate(String(args.file_path ?? '(project-wide)'), ARG_SUMMARY_MAX); + } + if (name === 'get_framework_analysis') { + return truncate(String(args.framework ?? '(auto-detect)'), ARG_SUMMARY_MAX); + } // Unknown tool — surface first arg value or the literal {} so the user can // see something happened. Forward-compatible with future tools. const keys = Object.keys(args); diff --git a/codecontext/.codecontextignore.template b/codecontext/.codecontextignore.template new file mode 100644 index 0000000..9a99c56 --- /dev/null +++ b/codecontext/.codecontextignore.template @@ -0,0 +1,33 @@ +# .codecontextignore — paths codecontext skips during analysis +# Copy to your project root and customize. Same syntax as .gitignore. + +# Dependencies / vendored code +node_modules/ +vendor/ +.venv/ +venv/ +__pycache__/ +target/ + +# Build artifacts +dist/ +build/ +out/ +.next/ +.nuxt/ +.svelte-kit/ + +# IDE / tooling +.opencode/ +.vscode/ +.idea/ + +# Test artifacts / coverage +coverage/ +.nyc_output/ +.pytest_cache/ + +# Lock files (rarely have meaningful symbols) +package-lock.json +yarn.lock +pnpm-lock.yaml diff --git a/codecontext/Dockerfile b/codecontext/Dockerfile new file mode 100644 index 0000000..b61704b --- /dev/null +++ b/codecontext/Dockerfile @@ -0,0 +1,40 @@ +# v1.12 Track B — codecontext sidecar container. +# +# Multi-stage build: golang:1.24-alpine builder produces two binaries +# (codecontext from source + our HTTP shim), then a minimal alpine:3.20 +# runtime holds both. +# +# No upstream Docker image exists for codecontext. We clone the repo +# directly because the module path declared in go.mod +# (github.com/nuthan-ms/codecontext) differs from the GitHub repo URL +# (github.com/nmakod/codecontext) — `go install` against the GitHub path +# wouldn't resolve. The tagged v3.2.1 source tree is the same either way. + +FROM golang:1.24-alpine AS builder +WORKDIR /build + +RUN apk add --no-cache git ca-certificates build-base + +# Build codecontext from the v3.2.1 tag. +# CGO is required: codecontext binds tree-sitter via cgo. +RUN git clone --depth=1 --branch v3.2.1 https://github.com/nmakod/codecontext.git /build/codecontext +WORKDIR /build/codecontext +RUN CGO_ENABLED=1 GOOS=linux go build -o /build/codecontext-bin ./cmd/codecontext + +# Build the shim. Stdlib-only — no go.sum needed. +WORKDIR /build/shim +COPY go.mod ./ +COPY shim.go ./ +RUN CGO_ENABLED=0 GOOS=linux go build -o /build/shim-bin ./ + +# Runtime: alpine matches the build target so codecontext's cgo bindings +# resolve against the same musl libc. +FROM alpine:3.20 +RUN apk add --no-cache ca-certificates +COPY --from=builder /build/codecontext-bin /usr/local/bin/codecontext +COPY --from=builder /build/shim-bin /usr/local/bin/shim + +EXPOSE 8080 +HEALTHCHECK --interval=30s --timeout=5s --start-period=30s \ + CMD wget -qO- http://localhost:8080/health || exit 1 +ENTRYPOINT ["/usr/local/bin/shim"] diff --git a/codecontext/go.mod b/codecontext/go.mod new file mode 100644 index 0000000..9a38632 --- /dev/null +++ b/codecontext/go.mod @@ -0,0 +1,3 @@ +module github.com/indifferentketchup/boocode-codecontext-shim + +go 1.24 diff --git a/codecontext/shim.go b/codecontext/shim.go new file mode 100644 index 0000000..891c364 --- /dev/null +++ b/codecontext/shim.go @@ -0,0 +1,442 @@ +// boocode-codecontext-shim — wraps codecontext's stdio MCP server with an +// HTTP/JSON facade so the BooCode Node server can call codecontext over the +// container network instead of speaking MCP directly. One process per +// container, holds a single codecontext child via os/exec; concurrent HTTP +// requests are serialized onto the child because codecontext's internal +// CodeContextMCPServer.graph swaps per target_dir (see recon report +// 2026-05-21). +// +// MCP framing is newline-delimited JSON (NDJSON), not LSP-style +// Content-Length — per the MCP stdio transport spec: +// https://spec.modelcontextprotocol.io/specification/server/transports +// +// No third-party deps. Stdlib only. + +package main + +import ( + "bufio" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "log" + "net/http" + "os" + "os/exec" + "os/signal" + "sync" + "sync/atomic" + "syscall" + "time" +) + +// ---- JSON-RPC types ---- + +// rpcMessage is shared by request, response, and notification. Notifications +// omit ID; requests omit Result/Error; responses omit Method/Params. omitempty +// + the zero int 0 sentinel works for ID because we never SEND id=0 +// (nextID starts at 0 and atomic.AddInt32 returns 1 on the first call). +type rpcMessage struct { + JSONRPC string `json:"jsonrpc"` + ID int `json:"id,omitempty"` + Method string `json:"method,omitempty"` + Params json.RawMessage `json:"params,omitempty"` + Result json.RawMessage `json:"result,omitempty"` + Error *rpcError `json:"error,omitempty"` +} + +type rpcError struct { + Code int `json:"code"` + Message string `json:"message"` +} + +// callToolResult is the MCP tools/call response shape. codecontext returns +// markdown wrapped in a TextContent entry. +type callToolResult struct { + Content []struct { + Type string `json:"type"` + Text string `json:"text"` + } `json:"content"` + IsError bool `json:"isError,omitempty"` +} + +// ---- Globals ---- + +var ( + child *exec.Cmd + childStdin io.WriteCloser + childStdout *bufio.Reader + + // Serialize tools/call so codecontext's per-call graph rebuild doesn't + // race itself when concurrent HTTP requests target different projects. + // Initialize/notifications/initialized run before HTTP starts so they + // don't need this lock. + callMu sync.Mutex + + pendingMu sync.Mutex + pending = make(map[int]chan *rpcMessage) + + nextID int32 +) + +// ---- MCP framing (NDJSON) ---- + +func writeMessage(w io.Writer, msg *rpcMessage) error { + body, err := json.Marshal(msg) + if err != nil { + return err + } + // Single write keeps the message atomic across concurrent writers. + // (We don't actually have concurrent writers here — callMu serializes — + // but the +'\n' append needs to be in one syscall regardless.) + _, err = w.Write(append(body, '\n')) + return err +} + +func readerLoop(r *bufio.Reader) { + for { + line, err := r.ReadBytes('\n') + if err != nil { + if errors.Is(err, io.EOF) { + log.Printf("reader: EOF (child closed stdout)") + } else { + log.Printf("reader: %v", err) + } + return + } + var msg rpcMessage + if err := json.Unmarshal(line, &msg); err != nil { + log.Printf("reader: malformed JSON: %v (line=%q)", err, line) + continue + } + if msg.ID == 0 { + // Server-initiated notification or progress update; nothing to + // dispatch. codecontext doesn't currently send these but the + // MCP spec allows them. + continue + } + pendingMu.Lock() + ch, ok := pending[msg.ID] + if ok { + delete(pending, msg.ID) + } + pendingMu.Unlock() + if ok { + ch <- &msg + } + } +} + +func call(ctx context.Context, method string, params any) (*rpcMessage, error) { + id := int(atomic.AddInt32(&nextID, 1)) + ch := make(chan *rpcMessage, 1) + pendingMu.Lock() + pending[id] = ch + pendingMu.Unlock() + + paramsJSON, err := json.Marshal(params) + if err != nil { + pendingMu.Lock() + delete(pending, id) + pendingMu.Unlock() + return nil, err + } + + msg := &rpcMessage{ + JSONRPC: "2.0", + ID: id, + Method: method, + Params: paramsJSON, + } + + if err := writeMessage(childStdin, msg); err != nil { + pendingMu.Lock() + delete(pending, id) + pendingMu.Unlock() + return nil, fmt.Errorf("write: %w", err) + } + + select { + case resp := <-ch: + return resp, nil + case <-ctx.Done(): + pendingMu.Lock() + delete(pending, id) + pendingMu.Unlock() + return nil, ctx.Err() + } +} + +func notify(method string, params any) error { + paramsJSON, err := json.Marshal(params) + if err != nil { + return err + } + msg := &rpcMessage{ + JSONRPC: "2.0", + Method: method, + Params: paramsJSON, + } + return writeMessage(childStdin, msg) +} + +// ---- Child lifecycle ---- + +func startChild() error { + // `codecontext mcp` with --watch=true (the default) keeps fsnotify + // running on the indexed directory; the per-call target_dir swap + // invalidates and re-indexes on demand. `--target=/opt/projects` is the + // initial scan target — codecontext rebuilds the graph against whatever + // target_dir each call carries, so this is just a valid bootstrap path + // (the default "." is the alpine root and trips on transient /proc fds). + child = exec.Command("codecontext", "mcp", "--target=/opt/projects", "--watch=true") + var err error + childStdin, err = child.StdinPipe() + if err != nil { + return fmt.Errorf("stdin pipe: %w", err) + } + stdout, err := child.StdoutPipe() + if err != nil { + return fmt.Errorf("stdout pipe: %w", err) + } + childStdout = bufio.NewReader(stdout) + // codecontext's own log.SetOutput(os.Stderr) keeps its diagnostic noise + // off the JSON-RPC channel; we just pass-through to our own stderr. + child.Stderr = os.Stderr + + if err := child.Start(); err != nil { + return fmt.Errorf("start: %w", err) + } + log.Printf("started codecontext pid=%d", child.Process.Pid) + + go readerLoop(childStdout) + + // Supervise the child. When codecontext exits (crash, OOM, externally + // pkill'd), child.Wait() returns and we tear the shim down so the + // container's `restart: unless-stopped` policy recreates us with a + // fresh child. Without this goroutine the dead child becomes a zombie + // (Signal(0) on a zombie returns nil, so the health endpoint would lie) + // and HTTP requests would queue forever waiting on responses that will + // never come. Discovered during B.1 kill-restart testing. + go func() { + err := child.Wait() + log.Printf("codecontext exited: %v — shim shutting down", err) + os.Exit(1) + }() + return nil +} + +func killChild() { + if child == nil || child.Process == nil { + return + } + log.Printf("killing codecontext pid=%d", child.Process.Pid) + _ = child.Process.Signal(syscall.SIGTERM) + done := make(chan error, 1) + go func() { done <- child.Wait() }() + select { + case <-done: + log.Printf("codecontext exited") + case <-time.After(5 * time.Second): + log.Printf("codecontext did not exit on SIGTERM; sending SIGKILL") + _ = child.Process.Kill() + <-done + } +} + +// MCP handshake: client sends initialize, server replies, client follows +// with the notifications/initialized notification. After that, tools/call +// is accepted. +func initializeMCP(ctx context.Context) error { + initParams := map[string]any{ + "protocolVersion": "2024-11-05", + "capabilities": map[string]any{}, + "clientInfo": map[string]any{ + "name": "boocode-codecontext-shim", + "version": "0.1.0", + }, + } + resp, err := call(ctx, "initialize", initParams) + if err != nil { + return fmt.Errorf("initialize: %w", err) + } + if resp.Error != nil { + return fmt.Errorf("initialize error %d: %s", resp.Error.Code, resp.Error.Message) + } + if err := notify("notifications/initialized", map[string]any{}); err != nil { + return fmt.Errorf("notifications/initialized: %w", err) + } + log.Printf("MCP handshake complete (server result=%s)", string(resp.Result)) + return nil +} + +// ---- HTTP ---- + +func writeJSON(w http.ResponseWriter, status int, body any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _ = json.NewEncoder(w).Encode(body) +} + +func handleHealth(w http.ResponseWriter, r *http.Request) { + if child == nil || child.Process == nil { + http.Error(w, "no child", http.StatusServiceUnavailable) + return + } + // Signal 0 doesn't actually deliver — it just returns an error if the + // process is gone. Cheaper than parsing /proc. + if err := child.Process.Signal(syscall.Signal(0)); err != nil { + http.Error(w, "child dead: "+err.Error(), http.StatusServiceUnavailable) + return + } + _, _ = io.WriteString(w, "ok") +} + +func makeToolHandler(toolName string) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + start := time.Now() + targetDir := "-" + status := "ok" + defer func() { + log.Printf("%s target_dir=%q duration_ms=%d status=%s", + toolName, targetDir, time.Since(start).Milliseconds(), status) + }() + + var args json.RawMessage + if err := json.NewDecoder(r.Body).Decode(&args); err != nil { + status = "bad_request" + writeJSON(w, http.StatusBadRequest, map[string]any{ + "result": nil, + "error": "invalid JSON body: " + err.Error(), + }) + return + } + + // Sniff target_dir purely for the access log; pass args through opaque. + var argsMap map[string]any + if json.Unmarshal(args, &argsMap) == nil { + if td, ok := argsMap["target_dir"].(string); ok { + targetDir = td + } + } + + ctx, cancel := context.WithTimeout(r.Context(), 60*time.Second) + defer cancel() + + callMu.Lock() + resp, err := call(ctx, "tools/call", map[string]any{ + "name": toolName, + "arguments": args, + }) + callMu.Unlock() + + if err != nil { + status = "rpc_error" + writeJSON(w, http.StatusBadGateway, map[string]any{ + "result": nil, + "error": err.Error(), + }) + return + } + if resp.Error != nil { + status = "mcp_error" + writeJSON(w, http.StatusOK, map[string]any{ + "result": nil, + "error": resp.Error.Message, + }) + return + } + + var ctr callToolResult + if err := json.Unmarshal(resp.Result, &ctr); err != nil { + status = "parse_error" + writeJSON(w, http.StatusOK, map[string]any{ + "result": nil, + "error": "parse result: " + err.Error(), + }) + return + } + + // codecontext only emits text content. Concatenate (single-entry in + // practice, but the schema allows multiple). + var buf []byte + for _, c := range ctr.Content { + if c.Type == "text" { + buf = append(buf, c.Text...) + } + } + text := string(buf) + + if ctr.IsError { + status = "tool_error" + writeJSON(w, http.StatusOK, map[string]any{ + "result": nil, + "error": text, + }) + return + } + writeJSON(w, http.StatusOK, map[string]any{ + "result": text, + "error": nil, + }) + } +} + +// ---- main ---- + +func main() { + log.SetOutput(os.Stderr) + log.SetFlags(log.LstdFlags | log.Lmicroseconds) + log.Println("boocode-codecontext-shim starting") + + if err := startChild(); err != nil { + log.Fatalf("startChild: %v", err) + } + + initCtx, initCancel := context.WithTimeout(context.Background(), 30*time.Second) + if err := initializeMCP(initCtx); err != nil { + initCancel() + killChild() + log.Fatalf("initializeMCP: %v", err) + } + initCancel() + + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, syscall.SIGTERM, syscall.SIGINT) + + mux := http.NewServeMux() + // Go 1.22+ method-prefix routing. Any non-listed method → 405 automatically. + mux.HandleFunc("GET /health", handleHealth) + mux.HandleFunc("POST /v1/get_codebase_overview", makeToolHandler("get_codebase_overview")) + mux.HandleFunc("POST /v1/get_file_analysis", makeToolHandler("get_file_analysis")) + mux.HandleFunc("POST /v1/get_symbol_info", makeToolHandler("get_symbol_info")) + mux.HandleFunc("POST /v1/search_symbols", makeToolHandler("search_symbols")) + mux.HandleFunc("POST /v1/get_dependencies", makeToolHandler("get_dependencies")) + mux.HandleFunc("POST /v1/watch_changes", makeToolHandler("watch_changes")) + mux.HandleFunc("POST /v1/get_semantic_neighborhoods", makeToolHandler("get_semantic_neighborhoods")) + mux.HandleFunc("POST /v1/get_framework_analysis", makeToolHandler("get_framework_analysis")) + + server := &http.Server{ + Addr: ":8080", + Handler: mux, + ReadHeaderTimeout: 5 * time.Second, + } + + go func() { + log.Println("listening on :8080") + if err := server.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) { + log.Fatalf("ListenAndServe: %v", err) + } + }() + + <-sigChan + log.Println("shutdown signal received") + + shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 10*time.Second) + _ = server.Shutdown(shutdownCtx) + shutdownCancel() + killChild() + log.Println("exit") +} diff --git a/docker-compose.yml b/docker-compose.yml index 2f32c0c..e1fcc43 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,6 +7,7 @@ services: - "100.114.205.53:9500:3000" env_file: .env environment: + CODECONTEXT_URL: http://codecontext:8080 CONTAINER_GUIDANCE_FILE: /app/BOOCHAT.md DATABASE_URL: postgres://boocode:${POSTGRES_PASSWORD}@boocode_db:5432/boocode volumes: @@ -60,6 +61,33 @@ services: networks: - boocode_net + # v1.12 Track B: codecontext sidecar. Stdio MCP server wrapped by a small + # HTTP shim (see ./codecontext/). No host port — reached from boocode at + # http://codecontext:8080 over the boocode_net bridge. + # + # Mounts /opt:/opt:ro (not just /opt/projects:ro): BooCode projects live + # at /opt/ on the host, not exclusively under /opt/projects. The + # mount must cover anywhere a project.path could resolve to. Read-only + # because codecontext only analyzes — never writes. The model can't + # arbitrarily set target_dir to a sensitive subtree because the B.2 + # wrappers validate target_dir against project.path before calling the + # shim, and the shim isn't reachable from outside boocode_net. + codecontext: + build: + context: ./codecontext + container_name: boocode_codecontext + restart: unless-stopped + networks: + - boocode_net + volumes: + - /opt:/opt:ro + healthcheck: + test: ["CMD-SHELL", "wget -qO- http://localhost:8080/health || exit 1"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 30s + volumes: boocode_pgdata: