diff --git a/apps/coder/src/index.ts b/apps/coder/src/index.ts index f007c2f..99b3d7d 100644 --- a/apps/coder/src/index.ts +++ b/apps/coder/src/index.ts @@ -13,7 +13,7 @@ import type { WsFrame } from '@boocode/contracts/ws-frames'; // v2.0.0 Phase 2C: write tools + adapter for BooChat ToolDef compatibility. import { WRITE_TOOLS } from './services/tools/index.js'; import { adaptWriteTool } from './services/tools/adapter.js'; -import { setInferenceContext, clearInferenceContext } from './services/tools/inference_context.js'; +import { runWithInferenceContext } from './services/tools/inference_context.js'; // Routes import { registerMessageRoutes } from './routes/messages.js'; import { registerSkillRoutes } from './routes/skills.js'; @@ -174,22 +174,27 @@ async function main() { } ); - // Wrap the inference runner to set/clear the write-tool context around each run. - // The inference runner calls enqueue() which fires asynchronously — we hook - // into the enqueue to set context before the run starts. + // Wrap the inference runner to bind the write-tool context around each run. + // enqueue() starts its async loop synchronously, so wrapping the call in + // runWithInferenceContext propagates the per-run context (sql, sessionId, the + // Plan/Ask/Bypass gate) through every awaited tool execution — and concurrent + // runs (a user message racing a dispatcher-polled native task) each get their + // own, instead of clobbering a shared global. const inferenceApi = { - enqueue: (sessionId: string, chatId: string, assistantId: string, user: string) => { - // Set the inference context so write tools can access sql + sessionId. - // The context persists for the duration of the inference run. Since - // BooCoder is single-user and runs one inference at a time per session, - // this module-level state is safe. - setInferenceContext({ sql, sessionId, taskId: null }); - inference.enqueue(sessionId, chatId, assistantId, user); + enqueue: ( + sessionId: string, + chatId: string, + assistantId: string, + user: string, + permissionMode?: 'plan' | 'ask' | 'bypass', + ) => { + runWithInferenceContext({ sql, sessionId, taskId: null, permissionMode }, () => { + inference.enqueue(sessionId, chatId, assistantId, user); + }); }, cancel: async (sessionId: string, chatId: string) => { - const result = await inference.cancel(sessionId, chatId); - clearInferenceContext(); - return result; + // No context to clear — AsyncLocalStorage scopes it to each run's own chain. + return inference.cancel(sessionId, chatId); }, hasActive: (chatId: string) => inference.hasActive(chatId), }; diff --git a/apps/coder/src/routes/messages.ts b/apps/coder/src/routes/messages.ts index 616bd5c..47f3cba 100644 --- a/apps/coder/src/routes/messages.ts +++ b/apps/coder/src/routes/messages.ts @@ -4,7 +4,7 @@ import type { Sql } from '../db.js'; import type { Broker } from '@boocode/server/broker'; import type { WsFrame } from '@boocode/contracts/ws-frames'; import { resolveChatId } from './chat-resolve.js'; -import { applyAll } from '../services/pending_changes.js'; +import { asPermissionMode } from '../services/tools/types.js'; const AnswerUserInputBody = z.object({ tool_call_id: z.string().min(1), @@ -44,7 +44,13 @@ const SendBody = z.object({ }); interface InferenceApi { - enqueue: (sessionId: string, chatId: string, assistantId: string, user: string) => void; + enqueue: ( + sessionId: string, + chatId: string, + assistantId: string, + user: string, + permissionMode?: 'plan' | 'ask' | 'bypass', + ) => void; cancel: (sessionId: string, chatId: string) => Promise; hasActive: (chatId: string) => boolean; } @@ -246,36 +252,16 @@ export function registerMessageRoutes( RETURNING id `; - inference.enqueue(sessionId, chatId, assistantMsg!.id, 'default'); - - // Bypass permission mode (native BooCode): auto-apply staged edits to disk - // once the turn settles. `enqueue` registers synchronously, so hasActive is - // true immediately; poll until it clears, apply, then re-publish - // message_complete so the DiffPanel reflects the now-applied (non-pending) - // state. Best-effort — failures stay in the pending queue for manual apply. - if (mode_id === 'bypass') { - const projectId = sessionRows[0]!.project_id; - const assistantId = assistantMsg!.id; - void (async () => { - try { - const [proj] = await sql<{ path: string }[]>`SELECT path FROM projects WHERE id = ${projectId}`; - if (!proj?.path) return; - for (let i = 0; i < 1200 && inference.hasActive(chatId); i++) { - await new Promise((r) => setTimeout(r, 1000)); - } - const applied = await applyAll(sql, sessionId, proj.path); - if (applied.length > 0) { - broker.publishFrame(sessionId, { - type: 'message_complete', - message_id: assistantId, - chat_id: chatId, - } as unknown as WsFrame); - } - } catch { - /* best-effort auto-apply — leave staged changes for manual apply */ - } - })(); - } + // Native BooCode permission gate (plan/ask/bypass) — threaded into the + // write-tool context so create/edit/delete and apply_pending honor it. + // Plan = read-only, Ask = stage to the queue (agent can't self-apply), + // Bypass = apply each write immediately. Other mode ids (e.g. an external + // fallback's native mode) leave the gate undefined = legacy behavior. + req.log.info( + { provider, mode_id, permissionMode: asPermissionMode(mode_id), chatId }, + 'native enqueue — permission gate', + ); + inference.enqueue(sessionId, chatId, assistantMsg!.id, 'default', asPermissionMode(mode_id)); reply.code(202); return { user_message_id: userMsg!.id, assistant_message_id: assistantMsg!.id }; diff --git a/apps/coder/src/services/__tests__/fuzzy-match.test.ts b/apps/coder/src/services/__tests__/fuzzy-match.test.ts index b25156c..ce7c0a2 100644 --- a/apps/coder/src/services/__tests__/fuzzy-match.test.ts +++ b/apps/coder/src/services/__tests__/fuzzy-match.test.ts @@ -161,6 +161,52 @@ describe('locateMatch — strategy 4: Levenshtein', () => { }); }); +describe('locateMatch — strategy 4: fail-closed on ambiguity (corruption guard)', () => { + it('refuses (ambiguous) when two equally-similar anchored blocks both clear the bar', () => { + // The repetitive-file case that duplicated blocks: two blocks share the same + // first+last anchor lines and their middle lines are EQUALLY similar to the + // (drifted) needle. Tier 4 must refuse rather than splice over one of them. + const content = [ + 'const x = {', + ' total = aa;', + '};', + 'const x = {', + ' total = bb;', + '};', + ].join('\n'); + const needle = ['const x = {', ' total = ab;', '};'].join('\n'); + const result = locateMatch(content, needle); + expect(result.kind).toBe('ambiguous'); + }); + + it('refuses a below-threshold near-miss that the old 0.66 floor would have spliced', () => { + // ~0.7 similar: under the raised 0.85 floor this is now not_found, so the + // caller surfaces a correctable error instead of corrupting the file. + const content = 'const grandTotalAmount = a + b;\n'; + const needle = 'const totalValue = a + b;'; + const result = locateMatch(content, needle); + expect(result).toEqual({ kind: 'not_found' }); + }); + + it('still matches a single genuine high-similarity drift uniquely', () => { + const content = 'const total = sum + tax;\n'; + const needle = 'const totals = sum + tax;'; // one-char typo, ~0.96 + const result = locateMatch(content, needle); + expect(result.kind).toBe('fuzzy'); + const { start, end } = span(result); + expect(content.slice(start, end)).toBe('const total = sum + tax;'); + }); + + it('requires an exact first+last line anchor for multi-line needles', () => { + // First line drifted too far to anchor → no window is scored → not_found, + // even though the middle lines are identical. + const content = ['function compute() {', ' return a + b;', ' return done;', '}'].join('\n'); + const needle = ['totally different opener', ' return a + b;', '}'].join('\n'); + const result = locateMatch(content, needle); + expect(result).toEqual({ kind: 'not_found' }); + }); +}); + describe('locateMatch — edge cases', () => { it('returns not_found for an empty needle', () => { expect(locateMatch('anything', '')).toEqual({ kind: 'not_found' }); diff --git a/apps/coder/src/services/__tests__/pending_changes_integration.test.ts b/apps/coder/src/services/__tests__/pending_changes_integration.test.ts index a75db86..ea862f8 100644 --- a/apps/coder/src/services/__tests__/pending_changes_integration.test.ts +++ b/apps/coder/src/services/__tests__/pending_changes_integration.test.ts @@ -83,6 +83,53 @@ describe.runIf(!!process.env.DATABASE_URL)('pending_changes integration', () => expect(existsSync(resolve(testDir, 'deleteme.txt'))).toBe(false); }); + it('re-emitted identical edits dedupe at queue and never duplicate on apply', async () => { + // Regression: the 2-3x block-stamping corruption. An anchored insert queued + // three times (a local model re-emitting the same tool call) must collapse to + // ONE pending row and apply exactly once. + await queueCreate(sql, testSessionId, null, 'dup.js', '\n'; + + const first = planEdit(before, oldStr, newStr); + expect(first.kind).toBe('apply'); + const after = first.kind === 'apply' ? first.updated : ''; + expect((after.match(/const recordFormats/g) || []).length).toBe(1); + + // Re-applying the identical edit to the already-edited content is a no-op. + const second = planEdit(after, oldStr, newStr); + expect(second).toEqual({ kind: 'noop', reason: 'already-applied' }); + }); + + it('treats an edit whose old_string is gone but new_string is present as already-applied', () => { + const content = 'const total = sum + tax;\n'; + const plan = planEdit(content, 'const subtotal = sum;', 'const total = sum + tax;'); + expect(plan).toEqual({ kind: 'noop', reason: 'already-applied' }); + }); + + it('treats a no-change splice as a noop', () => { + const content = 'a\nfoo\nb\n'; + const plan = planEdit(content, 'foo', 'foo'); + expect(plan).toEqual({ kind: 'noop', reason: 'identical' }); + }); + + it('does not duplicate across three repeated applications', () => { + const oldStr = 'function f() {'; + const newStr = 'function f() {\n const x = 1;'; + let content = 'function f() {\n return x;\n}\n'; + for (let i = 0; i < 3; i++) { + const plan = planEdit(content, oldStr, newStr); + if (plan.kind === 'apply') content = plan.updated; + } + expect((content.match(/const x = 1;/g) || []).length).toBe(1); + }); +}); diff --git a/apps/coder/src/services/dispatcher.ts b/apps/coder/src/services/dispatcher.ts index 3a93aab..3124f99 100644 --- a/apps/coder/src/services/dispatcher.ts +++ b/apps/coder/src/services/dispatcher.ts @@ -4,7 +4,7 @@ import type { Broker } from '@boocode/server/broker'; import type { WsFrame } from '@boocode/contracts/ws-frames'; import type { Config } from '../config.js'; import { createWorktree, diffWorktree, cleanupWorktree, ensureSessionWorktree } from './worktrees.js'; -import { applyAll } from './pending_changes.js'; +import { asPermissionMode } from './tools/types.js'; import { createCheckpoint } from './checkpoints.js'; import { makeDcpStreamStripper } from './dcp-strip.js'; import { dispatchViaAcp } from './acp-dispatch.js'; @@ -32,7 +32,13 @@ import { import { shouldFailOnMissingAgent } from './flow-runner-decisions.js'; interface InferenceRunner { - enqueue: (sessionId: string, chatId: string, assistantId: string, user: string) => void; + enqueue: ( + sessionId: string, + chatId: string, + assistantId: string, + user: string, + permissionMode?: 'plan' | 'ask' | 'bypass', + ) => void; cancel: (sessionId: string, chatId: string) => Promise; hasActive: (chatId: string) => boolean; } @@ -358,8 +364,9 @@ export function createDispatcher(deps: Deps): { `; const assistantId = assistantMsg!.id; - // Enqueue inference - inference.enqueue(sessionId, chatId, assistantId, 'default'); + // Enqueue inference — pass the native permission gate (plan/ask/bypass) + // through to the write-tool context. Non-unified mode ids → undefined. + inference.enqueue(sessionId, chatId, assistantId, 'default', asPermissionMode(task.mode_id)); // Wait for inference to complete (poll message status) const finalStatus = await waitForCompletion(assistantId); @@ -392,22 +399,6 @@ export function createDispatcher(deps: Deps): { WHERE id = ${taskId} `; log.info({ taskId, costTokens }, 'dispatcher: task completed (native)'); - // Bypass permission mode: auto-apply the staged edits to disk after the - // turn. Ask/Plan leave them in the pending-changes queue for review. - if (task.mode_id === 'bypass') { - try { - const [proj] = await sql<{ path: string }[]>`SELECT path FROM projects WHERE id = ${task.project_id}`; - if (proj?.path) { - const applied = await applyAll(sql, sessionId, proj.path); - log.info({ taskId, applied: applied.length }, 'dispatcher: native bypass auto-applied pending changes'); - } - } catch (applyErr) { - log.warn( - { taskId, err: applyErr instanceof Error ? applyErr.message : String(applyErr) }, - 'dispatcher: native bypass auto-apply failed', - ); - } - } } else { const [msg] = await sql<{ content: string | null }[]>` SELECT content FROM messages WHERE id = ${assistantId} diff --git a/apps/coder/src/services/fuzzy-match.ts b/apps/coder/src/services/fuzzy-match.ts index e6b47c3..8dedda2 100644 --- a/apps/coder/src/services/fuzzy-match.ts +++ b/apps/coder/src/services/fuzzy-match.ts @@ -21,7 +21,16 @@ // punctuation to ASCII on both sides; the match is // mapped back to original offsets. // 4. levenshtein — best line-window by normalized edit-distance -// similarity; accepted only at >= SIMILARITY_THRESHOLD. +// similarity; accepted only at >= SIMILARITY_THRESHOLD, +// anchored on an exact first+last line for multi-line +// needles, and REFUSED (ambiguous) when a second window +// scores within AMBIGUITY_EPSILON of the best. Like the +// exact/whitespace tiers, this tier fails CLOSED — it +// never splices over a merely-plausible guess, because a +// wrong-window splice corrupts the file (it leaves the +// real target intact and duplicates it). This mirrors +// opencode/cline/qwen, whose fuzzy tiers all keep the +// unique-match requirement rather than picking a winner. // // Pure and dependency-free (Levenshtein is the standard iterative two-row DP), // reimplemented from the general technique — no vendored source. @@ -31,8 +40,31 @@ export type MatchResult = | { kind: 'ambiguous'; count: number } | { kind: 'not_found' }; -/** Levenshtein similarity floor for the final fuzzy fallback (strategy 4). */ -export const SIMILARITY_THRESHOLD = 0.66; +/** + * Levenshtein similarity floor for the final fuzzy fallback (strategy 4). + * 0.66 was far too low — at two-thirds similarity a structurally-wrong window + * (e.g. one of three near-identical form blocks) clears the bar and gets spliced + * over, leaving the real target intact and duplicated. Competent agents anchor + * far tighter (opencode's BlockAnchor needs an exact anchor; cline needs exact + * first+last lines). 0.85 keeps genuine quantized-model drift (a typo, an indent + * shift) while refusing a different block. + */ +export const SIMILARITY_THRESHOLD = 0.85; + +/** + * If a second candidate window scores within this of the best, the match is + * ambiguous and tier 4 refuses rather than guessing — the same fail-closed + * stance the exact and whitespace tiers take on multiple hits. Repetitive files + * (the duplicate-block corruption case) produce near-tied windows; this is what + * turns that into a clean "add more context" error instead of a wrong splice. + */ +export const AMBIGUITY_EPSILON = 0.05; + +/** Multi-line needles at or above this length must anchor on an exact (after + * trim + unicode-fold) first AND last line before similarity is even scored — + * the cline/opencode block-anchor rule. Below it, threshold + uniqueness alone + * guard the match. */ +const ANCHOR_MIN_LINES = 3; export function locateMatch(content: string, needle: string): MatchResult { // Empty needle has no meaningful match. @@ -252,20 +284,39 @@ function locateByLevenshtein(content: string, needle: string): MatchResult | nul const needleJoined = needleLines.map((l) => l.trim()).join('\n'); - let best = -1; - let bestSpan: { start: number; end: number } | null = null; + // Block-anchor gate for multi-line needles: the first and last lines must match + // exactly (after trim + unicode-fold) or the window is not even scored. This + // stops a high interior-similarity from dragging a structurally-wrong window + // over the threshold — the failure that duplicates blocks in repetitive files. + const anchored = n >= ANCHOR_MIN_LINES; + const needleFirst = canonicalize(needleLines[0]!.trim()); + const needleLast = canonicalize(needleLines[n - 1]!.trim()); + + const scored: Array<{ score: number; start: number; end: number }> = []; for (let i = 0; i + n <= contentLines.length; i++) { const window = contentLines.slice(i, i + n); - const windowJoined = window.map((l) => l.text.trim()).join('\n'); - const score = similarity(windowJoined, needleJoined); - if (score > best) { - best = score; - bestSpan = { start: window[0]!.start, end: window[n - 1]!.end }; + if (anchored) { + const winFirst = canonicalize(window[0]!.text.trim()); + const winLast = canonicalize(window[n - 1]!.text.trim()); + if (winFirst !== needleFirst || winLast !== needleLast) continue; } + const windowJoined = window.map((l) => l.text.trim()).join('\n'); + scored.push({ + score: similarity(windowJoined, needleJoined), + start: window[0]!.start, + end: window[n - 1]!.end, + }); } - if (bestSpan && best >= SIMILARITY_THRESHOLD) { - return { kind: 'fuzzy', start: bestSpan.start, end: bestSpan.end }; - } - return null; + if (scored.length === 0) return null; + scored.sort((a, b) => b.score - a.score); + const best = scored[0]!; + if (best.score < SIMILARITY_THRESHOLD) return null; + + // Uniqueness guard: refuse when a second window is within epsilon of the best. + // Fail closed (ambiguous) rather than silently splicing one of several lookalikes. + const tied = scored.filter((s) => s.score >= best.score - AMBIGUITY_EPSILON); + if (tied.length > 1) return { kind: 'ambiguous', count: tied.length }; + + return { kind: 'fuzzy', start: best.start, end: best.end }; } diff --git a/apps/coder/src/services/pending_changes.ts b/apps/coder/src/services/pending_changes.ts index b8070e8..cf1d914 100644 --- a/apps/coder/src/services/pending_changes.ts +++ b/apps/coder/src/services/pending_changes.ts @@ -1,9 +1,120 @@ -import { readFile, writeFile, unlink, mkdir } from 'node:fs/promises'; -import { dirname } from 'node:path'; +import { readFile, writeFile, unlink, mkdir, rename, realpath } from 'node:fs/promises'; +import { dirname, join, basename } from 'node:path'; +import { randomBytes } from 'node:crypto'; import type { Sql } from '../db.js'; import { resolveWritePath } from './write_guard.js'; import { locateMatch } from './fuzzy-match.js'; +/** + * Write a file atomically: stage to a sibling temp file, then rename over the + * target. rename(2) on the same filesystem is atomic, so a crash mid-write can + * never leave a half-written (truncated/corrupt) source file — readers see + * either the old content or the complete new content. The temp lives in the same + * directory to guarantee a same-filesystem rename. + * + * Symlinks: a plain writeFile FOLLOWS a symlink and writes through to its target; + * a bare rename would REPLACE the link with a regular file. We realpath an + * existing target first so the rename lands on the real file and the link + * survives — preserving the prior follow-through behavior. A missing target + * (create, or a broken link) just writes the literal path. + */ +async function writeFileAtomic(filePath: string, content: string): Promise { + let target = filePath; + try { + target = await realpath(filePath); + } catch { + // ENOENT (new file) or broken link — write the literal path. + } + const tmp = join(dirname(target), `.${basename(target)}.tmp.${process.pid}.${randomBytes(6).toString('hex')}`); + await writeFile(tmp, content, 'utf8'); + try { + await rename(tmp, target); + } catch (err) { + await unlink(tmp).catch(() => {}); + throw err; + } +} + +/** Detect a file's dominant line ending so an edit can preserve it. */ +function detectEol(text: string): '\r\n' | '\n' { + return text.includes('\r\n') ? '\r\n' : '\n'; +} + +/** + * Serialize the read-modify-write of a single file so two concurrent applies + * (e.g. two chat tabs sharing one worktree, or a Bypass write racing an + * apply_pending) can't lose an update. In-process keying is sufficient — + * BooCoder is a single Fastify process. One Map entry per distinct path. + */ +const fileLocks = new Map>(); +async function withFileLock(filePath: string, fn: () => Promise): Promise { + const prev = fileLocks.get(filePath) ?? Promise.resolve(); + let release!: () => void; + const current = new Promise((r) => { release = r; }); + fileLocks.set(filePath, prev.then(() => current)); + await prev.catch(() => {}); + try { + return await fn(); + } finally { + release(); + } +} + +// --- Edit-apply planning (pure, unit-tested) --------------------------------- + +/** + * Decision for applying one queued edit to a file's current content. Pulled out + * of `applyOne` so the splice — the part that actually corrupted files — is pure + * and testable without a DB or filesystem. Mirrors how opencode/cline/qwen keep + * their matchers fail-closed and idempotent. + */ +export type EditPlan = + | { kind: 'apply'; updated: string } + | { kind: 'noop'; reason: 'identical' | 'already-applied' } + | { kind: 'ambiguous'; count: number } + | { kind: 'not_found' }; + +/** + * Decide how (or whether) to apply an `old → new` edit to `content`. + * + * Idempotency is the whole point here: a queued edit can legitimately be + * re-applied (a local model re-emits the same tool call; a turn is retried; the + * same change sits in the queue twice). A naive splice stamps the new text again + * each time — the 2–3× block duplication. Two guards make re-application a no-op: + * + * - already-applied (anchored insert): when `new` is `old` + an appended block + * (`old="anchor"`, `new="anchor\n"`), `old` still matches uniquely after + * the first apply, so a second apply would duplicate ``. If the full + * `new` text is already present at the match site, the edit is already applied. + * - already-applied (old gone): if `old` can't be located but `new` is already + * in the file, the change landed on a prior pass — treat as a no-op, not an error. + * - identical: the splice would not change the file. + * + * Anything ambiguous or genuinely absent fails CLOSED so the caller surfaces a + * correctable error instead of writing a guess. + */ +export function planEdit(content: string, oldStr: string, newStr: string): EditPlan { + const match = locateMatch(content, oldStr); + + if (match.kind === 'ambiguous') return { kind: 'ambiguous', count: match.count }; + + if (match.kind === 'not_found') { + if (newStr.length > 0 && content.includes(newStr)) { + return { kind: 'noop', reason: 'already-applied' }; + } + return { kind: 'not_found' }; + } + + const updated = content.slice(0, match.start) + newStr + content.slice(match.end); + // No-change splice first (covers old === new), then the anchored re-stamp guard: + // the full replacement already sits at the match site (re-emitted anchored insert). + if (updated === content) return { kind: 'noop', reason: 'identical' }; + if (content.slice(match.start, match.start + newStr.length) === newStr) { + return { kind: 'noop', reason: 'already-applied' }; + } + return { kind: 'apply', updated }; +} + // --- Types ------------------------------------------------------------------- export interface PendingChange { @@ -47,6 +158,13 @@ export async function queueEdit( const resolved = resolveWritePath(projectRoot, filePath); const diff = JSON.stringify({ old: oldString, new: newString }); + // Idempotent queue: collapse an identical edit that is still pending. Local + // quantized models re-emit the same edit_file call within a turn, and a retried + // turn re-queues — each duplicate row would apply and stamp another copy. One + // pending row per (session, file, operation, diff) is enough. + const existing = await findPendingDuplicate(sql, sessionId, resolved, 'edit', diff); + if (existing) return existing; + const [row] = await sql` INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff, agent) VALUES (${sessionId}, ${taskId}, ${resolved}, 'edit', ${diff}, ${agent}) @@ -55,6 +173,28 @@ export async function queueEdit( return row!; } +/** Return an identical still-pending change for this (session, file, op, diff), + * or undefined. Used to keep the queue idempotent against re-emitted edits. */ +async function findPendingDuplicate( + sql: Sql, + sessionId: string, + resolvedPath: string, + operation: 'create' | 'edit' | 'delete', + diff: string, +): Promise { + const [row] = await sql` + SELECT * FROM pending_changes + WHERE session_id = ${sessionId} + AND file_path = ${resolvedPath} + AND operation = ${operation} + AND diff = ${diff} + AND status = 'pending' + ORDER BY created_at ASC + LIMIT 1 + `; + return row; +} + export async function queueCreate( sql: Sql, sessionId: string, @@ -68,6 +208,9 @@ export async function queueCreate( ): Promise { const resolved = resolveWritePath(projectRoot, filePath); + const existing = await findPendingDuplicate(sql, sessionId, resolved, 'create', content); + if (existing) return existing; + const [row] = await sql` INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff, agent) VALUES (${sessionId}, ${taskId}, ${resolved}, 'create', ${content}, ${agent}) @@ -87,6 +230,9 @@ export async function queueDelete( ): Promise { const resolved = resolveWritePath(projectRoot, filePath); + const existing = await findPendingDuplicate(sql, sessionId, resolved, 'delete', ''); + if (existing) return existing; + const [row] = await sql` INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff, agent) VALUES (${sessionId}, ${taskId}, ${resolved}, 'delete', '', ${agent}) @@ -110,48 +256,60 @@ export async function applyOne( } try { - // Re-validate path in case projectRoot has shifted - resolveWritePath(projectRoot, change.file_path); + return await withFileLock(change.file_path, async () => { + // Re-validate path in case projectRoot has shifted + resolveWritePath(projectRoot, change.file_path); - switch (change.operation) { - case 'create': { - await mkdir(dirname(change.file_path), { recursive: true }); - await writeFile(change.file_path, change.diff, 'utf8'); - break; - } - case 'edit': { - const { old: oldStr, new: newStr } = JSON.parse(change.diff) as { old: string; new: string }; - const content = await readFile(change.file_path, 'utf8'); - const match = locateMatch(content, oldStr); - if (match.kind === 'ambiguous') { - throw new Error( - `old_string matches ${match.count} locations — add surrounding context to disambiguate`, - ); + switch (change.operation) { + case 'create': { + await mkdir(dirname(change.file_path), { recursive: true }); + await writeFileAtomic(change.file_path, change.diff); + break; } - if (match.kind === 'not_found') { - throw new Error( - 'old_string not found in file (even fuzzily) — file may have changed since the edit was queued', - ); + case 'edit': { + const { old: oldStr, new: newStr } = JSON.parse(change.diff) as { old: string; new: string }; + const raw = await readFile(change.file_path, 'utf8'); + // Normalize to LF for matching, then write back in the file's native EOL + // so an LF-emitting model doesn't leave a CRLF file with mixed endings. + const eol = detectEol(raw); + const toLf = (t: string) => t.replaceAll('\r\n', '\n'); + const plan = planEdit(toLf(raw), toLf(oldStr), toLf(newStr)); + if (plan.kind === 'ambiguous') { + throw new Error( + `old_string matches ${plan.count} locations — add surrounding context to disambiguate`, + ); + } + if (plan.kind === 'not_found') { + throw new Error( + 'old_string not found in file (even fuzzily) — file may have changed since the edit was queued', + ); + } + if (plan.kind === 'apply') { + const out = eol === '\r\n' ? plan.updated.replaceAll('\n', '\r\n') : plan.updated; + await writeFileAtomic(change.file_path, out); + } else { + // noop: the edit is already applied (re-emitted / retried) or a no-change. + // Mark it applied without rewriting so it can't stamp a duplicate. + console.log(`[pending] edit ${change.file_path} is a no-op (${plan.reason}) — not rewriting`); + } + break; } - const updated = content.slice(0, match.start) + newStr + content.slice(match.end); - await writeFile(change.file_path, updated, 'utf8'); - break; - } - case 'delete': { - // Stash current content in diff for potential rewind - try { - const existing = await readFile(change.file_path, 'utf8'); - await sql`UPDATE pending_changes SET diff = ${existing} WHERE id = ${changeId}`; - } catch { - // File may already be gone — proceed with status update + case 'delete': { + // Stash current content in diff for potential rewind + try { + const existing = await readFile(change.file_path, 'utf8'); + await sql`UPDATE pending_changes SET diff = ${existing} WHERE id = ${changeId}`; + } catch { + // File may already be gone — proceed with status update + } + await unlink(change.file_path); + break; } - await unlink(change.file_path); - break; } - } - await sql`UPDATE pending_changes SET status = 'applied' WHERE id = ${changeId}`; - return { id: change.id, file_path: change.file_path, operation: change.operation, success: true }; + await sql`UPDATE pending_changes SET status = 'applied' WHERE id = ${changeId}`; + return { id: change.id, file_path: change.file_path, operation: change.operation, success: true }; + }); } catch (err) { const message = err instanceof Error ? err.message : String(err); return { id: change.id, file_path: change.file_path, operation: change.operation, success: false, error: message }; @@ -220,13 +378,13 @@ export async function rewindOne( ); } const reverted = content.slice(0, match.start) + oldStr + content.slice(match.end); - await writeFile(change.file_path, reverted, 'utf8'); + await writeFileAtomic(change.file_path, reverted); break; } case 'delete': { // Reverse a delete: recreate the file (diff holds the original content stashed at apply time) await mkdir(dirname(change.file_path), { recursive: true }); - await writeFile(change.file_path, change.diff, 'utf8'); + await writeFileAtomic(change.file_path, change.diff); break; } } diff --git a/apps/coder/src/services/tools/__tests__/inference_context.test.ts b/apps/coder/src/services/tools/__tests__/inference_context.test.ts new file mode 100644 index 0000000..f395326 --- /dev/null +++ b/apps/coder/src/services/tools/__tests__/inference_context.test.ts @@ -0,0 +1,38 @@ +import { describe, it, expect } from 'vitest'; +import { runWithInferenceContext, getInferenceContext } from '../inference_context.js'; +import type { Sql } from '../../../db.js'; + +const fakeSql = {} as unknown as Sql; + +describe('inference context (AsyncLocalStorage isolation)', () => { + it('throws when read outside a run', () => { + expect(() => getInferenceContext()).toThrow(/outside inference context/); + }); + + it('keeps each run its own context across overlapping awaits', async () => { + // The race the global `let current` had: run B starts (and would overwrite a + // shared global) while run A is awaiting. After A resumes it must still read + // its OWN sessionId, not B's. + const run = (id: string, delay: number) => + runWithInferenceContext({ sql: fakeSql, sessionId: id, taskId: null }, async () => { + await new Promise((r) => setTimeout(r, delay)); + return getInferenceContext().sessionId; + }); + + const [a, b] = await Promise.all([run('A', 20), run('B', 5)]); + expect(a).toBe('A'); + expect(b).toBe('B'); + }); + + it('carries permissionMode and taskId per run', async () => { + const result = await runWithInferenceContext( + { sql: fakeSql, sessionId: 's1', taskId: 't1', permissionMode: 'bypass' }, + async () => { + await Promise.resolve(); + const ctx = getInferenceContext(); + return { taskId: ctx.taskId, mode: ctx.permissionMode }; + }, + ); + expect(result).toEqual({ taskId: 't1', mode: 'bypass' }); + }); +}); diff --git a/apps/coder/src/services/tools/apply_pending.ts b/apps/coder/src/services/tools/apply_pending.ts index c69ffa9..4825d26 100644 --- a/apps/coder/src/services/tools/apply_pending.ts +++ b/apps/coder/src/services/tools/apply_pending.ts @@ -26,6 +26,15 @@ export const applyPendingTool: ToolDef = { }, }, async execute(_input: ApplyPendingInputT, projectRoot: string, context: ToolContext): Promise { + // Under Ask (and Plan) the human approves via the Pending Changes panel — the + // agent must not auto-apply. Bypass and legacy (undefined) may apply. + if (context.permissionMode === 'ask' || context.permissionMode === 'plan') { + return { + status: 'denied', + message: + 'Permission mode is Ask — staged changes must be approved by the user in the Pending Changes panel, not applied by the agent.', + }; + } const results = await applyAll(context.sql, context.sessionId, projectRoot); const succeeded = results.filter((r) => r.success).length; const failed = results.filter((r) => !r.success).length; diff --git a/apps/coder/src/services/tools/create_file.ts b/apps/coder/src/services/tools/create_file.ts index a4aee29..df3d21a 100644 --- a/apps/coder/src/services/tools/create_file.ts +++ b/apps/coder/src/services/tools/create_file.ts @@ -1,6 +1,7 @@ import { z } from 'zod'; import type { ToolDef, ToolContext } from './types.js'; import { queueCreate } from '../pending_changes.js'; +import { denyReadOnly, finalizeWrite } from './write-gate.js'; const CreateFileInput = z.object({ file_path: z.string().min(1), @@ -32,6 +33,7 @@ export const createFileTool: ToolDef = { }, }, async execute(input: CreateFileInputT, projectRoot: string, context: ToolContext): Promise { + if (context.permissionMode === 'plan') return denyReadOnly('create_file'); const change = await queueCreate( context.sql, context.sessionId, @@ -40,12 +42,11 @@ export const createFileTool: ToolDef = { input.content, projectRoot, ); - return { - status: 'queued', - change_id: change.id, - file_path: change.file_path, - operation: 'create', - message: `File creation queued: ${change.file_path}. Use apply_pending to write changes to disk.`, - }; + return finalizeWrite( + context, + projectRoot, + change, + `File creation queued: ${change.file_path}. Use apply_pending to write changes to disk.`, + ); }, }; diff --git a/apps/coder/src/services/tools/delete_file.ts b/apps/coder/src/services/tools/delete_file.ts index 391e2f2..986a494 100644 --- a/apps/coder/src/services/tools/delete_file.ts +++ b/apps/coder/src/services/tools/delete_file.ts @@ -1,6 +1,7 @@ import { z } from 'zod'; import type { ToolDef, ToolContext } from './types.js'; import { queueDelete } from '../pending_changes.js'; +import { denyReadOnly, finalizeWrite } from './write-gate.js'; const DeleteFileInput = z.object({ file_path: z.string().min(1), @@ -30,6 +31,7 @@ export const deleteFileTool: ToolDef = { }, }, async execute(input: DeleteFileInputT, projectRoot: string, context: ToolContext): Promise { + if (context.permissionMode === 'plan') return denyReadOnly('delete_file'); const change = await queueDelete( context.sql, context.sessionId, @@ -37,12 +39,11 @@ export const deleteFileTool: ToolDef = { input.file_path, projectRoot, ); - return { - status: 'queued', - change_id: change.id, - file_path: change.file_path, - operation: 'delete', - message: `File deletion queued: ${change.file_path}. Use apply_pending to write changes to disk.`, - }; + return finalizeWrite( + context, + projectRoot, + change, + `File deletion queued: ${change.file_path}. Use apply_pending to write changes to disk.`, + ); }, }; diff --git a/apps/coder/src/services/tools/edit_file.ts b/apps/coder/src/services/tools/edit_file.ts index a361276..7e0fcc9 100644 --- a/apps/coder/src/services/tools/edit_file.ts +++ b/apps/coder/src/services/tools/edit_file.ts @@ -1,6 +1,7 @@ import { z } from 'zod'; import type { ToolDef, ToolContext } from './types.js'; import { queueEdit } from '../pending_changes.js'; +import { denyReadOnly, finalizeWrite } from './write-gate.js'; const EditFileInput = z.object({ file_path: z.string().min(1), @@ -34,6 +35,7 @@ export const editFileTool: ToolDef = { }, }, async execute(input: EditFileInputT, projectRoot: string, context: ToolContext): Promise { + if (context.permissionMode === 'plan') return denyReadOnly('edit_file'); const change = await queueEdit( context.sql, context.sessionId, @@ -43,12 +45,11 @@ export const editFileTool: ToolDef = { input.new_string, projectRoot, ); - return { - status: 'queued', - change_id: change.id, - file_path: change.file_path, - operation: 'edit', - message: `Edit queued for ${change.file_path}. Use apply_pending to write changes to disk.`, - }; + return finalizeWrite( + context, + projectRoot, + change, + `Edit queued for ${change.file_path}. Use apply_pending to write changes to disk.`, + ); }, }; diff --git a/apps/coder/src/services/tools/inference_context.ts b/apps/coder/src/services/tools/inference_context.ts index 507c6fd..5633e0c 100644 --- a/apps/coder/src/services/tools/inference_context.ts +++ b/apps/coder/src/services/tools/inference_context.ts @@ -1,36 +1,49 @@ +import { AsyncLocalStorage } from 'node:async_hooks'; import type { Sql } from '../../db.js'; +import type { PermissionMode } from './types.js'; /** - * Module-level inference context for write tools. + * Per-run inference context for write tools. * - * Set via `setInferenceContext()` before each inference run starts. - * Write tools read it via `getInferenceContext()` during execute. - * Same pattern as BooChat's `loadConfig()` singleton — tools need - * ambient state that can't be threaded through the tool-phase execute - * signature (which is `execute(input, projectRoot, extraRoots?)`). + * Write tools need ambient state (sql, sessionId, the permission gate) that the + * BooChat tool-phase `execute(input, projectRoot, extraRoots?)` signature can't + * carry. This used to be a single module-level `let current` — but the inference + * runner's `enqueue()` is fire-and-forget, so two overlapping runs (a user + * message racing a dispatcher-polled native task; two chat tabs streaming) would + * clobber each other's context, and `cancel()` cleared it for ALL in-flight runs. + * + * AsyncLocalStorage gives each run its own context: `enqueue()` starts its async + * loop synchronously inside `runWithInferenceContext`, so the store propagates + * through every awaited tool execution in that run — and only that run. */ export interface InferenceContext { sql: Sql; sessionId: string; taskId: string | null; + /** Native-BooCode permission gate, set per run from the request/task mode. */ + permissionMode?: PermissionMode; } -let current: InferenceContext | null = null; +const storage = new AsyncLocalStorage(); -export function setInferenceContext(ctx: InferenceContext): void { - current = ctx; -} - -export function clearInferenceContext(): void { - current = null; +/** + * Bind `ctx` for the duration of the (possibly detached) async chain `fn` starts. + * The inference runner kicks off its loop synchronously within this call, so all + * downstream `await`s — including write-tool `execute` via the adapter — read the + * same store. Concurrent runs each get their own; nothing is shared or cleared + * out from under an in-flight run. + */ +export function runWithInferenceContext(ctx: InferenceContext, fn: () => T): T { + return storage.run(ctx, fn); } export function getInferenceContext(): InferenceContext { - if (!current) { + const ctx = storage.getStore(); + if (!ctx) { throw new Error( - 'Write tool called outside inference context — setInferenceContext() was not called before this run', + 'Write tool called outside inference context — runWithInferenceContext() did not wrap this run', ); } - return current; + return ctx; } diff --git a/apps/coder/src/services/tools/types.ts b/apps/coder/src/services/tools/types.ts index 1600988..2bb4c48 100644 --- a/apps/coder/src/services/tools/types.ts +++ b/apps/coder/src/services/tools/types.ts @@ -1,6 +1,22 @@ import type { z } from 'zod'; import type { Sql } from '../../db.js'; +/** + * Unified permission ladder for native BooCode inference. Gates the write tools: + * plan — read-only: create/edit/delete are denied (no staging). + * ask — stage to the pending-changes queue; `apply_pending` is denied so the + * agent cannot self-apply (the human approves via the Diff panel). + * bypass — apply each write immediately (no queue, no approval). + * Undefined preserves the historical behavior (stage + `apply_pending` allowed). + */ +export type PermissionMode = 'plan' | 'ask' | 'bypass'; + +/** Narrow a raw task/request mode id to a unified PermissionMode, else undefined + * (e.g. an external agent's native mode id, or null). */ +export function asPermissionMode(id: string | null | undefined): PermissionMode | undefined { + return id === 'plan' || id === 'ask' || id === 'bypass' ? id : undefined; +} + export interface ToolJsonSchema { type: 'function'; function: { @@ -21,6 +37,8 @@ export interface ToolContext { sql: Sql; sessionId: string; taskId: string | null; + /** Native-BooCode permission gate for write tools (undefined = legacy behavior). */ + permissionMode?: PermissionMode; } export interface ToolDef { diff --git a/apps/coder/src/services/tools/write-gate.ts b/apps/coder/src/services/tools/write-gate.ts new file mode 100644 index 0000000..f1a46d9 --- /dev/null +++ b/apps/coder/src/services/tools/write-gate.ts @@ -0,0 +1,53 @@ +/** + * Permission-gate helpers for native BooCode write tools. The gate comes from + * the per-run inference context (`ToolContext.permissionMode`): + * plan — deny the write (read-only); nothing is staged. + * bypass — apply the staged change immediately (no queue, no approval). + * ask / undefined — leave it in the pending-changes queue for review. + */ +import type { ToolContext } from './types.js'; +import { applyOne } from '../pending_changes.js'; + +/** Result returned when a write is denied under Plan (read-only) mode. */ +export function denyReadOnly(operation: string): unknown { + return { + status: 'denied', + operation, + message: `Read-only (Plan) permission mode — ${operation} is not permitted. Switch to Ask or Bypass to make changes.`, + }; +} + +/** Finalize a just-staged change per the permission gate: apply now under Bypass, + * otherwise return it as queued for the human to approve. */ +export async function finalizeWrite( + context: ToolContext, + projectRoot: string, + change: { id: string; file_path: string; operation: string }, + queuedHint: string, +): Promise { + if (context.permissionMode === 'bypass') { + const res = await applyOne(context.sql, change.id, projectRoot); + console.log( + `[write-gate] bypass apply ${change.operation} ${change.file_path} -> ${res.success ? 'applied' : 'FAILED: ' + (res.error ?? '?')}`, + ); + return { + status: res.success ? 'applied' : 'failed', + change_id: change.id, + file_path: change.file_path, + operation: change.operation, + message: res.success + ? `${change.operation} applied to ${change.file_path}.` + : `Apply failed for ${change.file_path}: ${res.error ?? 'unknown error'}. Left in the pending queue.`, + }; + } + console.log( + `[write-gate] ${context.permissionMode ?? 'legacy'} queued ${change.operation} ${change.file_path}`, + ); + return { + status: 'queued', + change_id: change.id, + file_path: change.file_path, + operation: change.operation, + message: queuedHint, + }; +}