fix(coder): harden edit-apply pipeline against block duplication

Root cause: two proven corruption mechanisms — (M1) non-idempotent apply stamped the same block N times when a quantized model re-emitted the same edit_file call or a turn was retried; (M2) Levenshtein tier 4 was fail-open with no uniqueness guard, silently splicing into the wrong location. Fixes applied at every layer of the pipeline: Matcher (fuzzy-match.ts): raise SIMILARITY_THRESHOLD 0.66 → 0.85; add AMBIGUITY_EPSILON uniqueness guard — two windows within 0.05 of the top score → ambiguous, not a guess; add block-anchor gate (≥3-line needles require first+last line exact match before a window is scored). Edit planner (pending_changes.ts): extract planEdit() as a pure function; idempotency guards detect already-applied states (anchored insert re-stamp, old-gone-but-new-present); findPendingDuplicate() collapses identical pending rows at queue time so M1 never reaches applyOne. Atomic writes (pending_changes.ts): temp-file + rename on the same filesystem so a crash can't leave a half-written source file; realpath() first so symlinks survive the rename. Per-file mutex (pending_changes.ts): withFileLock() serializes concurrent read-modify-write on the same path via a chained-Promise Map. EOL preservation (pending_changes.ts): normalize CRLF → LF for matching, restore native line ending on write so Windows-style files stay clean. Context isolation (inference_context.ts): replace module-level singleton with AsyncLocalStorage so concurrent inference runs (arena parallel dispatch, dispatcher poll racing a user message) each get their own scoped context with no clobbering. Tests: plan-edit.test.ts (pure planEdit unit tests), extended fuzzy-match and pending_changes_integration suites, ALS isolation test that proves overlapping runs get correct session IDs. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-07 01:44:37 +00:00
parent dbf1662982
commit cce685b1a7
16 changed files with 644 additions and 157 deletions
--- a/apps/coder/src/services/tools/tests/inference_context.test.ts
+++ b/apps/coder/src/services/tools/tests/inference_context.test.ts
@@ -0,0 +1,38 @@
+import { describe, it, expect } from 'vitest';
+import { runWithInferenceContext, getInferenceContext } from '../inference_context.js';
+import type { Sql } from '../../../db.js';
+
+const fakeSql = {} as unknown as Sql;
+
+describe('inference context (AsyncLocalStorage isolation)', () => {
+  it('throws when read outside a run', () => {
+    expect(() => getInferenceContext()).toThrow(/outside inference context/);
+  });
+
+  it('keeps each run its own context across overlapping awaits', async () => {
+    // The race the global `let current` had: run B starts (and would overwrite a
+    // shared global) while run A is awaiting. After A resumes it must still read
+    // its OWN sessionId, not B's.
+    const run = (id: string, delay: number) =>
+      runWithInferenceContext({ sql: fakeSql, sessionId: id, taskId: null }, async () => {
+        await new Promise((r) => setTimeout(r, delay));
+        return getInferenceContext().sessionId;
+      });
+
+    const [a, b] = await Promise.all([run('A', 20), run('B', 5)]);
+    expect(a).toBe('A');
+    expect(b).toBe('B');
+  });
+
+  it('carries permissionMode and taskId per run', async () => {
+    const result = await runWithInferenceContext(
+      { sql: fakeSql, sessionId: 's1', taskId: 't1', permissionMode: 'bypass' },
+      async () => {
+        await Promise.resolve();
+        const ctx = getInferenceContext();
+        return { taskId: ctx.taskId, mode: ctx.permissionMode };
+      },
+    );
+    expect(result).toEqual({ taskId: 't1', mode: 'bypass' });
+  });
+});
--- a/apps/coder/src/services/tools/apply_pending.ts
+++ b/apps/coder/src/services/tools/apply_pending.ts
@@ -26,6 +26,15 @@ export const applyPendingTool: ToolDef<ApplyPendingInputT> = {
    },
  },
  async execute(_input: ApplyPendingInputT, projectRoot: string, context: ToolContext): Promise<unknown> {
+    // Under Ask (and Plan) the human approves via the Pending Changes panel — the
+    // agent must not auto-apply. Bypass and legacy (undefined) may apply.
+    if (context.permissionMode === 'ask' || context.permissionMode === 'plan') {
+      return {
+        status: 'denied',
+        message:
+          'Permission mode is Ask — staged changes must be approved by the user in the Pending Changes panel, not applied by the agent.',
+      };
+    }
    const results = await applyAll(context.sql, context.sessionId, projectRoot);
    const succeeded = results.filter((r) => r.success).length;
    const failed = results.filter((r) => !r.success).length;
--- a/apps/coder/src/services/tools/create_file.ts
+++ b/apps/coder/src/services/tools/create_file.ts
@@ -1,6 +1,7 @@
 import { z } from 'zod';
 import type { ToolDef, ToolContext } from './types.js';
 import { queueCreate } from '../pending_changes.js';
+import { denyReadOnly, finalizeWrite } from './write-gate.js';

 const CreateFileInput = z.object({
  file_path: z.string().min(1),
@@ -32,6 +33,7 @@ export const createFileTool: ToolDef<CreateFileInputT> = {
    },
  },
  async execute(input: CreateFileInputT, projectRoot: string, context: ToolContext): Promise<unknown> {
+    if (context.permissionMode === 'plan') return denyReadOnly('create_file');
    const change = await queueCreate(
      context.sql,
      context.sessionId,
@@ -40,12 +42,11 @@ export const createFileTool: ToolDef<CreateFileInputT> = {
      input.content,
      projectRoot,
    );
-    return {
-      status: 'queued',
-      change_id: change.id,
-      file_path: change.file_path,
-      operation: 'create',
-      message: `File creation queued: ${change.file_path}. Use apply_pending to write changes to disk.`,
-    };
+    return finalizeWrite(
+      context,
+      projectRoot,
+      change,
+      `File creation queued: ${change.file_path}. Use apply_pending to write changes to disk.`,
+    );
  },
 };
--- a/apps/coder/src/services/tools/delete_file.ts
+++ b/apps/coder/src/services/tools/delete_file.ts
@@ -1,6 +1,7 @@
 import { z } from 'zod';
 import type { ToolDef, ToolContext } from './types.js';
 import { queueDelete } from '../pending_changes.js';
+import { denyReadOnly, finalizeWrite } from './write-gate.js';

 const DeleteFileInput = z.object({
  file_path: z.string().min(1),
@@ -30,6 +31,7 @@ export const deleteFileTool: ToolDef<DeleteFileInputT> = {
    },
  },
  async execute(input: DeleteFileInputT, projectRoot: string, context: ToolContext): Promise<unknown> {
+    if (context.permissionMode === 'plan') return denyReadOnly('delete_file');
    const change = await queueDelete(
      context.sql,
      context.sessionId,
@@ -37,12 +39,11 @@ export const deleteFileTool: ToolDef<DeleteFileInputT> = {
      input.file_path,
      projectRoot,
    );
-    return {
-      status: 'queued',
-      change_id: change.id,
-      file_path: change.file_path,
-      operation: 'delete',
-      message: `File deletion queued: ${change.file_path}. Use apply_pending to write changes to disk.`,
-    };
+    return finalizeWrite(
+      context,
+      projectRoot,
+      change,
+      `File deletion queued: ${change.file_path}. Use apply_pending to write changes to disk.`,
+    );
  },
 };
--- a/apps/coder/src/services/tools/edit_file.ts
+++ b/apps/coder/src/services/tools/edit_file.ts
@@ -1,6 +1,7 @@
 import { z } from 'zod';
 import type { ToolDef, ToolContext } from './types.js';
 import { queueEdit } from '../pending_changes.js';
+import { denyReadOnly, finalizeWrite } from './write-gate.js';

 const EditFileInput = z.object({
  file_path: z.string().min(1),
@@ -34,6 +35,7 @@ export const editFileTool: ToolDef<EditFileInputT> = {
    },
  },
  async execute(input: EditFileInputT, projectRoot: string, context: ToolContext): Promise<unknown> {
+    if (context.permissionMode === 'plan') return denyReadOnly('edit_file');
    const change = await queueEdit(
      context.sql,
      context.sessionId,
@@ -43,12 +45,11 @@ export const editFileTool: ToolDef<EditFileInputT> = {
      input.new_string,
      projectRoot,
    );
-    return {
-      status: 'queued',
-      change_id: change.id,
-      file_path: change.file_path,
-      operation: 'edit',
-      message: `Edit queued for ${change.file_path}. Use apply_pending to write changes to disk.`,
-    };
+    return finalizeWrite(
+      context,
+      projectRoot,
+      change,
+      `Edit queued for ${change.file_path}. Use apply_pending to write changes to disk.`,
+    );
  },
 };
--- a/apps/coder/src/services/tools/inference_context.ts
+++ b/apps/coder/src/services/tools/inference_context.ts
@@ -1,36 +1,49 @@
+import { AsyncLocalStorage } from 'node:async_hooks';
 import type { Sql } from '../../db.js';
+import type { PermissionMode } from './types.js';

 /**
- * Module-level inference context for write tools.
+ * Per-run inference context for write tools.
 *
- * Set via `setInferenceContext()` before each inference run starts.
- * Write tools read it via `getInferenceContext()` during execute.
- * Same pattern as BooChat's `loadConfig()` singleton — tools need
- * ambient state that can't be threaded through the tool-phase execute
- * signature (which is `execute(input, projectRoot, extraRoots?)`).
+ * Write tools need ambient state (sql, sessionId, the permission gate) that the
+ * BooChat tool-phase `execute(input, projectRoot, extraRoots?)` signature can't
+ * carry. This used to be a single module-level `let current` — but the inference
+ * runner's `enqueue()` is fire-and-forget, so two overlapping runs (a user
+ * message racing a dispatcher-polled native task; two chat tabs streaming) would
+ * clobber each other's context, and `cancel()` cleared it for ALL in-flight runs.
+ *
+ * AsyncLocalStorage gives each run its own context: `enqueue()` starts its async
+ * loop synchronously inside `runWithInferenceContext`, so the store propagates
+ * through every awaited tool execution in that run — and only that run.
 */

 export interface InferenceContext {
  sql: Sql;
  sessionId: string;
  taskId: string | null;
+  /** Native-BooCode permission gate, set per run from the request/task mode. */
+  permissionMode?: PermissionMode;
 }

-let current: InferenceContext | null = null;
+const storage = new AsyncLocalStorage<InferenceContext>();

-export function setInferenceContext(ctx: InferenceContext): void {
-  current = ctx;
-}
-
-export function clearInferenceContext(): void {
-  current = null;
+/**
+ * Bind `ctx` for the duration of the (possibly detached) async chain `fn` starts.
+ * The inference runner kicks off its loop synchronously within this call, so all
+ * downstream `await`s — including write-tool `execute` via the adapter — read the
+ * same store. Concurrent runs each get their own; nothing is shared or cleared
+ * out from under an in-flight run.
+ */
+export function runWithInferenceContext<T>(ctx: InferenceContext, fn: () => T): T {
+  return storage.run(ctx, fn);
 }

 export function getInferenceContext(): InferenceContext {
-  if (!current) {
+  const ctx = storage.getStore();
+  if (!ctx) {
    throw new Error(
-      'Write tool called outside inference context — setInferenceContext() was not called before this run',
+      'Write tool called outside inference context — runWithInferenceContext() did not wrap this run',
    );
  }
-  return current;
+  return ctx;
 }
--- a/apps/coder/src/services/tools/types.ts
+++ b/apps/coder/src/services/tools/types.ts
@@ -1,6 +1,22 @@
 import type { z } from 'zod';
 import type { Sql } from '../../db.js';

+/**
+ * Unified permission ladder for native BooCode inference. Gates the write tools:
+ *   plan   — read-only: create/edit/delete are denied (no staging).
+ *   ask    — stage to the pending-changes queue; `apply_pending` is denied so the
+ *            agent cannot self-apply (the human approves via the Diff panel).
+ *   bypass — apply each write immediately (no queue, no approval).
+ * Undefined preserves the historical behavior (stage + `apply_pending` allowed).
+ */
+export type PermissionMode = 'plan' | 'ask' | 'bypass';
+
+/** Narrow a raw task/request mode id to a unified PermissionMode, else undefined
+ *  (e.g. an external agent's native mode id, or null). */
+export function asPermissionMode(id: string | null | undefined): PermissionMode | undefined {
+  return id === 'plan' || id === 'ask' || id === 'bypass' ? id : undefined;
+}
+
 export interface ToolJsonSchema {
  type: 'function';
  function: {
@@ -21,6 +37,8 @@ export interface ToolContext {
  sql: Sql;
  sessionId: string;
  taskId: string | null;
+  /** Native-BooCode permission gate for write tools (undefined = legacy behavior). */
+  permissionMode?: PermissionMode;
 }

 export interface ToolDef<TInput> {
--- a/apps/coder/src/services/tools/write-gate.ts
+++ b/apps/coder/src/services/tools/write-gate.ts
@@ -0,0 +1,53 @@
+/**
+ * Permission-gate helpers for native BooCode write tools. The gate comes from
+ * the per-run inference context (`ToolContext.permissionMode`):
+ *   plan   — deny the write (read-only); nothing is staged.
+ *   bypass — apply the staged change immediately (no queue, no approval).
+ *   ask / undefined — leave it in the pending-changes queue for review.
+ */
+import type { ToolContext } from './types.js';
+import { applyOne } from '../pending_changes.js';
+
+/** Result returned when a write is denied under Plan (read-only) mode. */
+export function denyReadOnly(operation: string): unknown {
+  return {
+    status: 'denied',
+    operation,
+    message: `Read-only (Plan) permission mode — ${operation} is not permitted. Switch to Ask or Bypass to make changes.`,
+  };
+}
+
+/** Finalize a just-staged change per the permission gate: apply now under Bypass,
+ *  otherwise return it as queued for the human to approve. */
+export async function finalizeWrite(
+  context: ToolContext,
+  projectRoot: string,
+  change: { id: string; file_path: string; operation: string },
+  queuedHint: string,
+): Promise<unknown> {
+  if (context.permissionMode === 'bypass') {
+    const res = await applyOne(context.sql, change.id, projectRoot);
+    console.log(
+      `[write-gate] bypass apply ${change.operation} ${change.file_path} -> ${res.success ? 'applied' : 'FAILED: ' + (res.error ?? '?')}`,
+    );
+    return {
+      status: res.success ? 'applied' : 'failed',
+      change_id: change.id,
+      file_path: change.file_path,
+      operation: change.operation,
+      message: res.success
+        ? `${change.operation} applied to ${change.file_path}.`
+        : `Apply failed for ${change.file_path}: ${res.error ?? 'unknown error'}. Left in the pending queue.`,
+    };
+  }
+  console.log(
+    `[write-gate] ${context.permissionMode ?? 'legacy'} queued ${change.operation} ${change.file_path}`,
+  );
+  return {
+    status: 'queued',
+    change_id: change.id,
+    file_path: change.file_path,
+    operation: change.operation,
+    message: queuedHint,
+  };
+}