fix(coder): harden edit-apply pipeline against block duplication
Root cause: two proven corruption mechanisms — (M1) non-idempotent apply stamped the same block N times when a quantized model re-emitted the same edit_file call or a turn was retried; (M2) Levenshtein tier 4 was fail-open with no uniqueness guard, silently splicing into the wrong location. Fixes applied at every layer of the pipeline: Matcher (fuzzy-match.ts): raise SIMILARITY_THRESHOLD 0.66 → 0.85; add AMBIGUITY_EPSILON uniqueness guard — two windows within 0.05 of the top score → ambiguous, not a guess; add block-anchor gate (≥3-line needles require first+last line exact match before a window is scored). Edit planner (pending_changes.ts): extract planEdit() as a pure function; idempotency guards detect already-applied states (anchored insert re-stamp, old-gone-but-new-present); findPendingDuplicate() collapses identical pending rows at queue time so M1 never reaches applyOne. Atomic writes (pending_changes.ts): temp-file + rename on the same filesystem so a crash can't leave a half-written source file; realpath() first so symlinks survive the rename. Per-file mutex (pending_changes.ts): withFileLock() serializes concurrent read-modify-write on the same path via a chained-Promise Map. EOL preservation (pending_changes.ts): normalize CRLF → LF for matching, restore native line ending on write so Windows-style files stay clean. Context isolation (inference_context.ts): replace module-level singleton with AsyncLocalStorage so concurrent inference runs (arena parallel dispatch, dispatcher poll racing a user message) each get their own scoped context with no clobbering. Tests: plan-edit.test.ts (pure planEdit unit tests), extended fuzzy-match and pending_changes_integration suites, ALS isolation test that proves overlapping runs get correct session IDs. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,38 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { runWithInferenceContext, getInferenceContext } from '../inference_context.js';
|
||||
import type { Sql } from '../../../db.js';
|
||||
|
||||
const fakeSql = {} as unknown as Sql;
|
||||
|
||||
describe('inference context (AsyncLocalStorage isolation)', () => {
|
||||
it('throws when read outside a run', () => {
|
||||
expect(() => getInferenceContext()).toThrow(/outside inference context/);
|
||||
});
|
||||
|
||||
it('keeps each run its own context across overlapping awaits', async () => {
|
||||
// The race the global `let current` had: run B starts (and would overwrite a
|
||||
// shared global) while run A is awaiting. After A resumes it must still read
|
||||
// its OWN sessionId, not B's.
|
||||
const run = (id: string, delay: number) =>
|
||||
runWithInferenceContext({ sql: fakeSql, sessionId: id, taskId: null }, async () => {
|
||||
await new Promise((r) => setTimeout(r, delay));
|
||||
return getInferenceContext().sessionId;
|
||||
});
|
||||
|
||||
const [a, b] = await Promise.all([run('A', 20), run('B', 5)]);
|
||||
expect(a).toBe('A');
|
||||
expect(b).toBe('B');
|
||||
});
|
||||
|
||||
it('carries permissionMode and taskId per run', async () => {
|
||||
const result = await runWithInferenceContext(
|
||||
{ sql: fakeSql, sessionId: 's1', taskId: 't1', permissionMode: 'bypass' },
|
||||
async () => {
|
||||
await Promise.resolve();
|
||||
const ctx = getInferenceContext();
|
||||
return { taskId: ctx.taskId, mode: ctx.permissionMode };
|
||||
},
|
||||
);
|
||||
expect(result).toEqual({ taskId: 't1', mode: 'bypass' });
|
||||
});
|
||||
});
|
||||
@@ -26,6 +26,15 @@ export const applyPendingTool: ToolDef<ApplyPendingInputT> = {
|
||||
},
|
||||
},
|
||||
async execute(_input: ApplyPendingInputT, projectRoot: string, context: ToolContext): Promise<unknown> {
|
||||
// Under Ask (and Plan) the human approves via the Pending Changes panel — the
|
||||
// agent must not auto-apply. Bypass and legacy (undefined) may apply.
|
||||
if (context.permissionMode === 'ask' || context.permissionMode === 'plan') {
|
||||
return {
|
||||
status: 'denied',
|
||||
message:
|
||||
'Permission mode is Ask — staged changes must be approved by the user in the Pending Changes panel, not applied by the agent.',
|
||||
};
|
||||
}
|
||||
const results = await applyAll(context.sql, context.sessionId, projectRoot);
|
||||
const succeeded = results.filter((r) => r.success).length;
|
||||
const failed = results.filter((r) => !r.success).length;
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { z } from 'zod';
|
||||
import type { ToolDef, ToolContext } from './types.js';
|
||||
import { queueCreate } from '../pending_changes.js';
|
||||
import { denyReadOnly, finalizeWrite } from './write-gate.js';
|
||||
|
||||
const CreateFileInput = z.object({
|
||||
file_path: z.string().min(1),
|
||||
@@ -32,6 +33,7 @@ export const createFileTool: ToolDef<CreateFileInputT> = {
|
||||
},
|
||||
},
|
||||
async execute(input: CreateFileInputT, projectRoot: string, context: ToolContext): Promise<unknown> {
|
||||
if (context.permissionMode === 'plan') return denyReadOnly('create_file');
|
||||
const change = await queueCreate(
|
||||
context.sql,
|
||||
context.sessionId,
|
||||
@@ -40,12 +42,11 @@ export const createFileTool: ToolDef<CreateFileInputT> = {
|
||||
input.content,
|
||||
projectRoot,
|
||||
);
|
||||
return {
|
||||
status: 'queued',
|
||||
change_id: change.id,
|
||||
file_path: change.file_path,
|
||||
operation: 'create',
|
||||
message: `File creation queued: ${change.file_path}. Use apply_pending to write changes to disk.`,
|
||||
};
|
||||
return finalizeWrite(
|
||||
context,
|
||||
projectRoot,
|
||||
change,
|
||||
`File creation queued: ${change.file_path}. Use apply_pending to write changes to disk.`,
|
||||
);
|
||||
},
|
||||
};
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { z } from 'zod';
|
||||
import type { ToolDef, ToolContext } from './types.js';
|
||||
import { queueDelete } from '../pending_changes.js';
|
||||
import { denyReadOnly, finalizeWrite } from './write-gate.js';
|
||||
|
||||
const DeleteFileInput = z.object({
|
||||
file_path: z.string().min(1),
|
||||
@@ -30,6 +31,7 @@ export const deleteFileTool: ToolDef<DeleteFileInputT> = {
|
||||
},
|
||||
},
|
||||
async execute(input: DeleteFileInputT, projectRoot: string, context: ToolContext): Promise<unknown> {
|
||||
if (context.permissionMode === 'plan') return denyReadOnly('delete_file');
|
||||
const change = await queueDelete(
|
||||
context.sql,
|
||||
context.sessionId,
|
||||
@@ -37,12 +39,11 @@ export const deleteFileTool: ToolDef<DeleteFileInputT> = {
|
||||
input.file_path,
|
||||
projectRoot,
|
||||
);
|
||||
return {
|
||||
status: 'queued',
|
||||
change_id: change.id,
|
||||
file_path: change.file_path,
|
||||
operation: 'delete',
|
||||
message: `File deletion queued: ${change.file_path}. Use apply_pending to write changes to disk.`,
|
||||
};
|
||||
return finalizeWrite(
|
||||
context,
|
||||
projectRoot,
|
||||
change,
|
||||
`File deletion queued: ${change.file_path}. Use apply_pending to write changes to disk.`,
|
||||
);
|
||||
},
|
||||
};
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { z } from 'zod';
|
||||
import type { ToolDef, ToolContext } from './types.js';
|
||||
import { queueEdit } from '../pending_changes.js';
|
||||
import { denyReadOnly, finalizeWrite } from './write-gate.js';
|
||||
|
||||
const EditFileInput = z.object({
|
||||
file_path: z.string().min(1),
|
||||
@@ -34,6 +35,7 @@ export const editFileTool: ToolDef<EditFileInputT> = {
|
||||
},
|
||||
},
|
||||
async execute(input: EditFileInputT, projectRoot: string, context: ToolContext): Promise<unknown> {
|
||||
if (context.permissionMode === 'plan') return denyReadOnly('edit_file');
|
||||
const change = await queueEdit(
|
||||
context.sql,
|
||||
context.sessionId,
|
||||
@@ -43,12 +45,11 @@ export const editFileTool: ToolDef<EditFileInputT> = {
|
||||
input.new_string,
|
||||
projectRoot,
|
||||
);
|
||||
return {
|
||||
status: 'queued',
|
||||
change_id: change.id,
|
||||
file_path: change.file_path,
|
||||
operation: 'edit',
|
||||
message: `Edit queued for ${change.file_path}. Use apply_pending to write changes to disk.`,
|
||||
};
|
||||
return finalizeWrite(
|
||||
context,
|
||||
projectRoot,
|
||||
change,
|
||||
`Edit queued for ${change.file_path}. Use apply_pending to write changes to disk.`,
|
||||
);
|
||||
},
|
||||
};
|
||||
|
||||
@@ -1,36 +1,49 @@
|
||||
import { AsyncLocalStorage } from 'node:async_hooks';
|
||||
import type { Sql } from '../../db.js';
|
||||
import type { PermissionMode } from './types.js';
|
||||
|
||||
/**
|
||||
* Module-level inference context for write tools.
|
||||
* Per-run inference context for write tools.
|
||||
*
|
||||
* Set via `setInferenceContext()` before each inference run starts.
|
||||
* Write tools read it via `getInferenceContext()` during execute.
|
||||
* Same pattern as BooChat's `loadConfig()` singleton — tools need
|
||||
* ambient state that can't be threaded through the tool-phase execute
|
||||
* signature (which is `execute(input, projectRoot, extraRoots?)`).
|
||||
* Write tools need ambient state (sql, sessionId, the permission gate) that the
|
||||
* BooChat tool-phase `execute(input, projectRoot, extraRoots?)` signature can't
|
||||
* carry. This used to be a single module-level `let current` — but the inference
|
||||
* runner's `enqueue()` is fire-and-forget, so two overlapping runs (a user
|
||||
* message racing a dispatcher-polled native task; two chat tabs streaming) would
|
||||
* clobber each other's context, and `cancel()` cleared it for ALL in-flight runs.
|
||||
*
|
||||
* AsyncLocalStorage gives each run its own context: `enqueue()` starts its async
|
||||
* loop synchronously inside `runWithInferenceContext`, so the store propagates
|
||||
* through every awaited tool execution in that run — and only that run.
|
||||
*/
|
||||
|
||||
export interface InferenceContext {
|
||||
sql: Sql;
|
||||
sessionId: string;
|
||||
taskId: string | null;
|
||||
/** Native-BooCode permission gate, set per run from the request/task mode. */
|
||||
permissionMode?: PermissionMode;
|
||||
}
|
||||
|
||||
let current: InferenceContext | null = null;
|
||||
const storage = new AsyncLocalStorage<InferenceContext>();
|
||||
|
||||
export function setInferenceContext(ctx: InferenceContext): void {
|
||||
current = ctx;
|
||||
}
|
||||
|
||||
export function clearInferenceContext(): void {
|
||||
current = null;
|
||||
/**
|
||||
* Bind `ctx` for the duration of the (possibly detached) async chain `fn` starts.
|
||||
* The inference runner kicks off its loop synchronously within this call, so all
|
||||
* downstream `await`s — including write-tool `execute` via the adapter — read the
|
||||
* same store. Concurrent runs each get their own; nothing is shared or cleared
|
||||
* out from under an in-flight run.
|
||||
*/
|
||||
export function runWithInferenceContext<T>(ctx: InferenceContext, fn: () => T): T {
|
||||
return storage.run(ctx, fn);
|
||||
}
|
||||
|
||||
export function getInferenceContext(): InferenceContext {
|
||||
if (!current) {
|
||||
const ctx = storage.getStore();
|
||||
if (!ctx) {
|
||||
throw new Error(
|
||||
'Write tool called outside inference context — setInferenceContext() was not called before this run',
|
||||
'Write tool called outside inference context — runWithInferenceContext() did not wrap this run',
|
||||
);
|
||||
}
|
||||
return current;
|
||||
return ctx;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,22 @@
|
||||
import type { z } from 'zod';
|
||||
import type { Sql } from '../../db.js';
|
||||
|
||||
/**
|
||||
* Unified permission ladder for native BooCode inference. Gates the write tools:
|
||||
* plan — read-only: create/edit/delete are denied (no staging).
|
||||
* ask — stage to the pending-changes queue; `apply_pending` is denied so the
|
||||
* agent cannot self-apply (the human approves via the Diff panel).
|
||||
* bypass — apply each write immediately (no queue, no approval).
|
||||
* Undefined preserves the historical behavior (stage + `apply_pending` allowed).
|
||||
*/
|
||||
export type PermissionMode = 'plan' | 'ask' | 'bypass';
|
||||
|
||||
/** Narrow a raw task/request mode id to a unified PermissionMode, else undefined
|
||||
* (e.g. an external agent's native mode id, or null). */
|
||||
export function asPermissionMode(id: string | null | undefined): PermissionMode | undefined {
|
||||
return id === 'plan' || id === 'ask' || id === 'bypass' ? id : undefined;
|
||||
}
|
||||
|
||||
export interface ToolJsonSchema {
|
||||
type: 'function';
|
||||
function: {
|
||||
@@ -21,6 +37,8 @@ export interface ToolContext {
|
||||
sql: Sql;
|
||||
sessionId: string;
|
||||
taskId: string | null;
|
||||
/** Native-BooCode permission gate for write tools (undefined = legacy behavior). */
|
||||
permissionMode?: PermissionMode;
|
||||
}
|
||||
|
||||
export interface ToolDef<TInput> {
|
||||
|
||||
53
apps/coder/src/services/tools/write-gate.ts
Normal file
53
apps/coder/src/services/tools/write-gate.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
/**
|
||||
* Permission-gate helpers for native BooCode write tools. The gate comes from
|
||||
* the per-run inference context (`ToolContext.permissionMode`):
|
||||
* plan — deny the write (read-only); nothing is staged.
|
||||
* bypass — apply the staged change immediately (no queue, no approval).
|
||||
* ask / undefined — leave it in the pending-changes queue for review.
|
||||
*/
|
||||
import type { ToolContext } from './types.js';
|
||||
import { applyOne } from '../pending_changes.js';
|
||||
|
||||
/** Result returned when a write is denied under Plan (read-only) mode. */
|
||||
export function denyReadOnly(operation: string): unknown {
|
||||
return {
|
||||
status: 'denied',
|
||||
operation,
|
||||
message: `Read-only (Plan) permission mode — ${operation} is not permitted. Switch to Ask or Bypass to make changes.`,
|
||||
};
|
||||
}
|
||||
|
||||
/** Finalize a just-staged change per the permission gate: apply now under Bypass,
|
||||
* otherwise return it as queued for the human to approve. */
|
||||
export async function finalizeWrite(
|
||||
context: ToolContext,
|
||||
projectRoot: string,
|
||||
change: { id: string; file_path: string; operation: string },
|
||||
queuedHint: string,
|
||||
): Promise<unknown> {
|
||||
if (context.permissionMode === 'bypass') {
|
||||
const res = await applyOne(context.sql, change.id, projectRoot);
|
||||
console.log(
|
||||
`[write-gate] bypass apply ${change.operation} ${change.file_path} -> ${res.success ? 'applied' : 'FAILED: ' + (res.error ?? '?')}`,
|
||||
);
|
||||
return {
|
||||
status: res.success ? 'applied' : 'failed',
|
||||
change_id: change.id,
|
||||
file_path: change.file_path,
|
||||
operation: change.operation,
|
||||
message: res.success
|
||||
? `${change.operation} applied to ${change.file_path}.`
|
||||
: `Apply failed for ${change.file_path}: ${res.error ?? 'unknown error'}. Left in the pending queue.`,
|
||||
};
|
||||
}
|
||||
console.log(
|
||||
`[write-gate] ${context.permissionMode ?? 'legacy'} queued ${change.operation} ${change.file_path}`,
|
||||
);
|
||||
return {
|
||||
status: 'queued',
|
||||
change_id: change.id,
|
||||
file_path: change.file_path,
|
||||
operation: change.operation,
|
||||
message: queuedHint,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user