fix(coder): harden edit-apply pipeline against block duplication

Root cause: two proven corruption mechanisms — (M1) non-idempotent apply
stamped the same block N times when a quantized model re-emitted the same
edit_file call or a turn was retried; (M2) Levenshtein tier 4 was fail-open
with no uniqueness guard, silently splicing into the wrong location.

Fixes applied at every layer of the pipeline:

Matcher (fuzzy-match.ts): raise SIMILARITY_THRESHOLD 0.66 → 0.85; add
AMBIGUITY_EPSILON uniqueness guard — two windows within 0.05 of the top
score → ambiguous, not a guess; add block-anchor gate (≥3-line needles
require first+last line exact match before a window is scored).

Edit planner (pending_changes.ts): extract planEdit() as a pure function;
idempotency guards detect already-applied states (anchored insert re-stamp,
old-gone-but-new-present); findPendingDuplicate() collapses identical
pending rows at queue time so M1 never reaches applyOne.

Atomic writes (pending_changes.ts): temp-file + rename on the same
filesystem so a crash can't leave a half-written source file; realpath()
first so symlinks survive the rename.

Per-file mutex (pending_changes.ts): withFileLock() serializes concurrent
read-modify-write on the same path via a chained-Promise Map.

EOL preservation (pending_changes.ts): normalize CRLF → LF for matching,
restore native line ending on write so Windows-style files stay clean.

Context isolation (inference_context.ts): replace module-level singleton
with AsyncLocalStorage so concurrent inference runs (arena parallel
dispatch, dispatcher poll racing a user message) each get their own
scoped context with no clobbering.

Tests: plan-edit.test.ts (pure planEdit unit tests), extended fuzzy-match
and pending_changes_integration suites, ALS isolation test that proves
overlapping runs get correct session IDs.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-07 01:44:37 +00:00
parent dbf1662982
commit cce685b1a7
16 changed files with 644 additions and 157 deletions

View File

@@ -0,0 +1,38 @@
import { describe, it, expect } from 'vitest';
import { runWithInferenceContext, getInferenceContext } from '../inference_context.js';
import type { Sql } from '../../../db.js';
const fakeSql = {} as unknown as Sql;
describe('inference context (AsyncLocalStorage isolation)', () => {
it('throws when read outside a run', () => {
expect(() => getInferenceContext()).toThrow(/outside inference context/);
});
it('keeps each run its own context across overlapping awaits', async () => {
// The race the global `let current` had: run B starts (and would overwrite a
// shared global) while run A is awaiting. After A resumes it must still read
// its OWN sessionId, not B's.
const run = (id: string, delay: number) =>
runWithInferenceContext({ sql: fakeSql, sessionId: id, taskId: null }, async () => {
await new Promise((r) => setTimeout(r, delay));
return getInferenceContext().sessionId;
});
const [a, b] = await Promise.all([run('A', 20), run('B', 5)]);
expect(a).toBe('A');
expect(b).toBe('B');
});
it('carries permissionMode and taskId per run', async () => {
const result = await runWithInferenceContext(
{ sql: fakeSql, sessionId: 's1', taskId: 't1', permissionMode: 'bypass' },
async () => {
await Promise.resolve();
const ctx = getInferenceContext();
return { taskId: ctx.taskId, mode: ctx.permissionMode };
},
);
expect(result).toEqual({ taskId: 't1', mode: 'bypass' });
});
});

View File

@@ -26,6 +26,15 @@ export const applyPendingTool: ToolDef<ApplyPendingInputT> = {
},
},
async execute(_input: ApplyPendingInputT, projectRoot: string, context: ToolContext): Promise<unknown> {
// Under Ask (and Plan) the human approves via the Pending Changes panel — the
// agent must not auto-apply. Bypass and legacy (undefined) may apply.
if (context.permissionMode === 'ask' || context.permissionMode === 'plan') {
return {
status: 'denied',
message:
'Permission mode is Ask — staged changes must be approved by the user in the Pending Changes panel, not applied by the agent.',
};
}
const results = await applyAll(context.sql, context.sessionId, projectRoot);
const succeeded = results.filter((r) => r.success).length;
const failed = results.filter((r) => !r.success).length;

View File

@@ -1,6 +1,7 @@
import { z } from 'zod';
import type { ToolDef, ToolContext } from './types.js';
import { queueCreate } from '../pending_changes.js';
import { denyReadOnly, finalizeWrite } from './write-gate.js';
const CreateFileInput = z.object({
file_path: z.string().min(1),
@@ -32,6 +33,7 @@ export const createFileTool: ToolDef<CreateFileInputT> = {
},
},
async execute(input: CreateFileInputT, projectRoot: string, context: ToolContext): Promise<unknown> {
if (context.permissionMode === 'plan') return denyReadOnly('create_file');
const change = await queueCreate(
context.sql,
context.sessionId,
@@ -40,12 +42,11 @@ export const createFileTool: ToolDef<CreateFileInputT> = {
input.content,
projectRoot,
);
return {
status: 'queued',
change_id: change.id,
file_path: change.file_path,
operation: 'create',
message: `File creation queued: ${change.file_path}. Use apply_pending to write changes to disk.`,
};
return finalizeWrite(
context,
projectRoot,
change,
`File creation queued: ${change.file_path}. Use apply_pending to write changes to disk.`,
);
},
};

View File

@@ -1,6 +1,7 @@
import { z } from 'zod';
import type { ToolDef, ToolContext } from './types.js';
import { queueDelete } from '../pending_changes.js';
import { denyReadOnly, finalizeWrite } from './write-gate.js';
const DeleteFileInput = z.object({
file_path: z.string().min(1),
@@ -30,6 +31,7 @@ export const deleteFileTool: ToolDef<DeleteFileInputT> = {
},
},
async execute(input: DeleteFileInputT, projectRoot: string, context: ToolContext): Promise<unknown> {
if (context.permissionMode === 'plan') return denyReadOnly('delete_file');
const change = await queueDelete(
context.sql,
context.sessionId,
@@ -37,12 +39,11 @@ export const deleteFileTool: ToolDef<DeleteFileInputT> = {
input.file_path,
projectRoot,
);
return {
status: 'queued',
change_id: change.id,
file_path: change.file_path,
operation: 'delete',
message: `File deletion queued: ${change.file_path}. Use apply_pending to write changes to disk.`,
};
return finalizeWrite(
context,
projectRoot,
change,
`File deletion queued: ${change.file_path}. Use apply_pending to write changes to disk.`,
);
},
};

View File

@@ -1,6 +1,7 @@
import { z } from 'zod';
import type { ToolDef, ToolContext } from './types.js';
import { queueEdit } from '../pending_changes.js';
import { denyReadOnly, finalizeWrite } from './write-gate.js';
const EditFileInput = z.object({
file_path: z.string().min(1),
@@ -34,6 +35,7 @@ export const editFileTool: ToolDef<EditFileInputT> = {
},
},
async execute(input: EditFileInputT, projectRoot: string, context: ToolContext): Promise<unknown> {
if (context.permissionMode === 'plan') return denyReadOnly('edit_file');
const change = await queueEdit(
context.sql,
context.sessionId,
@@ -43,12 +45,11 @@ export const editFileTool: ToolDef<EditFileInputT> = {
input.new_string,
projectRoot,
);
return {
status: 'queued',
change_id: change.id,
file_path: change.file_path,
operation: 'edit',
message: `Edit queued for ${change.file_path}. Use apply_pending to write changes to disk.`,
};
return finalizeWrite(
context,
projectRoot,
change,
`Edit queued for ${change.file_path}. Use apply_pending to write changes to disk.`,
);
},
};

View File

@@ -1,36 +1,49 @@
import { AsyncLocalStorage } from 'node:async_hooks';
import type { Sql } from '../../db.js';
import type { PermissionMode } from './types.js';
/**
* Module-level inference context for write tools.
* Per-run inference context for write tools.
*
* Set via `setInferenceContext()` before each inference run starts.
* Write tools read it via `getInferenceContext()` during execute.
* Same pattern as BooChat's `loadConfig()` singleton — tools need
* ambient state that can't be threaded through the tool-phase execute
* signature (which is `execute(input, projectRoot, extraRoots?)`).
* Write tools need ambient state (sql, sessionId, the permission gate) that the
* BooChat tool-phase `execute(input, projectRoot, extraRoots?)` signature can't
* carry. This used to be a single module-level `let current` — but the inference
* runner's `enqueue()` is fire-and-forget, so two overlapping runs (a user
* message racing a dispatcher-polled native task; two chat tabs streaming) would
* clobber each other's context, and `cancel()` cleared it for ALL in-flight runs.
*
* AsyncLocalStorage gives each run its own context: `enqueue()` starts its async
* loop synchronously inside `runWithInferenceContext`, so the store propagates
* through every awaited tool execution in that run — and only that run.
*/
export interface InferenceContext {
sql: Sql;
sessionId: string;
taskId: string | null;
/** Native-BooCode permission gate, set per run from the request/task mode. */
permissionMode?: PermissionMode;
}
let current: InferenceContext | null = null;
const storage = new AsyncLocalStorage<InferenceContext>();
export function setInferenceContext(ctx: InferenceContext): void {
current = ctx;
}
export function clearInferenceContext(): void {
current = null;
/**
* Bind `ctx` for the duration of the (possibly detached) async chain `fn` starts.
* The inference runner kicks off its loop synchronously within this call, so all
* downstream `await`s — including write-tool `execute` via the adapter — read the
* same store. Concurrent runs each get their own; nothing is shared or cleared
* out from under an in-flight run.
*/
export function runWithInferenceContext<T>(ctx: InferenceContext, fn: () => T): T {
return storage.run(ctx, fn);
}
export function getInferenceContext(): InferenceContext {
if (!current) {
const ctx = storage.getStore();
if (!ctx) {
throw new Error(
'Write tool called outside inference context — setInferenceContext() was not called before this run',
'Write tool called outside inference context — runWithInferenceContext() did not wrap this run',
);
}
return current;
return ctx;
}

View File

@@ -1,6 +1,22 @@
import type { z } from 'zod';
import type { Sql } from '../../db.js';
/**
* Unified permission ladder for native BooCode inference. Gates the write tools:
* plan — read-only: create/edit/delete are denied (no staging).
* ask — stage to the pending-changes queue; `apply_pending` is denied so the
* agent cannot self-apply (the human approves via the Diff panel).
* bypass — apply each write immediately (no queue, no approval).
* Undefined preserves the historical behavior (stage + `apply_pending` allowed).
*/
export type PermissionMode = 'plan' | 'ask' | 'bypass';
/** Narrow a raw task/request mode id to a unified PermissionMode, else undefined
* (e.g. an external agent's native mode id, or null). */
export function asPermissionMode(id: string | null | undefined): PermissionMode | undefined {
return id === 'plan' || id === 'ask' || id === 'bypass' ? id : undefined;
}
export interface ToolJsonSchema {
type: 'function';
function: {
@@ -21,6 +37,8 @@ export interface ToolContext {
sql: Sql;
sessionId: string;
taskId: string | null;
/** Native-BooCode permission gate for write tools (undefined = legacy behavior). */
permissionMode?: PermissionMode;
}
export interface ToolDef<TInput> {

View File

@@ -0,0 +1,53 @@
/**
* Permission-gate helpers for native BooCode write tools. The gate comes from
* the per-run inference context (`ToolContext.permissionMode`):
* plan — deny the write (read-only); nothing is staged.
* bypass — apply the staged change immediately (no queue, no approval).
* ask / undefined — leave it in the pending-changes queue for review.
*/
import type { ToolContext } from './types.js';
import { applyOne } from '../pending_changes.js';
/** Result returned when a write is denied under Plan (read-only) mode. */
export function denyReadOnly(operation: string): unknown {
return {
status: 'denied',
operation,
message: `Read-only (Plan) permission mode — ${operation} is not permitted. Switch to Ask or Bypass to make changes.`,
};
}
/** Finalize a just-staged change per the permission gate: apply now under Bypass,
* otherwise return it as queued for the human to approve. */
export async function finalizeWrite(
context: ToolContext,
projectRoot: string,
change: { id: string; file_path: string; operation: string },
queuedHint: string,
): Promise<unknown> {
if (context.permissionMode === 'bypass') {
const res = await applyOne(context.sql, change.id, projectRoot);
console.log(
`[write-gate] bypass apply ${change.operation} ${change.file_path} -> ${res.success ? 'applied' : 'FAILED: ' + (res.error ?? '?')}`,
);
return {
status: res.success ? 'applied' : 'failed',
change_id: change.id,
file_path: change.file_path,
operation: change.operation,
message: res.success
? `${change.operation} applied to ${change.file_path}.`
: `Apply failed for ${change.file_path}: ${res.error ?? 'unknown error'}. Left in the pending queue.`,
};
}
console.log(
`[write-gate] ${context.permissionMode ?? 'legacy'} queued ${change.operation} ${change.file_path}`,
);
return {
status: 'queued',
change_id: change.id,
file_path: change.file_path,
operation: change.operation,
message: queuedHint,
};
}