fix(coder): harden edit-apply pipeline against block duplication
Root cause: two proven corruption mechanisms — (M1) non-idempotent apply stamped the same block N times when a quantized model re-emitted the same edit_file call or a turn was retried; (M2) Levenshtein tier 4 was fail-open with no uniqueness guard, silently splicing into the wrong location. Fixes applied at every layer of the pipeline: Matcher (fuzzy-match.ts): raise SIMILARITY_THRESHOLD 0.66 → 0.85; add AMBIGUITY_EPSILON uniqueness guard — two windows within 0.05 of the top score → ambiguous, not a guess; add block-anchor gate (≥3-line needles require first+last line exact match before a window is scored). Edit planner (pending_changes.ts): extract planEdit() as a pure function; idempotency guards detect already-applied states (anchored insert re-stamp, old-gone-but-new-present); findPendingDuplicate() collapses identical pending rows at queue time so M1 never reaches applyOne. Atomic writes (pending_changes.ts): temp-file + rename on the same filesystem so a crash can't leave a half-written source file; realpath() first so symlinks survive the rename. Per-file mutex (pending_changes.ts): withFileLock() serializes concurrent read-modify-write on the same path via a chained-Promise Map. EOL preservation (pending_changes.ts): normalize CRLF → LF for matching, restore native line ending on write so Windows-style files stay clean. Context isolation (inference_context.ts): replace module-level singleton with AsyncLocalStorage so concurrent inference runs (arena parallel dispatch, dispatcher poll racing a user message) each get their own scoped context with no clobbering. Tests: plan-edit.test.ts (pure planEdit unit tests), extended fuzzy-match and pending_changes_integration suites, ALS isolation test that proves overlapping runs get correct session IDs. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,9 +1,120 @@
|
||||
import { readFile, writeFile, unlink, mkdir } from 'node:fs/promises';
|
||||
import { dirname } from 'node:path';
|
||||
import { readFile, writeFile, unlink, mkdir, rename, realpath } from 'node:fs/promises';
|
||||
import { dirname, join, basename } from 'node:path';
|
||||
import { randomBytes } from 'node:crypto';
|
||||
import type { Sql } from '../db.js';
|
||||
import { resolveWritePath } from './write_guard.js';
|
||||
import { locateMatch } from './fuzzy-match.js';
|
||||
|
||||
/**
|
||||
* Write a file atomically: stage to a sibling temp file, then rename over the
|
||||
* target. rename(2) on the same filesystem is atomic, so a crash mid-write can
|
||||
* never leave a half-written (truncated/corrupt) source file — readers see
|
||||
* either the old content or the complete new content. The temp lives in the same
|
||||
* directory to guarantee a same-filesystem rename.
|
||||
*
|
||||
* Symlinks: a plain writeFile FOLLOWS a symlink and writes through to its target;
|
||||
* a bare rename would REPLACE the link with a regular file. We realpath an
|
||||
* existing target first so the rename lands on the real file and the link
|
||||
* survives — preserving the prior follow-through behavior. A missing target
|
||||
* (create, or a broken link) just writes the literal path.
|
||||
*/
|
||||
async function writeFileAtomic(filePath: string, content: string): Promise<void> {
|
||||
let target = filePath;
|
||||
try {
|
||||
target = await realpath(filePath);
|
||||
} catch {
|
||||
// ENOENT (new file) or broken link — write the literal path.
|
||||
}
|
||||
const tmp = join(dirname(target), `.${basename(target)}.tmp.${process.pid}.${randomBytes(6).toString('hex')}`);
|
||||
await writeFile(tmp, content, 'utf8');
|
||||
try {
|
||||
await rename(tmp, target);
|
||||
} catch (err) {
|
||||
await unlink(tmp).catch(() => {});
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/** Detect a file's dominant line ending so an edit can preserve it. */
|
||||
function detectEol(text: string): '\r\n' | '\n' {
|
||||
return text.includes('\r\n') ? '\r\n' : '\n';
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize the read-modify-write of a single file so two concurrent applies
|
||||
* (e.g. two chat tabs sharing one worktree, or a Bypass write racing an
|
||||
* apply_pending) can't lose an update. In-process keying is sufficient —
|
||||
* BooCoder is a single Fastify process. One Map entry per distinct path.
|
||||
*/
|
||||
const fileLocks = new Map<string, Promise<void>>();
|
||||
async function withFileLock<T>(filePath: string, fn: () => Promise<T>): Promise<T> {
|
||||
const prev = fileLocks.get(filePath) ?? Promise.resolve();
|
||||
let release!: () => void;
|
||||
const current = new Promise<void>((r) => { release = r; });
|
||||
fileLocks.set(filePath, prev.then(() => current));
|
||||
await prev.catch(() => {});
|
||||
try {
|
||||
return await fn();
|
||||
} finally {
|
||||
release();
|
||||
}
|
||||
}
|
||||
|
||||
// --- Edit-apply planning (pure, unit-tested) ---------------------------------
|
||||
|
||||
/**
|
||||
* Decision for applying one queued edit to a file's current content. Pulled out
|
||||
* of `applyOne` so the splice — the part that actually corrupted files — is pure
|
||||
* and testable without a DB or filesystem. Mirrors how opencode/cline/qwen keep
|
||||
* their matchers fail-closed and idempotent.
|
||||
*/
|
||||
export type EditPlan =
|
||||
| { kind: 'apply'; updated: string }
|
||||
| { kind: 'noop'; reason: 'identical' | 'already-applied' }
|
||||
| { kind: 'ambiguous'; count: number }
|
||||
| { kind: 'not_found' };
|
||||
|
||||
/**
|
||||
* Decide how (or whether) to apply an `old → new` edit to `content`.
|
||||
*
|
||||
* Idempotency is the whole point here: a queued edit can legitimately be
|
||||
* re-applied (a local model re-emits the same tool call; a turn is retried; the
|
||||
* same change sits in the queue twice). A naive splice stamps the new text again
|
||||
* each time — the 2–3× block duplication. Two guards make re-application a no-op:
|
||||
*
|
||||
* - already-applied (anchored insert): when `new` is `old` + an appended block
|
||||
* (`old="anchor"`, `new="anchor\n<block>"`), `old` still matches uniquely after
|
||||
* the first apply, so a second apply would duplicate `<block>`. If the full
|
||||
* `new` text is already present at the match site, the edit is already applied.
|
||||
* - already-applied (old gone): if `old` can't be located but `new` is already
|
||||
* in the file, the change landed on a prior pass — treat as a no-op, not an error.
|
||||
* - identical: the splice would not change the file.
|
||||
*
|
||||
* Anything ambiguous or genuinely absent fails CLOSED so the caller surfaces a
|
||||
* correctable error instead of writing a guess.
|
||||
*/
|
||||
export function planEdit(content: string, oldStr: string, newStr: string): EditPlan {
|
||||
const match = locateMatch(content, oldStr);
|
||||
|
||||
if (match.kind === 'ambiguous') return { kind: 'ambiguous', count: match.count };
|
||||
|
||||
if (match.kind === 'not_found') {
|
||||
if (newStr.length > 0 && content.includes(newStr)) {
|
||||
return { kind: 'noop', reason: 'already-applied' };
|
||||
}
|
||||
return { kind: 'not_found' };
|
||||
}
|
||||
|
||||
const updated = content.slice(0, match.start) + newStr + content.slice(match.end);
|
||||
// No-change splice first (covers old === new), then the anchored re-stamp guard:
|
||||
// the full replacement already sits at the match site (re-emitted anchored insert).
|
||||
if (updated === content) return { kind: 'noop', reason: 'identical' };
|
||||
if (content.slice(match.start, match.start + newStr.length) === newStr) {
|
||||
return { kind: 'noop', reason: 'already-applied' };
|
||||
}
|
||||
return { kind: 'apply', updated };
|
||||
}
|
||||
|
||||
// --- Types -------------------------------------------------------------------
|
||||
|
||||
export interface PendingChange {
|
||||
@@ -47,6 +158,13 @@ export async function queueEdit(
|
||||
const resolved = resolveWritePath(projectRoot, filePath);
|
||||
const diff = JSON.stringify({ old: oldString, new: newString });
|
||||
|
||||
// Idempotent queue: collapse an identical edit that is still pending. Local
|
||||
// quantized models re-emit the same edit_file call within a turn, and a retried
|
||||
// turn re-queues — each duplicate row would apply and stamp another copy. One
|
||||
// pending row per (session, file, operation, diff) is enough.
|
||||
const existing = await findPendingDuplicate(sql, sessionId, resolved, 'edit', diff);
|
||||
if (existing) return existing;
|
||||
|
||||
const [row] = await sql<PendingChange[]>`
|
||||
INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff, agent)
|
||||
VALUES (${sessionId}, ${taskId}, ${resolved}, 'edit', ${diff}, ${agent})
|
||||
@@ -55,6 +173,28 @@ export async function queueEdit(
|
||||
return row!;
|
||||
}
|
||||
|
||||
/** Return an identical still-pending change for this (session, file, op, diff),
|
||||
* or undefined. Used to keep the queue idempotent against re-emitted edits. */
|
||||
async function findPendingDuplicate(
|
||||
sql: Sql,
|
||||
sessionId: string,
|
||||
resolvedPath: string,
|
||||
operation: 'create' | 'edit' | 'delete',
|
||||
diff: string,
|
||||
): Promise<PendingChange | undefined> {
|
||||
const [row] = await sql<PendingChange[]>`
|
||||
SELECT * FROM pending_changes
|
||||
WHERE session_id = ${sessionId}
|
||||
AND file_path = ${resolvedPath}
|
||||
AND operation = ${operation}
|
||||
AND diff = ${diff}
|
||||
AND status = 'pending'
|
||||
ORDER BY created_at ASC
|
||||
LIMIT 1
|
||||
`;
|
||||
return row;
|
||||
}
|
||||
|
||||
export async function queueCreate(
|
||||
sql: Sql,
|
||||
sessionId: string,
|
||||
@@ -68,6 +208,9 @@ export async function queueCreate(
|
||||
): Promise<PendingChange> {
|
||||
const resolved = resolveWritePath(projectRoot, filePath);
|
||||
|
||||
const existing = await findPendingDuplicate(sql, sessionId, resolved, 'create', content);
|
||||
if (existing) return existing;
|
||||
|
||||
const [row] = await sql<PendingChange[]>`
|
||||
INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff, agent)
|
||||
VALUES (${sessionId}, ${taskId}, ${resolved}, 'create', ${content}, ${agent})
|
||||
@@ -87,6 +230,9 @@ export async function queueDelete(
|
||||
): Promise<PendingChange> {
|
||||
const resolved = resolveWritePath(projectRoot, filePath);
|
||||
|
||||
const existing = await findPendingDuplicate(sql, sessionId, resolved, 'delete', '');
|
||||
if (existing) return existing;
|
||||
|
||||
const [row] = await sql<PendingChange[]>`
|
||||
INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff, agent)
|
||||
VALUES (${sessionId}, ${taskId}, ${resolved}, 'delete', '', ${agent})
|
||||
@@ -110,48 +256,60 @@ export async function applyOne(
|
||||
}
|
||||
|
||||
try {
|
||||
// Re-validate path in case projectRoot has shifted
|
||||
resolveWritePath(projectRoot, change.file_path);
|
||||
return await withFileLock(change.file_path, async () => {
|
||||
// Re-validate path in case projectRoot has shifted
|
||||
resolveWritePath(projectRoot, change.file_path);
|
||||
|
||||
switch (change.operation) {
|
||||
case 'create': {
|
||||
await mkdir(dirname(change.file_path), { recursive: true });
|
||||
await writeFile(change.file_path, change.diff, 'utf8');
|
||||
break;
|
||||
}
|
||||
case 'edit': {
|
||||
const { old: oldStr, new: newStr } = JSON.parse(change.diff) as { old: string; new: string };
|
||||
const content = await readFile(change.file_path, 'utf8');
|
||||
const match = locateMatch(content, oldStr);
|
||||
if (match.kind === 'ambiguous') {
|
||||
throw new Error(
|
||||
`old_string matches ${match.count} locations — add surrounding context to disambiguate`,
|
||||
);
|
||||
switch (change.operation) {
|
||||
case 'create': {
|
||||
await mkdir(dirname(change.file_path), { recursive: true });
|
||||
await writeFileAtomic(change.file_path, change.diff);
|
||||
break;
|
||||
}
|
||||
if (match.kind === 'not_found') {
|
||||
throw new Error(
|
||||
'old_string not found in file (even fuzzily) — file may have changed since the edit was queued',
|
||||
);
|
||||
case 'edit': {
|
||||
const { old: oldStr, new: newStr } = JSON.parse(change.diff) as { old: string; new: string };
|
||||
const raw = await readFile(change.file_path, 'utf8');
|
||||
// Normalize to LF for matching, then write back in the file's native EOL
|
||||
// so an LF-emitting model doesn't leave a CRLF file with mixed endings.
|
||||
const eol = detectEol(raw);
|
||||
const toLf = (t: string) => t.replaceAll('\r\n', '\n');
|
||||
const plan = planEdit(toLf(raw), toLf(oldStr), toLf(newStr));
|
||||
if (plan.kind === 'ambiguous') {
|
||||
throw new Error(
|
||||
`old_string matches ${plan.count} locations — add surrounding context to disambiguate`,
|
||||
);
|
||||
}
|
||||
if (plan.kind === 'not_found') {
|
||||
throw new Error(
|
||||
'old_string not found in file (even fuzzily) — file may have changed since the edit was queued',
|
||||
);
|
||||
}
|
||||
if (plan.kind === 'apply') {
|
||||
const out = eol === '\r\n' ? plan.updated.replaceAll('\n', '\r\n') : plan.updated;
|
||||
await writeFileAtomic(change.file_path, out);
|
||||
} else {
|
||||
// noop: the edit is already applied (re-emitted / retried) or a no-change.
|
||||
// Mark it applied without rewriting so it can't stamp a duplicate.
|
||||
console.log(`[pending] edit ${change.file_path} is a no-op (${plan.reason}) — not rewriting`);
|
||||
}
|
||||
break;
|
||||
}
|
||||
const updated = content.slice(0, match.start) + newStr + content.slice(match.end);
|
||||
await writeFile(change.file_path, updated, 'utf8');
|
||||
break;
|
||||
}
|
||||
case 'delete': {
|
||||
// Stash current content in diff for potential rewind
|
||||
try {
|
||||
const existing = await readFile(change.file_path, 'utf8');
|
||||
await sql`UPDATE pending_changes SET diff = ${existing} WHERE id = ${changeId}`;
|
||||
} catch {
|
||||
// File may already be gone — proceed with status update
|
||||
case 'delete': {
|
||||
// Stash current content in diff for potential rewind
|
||||
try {
|
||||
const existing = await readFile(change.file_path, 'utf8');
|
||||
await sql`UPDATE pending_changes SET diff = ${existing} WHERE id = ${changeId}`;
|
||||
} catch {
|
||||
// File may already be gone — proceed with status update
|
||||
}
|
||||
await unlink(change.file_path);
|
||||
break;
|
||||
}
|
||||
await unlink(change.file_path);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
await sql`UPDATE pending_changes SET status = 'applied' WHERE id = ${changeId}`;
|
||||
return { id: change.id, file_path: change.file_path, operation: change.operation, success: true };
|
||||
await sql`UPDATE pending_changes SET status = 'applied' WHERE id = ${changeId}`;
|
||||
return { id: change.id, file_path: change.file_path, operation: change.operation, success: true };
|
||||
});
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
return { id: change.id, file_path: change.file_path, operation: change.operation, success: false, error: message };
|
||||
@@ -220,13 +378,13 @@ export async function rewindOne(
|
||||
);
|
||||
}
|
||||
const reverted = content.slice(0, match.start) + oldStr + content.slice(match.end);
|
||||
await writeFile(change.file_path, reverted, 'utf8');
|
||||
await writeFileAtomic(change.file_path, reverted);
|
||||
break;
|
||||
}
|
||||
case 'delete': {
|
||||
// Reverse a delete: recreate the file (diff holds the original content stashed at apply time)
|
||||
await mkdir(dirname(change.file_path), { recursive: true });
|
||||
await writeFile(change.file_path, change.diff, 'utf8');
|
||||
await writeFileAtomic(change.file_path, change.diff);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user