feat: write/edit robustness — fuzzy patch applier + worktree checkpoints (v2.7.1)
#3 Fuzzy patch applier: new pure fuzzy-match.ts (locateMatch, exact→trim→ unicode-canon→Levenshtein≥0.66, refuse-on-ambiguous) wired into pending_changes applyOne/rewindOne so local-model whitespace/unicode drift in old_string no longer loses the edit. #4 Worktree checkpoint + conversation-trim: checkpoints table + checkpoints.ts (shadow-commit of tracked+untracked into refs/boocode/checkpoints, hooked into the 3 external-agent dispatcher paths) + POST restore route (reset --hard + clean -fd -> transcript trim -> backend-session reset) + "Restore to here" UI. Built by 3 parallel agents; DB-integration testing caught a created_at self-deletion bug. Coder suite 234 passing; server+coder build + web tsc clean. Builds on v2.7.0-mit. openspec write-edit-robustness. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
297
apps/coder/src/services/checkpoints.ts
Normal file
297
apps/coder/src/services/checkpoints.ts
Normal file
@@ -0,0 +1,297 @@
|
||||
/**
|
||||
* write-edit-robustness #4 — worktree checkpoints.
|
||||
*
|
||||
* External agents (opencode / goose / qwen / claude) write DIRECTLY into the
|
||||
* shared session worktree (`/tmp/booworktrees/sess-<id>`); BooCode's own `rewind`
|
||||
* only reverses `pending_changes` against the project root, so it has zero coverage
|
||||
* there. A checkpoint is a pre-turn shadow-commit of the worktree tree (tracked +
|
||||
* untracked) captured WITHOUT touching the real index/working tree, stored in a
|
||||
* private GC-safe ref. `restoreCheckpoint` rewinds the worktree to that commit,
|
||||
* trims the transcript from the anchor message forward, and resets the agent
|
||||
* backend so the next turn re-establishes a fresh context consistent with the
|
||||
* restored files.
|
||||
*
|
||||
* All git goes through hostExec + shellEscape (BooCoder runs on the host; the
|
||||
* worktrees live on the host fs). Checkpoint CREATION is best-effort: a failure
|
||||
* logs and returns null — it must NEVER throw into the dispatch turn.
|
||||
*/
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import type { FastifyBaseLogger } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
import { hostExec } from './host-exec.js';
|
||||
import { agentPool, OPENCODE_POOL_KEY } from './agent-pool.js';
|
||||
import type { AgentSessionHandle } from './agent-backend.js';
|
||||
|
||||
/** Minimal shell escape for paths/refs (single-quote wrapping). Mirrors worktrees.ts. */
|
||||
function shellEscape(s: string): string {
|
||||
return "'" + s.replace(/'/g, "'\\''") + "'";
|
||||
}
|
||||
|
||||
/**
|
||||
* Pure builder for the shadow-commit command. Captures tracked + untracked files
|
||||
* in the worktree into a temp index (so the real index/working tree is untouched),
|
||||
* writes a tree, commits it parented on HEAD, and parks the commit under a private
|
||||
* ref `refs/boocode/checkpoints/<id>` so git's GC never reclaims it. Prints ONLY
|
||||
* the resulting SHA on stdout (the trailing `printf '%s'`), so the caller parses
|
||||
* stdout.trim() directly.
|
||||
*
|
||||
* `id` is the row UUID (minted before the ref so the ref name matches the row).
|
||||
* Both the worktree path and the id are shell-escaped.
|
||||
*/
|
||||
export function buildShadowCommitCommand(worktreePath: string, id: string): string {
|
||||
const wt = shellEscape(worktreePath);
|
||||
const ref = shellEscape(`refs/boocode/checkpoints/${id}`);
|
||||
return (
|
||||
`cd ${wt} && TMP=$(mktemp) && GIT_INDEX_FILE="$TMP" git read-tree HEAD ` +
|
||||
`&& GIT_INDEX_FILE="$TMP" git add -A ` +
|
||||
`&& TREE=$(GIT_INDEX_FILE="$TMP" git write-tree) ` +
|
||||
`&& SHA=$(git commit-tree "$TREE" -p HEAD -m "boocode checkpoint") ` +
|
||||
`&& git update-ref ${ref} "$SHA" && rm -f "$TMP" && printf '%s' "$SHA"`
|
||||
);
|
||||
}
|
||||
|
||||
export interface CreateCheckpointArgs {
|
||||
chatId: string;
|
||||
sessionId: string | null;
|
||||
worktreeId: string | null;
|
||||
worktreePath: string;
|
||||
messageId: string | null;
|
||||
label?: string | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Capture a pre-turn checkpoint of the session worktree. Best-effort: returns the
|
||||
* inserted row's { id, commit_sha } on success, or null on any failure (the turn
|
||||
* proceeds either way — a missing checkpoint just means no restore point for that
|
||||
* turn). NEVER throws.
|
||||
*
|
||||
* The id is minted up front so the git ref name (`refs/boocode/checkpoints/<id>`)
|
||||
* matches the DB row id, keeping ref and row in lockstep.
|
||||
*/
|
||||
export async function createCheckpoint(
|
||||
sql: Sql,
|
||||
args: CreateCheckpointArgs,
|
||||
opts?: { signal?: AbortSignal; log?: FastifyBaseLogger },
|
||||
): Promise<{ id: string; commit_sha: string } | null> {
|
||||
const id = randomUUID();
|
||||
try {
|
||||
const cmd = buildShadowCommitCommand(args.worktreePath, id);
|
||||
const res = await hostExec(cmd, { signal: opts?.signal, timeoutMs: 30_000 });
|
||||
if (res.exitCode !== 0) {
|
||||
opts?.log?.warn(
|
||||
{ chatId: args.chatId, worktreePath: args.worktreePath, stderr: res.stderr.trim().slice(0, 500) },
|
||||
'checkpoint: shadow-commit failed (turn proceeds without a checkpoint)',
|
||||
);
|
||||
return null;
|
||||
}
|
||||
const commitSha = res.stdout.trim();
|
||||
if (!commitSha) {
|
||||
opts?.log?.warn(
|
||||
{ chatId: args.chatId, worktreePath: args.worktreePath },
|
||||
'checkpoint: shadow-commit produced no SHA (turn proceeds)',
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
await sql`
|
||||
INSERT INTO checkpoints (id, chat_id, session_id, worktree_id, message_id, commit_sha, label)
|
||||
VALUES (${id}, ${args.chatId}, ${args.sessionId}, ${args.worktreeId}, ${args.messageId}, ${commitSha}, ${args.label ?? null})
|
||||
`;
|
||||
opts?.log?.info({ checkpointId: id, chatId: args.chatId, commitSha }, 'checkpoint: created');
|
||||
return { id, commit_sha: commitSha };
|
||||
} catch (err) {
|
||||
opts?.log?.warn(
|
||||
{ chatId: args.chatId, err: err instanceof Error ? err.message : String(err) },
|
||||
'checkpoint: create threw (turn proceeds without a checkpoint)',
|
||||
);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Error the route maps to a 404 when the checkpoint can't be resolved / scoped. */
|
||||
export class CheckpointNotFoundError extends Error {
|
||||
constructor(message: string) {
|
||||
super(message);
|
||||
this.name = 'CheckpointNotFoundError';
|
||||
}
|
||||
}
|
||||
|
||||
export interface RestoreCheckpointResult {
|
||||
checkpoint_id: string;
|
||||
messages_deleted: number;
|
||||
worktree_reset: boolean;
|
||||
backend_reset: boolean;
|
||||
}
|
||||
|
||||
export interface RestoreCheckpointOpts {
|
||||
signal?: AbortSignal;
|
||||
log?: FastifyBaseLogger;
|
||||
/** If set, the checkpoint MUST belong to this session (route scope guard). */
|
||||
sessionId?: string;
|
||||
}
|
||||
|
||||
interface CheckpointRow {
|
||||
id: string;
|
||||
chat_id: string;
|
||||
session_id: string | null;
|
||||
worktree_id: string | null;
|
||||
message_id: string | null;
|
||||
commit_sha: string;
|
||||
created_at: Date;
|
||||
}
|
||||
|
||||
/**
|
||||
* Restore a checkpoint: rewind its worktree to the shadow commit, trim the
|
||||
* transcript from the anchor message forward, reset the backend session, and drop
|
||||
* now-orphaned later checkpoints. Throws CheckpointNotFoundError when the
|
||||
* checkpoint is missing or not in the requested session (route → 404).
|
||||
*/
|
||||
export async function restoreCheckpoint(
|
||||
sql: Sql,
|
||||
checkpointId: string,
|
||||
opts?: RestoreCheckpointOpts,
|
||||
): Promise<RestoreCheckpointResult> {
|
||||
// 1. Resolve the checkpoint.
|
||||
const [cp] = await sql<CheckpointRow[]>`
|
||||
SELECT id, chat_id, session_id, worktree_id, message_id, commit_sha, created_at
|
||||
FROM checkpoints WHERE id = ${checkpointId}
|
||||
`;
|
||||
if (!cp) {
|
||||
throw new CheckpointNotFoundError('checkpoint not found');
|
||||
}
|
||||
if (opts?.sessionId && cp.session_id && cp.session_id !== opts.sessionId) {
|
||||
throw new CheckpointNotFoundError('checkpoint not in session');
|
||||
}
|
||||
|
||||
// 2. Resolve the worktree path (by worktree_id, else the session's active one).
|
||||
let worktreePath: string | null = null;
|
||||
if (cp.worktree_id) {
|
||||
const [wt] = await sql<{ path: string }[]>`
|
||||
SELECT path FROM worktrees WHERE id = ${cp.worktree_id}
|
||||
`;
|
||||
worktreePath = wt?.path ?? null;
|
||||
}
|
||||
if (!worktreePath) {
|
||||
const sid = cp.session_id ?? opts?.sessionId ?? null;
|
||||
if (sid) {
|
||||
const [wt] = await sql<{ path: string }[]>`
|
||||
SELECT path FROM worktrees WHERE session_id = ${sid} AND status = 'active' LIMIT 1
|
||||
`;
|
||||
worktreePath = wt?.path ?? null;
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Worktree reset — hard-reset to the shadow commit, then clean untracked.
|
||||
let worktreeReset = false;
|
||||
if (worktreePath) {
|
||||
const resetRes = await hostExec(
|
||||
`git -C ${shellEscape(worktreePath)} reset --hard ${shellEscape(cp.commit_sha)}`,
|
||||
{ signal: opts?.signal, timeoutMs: 30_000 },
|
||||
).catch((err) => {
|
||||
opts?.log?.warn(
|
||||
{ checkpointId, err: err instanceof Error ? err.message : String(err) },
|
||||
'checkpoint restore: reset --hard threw',
|
||||
);
|
||||
return null;
|
||||
});
|
||||
if (resetRes && resetRes.exitCode === 0) {
|
||||
const cleanRes = await hostExec(
|
||||
`git -C ${shellEscape(worktreePath)} clean -fd`,
|
||||
{ signal: opts?.signal, timeoutMs: 30_000 },
|
||||
).catch(() => null);
|
||||
worktreeReset = cleanRes != null && cleanRes.exitCode === 0;
|
||||
if (!worktreeReset) {
|
||||
opts?.log?.warn({ checkpointId, worktreePath }, 'checkpoint restore: clean -fd did not succeed');
|
||||
}
|
||||
} else {
|
||||
opts?.log?.warn(
|
||||
{ checkpointId, worktreePath, stderr: resetRes?.stderr?.trim()?.slice(0, 500) },
|
||||
'checkpoint restore: reset --hard did not succeed',
|
||||
);
|
||||
}
|
||||
} else {
|
||||
opts?.log?.warn({ checkpointId }, 'checkpoint restore: no worktree path resolved (files not reset)');
|
||||
}
|
||||
|
||||
// 4. Trim the transcript from the anchor message forward. message_parts FK to
|
||||
// messages is ON DELETE CASCADE (apps/server schema.sql:49), so parts are
|
||||
// removed with their messages — no explicit parts delete needed.
|
||||
let messagesDeleted = 0;
|
||||
if (cp.message_id) {
|
||||
const deleted = await sql<{ id: string }[]>`
|
||||
DELETE FROM messages
|
||||
WHERE chat_id = ${cp.chat_id}
|
||||
AND created_at >= (SELECT created_at FROM messages WHERE id = ${cp.message_id})
|
||||
RETURNING id
|
||||
`;
|
||||
messagesDeleted = deleted.length;
|
||||
}
|
||||
|
||||
// 5. Backend reset — mark the chat's agent sessions crashed so the next turn
|
||||
// re-establishes a fresh backend, and evict the live pool session(s) for this
|
||||
// (chat, agent). Warm backends hold context server-side with no partial
|
||||
// rewind, so a full reset is the only consistent option (proposal §4).
|
||||
const agentRows = await sql<{ agent: string; backend: string; agent_session_id: string | null; session_id: string | null; worktree_id: string | null }[]>`
|
||||
SELECT agent, backend, agent_session_id, session_id, worktree_id
|
||||
FROM agent_sessions WHERE chat_id = ${cp.chat_id}
|
||||
`;
|
||||
await sql`
|
||||
UPDATE agent_sessions SET status = 'crashed' WHERE chat_id = ${cp.chat_id}
|
||||
`.catch(() => {});
|
||||
|
||||
let backendReset = false;
|
||||
try {
|
||||
// opencode runs on the SHARED server (keyed on a sentinel, not the chat) — close
|
||||
// just this chat's session(s) on it, mirroring the lifecycle close-hook.
|
||||
const ocBackend = agentPool.peek(OPENCODE_POOL_KEY, 'opencode');
|
||||
if (ocBackend) {
|
||||
for (const row of agentRows) {
|
||||
if (row.backend !== 'opencode_server' || !row.agent_session_id) continue;
|
||||
const handle: AgentSessionHandle = {
|
||||
sessionId: row.session_id ?? '',
|
||||
agent: row.agent,
|
||||
backend: 'opencode_server',
|
||||
chatId: cp.chat_id,
|
||||
worktreeId: row.worktree_id ?? '',
|
||||
agentSessionId: row.agent_session_id,
|
||||
serverPort: null,
|
||||
};
|
||||
await ocBackend.closeSession(handle).catch((err) => {
|
||||
opts?.log?.warn(
|
||||
{ checkpointId, err: err instanceof Error ? err.message : String(err) },
|
||||
'checkpoint restore: opencode closeSession threw',
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
// Warm-ACP backends are pooled under the chat id — dispose them (kills the
|
||||
// goose/qwen child). closeChat skips busy backends (a live turn isn't torn down).
|
||||
const disposed = await agentPool.closeChat(cp.chat_id);
|
||||
backendReset = true;
|
||||
opts?.log?.info({ checkpointId, chatId: cp.chat_id, disposed }, 'checkpoint restore: backend reset');
|
||||
} catch (err) {
|
||||
opts?.log?.warn(
|
||||
{ checkpointId, err: err instanceof Error ? err.message : String(err) },
|
||||
'checkpoint restore: backend reset threw',
|
||||
);
|
||||
}
|
||||
|
||||
// 6. Drop now-orphaned later checkpoints for this chat (their anchor messages were
|
||||
// just trimmed). Compare `created_at` SERVER-SIDE via a subquery (NOT the JS
|
||||
// Date round-trip, which truncates the stored microsecond precision to ms and
|
||||
// would make this checkpoint delete ITSELF), and exclude this checkpoint's own
|
||||
// id so it always survives — letting the user re-restore to it.
|
||||
await sql`
|
||||
DELETE FROM checkpoints
|
||||
WHERE chat_id = ${cp.chat_id}
|
||||
AND id <> ${cp.id}
|
||||
AND created_at > (SELECT created_at FROM checkpoints WHERE id = ${cp.id})
|
||||
`.catch(() => {});
|
||||
|
||||
return {
|
||||
checkpoint_id: checkpointId,
|
||||
messages_deleted: messagesDeleted,
|
||||
worktree_reset: worktreeReset,
|
||||
backend_reset: backendReset,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user