Files
boocode/apps/coder/src/services/worktrees.ts

392 lines
16 KiB
TypeScript

/**
* Git worktree management for external agent dispatch.
*
* Each dispatched task gets its own git worktree so the external agent
* can modify files freely without touching the main working tree.
* After the agent completes, we diff the worktree against HEAD and
* queue the diff into pending_changes.
*/
import type { Sql } from '../db.js';
import { hostExec } from './host-exec.js';
import type { WorktreeRiskReport } from '@boocode/contracts/worktree-risk';
import { checkWorktreeWorkAtRisk } from './worktree-risk.js';
export const WORKTREE_BASE = '/tmp/booworktrees';
/**
* Create a git worktree for a task on the host.
* Returns the absolute path to the worktree directory.
*/
export async function createWorktree(
projectPath: string,
taskId: string,
opts?: { signal?: AbortSignal },
): Promise<string> {
const worktreePath = `${WORKTREE_BASE}/${taskId}`;
const branchName = `task-${taskId}`;
// Ensure the base directory exists
await hostExec(`mkdir -p ${WORKTREE_BASE}`, { signal: opts?.signal });
// Create the worktree with a new branch from HEAD
const result = await hostExec(
`git -C ${shellEscape(projectPath)} worktree add ${shellEscape(worktreePath)} -b ${shellEscape(branchName)} HEAD`,
{ signal: opts?.signal, timeoutMs: 30_000 },
);
if (result.exitCode !== 0) {
throw new Error(`Failed to create worktree: ${result.stderr.trim() || result.stdout.trim()}`);
}
return worktreePath;
}
/**
* Get the unified diff of changes made in the worktree vs the parent branch (HEAD).
* Returns an empty string if there are no changes.
*/
export async function diffWorktree(
worktreePath: string,
projectPath: string,
opts?: { signal?: AbortSignal; baseRef?: string },
): Promise<string> {
// First, commit any uncommitted changes in the worktree so we can diff branches
// Stage all changes
const addResult = await hostExec(
`cd ${shellEscape(worktreePath)} && git add -A`,
{ signal: opts?.signal, timeoutMs: 30_000 },
);
if (addResult.exitCode !== 0) {
throw new Error(`Failed to stage worktree changes: ${addResult.stderr.trim()}`);
}
// Check if there are staged changes
const statusResult = await hostExec(
`cd ${shellEscape(worktreePath)} && git diff --cached --quiet`,
{ signal: opts?.signal, timeoutMs: 10_000 },
);
if (statusResult.exitCode === 0) {
// No changes
return '';
}
// Commit staged changes (needed to produce a clean branch diff)
await hostExec(
`cd ${shellEscape(worktreePath)} && git -c user.email=boocoder@local -c user.name=BooCoder commit -m "task changes" --allow-empty`,
{ signal: opts?.signal, timeoutMs: 15_000 },
);
// Diff the worktree branch against the baseline. Per-task callers default to the
// main tree's current HEAD; the session-worktree (opencode) path passes the
// captured base_commit so the accumulated diff is stable across turns even if
// project HEAD advances.
const baseRef = opts?.baseRef ?? 'HEAD';
const diffResult = await hostExec(
`git -C ${shellEscape(projectPath)} diff ${shellEscape(baseRef)}...$(git -C ${shellEscape(worktreePath)} rev-parse HEAD)`,
{ signal: opts?.signal, timeoutMs: 60_000 },
);
if (diffResult.exitCode !== 0) {
throw new Error(`Failed to diff worktree: ${diffResult.stderr.trim()}`);
}
return diffResult.stdout;
}
/**
* Remove a worktree and its associated branch.
* Best-effort — does not throw on failure (task may have already been cleaned up).
*/
export async function cleanupWorktree(
projectPath: string,
taskId: string,
): Promise<void> {
const worktreePath = `${WORKTREE_BASE}/${taskId}`;
const branchName = `task-${taskId}`;
// Remove the worktree (--force handles dirty state)
await hostExec(
`git -C ${shellEscape(projectPath)} worktree remove ${shellEscape(worktreePath)} --force`,
{ timeoutMs: 15_000 },
).catch(() => {});
// Delete the task branch
await hostExec(
`git -C ${shellEscape(projectPath)} branch -D ${shellEscape(branchName)}`,
{ timeoutMs: 10_000 },
).catch(() => {});
}
// ─── v2.6: session-keyed persistent worktree ────────────────────────────────
export interface SessionWorktree {
/** P1.5-b: the `worktrees.id` — stored on agent_sessions informationally. */
worktreeId: string;
worktreePath: string;
baseCommit: string | null;
}
/**
* v2.6 / P1.5-b: create-or-reuse ONE worktree per BooCode session (shared across
* all tabs/agents in the session), recorded in `worktrees` (was the superseded
* `session_worktrees`). Persists — NOT torn down per turn (cleanup is Phase 3) —
* and now survives session delete (`worktrees.session_id` is ON DELETE SET NULL).
* Captures the project's current HEAD as `base_commit` for a stable diff baseline.
*
* Distinct path namespace (`session-<id>` branch, `/sess-<id>` dir) so it never
* collides with the per-task worktrees that arena/new_task/MCP still use.
*/
export async function ensureSessionWorktree(
sql: Sql,
projectPath: string,
sessionId: string,
opts?: { signal?: AbortSignal },
): Promise<SessionWorktree> {
const [existing] = await sql<{ id: string; path: string; base_commit: string | null }[]>`
SELECT id, path, base_commit FROM worktrees
WHERE session_id = ${sessionId} AND status = 'active'
LIMIT 1
`;
if (existing) {
return { worktreeId: existing.id, worktreePath: existing.path, baseCommit: existing.base_commit };
}
const worktreePath = `${WORKTREE_BASE}/sess-${sessionId}`;
const branchName = `session-${sessionId}`;
await hostExec(`mkdir -p ${WORKTREE_BASE}`, { signal: opts?.signal });
// Capture the baseline commit BEFORE branching, so the diff is stable even if
// project HEAD later advances.
const headResult = await hostExec(
`git -C ${shellEscape(projectPath)} rev-parse HEAD`,
{ signal: opts?.signal, timeoutMs: 10_000 },
);
const baseCommit = headResult.exitCode === 0 ? headResult.stdout.trim() || null : null;
const result = await hostExec(
`git -C ${shellEscape(projectPath)} worktree add ${shellEscape(worktreePath)} -b ${shellEscape(branchName)} HEAD`,
{ signal: opts?.signal, timeoutMs: 30_000 },
);
if (result.exitCode !== 0) {
throw new Error(`Failed to create session worktree: ${result.stderr.trim() || result.stdout.trim()}`);
}
// Insert-or-get: WHERE NOT EXISTS keeps the first writer's row if two turns race
// the create (the partial unique on active path also backstops it).
const [inserted] = await sql<{ id: string; path: string; base_commit: string | null }[]>`
INSERT INTO worktrees (session_id, path, branch, base_commit, status)
SELECT ${sessionId}, ${worktreePath}, ${branchName}, ${baseCommit}, 'active'
WHERE NOT EXISTS (
SELECT 1 FROM worktrees WHERE session_id = ${sessionId} AND status = 'active'
)
RETURNING id, path, base_commit
`;
if (inserted) {
return { worktreeId: inserted.id, worktreePath: inserted.path, baseCommit: inserted.base_commit };
}
// Lost the race — another turn inserted first; read its row.
const [row] = await sql<{ id: string; path: string; base_commit: string | null }[]>`
SELECT id, path, base_commit FROM worktrees
WHERE session_id = ${sessionId} AND status = 'active'
LIMIT 1
`;
return {
worktreeId: row!.id,
worktreePath: row?.path ?? worktreePath,
baseCommit: row?.base_commit ?? baseCommit,
};
}
/**
* v2.6 Phase 3 (3.3 / 3.4): physically remove a session's persistent worktree —
* the git worktree dir + its branch — and archive its `worktrees` row. Used by the
* chat/session-close hook (when the last chat in a session closes) and the orphan
* reaper. Best-effort on the git side (a dir already gone is not an error); the DB
* row is flipped to 'archived' (soft-delete, Paseo's worktree-archive pattern) so
* history/attribution survives and a re-run is idempotent.
*
* SAFETY: callers MUST run `checkWorktreeWorkAtRisk` first and skip at-risk
* worktrees — this function force-removes (`--force`), so it never silently drops
* uncommitted/unmerged work unless the caller already cleared/accepted the risk.
*/
export async function removeSessionWorktree(
sql: Sql,
projectPath: string,
worktree: { id: string; path: string; branch?: string | null },
opts?: { signal?: AbortSignal },
): Promise<void> {
await hostExec(
`git -C ${shellEscape(projectPath)} worktree remove ${shellEscape(worktree.path)} --force`,
{ signal: opts?.signal, timeoutMs: 15_000 },
).catch(() => {});
const branch = worktree.branch ?? null;
if (branch) {
await hostExec(
`git -C ${shellEscape(projectPath)} branch -D ${shellEscape(branch)}`,
{ signal: opts?.signal, timeoutMs: 10_000 },
).catch(() => {});
}
// Prune any stale worktree administrative entries left behind by a partial remove.
await hostExec(
`git -C ${shellEscape(projectPath)} worktree prune`,
{ signal: opts?.signal, timeoutMs: 10_000 },
).catch(() => {});
await sql`UPDATE worktrees SET status = 'archived' WHERE id = ${worktree.id}`.catch(() => {});
}
/**
* v2.6 Phase 3 (3.3): the chat-close cleanup. Mark every `agent_sessions` row for
* the chat 'closed', then — only if this was the session's LAST open chat — remove
* the shared session worktree (a worktree is one-per-session, shared across the
* session's chat tabs, so closing one tab must not pull the rug from sibling tabs).
*
* Returns what it did so the route can report it. The actual backend (process /
* server-session) teardown is the pool's job (`agentPool.closeChat` +
* `backend.closeSession`); this owns the DB + git truth.
*
* `worktreeRemoved` is false when other open chats remain (worktree kept) OR when
* the worktree held work at risk (preflight blocked it — never silently dropped).
*/
export interface ChatCloseResult {
agentRowsClosed: number;
worktreeRemoved: boolean;
worktreeAtRisk: boolean;
}
export async function closeChatBackendState(
sql: Sql,
chatId: string,
opts?: { signal?: AbortSignal; force?: boolean },
): Promise<ChatCloseResult> {
// Resolve the chat's session (and that session's project path) before we touch
// anything — a deleted chat row leaves agent_sessions/worktrees pointing nowhere.
const [chatRow] = await sql<{ session_id: string | null }[]>`
SELECT session_id FROM chats WHERE id = ${chatId}
`;
// chat row may already be gone (delete fired first); fall back to agent_sessions'
// session_id link, which SET NULLs only on session delete, not chat delete.
let sessionId = chatRow?.session_id ?? null;
if (!sessionId) {
const [as] = await sql<{ session_id: string | null }[]>`
SELECT session_id FROM agent_sessions WHERE chat_id = ${chatId} AND session_id IS NOT NULL LIMIT 1
`;
sessionId = as?.session_id ?? null;
}
// Mark this chat's (chat,agent) backend rows closed (idempotent).
const closedRows = await sql<{ agent: string }[]>`
UPDATE agent_sessions SET status = 'closed'
WHERE chat_id = ${chatId} AND status <> 'closed'
RETURNING agent
`;
let worktreeRemoved = false;
let worktreeAtRisk = false;
if (sessionId) {
// Other open chats still sharing the session worktree? If so, keep it.
const openRows = await sql<{ open_count: number }[]>`
SELECT COUNT(*)::int AS open_count FROM chats
WHERE session_id = ${sessionId} AND status = 'open' AND id <> ${chatId}
`;
const openCount = openRows[0]?.open_count ?? 0;
if (openCount === 0) {
const [wt] = await sql<{ id: string; path: string; branch: string | null }[]>`
SELECT id, path, branch FROM worktrees
WHERE session_id = ${sessionId} AND status = 'active' LIMIT 1
`;
if (wt) {
const projRows = await sql<{ path: string | null }[]>`
SELECT p.path FROM sessions s JOIN projects p ON p.id = s.project_id WHERE s.id = ${sessionId}
`;
const projectPath = projRows[0]?.path ?? null;
// Preflight (close-hook semantics): a DELIBERATE chat/session close — the
// server's session-delete already ran the full work-at-risk gate
// (dirty/unpushed/unmerged) before calling us, and chat-close discards the
// tab's staged review intentionally. So here we only block on UNCOMMITTED
// working-tree changes (`dirty`) — work the user never even staged into the
// review diff. The session branch's own commits (the diff-staging
// mechanism) are NOT a block; treating them as "unmerged risk" would make
// the worktree un-removable on every real session (the orphan reaper keeps
// the full at-risk gate because it runs unattended). `force` skips this.
if (!opts?.force) {
const risk = await checkWorktreeWorkAtRisk(wt.path, opts);
worktreeAtRisk = risk.dirty || risk.error != null;
}
if (projectPath && (opts?.force || !worktreeAtRisk)) {
await removeSessionWorktree(sql, projectPath, wt, opts);
worktreeRemoved = true;
}
}
}
}
return { agentRowsClosed: closedRows.length, worktreeRemoved, worktreeAtRisk };
}
/**
* v2.6 Phase 3 (3.5): re-baseline a session's worktree diff after a successful
* `apply_pending`. The applied changes were written to the PROJECT ROOT; the
* worktree branch still holds the same delta against the ORIGINAL `base_commit`,
* so the next turn's `diffWorktree(base_commit...worktree-HEAD)` would re-surface
* the already-applied changes as "pending" — a confusing double-count.
*
* Fix: advance the stored `base_commit` to the worktree's CURRENT HEAD (the
* `diffWorktree` path commits the worktree's accumulated changes before diffing,
* so HEAD already encodes the applied state). The next turn then diffs against
* that, surfacing only edits made AFTER the apply. Idempotent: if the worktree has
* no new commits, the base is unchanged.
*
* Diff-baseline-correctness note (design §7): we re-baseline to the worktree's own
* HEAD, NOT to a moving project HEAD — so an out-of-band edit to the project root
* after apply doesn't corrupt the baseline. The trade-off is that a manual project
* edit isn't reflected as "already there"; acceptable, and matches the stored-base
* (not moving-target) decision in §7.
*/
export async function rebaselineWorktreeAfterApply(
sql: Sql,
sessionId: string,
opts?: { signal?: AbortSignal },
): Promise<{ rebaselined: boolean; newBaseCommit: string | null }> {
const [wt] = await sql<{ id: string; path: string; base_commit: string | null }[]>`
SELECT id, path, base_commit FROM worktrees
WHERE session_id = ${sessionId} AND status = 'active' LIMIT 1
`;
if (!wt) return { rebaselined: false, newBaseCommit: null };
// Make sure the worktree's accumulated edits are committed so HEAD encodes the
// just-applied state (the diff path normally does this, but apply may run with no
// prior diff this turn). Commit ONLY when something is staged — NO --allow-empty,
// so a re-baseline with no new edits doesn't advance HEAD and stays idempotent.
await hostExec(
`cd ${shellEscape(wt.path)} && git add -A && ` +
`git diff --cached --quiet || ` +
`git -c user.email=boocoder@local -c user.name=BooCoder commit -q -m "rebaseline after apply"`,
{ signal: opts?.signal, timeoutMs: 15_000 },
).catch(() => {});
const headRes = await hostExec(
`git -C ${shellEscape(wt.path)} rev-parse HEAD`,
{ signal: opts?.signal, timeoutMs: 10_000 },
).catch(() => null);
const newBase = headRes && headRes.exitCode === 0 ? headRes.stdout.trim() || null : null;
if (!newBase || newBase === wt.base_commit) {
return { rebaselined: false, newBaseCommit: wt.base_commit };
}
await sql`UPDATE worktrees SET base_commit = ${newBase} WHERE id = ${wt.id}`;
return { rebaselined: true, newBaseCommit: newBase };
}
// ─── Session-delete work-loss guard ─────────────────────────────────────────
// WorktreeRiskReport single-sourced in @boocode/contracts — edit the package, not here.
export type { WorktreeRiskReport };
/** Minimal shell escape for paths (single-quote wrapping). */
function shellEscape(s: string): string {
// Replace single quotes with escaped version, wrap in single quotes
return "'" + s.replace(/'/g, "'\\''") + "'";
}