feat(coder): v2.6 Phase 3 — lifecycle hardening (idle evict, crash recovery, worktree reaper)

Idle TTL eviction per (chat,agent) + LRU cap (never a busy backend); pure lifecycle-decisions.ts (TDD). Crash recovery lifts openchamber's health-monitor + busy-aware-restart + stale-grace state machine into opencode-server.ts (+ port reclaim) and warm-acp.ts; opencode crash -> fresh sessions, ACP -> re-session/new. F.1 turn-guard + U.6 usage preserved (their tests pass). Orphan worktree reaper (1h grace, superset-style dirty/unpushed preflight, Paseo soft-delete) + close hooks + diff re-baseline after apply_pending. 35 new tests + DB-opt-in reconnect test; 215 coder tests pass; tsc + build clean. Completes v2.6. Follow-ups out of scope: apps/server close-hook caller, 3.7 DiffPanel staging hint, live smokes.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-01 01:10:09 +00:00
parent 850d48853f
commit aa3797e356
15 changed files with 1789 additions and 34 deletions

View File

@@ -9,7 +9,7 @@
import type { Sql } from '../db.js';
import { hostExec } from './host-exec.js';
const WORKTREE_BASE = '/tmp/booworktrees';
export const WORKTREE_BASE = '/tmp/booworktrees';
/**
* Create a git worktree for a task on the host.
@@ -197,6 +197,187 @@ export async function ensureSessionWorktree(
};
}
/**
* v2.6 Phase 3 (3.3 / 3.4): physically remove a session's persistent worktree —
* the git worktree dir + its branch — and archive its `worktrees` row. Used by the
* chat/session-close hook (when the last chat in a session closes) and the orphan
* reaper. Best-effort on the git side (a dir already gone is not an error); the DB
* row is flipped to 'archived' (soft-delete, Paseo's worktree-archive pattern) so
* history/attribution survives and a re-run is idempotent.
*
* SAFETY: callers MUST run `checkWorktreeWorkAtRisk` first and skip at-risk
* worktrees — this function force-removes (`--force`), so it never silently drops
* uncommitted/unmerged work unless the caller already cleared/accepted the risk.
*/
export async function removeSessionWorktree(
sql: Sql,
projectPath: string,
worktree: { id: string; path: string; branch?: string | null },
opts?: { signal?: AbortSignal },
): Promise<void> {
await hostExec(
`git -C ${shellEscape(projectPath)} worktree remove ${shellEscape(worktree.path)} --force`,
{ signal: opts?.signal, timeoutMs: 15_000 },
).catch(() => {});
const branch = worktree.branch ?? null;
if (branch) {
await hostExec(
`git -C ${shellEscape(projectPath)} branch -D ${shellEscape(branch)}`,
{ signal: opts?.signal, timeoutMs: 10_000 },
).catch(() => {});
}
// Prune any stale worktree administrative entries left behind by a partial remove.
await hostExec(
`git -C ${shellEscape(projectPath)} worktree prune`,
{ signal: opts?.signal, timeoutMs: 10_000 },
).catch(() => {});
await sql`UPDATE worktrees SET status = 'archived' WHERE id = ${worktree.id}`.catch(() => {});
}
/**
* v2.6 Phase 3 (3.3): the chat-close cleanup. Mark every `agent_sessions` row for
* the chat 'closed', then — only if this was the session's LAST open chat — remove
* the shared session worktree (a worktree is one-per-session, shared across the
* session's chat tabs, so closing one tab must not pull the rug from sibling tabs).
*
* Returns what it did so the route can report it. The actual backend (process /
* server-session) teardown is the pool's job (`agentPool.closeChat` +
* `backend.closeSession`); this owns the DB + git truth.
*
* `worktreeRemoved` is false when other open chats remain (worktree kept) OR when
* the worktree held work at risk (preflight blocked it — never silently dropped).
*/
export interface ChatCloseResult {
agentRowsClosed: number;
worktreeRemoved: boolean;
worktreeAtRisk: boolean;
}
export async function closeChatBackendState(
sql: Sql,
chatId: string,
opts?: { signal?: AbortSignal; force?: boolean },
): Promise<ChatCloseResult> {
// Resolve the chat's session (and that session's project path) before we touch
// anything — a deleted chat row leaves agent_sessions/worktrees pointing nowhere.
const [chatRow] = await sql<{ session_id: string | null }[]>`
SELECT session_id FROM chats WHERE id = ${chatId}
`;
// chat row may already be gone (delete fired first); fall back to agent_sessions'
// session_id link, which SET NULLs only on session delete, not chat delete.
let sessionId = chatRow?.session_id ?? null;
if (!sessionId) {
const [as] = await sql<{ session_id: string | null }[]>`
SELECT session_id FROM agent_sessions WHERE chat_id = ${chatId} AND session_id IS NOT NULL LIMIT 1
`;
sessionId = as?.session_id ?? null;
}
// Mark this chat's (chat,agent) backend rows closed (idempotent).
const closedRows = await sql<{ agent: string }[]>`
UPDATE agent_sessions SET status = 'closed'
WHERE chat_id = ${chatId} AND status <> 'closed'
RETURNING agent
`;
let worktreeRemoved = false;
let worktreeAtRisk = false;
if (sessionId) {
// Other open chats still sharing the session worktree? If so, keep it.
const openRows = await sql<{ open_count: number }[]>`
SELECT COUNT(*)::int AS open_count FROM chats
WHERE session_id = ${sessionId} AND status = 'open' AND id <> ${chatId}
`;
const openCount = openRows[0]?.open_count ?? 0;
if (openCount === 0) {
const [wt] = await sql<{ id: string; path: string; branch: string | null }[]>`
SELECT id, path, branch FROM worktrees
WHERE session_id = ${sessionId} AND status = 'active' LIMIT 1
`;
if (wt) {
const projRows = await sql<{ path: string | null }[]>`
SELECT p.path FROM sessions s JOIN projects p ON p.id = s.project_id WHERE s.id = ${sessionId}
`;
const projectPath = projRows[0]?.path ?? null;
// Preflight (close-hook semantics): a DELIBERATE chat/session close — the
// server's session-delete already ran the full work-at-risk gate
// (dirty/unpushed/unmerged) before calling us, and chat-close discards the
// tab's staged review intentionally. So here we only block on UNCOMMITTED
// working-tree changes (`dirty`) — work the user never even staged into the
// review diff. The session branch's own commits (the diff-staging
// mechanism) are NOT a block; treating them as "unmerged risk" would make
// the worktree un-removable on every real session (the orphan reaper keeps
// the full at-risk gate because it runs unattended). `force` skips this.
if (!opts?.force) {
const risk = await checkWorktreeWorkAtRisk(wt.path, opts);
worktreeAtRisk = risk.dirty || risk.error != null;
}
if (projectPath && (opts?.force || !worktreeAtRisk)) {
await removeSessionWorktree(sql, projectPath, wt, opts);
worktreeRemoved = true;
}
}
}
}
return { agentRowsClosed: closedRows.length, worktreeRemoved, worktreeAtRisk };
}
/**
* v2.6 Phase 3 (3.5): re-baseline a session's worktree diff after a successful
* `apply_pending`. The applied changes were written to the PROJECT ROOT; the
* worktree branch still holds the same delta against the ORIGINAL `base_commit`,
* so the next turn's `diffWorktree(base_commit...worktree-HEAD)` would re-surface
* the already-applied changes as "pending" — a confusing double-count.
*
* Fix: advance the stored `base_commit` to the worktree's CURRENT HEAD (the
* `diffWorktree` path commits the worktree's accumulated changes before diffing,
* so HEAD already encodes the applied state). The next turn then diffs against
* that, surfacing only edits made AFTER the apply. Idempotent: if the worktree has
* no new commits, the base is unchanged.
*
* Diff-baseline-correctness note (design §7): we re-baseline to the worktree's own
* HEAD, NOT to a moving project HEAD — so an out-of-band edit to the project root
* after apply doesn't corrupt the baseline. The trade-off is that a manual project
* edit isn't reflected as "already there"; acceptable, and matches the stored-base
* (not moving-target) decision in §7.
*/
export async function rebaselineWorktreeAfterApply(
sql: Sql,
sessionId: string,
opts?: { signal?: AbortSignal },
): Promise<{ rebaselined: boolean; newBaseCommit: string | null }> {
const [wt] = await sql<{ id: string; path: string; base_commit: string | null }[]>`
SELECT id, path, base_commit FROM worktrees
WHERE session_id = ${sessionId} AND status = 'active' LIMIT 1
`;
if (!wt) return { rebaselined: false, newBaseCommit: null };
// Make sure the worktree's accumulated edits are committed so HEAD encodes the
// just-applied state (the diff path normally does this, but apply may run with no
// prior diff this turn). Commit ONLY when something is staged — NO --allow-empty,
// so a re-baseline with no new edits doesn't advance HEAD and stays idempotent.
await hostExec(
`cd ${shellEscape(wt.path)} && git add -A && ` +
`git diff --cached --quiet || ` +
`git -c user.email=boocoder@local -c user.name=BooCoder commit -q -m "rebaseline after apply"`,
{ signal: opts?.signal, timeoutMs: 15_000 },
).catch(() => {});
const headRes = await hostExec(
`git -C ${shellEscape(wt.path)} rev-parse HEAD`,
{ signal: opts?.signal, timeoutMs: 10_000 },
).catch(() => null);
const newBase = headRes && headRes.exitCode === 0 ? headRes.stdout.trim() || null : null;
if (!newBase || newBase === wt.base_commit) {
return { rebaselined: false, newBaseCommit: wt.base_commit };
}
await sql`UPDATE worktrees SET base_commit = ${newBase} WHERE id = ${wt.id}`;
return { rebaselined: true, newBaseCommit: newBase };
}
// ─── Session-delete work-loss guard ─────────────────────────────────────────
/**